1use crate::arch::asm;
2use crate::core_arch::{simd::*, x86::*};
3use crate::intrinsics::{fmaf16, simd::*};
4use crate::ptr;
5
6/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values.
7///
8/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_ph)
9#[inline]
10#[target_feature(enable = "avx512fp16")]
11#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13pub const fn _mm_set_ph(
14 e7: f16,
15 e6: f16,
16 e5: f16,
17 e4: f16,
18 e3: f16,
19 e2: f16,
20 e1: f16,
21 e0: f16,
22) -> __m128h {
23 __m128h([e0, e1, e2, e3, e4, e5, e6, e7])
24}
25
26/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values.
27///
28/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_ph)
29#[inline]
30#[target_feature(enable = "avx512fp16")]
31#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
32#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33pub const fn _mm256_set_ph(
34 e15: f16,
35 e14: f16,
36 e13: f16,
37 e12: f16,
38 e11: f16,
39 e10: f16,
40 e9: f16,
41 e8: f16,
42 e7: f16,
43 e6: f16,
44 e5: f16,
45 e4: f16,
46 e3: f16,
47 e2: f16,
48 e1: f16,
49 e0: f16,
50) -> __m256h {
51 __m256h([
52 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
53 ])
54}
55
56/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values.
57///
58/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set_ph)
59#[inline]
60#[target_feature(enable = "avx512fp16")]
61#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
62#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
63pub const fn _mm512_set_ph(
64 e31: f16,
65 e30: f16,
66 e29: f16,
67 e28: f16,
68 e27: f16,
69 e26: f16,
70 e25: f16,
71 e24: f16,
72 e23: f16,
73 e22: f16,
74 e21: f16,
75 e20: f16,
76 e19: f16,
77 e18: f16,
78 e17: f16,
79 e16: f16,
80 e15: f16,
81 e14: f16,
82 e13: f16,
83 e12: f16,
84 e11: f16,
85 e10: f16,
86 e9: f16,
87 e8: f16,
88 e7: f16,
89 e6: f16,
90 e5: f16,
91 e4: f16,
92 e3: f16,
93 e2: f16,
94 e1: f16,
95 e0: f16,
96) -> __m512h {
97 __m512h([
98 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19,
99 e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31,
100 ])
101}
102
103/// Copy half-precision (16-bit) floating-point elements from a to the lower element of dst and zero
104/// the upper 7 elements.
105///
106/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_sh)
107#[inline]
108#[target_feature(enable = "avx512fp16")]
109#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
110#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
111pub const fn _mm_set_sh(a: f16) -> __m128h {
112 __m128h([a, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
113}
114
115/// Broadcast the half-precision (16-bit) floating-point value a to all elements of dst.
116///
117/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_ph)
118#[inline]
119#[target_feature(enable = "avx512fp16")]
120#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
121#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
122pub const fn _mm_set1_ph(a: f16) -> __m128h {
123 unsafe { transmute(src:f16x8::splat(a)) }
124}
125
126/// Broadcast the half-precision (16-bit) floating-point value a to all elements of dst.
127///
128/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_ph)
129#[inline]
130#[target_feature(enable = "avx512fp16")]
131#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
132#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
133pub const fn _mm256_set1_ph(a: f16) -> __m256h {
134 unsafe { transmute(src:f16x16::splat(a)) }
135}
136
137/// Broadcast the half-precision (16-bit) floating-point value a to all elements of dst.
138///
139/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_ph)
140#[inline]
141#[target_feature(enable = "avx512fp16")]
142#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
143#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
144pub const fn _mm512_set1_ph(a: f16) -> __m512h {
145 unsafe { transmute(src:f16x32::splat(a)) }
146}
147
148/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values in reverse order.
149///
150/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_ph)
151#[inline]
152#[target_feature(enable = "avx512fp16")]
153#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
154#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
155pub const fn _mm_setr_ph(
156 e0: f16,
157 e1: f16,
158 e2: f16,
159 e3: f16,
160 e4: f16,
161 e5: f16,
162 e6: f16,
163 e7: f16,
164) -> __m128h {
165 __m128h([e0, e1, e2, e3, e4, e5, e6, e7])
166}
167
168/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values in reverse order.
169///
170/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_ph)
171#[inline]
172#[target_feature(enable = "avx512fp16")]
173#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
174#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
175pub const fn _mm256_setr_ph(
176 e0: f16,
177 e1: f16,
178 e2: f16,
179 e3: f16,
180 e4: f16,
181 e5: f16,
182 e6: f16,
183 e7: f16,
184 e8: f16,
185 e9: f16,
186 e10: f16,
187 e11: f16,
188 e12: f16,
189 e13: f16,
190 e14: f16,
191 e15: f16,
192) -> __m256h {
193 __m256h([
194 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
195 ])
196}
197
198/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values in reverse order.
199///
200/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setr_ph)
201#[inline]
202#[target_feature(enable = "avx512fp16")]
203#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
204#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
205pub const fn _mm512_setr_ph(
206 e0: f16,
207 e1: f16,
208 e2: f16,
209 e3: f16,
210 e4: f16,
211 e5: f16,
212 e6: f16,
213 e7: f16,
214 e8: f16,
215 e9: f16,
216 e10: f16,
217 e11: f16,
218 e12: f16,
219 e13: f16,
220 e14: f16,
221 e15: f16,
222 e16: f16,
223 e17: f16,
224 e18: f16,
225 e19: f16,
226 e20: f16,
227 e21: f16,
228 e22: f16,
229 e23: f16,
230 e24: f16,
231 e25: f16,
232 e26: f16,
233 e27: f16,
234 e28: f16,
235 e29: f16,
236 e30: f16,
237 e31: f16,
238) -> __m512h {
239 __m512h([
240 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19,
241 e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31,
242 ])
243}
244
245/// Return vector of type __m128h with all elements set to zero.
246///
247/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_ph)
248#[inline]
249#[target_feature(enable = "avx512fp16,avx512vl")]
250#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
251#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
252pub const fn _mm_setzero_ph() -> __m128h {
253 unsafe { transmute(src:f16x8::ZERO) }
254}
255
256/// Return vector of type __m256h with all elements set to zero.
257///
258/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setzero_ph)
259#[inline]
260#[target_feature(enable = "avx512fp16,avx512vl")]
261#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
262#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
263pub const fn _mm256_setzero_ph() -> __m256h {
264 f16x16::ZERO.as_m256h()
265}
266
267/// Return vector of type __m512h with all elements set to zero.
268///
269/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setzero_ph)
270#[inline]
271#[target_feature(enable = "avx512fp16")]
272#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
273#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
274pub const fn _mm512_setzero_ph() -> __m512h {
275 f16x32::ZERO.as_m512h()
276}
277
278/// Return vector of type `__m128h` with indetermination elements.
279/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
280/// picks some valid value and is not equivalent to [`mem::MaybeUninit`](crate::mem::MaybeUninit).
281/// In practice, this is typically equivalent to [`mem::zeroed`](crate::mem::zeroed).
282///
283/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_ph)
284#[inline]
285#[target_feature(enable = "avx512fp16,avx512vl")]
286#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
287#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
288pub const fn _mm_undefined_ph() -> __m128h {
289 f16x8::ZERO.as_m128h()
290}
291
292/// Return vector of type `__m256h` with indetermination elements.
293/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
294/// picks some valid value and is not equivalent to [`mem::MaybeUninit`](crate::mem::MaybeUninit).
295/// In practice, this is typically equivalent to [`mem::zeroed`](crate::mem::zeroed).
296///
297/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_undefined_ph)
298#[inline]
299#[target_feature(enable = "avx512fp16,avx512vl")]
300#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
301#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
302pub const fn _mm256_undefined_ph() -> __m256h {
303 f16x16::ZERO.as_m256h()
304}
305
306/// Return vector of type `__m512h` with indetermination elements.
307/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
308/// picks some valid value and is not equivalent to [`mem::MaybeUninit`](crate::mem::MaybeUninit).
309/// In practice, this is typically equivalent to [`mem::zeroed`](crate::mem::zeroed).
310///
311/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_ph)
312#[inline]
313#[target_feature(enable = "avx512fp16")]
314#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
315#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
316pub const fn _mm512_undefined_ph() -> __m512h {
317 f16x32::ZERO.as_m512h()
318}
319
320/// Cast vector of type `__m128d` to type `__m128h`. This intrinsic is only used for compilation and
321/// does not generate any instructions, thus it has zero latency.
322///
323/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_ph)
324#[inline]
325#[target_feature(enable = "avx512fp16")]
326#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
327#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
328pub const fn _mm_castpd_ph(a: __m128d) -> __m128h {
329 unsafe { transmute(src:a) }
330}
331
332/// Cast vector of type `__m256d` to type `__m256h`. This intrinsic is only used for compilation and
333/// does not generate any instructions, thus it has zero latency.
334///
335/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castpd_ph)
336#[inline]
337#[target_feature(enable = "avx512fp16")]
338#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
339#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
340pub const fn _mm256_castpd_ph(a: __m256d) -> __m256h {
341 unsafe { transmute(src:a) }
342}
343
344/// Cast vector of type `__m512d` to type `__m512h`. This intrinsic is only used for compilation and
345/// does not generate any instructions, thus it has zero latency.
346///
347/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castpd_ph)
348#[inline]
349#[target_feature(enable = "avx512fp16")]
350#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
351#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
352pub const fn _mm512_castpd_ph(a: __m512d) -> __m512h {
353 unsafe { transmute(src:a) }
354}
355
356/// Cast vector of type `__m128h` to type `__m128d`. This intrinsic is only used for compilation and
357/// does not generate any instructions, thus it has zero latency.
358///
359/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castph_pd)
360#[inline]
361#[target_feature(enable = "avx512fp16")]
362#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
363#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
364pub const fn _mm_castph_pd(a: __m128h) -> __m128d {
365 unsafe { transmute(src:a) }
366}
367
368/// Cast vector of type `__m256h` to type `__m256d`. This intrinsic is only used for compilation and
369/// does not generate any instructions, thus it has zero latency.
370///
371/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph_pd)
372#[inline]
373#[target_feature(enable = "avx512fp16")]
374#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
375#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
376pub const fn _mm256_castph_pd(a: __m256h) -> __m256d {
377 unsafe { transmute(src:a) }
378}
379
380/// Cast vector of type `__m512h` to type `__m512d`. This intrinsic is only used for compilation and
381/// does not generate any instructions, thus it has zero latency.
382///
383/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph_pd)
384#[inline]
385#[target_feature(enable = "avx512fp16")]
386#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
387#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
388pub const fn _mm512_castph_pd(a: __m512h) -> __m512d {
389 unsafe { transmute(src:a) }
390}
391
392/// Cast vector of type `__m128` to type `__m128h`. This intrinsic is only used for compilation and
393/// does not generate any instructions, thus it has zero latency.
394///
395/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castps_ph)
396#[inline]
397#[target_feature(enable = "avx512fp16")]
398#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
399#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
400pub const fn _mm_castps_ph(a: __m128) -> __m128h {
401 unsafe { transmute(src:a) }
402}
403
404/// Cast vector of type `__m256` to type `__m256h`. This intrinsic is only used for compilation and
405/// does not generate any instructions, thus it has zero latency.
406///
407/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castps_ph)
408#[inline]
409#[target_feature(enable = "avx512fp16")]
410#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
411#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
412pub const fn _mm256_castps_ph(a: __m256) -> __m256h {
413 unsafe { transmute(src:a) }
414}
415
416/// Cast vector of type `__m512` to type `__m512h`. This intrinsic is only used for compilation and
417/// does not generate any instructions, thus it has zero latency.
418///
419/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castps_ph)
420#[inline]
421#[target_feature(enable = "avx512fp16")]
422#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
423#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
424pub const fn _mm512_castps_ph(a: __m512) -> __m512h {
425 unsafe { transmute(src:a) }
426}
427
428/// Cast vector of type `__m128h` to type `__m128`. This intrinsic is only used for compilation and
429/// does not generate any instructions, thus it has zero latency.
430///
431/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castph_ps)
432#[inline]
433#[target_feature(enable = "avx512fp16")]
434#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
435#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
436pub const fn _mm_castph_ps(a: __m128h) -> __m128 {
437 unsafe { transmute(src:a) }
438}
439
440/// Cast vector of type `__m256h` to type `__m256`. This intrinsic is only used for compilation and
441/// does not generate any instructions, thus it has zero latency.
442///
443/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph_ps)
444#[inline]
445#[target_feature(enable = "avx512fp16")]
446#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
447#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
448pub const fn _mm256_castph_ps(a: __m256h) -> __m256 {
449 unsafe { transmute(src:a) }
450}
451
452/// Cast vector of type `__m512h` to type `__m512`. This intrinsic is only used for compilation and
453/// does not generate any instructions, thus it has zero latency.
454///
455/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph_ps)
456#[inline]
457#[target_feature(enable = "avx512fp16")]
458#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
459#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
460pub const fn _mm512_castph_ps(a: __m512h) -> __m512 {
461 unsafe { transmute(src:a) }
462}
463
464/// Cast vector of type `__m128i` to type `__m128h`. This intrinsic is only used for compilation and
465/// does not generate any instructions, thus it has zero latency.
466///
467/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castsi128_ph)
468#[inline]
469#[target_feature(enable = "avx512fp16")]
470#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
471#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
472pub const fn _mm_castsi128_ph(a: __m128i) -> __m128h {
473 unsafe { transmute(src:a) }
474}
475
476/// Cast vector of type `__m256i` to type `__m256h`. This intrinsic is only used for compilation and
477/// does not generate any instructions, thus it has zero latency.
478///
479/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castsi256_ph)
480#[inline]
481#[target_feature(enable = "avx512fp16")]
482#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
483#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
484pub const fn _mm256_castsi256_ph(a: __m256i) -> __m256h {
485 unsafe { transmute(src:a) }
486}
487
488/// Cast vector of type `__m512i` to type `__m512h`. This intrinsic is only used for compilation and
489/// does not generate any instructions, thus it has zero latency.
490///
491/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castsi512_ph)
492#[inline]
493#[target_feature(enable = "avx512fp16")]
494#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
495#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
496pub const fn _mm512_castsi512_ph(a: __m512i) -> __m512h {
497 unsafe { transmute(src:a) }
498}
499
500/// Cast vector of type `__m128h` to type `__m128i`. This intrinsic is only used for compilation and
501/// does not generate any instructions, thus it has zero latency.
502///
503/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castph_si128)
504#[inline]
505#[target_feature(enable = "avx512fp16")]
506#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
507#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
508pub const fn _mm_castph_si128(a: __m128h) -> __m128i {
509 unsafe { transmute(src:a) }
510}
511
512/// Cast vector of type `__m256h` to type `__m256i`. This intrinsic is only used for compilation and
513/// does not generate any instructions, thus it has zero latency.
514///
515/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph_si256)
516#[inline]
517#[target_feature(enable = "avx512fp16")]
518#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
519#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
520pub const fn _mm256_castph_si256(a: __m256h) -> __m256i {
521 unsafe { transmute(src:a) }
522}
523
524/// Cast vector of type `__m512h` to type `__m512i`. This intrinsic is only used for compilation and
525/// does not generate any instructions, thus it has zero latency.
526///
527/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph_si512)
528#[inline]
529#[target_feature(enable = "avx512fp16")]
530#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
531#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
532pub const fn _mm512_castph_si512(a: __m512h) -> __m512i {
533 unsafe { transmute(src:a) }
534}
535
536/// Cast vector of type `__m256h` to type `__m128h`. This intrinsic is only used for compilation and
537/// does not generate any instructions, thus it has zero latency.
538///
539/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph256_ph128)
540#[inline]
541#[target_feature(enable = "avx512fp16")]
542#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
543#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
544pub const fn _mm256_castph256_ph128(a: __m256h) -> __m128h {
545 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) }
546}
547
548/// Cast vector of type `__m512h` to type `__m128h`. This intrinsic is only used for compilation and
549/// does not generate any instructions, thus it has zero latency.
550///
551/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph512_ph128)
552#[inline]
553#[target_feature(enable = "avx512fp16")]
554#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
555#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
556pub const fn _mm512_castph512_ph128(a: __m512h) -> __m128h {
557 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) }
558}
559
560/// Cast vector of type `__m512h` to type `__m256h`. This intrinsic is only used for compilation and
561/// does not generate any instructions, thus it has zero latency.
562///
563/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph512_ph256)
564#[inline]
565#[target_feature(enable = "avx512fp16")]
566#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
567#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
568pub const fn _mm512_castph512_ph256(a: __m512h) -> __m256h {
569 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) }
570}
571
572/// Cast vector of type `__m128h` to type `__m256h`. The upper 8 elements of the result are undefined.
573/// In practice, the upper elements are zeroed. This intrinsic can generate the `vzeroupper` instruction,
574/// but most of the time it does not generate any instructions.
575///
576/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph128_ph256)
577#[inline]
578#[target_feature(enable = "avx512fp16")]
579#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
580#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
581pub const fn _mm256_castph128_ph256(a: __m128h) -> __m256h {
582 unsafe {
583 simd_shuffle!(
584 a,
585 _mm_undefined_ph(),
586 [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8]
587 )
588 }
589}
590
591/// Cast vector of type `__m128h` to type `__m512h`. The upper 24 elements of the result are undefined.
592/// In practice, the upper elements are zeroed. This intrinsic can generate the `vzeroupper` instruction,
593/// but most of the time it does not generate any instructions.
594///
595/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph128_ph512)
596#[inline]
597#[target_feature(enable = "avx512fp16")]
598#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
599#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
600pub const fn _mm512_castph128_ph512(a: __m128h) -> __m512h {
601 unsafe {
602 simd_shuffle!(
603 a,
604 _mm_undefined_ph(),
605 [
606 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
607 8, 8, 8, 8
608 ]
609 )
610 }
611}
612
613/// Cast vector of type `__m256h` to type `__m512h`. The upper 16 elements of the result are undefined.
614/// In practice, the upper elements are zeroed. This intrinsic can generate the `vzeroupper` instruction,
615/// but most of the time it does not generate any instructions.
616///
617/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph256_ph512)
618#[inline]
619#[target_feature(enable = "avx512fp16")]
620#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
621#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
622pub const fn _mm512_castph256_ph512(a: __m256h) -> __m512h {
623 unsafe {
624 simd_shuffle!(
625 a,
626 _mm256_undefined_ph(),
627 [
628 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16,
629 16, 16, 16, 16, 16, 16, 16, 16, 16
630 ]
631 )
632 }
633}
634
635/// Cast vector of type `__m256h` to type `__m128h`. The upper 8 elements of the result are zeroed.
636/// This intrinsic can generate the `vzeroupper` instruction, but most of the time it does not generate
637/// any instructions.
638///
639/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_zextph128_ph256)
640#[inline]
641#[target_feature(enable = "avx512fp16")]
642#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
643#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
644pub const fn _mm256_zextph128_ph256(a: __m128h) -> __m256h {
645 unsafe {
646 simd_shuffle!(
647 a,
648 _mm_setzero_ph(),
649 [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8]
650 )
651 }
652}
653
654/// Cast vector of type `__m256h` to type `__m512h`. The upper 16 elements of the result are zeroed.
655/// This intrinsic can generate the `vzeroupper` instruction, but most of the time it does not generate
656/// any instructions.
657///
658/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_zextph256_ph512)
659#[inline]
660#[target_feature(enable = "avx512fp16")]
661#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
662#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
663pub const fn _mm512_zextph256_ph512(a: __m256h) -> __m512h {
664 unsafe {
665 simd_shuffle!(
666 a,
667 _mm256_setzero_ph(),
668 [
669 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16,
670 16, 16, 16, 16, 16, 16, 16, 16, 16
671 ]
672 )
673 }
674}
675
676/// Cast vector of type `__m128h` to type `__m512h`. The upper 24 elements of the result are zeroed.
677/// This intrinsic can generate the `vzeroupper` instruction, but most of the time it does not generate
678/// any instructions.
679///
680/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_zextph128_ph512)
681#[inline]
682#[target_feature(enable = "avx512fp16")]
683#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
684#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
685pub const fn _mm512_zextph128_ph512(a: __m128h) -> __m512h {
686 unsafe {
687 simd_shuffle!(
688 a,
689 _mm_setzero_ph(),
690 [
691 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
692 8, 8, 8, 8
693 ]
694 )
695 }
696}
697
698macro_rules! cmp_asm { // FIXME: use LLVM intrinsics
699 ($mask_type: ty, $reg: ident, $a: expr, $b: expr) => {{
700 let dst: $mask_type;
701 asm!(
702 "vcmpph {k}, {a}, {b}, {imm8}",
703 k = lateout(kreg) dst,
704 a = in($reg) $a,
705 b = in($reg) $b,
706 imm8 = const IMM5,
707 options(pure, nomem, nostack)
708 );
709 dst
710 }};
711 ($mask_type: ty, $mask: expr, $reg: ident, $a: expr, $b: expr) => {{
712 let dst: $mask_type;
713 asm!(
714 "vcmpph {k} {{ {mask} }}, {a}, {b}, {imm8}",
715 k = lateout(kreg) dst,
716 mask = in(kreg) $mask,
717 a = in($reg) $a,
718 b = in($reg) $b,
719 imm8 = const IMM5,
720 options(pure, nomem, nostack)
721 );
722 dst
723 }};
724}
725
726/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
727/// operand specified by imm8, and store the results in mask vector k.
728///
729/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ph_mask)
730#[inline]
731#[target_feature(enable = "avx512fp16,avx512vl")]
732#[rustc_legacy_const_generics(2)]
733#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
734pub fn _mm_cmp_ph_mask<const IMM5: i32>(a: __m128h, b: __m128h) -> __mmask8 {
735 unsafe {
736 static_assert_uimm_bits!(IMM5, 5);
737 cmp_asm!(__mmask8, xmm_reg, a, b)
738 }
739}
740
741/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
742/// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are
743/// zeroed out when the corresponding mask bit is not set).
744///
745/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_ph_mask)
746#[inline]
747#[target_feature(enable = "avx512fp16,avx512vl")]
748#[rustc_legacy_const_generics(3)]
749#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
750pub fn _mm_mask_cmp_ph_mask<const IMM5: i32>(k1: __mmask8, a: __m128h, b: __m128h) -> __mmask8 {
751 unsafe {
752 static_assert_uimm_bits!(IMM5, 5);
753 cmp_asm!(__mmask8, k1, xmm_reg, a, b)
754 }
755}
756
757/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
758/// operand specified by imm8, and store the results in mask vector k.
759///
760/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmp_ph_mask)
761#[inline]
762#[target_feature(enable = "avx512fp16,avx512vl")]
763#[rustc_legacy_const_generics(2)]
764#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
765pub fn _mm256_cmp_ph_mask<const IMM5: i32>(a: __m256h, b: __m256h) -> __mmask16 {
766 unsafe {
767 static_assert_uimm_bits!(IMM5, 5);
768 cmp_asm!(__mmask16, ymm_reg, a, b)
769 }
770}
771
772/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
773/// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are
774/// zeroed out when the corresponding mask bit is not set).
775///
776/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmp_ph_mask)
777#[inline]
778#[target_feature(enable = "avx512fp16,avx512vl")]
779#[rustc_legacy_const_generics(3)]
780#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
781pub fn _mm256_mask_cmp_ph_mask<const IMM5: i32>(
782 k1: __mmask16,
783 a: __m256h,
784 b: __m256h,
785) -> __mmask16 {
786 unsafe {
787 static_assert_uimm_bits!(IMM5, 5);
788 cmp_asm!(__mmask16, k1, ymm_reg, a, b)
789 }
790}
791
792/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
793/// operand specified by imm8, and store the results in mask vector k.
794///
795/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_ph_mask)
796#[inline]
797#[target_feature(enable = "avx512fp16")]
798#[rustc_legacy_const_generics(2)]
799#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
800pub fn _mm512_cmp_ph_mask<const IMM5: i32>(a: __m512h, b: __m512h) -> __mmask32 {
801 unsafe {
802 static_assert_uimm_bits!(IMM5, 5);
803 cmp_asm!(__mmask32, zmm_reg, a, b)
804 }
805}
806
807/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
808/// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are
809/// zeroed out when the corresponding mask bit is not set).
810///
811/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_ph_mask)
812#[inline]
813#[target_feature(enable = "avx512fp16")]
814#[rustc_legacy_const_generics(3)]
815#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
816pub fn _mm512_mask_cmp_ph_mask<const IMM5: i32>(
817 k1: __mmask32,
818 a: __m512h,
819 b: __m512h,
820) -> __mmask32 {
821 unsafe {
822 static_assert_uimm_bits!(IMM5, 5);
823 cmp_asm!(__mmask32, k1, zmm_reg, a, b)
824 }
825}
826
827/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
828/// operand specified by imm8, and store the results in mask vector k.
829///
830/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
831///
832/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_round_ph_mask)
833#[inline]
834#[target_feature(enable = "avx512fp16")]
835#[rustc_legacy_const_generics(2, 3)]
836#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
837pub fn _mm512_cmp_round_ph_mask<const IMM5: i32, const SAE: i32>(
838 a: __m512h,
839 b: __m512h,
840) -> __mmask32 {
841 unsafe {
842 static_assert_uimm_bits!(IMM5, 5);
843 static_assert_sae!(SAE);
844 if SAE == _MM_FROUND_NO_EXC {
845 let dst: __mmask32;
846 asm!(
847 "vcmpph {k}, {a}, {b}, {{sae}}, {imm8}",
848 k = lateout(kreg) dst,
849 a = in(zmm_reg) a,
850 b = in(zmm_reg) b,
851 imm8 = const IMM5,
852 options(pure, nomem, nostack)
853 );
854 dst
855 } else {
856 cmp_asm!(__mmask32, zmm_reg, a, b)
857 }
858 }
859}
860
861/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
862/// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are
863/// zeroed out when the corresponding mask bit is not set).
864///
865/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
866///
867/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_round_ph_mask)
868#[inline]
869#[target_feature(enable = "avx512fp16")]
870#[rustc_legacy_const_generics(3, 4)]
871#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
872pub fn _mm512_mask_cmp_round_ph_mask<const IMM5: i32, const SAE: i32>(
873 k1: __mmask32,
874 a: __m512h,
875 b: __m512h,
876) -> __mmask32 {
877 unsafe {
878 static_assert_uimm_bits!(IMM5, 5);
879 static_assert_sae!(SAE);
880 if SAE == _MM_FROUND_NO_EXC {
881 let dst: __mmask32;
882 asm!(
883 "vcmpph {k} {{{k1}}}, {a}, {b}, {{sae}}, {imm8}",
884 k = lateout(kreg) dst,
885 k1 = in(kreg) k1,
886 a = in(zmm_reg) a,
887 b = in(zmm_reg) b,
888 imm8 = const IMM5,
889 options(pure, nomem, nostack)
890 );
891 dst
892 } else {
893 cmp_asm!(__mmask32, k1, zmm_reg, a, b)
894 }
895 }
896}
897
898/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
899/// operand specified by imm8, and store the result in mask vector k. Exceptions can be suppressed by
900/// passing _MM_FROUND_NO_EXC in the sae parameter.
901///
902/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_round_sh_mask)
903#[inline]
904#[target_feature(enable = "avx512fp16")]
905#[rustc_legacy_const_generics(2, 3)]
906#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
907pub fn _mm_cmp_round_sh_mask<const IMM5: i32, const SAE: i32>(a: __m128h, b: __m128h) -> __mmask8 {
908 static_assert_uimm_bits!(IMM5, 5);
909 static_assert_sae!(SAE);
910 _mm_mask_cmp_round_sh_mask::<IMM5, SAE>(k1:0xff, a, b)
911}
912
913/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
914/// operand specified by imm8, and store the result in mask vector k using zeromask k1. Exceptions can be
915/// suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
916///
917/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_round_sh_mask)
918#[inline]
919#[target_feature(enable = "avx512fp16")]
920#[rustc_legacy_const_generics(3, 4)]
921#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
922pub fn _mm_mask_cmp_round_sh_mask<const IMM5: i32, const SAE: i32>(
923 k1: __mmask8,
924 a: __m128h,
925 b: __m128h,
926) -> __mmask8 {
927 unsafe {
928 static_assert_uimm_bits!(IMM5, 5);
929 static_assert_sae!(SAE);
930 vcmpsh(a, b, IMM5, mask:k1, SAE)
931 }
932}
933
934/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
935/// operand specified by imm8, and store the result in mask vector k.
936///
937/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_sh_mask)
938#[inline]
939#[target_feature(enable = "avx512fp16")]
940#[rustc_legacy_const_generics(2)]
941#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
942pub fn _mm_cmp_sh_mask<const IMM5: i32>(a: __m128h, b: __m128h) -> __mmask8 {
943 static_assert_uimm_bits!(IMM5, 5);
944 _mm_cmp_round_sh_mask::<IMM5, _MM_FROUND_CUR_DIRECTION>(a, b)
945}
946
947/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
948/// operand specified by imm8, and store the result in mask vector k using zeromask k1.
949///
950/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_sh_mask)
951#[inline]
952#[target_feature(enable = "avx512fp16")]
953#[rustc_legacy_const_generics(3)]
954#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
955pub fn _mm_mask_cmp_sh_mask<const IMM5: i32>(k1: __mmask8, a: __m128h, b: __m128h) -> __mmask8 {
956 static_assert_uimm_bits!(IMM5, 5);
957 _mm_mask_cmp_round_sh_mask::<IMM5, _MM_FROUND_CUR_DIRECTION>(k1, a, b)
958}
959
960/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
961/// operand specified by imm8, and return the boolean result (0 or 1).
962/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
963///
964/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comi_round_sh)
965#[inline]
966#[target_feature(enable = "avx512fp16")]
967#[rustc_legacy_const_generics(2, 3)]
968#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
969pub fn _mm_comi_round_sh<const IMM5: i32, const SAE: i32>(a: __m128h, b: __m128h) -> i32 {
970 unsafe {
971 static_assert_uimm_bits!(IMM5, 5);
972 static_assert_sae!(SAE);
973 vcomish(a, b, IMM5, SAE)
974 }
975}
976
977/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
978/// operand specified by imm8, and return the boolean result (0 or 1).
979///
980/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comi_sh)
981#[inline]
982#[target_feature(enable = "avx512fp16")]
983#[rustc_legacy_const_generics(2)]
984#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
985pub fn _mm_comi_sh<const IMM5: i32>(a: __m128h, b: __m128h) -> i32 {
986 static_assert_uimm_bits!(IMM5, 5);
987 _mm_comi_round_sh::<IMM5, _MM_FROUND_CUR_DIRECTION>(a, b)
988}
989
990/// Compare the lower half-precision (16-bit) floating-point elements in a and b for equality, and return
991/// the boolean result (0 or 1).
992///
993/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comieq_sh)
994#[inline]
995#[target_feature(enable = "avx512fp16")]
996#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
997pub fn _mm_comieq_sh(a: __m128h, b: __m128h) -> i32 {
998 _mm_comi_sh::<_CMP_EQ_OS>(a, b)
999}
1000
1001/// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than-or-equal,
1002/// and return the boolean result (0 or 1).
1003///
1004/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comige_sh)
1005#[inline]
1006#[target_feature(enable = "avx512fp16")]
1007#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1008pub fn _mm_comige_sh(a: __m128h, b: __m128h) -> i32 {
1009 _mm_comi_sh::<_CMP_GE_OS>(a, b)
1010}
1011
1012/// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than, and return
1013/// the boolean result (0 or 1).
1014///
1015/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comigt_sh)
1016#[inline]
1017#[target_feature(enable = "avx512fp16")]
1018#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1019pub fn _mm_comigt_sh(a: __m128h, b: __m128h) -> i32 {
1020 _mm_comi_sh::<_CMP_GT_OS>(a, b)
1021}
1022
1023/// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than-or-equal, and
1024/// return the boolean result (0 or 1).
1025///
1026/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comile_sh)
1027#[inline]
1028#[target_feature(enable = "avx512fp16")]
1029#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1030pub fn _mm_comile_sh(a: __m128h, b: __m128h) -> i32 {
1031 _mm_comi_sh::<_CMP_LE_OS>(a, b)
1032}
1033
1034/// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than, and return
1035/// the boolean result (0 or 1).
1036///
1037/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comilt_sh)
1038#[inline]
1039#[target_feature(enable = "avx512fp16")]
1040#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1041pub fn _mm_comilt_sh(a: __m128h, b: __m128h) -> i32 {
1042 _mm_comi_sh::<_CMP_LT_OS>(a, b)
1043}
1044
1045/// Compare the lower half-precision (16-bit) floating-point elements in a and b for not-equal, and return
1046/// the boolean result (0 or 1).
1047///
1048/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comineq_sh)
1049#[inline]
1050#[target_feature(enable = "avx512fp16")]
1051#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1052pub fn _mm_comineq_sh(a: __m128h, b: __m128h) -> i32 {
1053 _mm_comi_sh::<_CMP_NEQ_US>(a, b)
1054}
1055
1056/// Compare the lower half-precision (16-bit) floating-point elements in a and b for equality, and
1057/// return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
1058///
1059/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomieq_sh)
1060#[inline]
1061#[target_feature(enable = "avx512fp16")]
1062#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1063pub fn _mm_ucomieq_sh(a: __m128h, b: __m128h) -> i32 {
1064 _mm_comi_sh::<_CMP_EQ_OQ>(a, b)
1065}
1066
1067/// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than-or-equal,
1068/// and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
1069///
1070/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomige_sh)
1071#[inline]
1072#[target_feature(enable = "avx512fp16")]
1073#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1074pub fn _mm_ucomige_sh(a: __m128h, b: __m128h) -> i32 {
1075 _mm_comi_sh::<_CMP_GE_OQ>(a, b)
1076}
1077
1078/// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than, and return
1079/// the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
1080///
1081/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomigt_sh)
1082#[inline]
1083#[target_feature(enable = "avx512fp16")]
1084#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1085pub fn _mm_ucomigt_sh(a: __m128h, b: __m128h) -> i32 {
1086 _mm_comi_sh::<_CMP_GT_OQ>(a, b)
1087}
1088
1089/// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than-or-equal, and
1090/// return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
1091///
1092/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomile_sh)
1093#[inline]
1094#[target_feature(enable = "avx512fp16")]
1095#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1096pub fn _mm_ucomile_sh(a: __m128h, b: __m128h) -> i32 {
1097 _mm_comi_sh::<_CMP_LE_OQ>(a, b)
1098}
1099
1100/// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than, and return
1101/// the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
1102///
1103/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomilt_sh)
1104#[inline]
1105#[target_feature(enable = "avx512fp16")]
1106#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1107pub fn _mm_ucomilt_sh(a: __m128h, b: __m128h) -> i32 {
1108 _mm_comi_sh::<_CMP_LT_OQ>(a, b)
1109}
1110
1111/// Compare the lower half-precision (16-bit) floating-point elements in a and b for not-equal, and return
1112/// the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
1113///
1114/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomineq_sh)
1115#[inline]
1116#[target_feature(enable = "avx512fp16")]
1117#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1118pub fn _mm_ucomineq_sh(a: __m128h, b: __m128h) -> i32 {
1119 _mm_comi_sh::<_CMP_NEQ_UQ>(a, b)
1120}
1121
1122/// Load 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from memory into
1123/// a new vector. The address must be aligned to 16 bytes or a general-protection exception may be generated.
1124///
1125/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_ph)
1126#[inline]
1127#[target_feature(enable = "avx512fp16,avx512vl")]
1128#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1129#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1130pub const unsafe fn _mm_load_ph(mem_addr: *const f16) -> __m128h {
1131 *mem_addr.cast()
1132}
1133
1134/// Load 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from memory into
1135/// a new vector. The address must be aligned to 32 bytes or a general-protection exception may be generated.
1136///
1137/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_load_ph)
1138#[inline]
1139#[target_feature(enable = "avx512fp16,avx512vl")]
1140#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1141#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1142pub const unsafe fn _mm256_load_ph(mem_addr: *const f16) -> __m256h {
1143 *mem_addr.cast()
1144}
1145
1146/// Load 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from memory into
1147/// a new vector. The address must be aligned to 64 bytes or a general-protection exception may be generated.
1148///
1149/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_load_ph)
1150#[inline]
1151#[target_feature(enable = "avx512fp16")]
1152#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1153#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1154pub const unsafe fn _mm512_load_ph(mem_addr: *const f16) -> __m512h {
1155 *mem_addr.cast()
1156}
1157
1158/// Load a half-precision (16-bit) floating-point element from memory into the lower element of a new vector,
1159/// and zero the upper elements
1160///
1161/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_sh)
1162#[inline]
1163#[target_feature(enable = "avx512fp16")]
1164#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1165#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1166pub const unsafe fn _mm_load_sh(mem_addr: *const f16) -> __m128h {
1167 _mm_set_sh(*mem_addr)
1168}
1169
1170/// Load a half-precision (16-bit) floating-point element from memory into the lower element of a new vector
1171/// using writemask k (the element is copied from src when mask bit 0 is not set), and zero the upper elements.
1172///
1173/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_sh)
1174#[inline]
1175#[target_feature(enable = "avx512fp16")]
1176#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1177pub unsafe fn _mm_mask_load_sh(src: __m128h, k: __mmask8, mem_addr: *const f16) -> __m128h {
1178 let mut dst: __m128h = src;
1179 asm!(
1180 vpl!("vmovsh {dst}{{{k}}}"),
1181 dst = inout(xmm_reg) dst,
1182 k = in(kreg) k,
1183 p = in(reg) mem_addr,
1184 options(pure, readonly, nostack, preserves_flags)
1185 );
1186 dst
1187}
1188
1189/// Load a half-precision (16-bit) floating-point element from memory into the lower element of a new vector
1190/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and zero the upper elements.
1191///
1192/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_sh)
1193#[inline]
1194#[target_feature(enable = "avx512fp16")]
1195#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1196pub unsafe fn _mm_maskz_load_sh(k: __mmask8, mem_addr: *const f16) -> __m128h {
1197 let mut dst: __m128h;
1198 asm!(
1199 vpl!("vmovsh {dst}{{{k}}}{{z}}"),
1200 dst = out(xmm_reg) dst,
1201 k = in(kreg) k,
1202 p = in(reg) mem_addr,
1203 options(pure, readonly, nostack, preserves_flags)
1204 );
1205 dst
1206}
1207
1208/// Load 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from memory into
1209/// a new vector. The address does not need to be aligned to any particular boundary.
1210///
1211/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_ph)
1212#[inline]
1213#[target_feature(enable = "avx512fp16,avx512vl")]
1214#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1215#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1216pub const unsafe fn _mm_loadu_ph(mem_addr: *const f16) -> __m128h {
1217 ptr::read_unaligned(src:mem_addr.cast())
1218}
1219
1220/// Load 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from memory into
1221/// a new vector. The address does not need to be aligned to any particular boundary.
1222///
1223/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_loadu_ph)
1224#[inline]
1225#[target_feature(enable = "avx512fp16,avx512vl")]
1226#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1227#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1228pub const unsafe fn _mm256_loadu_ph(mem_addr: *const f16) -> __m256h {
1229 ptr::read_unaligned(src:mem_addr.cast())
1230}
1231
1232/// Load 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from memory into
1233/// a new vector. The address does not need to be aligned to any particular boundary.
1234///
1235/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_ph)
1236#[inline]
1237#[target_feature(enable = "avx512fp16")]
1238#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1239#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1240pub const unsafe fn _mm512_loadu_ph(mem_addr: *const f16) -> __m512h {
1241 ptr::read_unaligned(src:mem_addr.cast())
1242}
1243
1244/// Move the lower half-precision (16-bit) floating-point element from b to the lower element of dst
1245/// using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper
1246/// 7 packed elements from a to the upper elements of dst.
1247///
1248/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_move_sh)
1249#[inline]
1250#[target_feature(enable = "avx512fp16")]
1251#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1252#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1253pub const fn _mm_mask_move_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1254 unsafe {
1255 let mut mov: f16 = simd_extract!(src, 0);
1256 if (k & 1) != 0 {
1257 mov = simd_extract!(b, 0);
1258 }
1259 simd_insert!(a, 0, mov)
1260 }
1261}
1262
1263/// Move the lower half-precision (16-bit) floating-point element from b to the lower element of dst
1264/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed
1265/// elements from a to the upper elements of dst.
1266///
1267/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_move_sh)
1268#[inline]
1269#[target_feature(enable = "avx512fp16")]
1270#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1271#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1272pub const fn _mm_maskz_move_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1273 unsafe {
1274 let mut mov: f16 = 0.;
1275 if (k & 1) != 0 {
1276 mov = simd_extract!(b, 0);
1277 }
1278 simd_insert!(a, 0, mov)
1279 }
1280}
1281
1282/// Move the lower half-precision (16-bit) floating-point element from b to the lower element of dst,
1283/// and copy the upper 7 packed elements from a to the upper elements of dst.
1284///
1285/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_sh)
1286#[inline]
1287#[target_feature(enable = "avx512fp16")]
1288#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1289#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1290pub const fn _mm_move_sh(a: __m128h, b: __m128h) -> __m128h {
1291 unsafe {
1292 let mov: f16 = simd_extract!(b, 0);
1293 simd_insert!(a, 0, mov)
1294 }
1295}
1296
1297/// Store 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from a into memory.
1298/// The address must be aligned to 16 bytes or a general-protection exception may be generated.
1299///
1300/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_ph)
1301#[inline]
1302#[target_feature(enable = "avx512fp16,avx512vl")]
1303#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1304#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1305pub const unsafe fn _mm_store_ph(mem_addr: *mut f16, a: __m128h) {
1306 *mem_addr.cast() = a;
1307}
1308
1309/// Store 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from a into memory.
1310/// The address must be aligned to 32 bytes or a general-protection exception may be generated.
1311///
1312/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_store_ph)
1313#[inline]
1314#[target_feature(enable = "avx512fp16,avx512vl")]
1315#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1316#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1317pub const unsafe fn _mm256_store_ph(mem_addr: *mut f16, a: __m256h) {
1318 *mem_addr.cast() = a;
1319}
1320
1321/// Store 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from a into memory.
1322/// The address must be aligned to 64 bytes or a general-protection exception may be generated.
1323///
1324/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_store_ph)
1325#[inline]
1326#[target_feature(enable = "avx512fp16")]
1327#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1328#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1329pub const unsafe fn _mm512_store_ph(mem_addr: *mut f16, a: __m512h) {
1330 *mem_addr.cast() = a;
1331}
1332
1333/// Store the lower half-precision (16-bit) floating-point element from a into memory.
1334///
1335/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_sh)
1336#[inline]
1337#[target_feature(enable = "avx512fp16")]
1338#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1339#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1340pub const unsafe fn _mm_store_sh(mem_addr: *mut f16, a: __m128h) {
1341 *mem_addr = simd_extract!(a, 0);
1342}
1343
1344/// Store the lower half-precision (16-bit) floating-point element from a into memory using writemask k
1345///
1346/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_sh)
1347#[inline]
1348#[target_feature(enable = "avx512fp16")]
1349#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1350pub unsafe fn _mm_mask_store_sh(mem_addr: *mut f16, k: __mmask8, a: __m128h) {
1351 asm!(
1352 vps!("vmovdqu16", "{{{k}}}, {src}"),
1353 p = in(reg) mem_addr,
1354 k = in(kreg) k,
1355 src = in(xmm_reg) a,
1356 options(nostack, preserves_flags)
1357 );
1358}
1359
1360/// Store 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from a into memory.
1361/// The address does not need to be aligned to any particular boundary.
1362///
1363/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_ph)
1364#[inline]
1365#[target_feature(enable = "avx512fp16,avx512vl")]
1366#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1367#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1368pub const unsafe fn _mm_storeu_ph(mem_addr: *mut f16, a: __m128h) {
1369 ptr::write_unaligned(dst:mem_addr.cast(), src:a);
1370}
1371
1372/// Store 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from a into memory.
1373/// The address does not need to be aligned to any particular boundary.
1374///
1375/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_storeu_ph)
1376#[inline]
1377#[target_feature(enable = "avx512fp16,avx512vl")]
1378#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1379#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1380pub const unsafe fn _mm256_storeu_ph(mem_addr: *mut f16, a: __m256h) {
1381 ptr::write_unaligned(dst:mem_addr.cast(), src:a);
1382}
1383
1384/// Store 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from a into memory.
1385/// The address does not need to be aligned to any particular boundary.
1386///
1387/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_ph)
1388#[inline]
1389#[target_feature(enable = "avx512fp16")]
1390#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1391#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1392pub const unsafe fn _mm512_storeu_ph(mem_addr: *mut f16, a: __m512h) {
1393 ptr::write_unaligned(dst:mem_addr.cast(), src:a);
1394}
1395
1396/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
1397///
1398/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_ph)
1399#[inline]
1400#[target_feature(enable = "avx512fp16,avx512vl")]
1401#[cfg_attr(test, assert_instr(vaddph))]
1402#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1403#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1404pub const fn _mm_add_ph(a: __m128h, b: __m128h) -> __m128h {
1405 unsafe { simd_add(x:a, y:b) }
1406}
1407
1408/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1409/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1410///
1411/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_ph)
1412#[inline]
1413#[target_feature(enable = "avx512fp16,avx512vl")]
1414#[cfg_attr(test, assert_instr(vaddph))]
1415#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1416#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1417pub const fn _mm_mask_add_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1418 unsafe {
1419 let r: __m128h = _mm_add_ph(a, b);
1420 simd_select_bitmask(m:k, yes:r, no:src)
1421 }
1422}
1423
1424/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1425/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1426///
1427/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_ph)
1428#[inline]
1429#[target_feature(enable = "avx512fp16,avx512vl")]
1430#[cfg_attr(test, assert_instr(vaddph))]
1431#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1432#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1433pub const fn _mm_maskz_add_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1434 unsafe {
1435 let r: __m128h = _mm_add_ph(a, b);
1436 simd_select_bitmask(m:k, yes:r, no:_mm_setzero_ph())
1437 }
1438}
1439
1440/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
1441///
1442/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_ph)
1443#[inline]
1444#[target_feature(enable = "avx512fp16,avx512vl")]
1445#[cfg_attr(test, assert_instr(vaddph))]
1446#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1447#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1448pub const fn _mm256_add_ph(a: __m256h, b: __m256h) -> __m256h {
1449 unsafe { simd_add(x:a, y:b) }
1450}
1451
1452/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1453/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1454///
1455/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_add_ph)
1456#[inline]
1457#[target_feature(enable = "avx512fp16,avx512vl")]
1458#[cfg_attr(test, assert_instr(vaddph))]
1459#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1460#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1461pub const fn _mm256_mask_add_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
1462 unsafe {
1463 let r: __m256h = _mm256_add_ph(a, b);
1464 simd_select_bitmask(m:k, yes:r, no:src)
1465 }
1466}
1467
1468/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1469/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1470///
1471/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_add_ph)
1472#[inline]
1473#[target_feature(enable = "avx512fp16,avx512vl")]
1474#[cfg_attr(test, assert_instr(vaddph))]
1475#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1476#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1477pub const fn _mm256_maskz_add_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
1478 unsafe {
1479 let r: __m256h = _mm256_add_ph(a, b);
1480 simd_select_bitmask(m:k, yes:r, no:_mm256_setzero_ph())
1481 }
1482}
1483
1484/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
1485///
1486/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_ph)
1487#[inline]
1488#[target_feature(enable = "avx512fp16")]
1489#[cfg_attr(test, assert_instr(vaddph))]
1490#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1491#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1492pub const fn _mm512_add_ph(a: __m512h, b: __m512h) -> __m512h {
1493 unsafe { simd_add(x:a, y:b) }
1494}
1495
1496/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1497/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1498///
1499/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_ph)
1500#[inline]
1501#[target_feature(enable = "avx512fp16")]
1502#[cfg_attr(test, assert_instr(vaddph))]
1503#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1504#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1505pub const fn _mm512_mask_add_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
1506 unsafe {
1507 let r: __m512h = _mm512_add_ph(a, b);
1508 simd_select_bitmask(m:k, yes:r, no:src)
1509 }
1510}
1511
1512/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1513/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1514///
1515/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_ph)
1516#[inline]
1517#[target_feature(enable = "avx512fp16")]
1518#[cfg_attr(test, assert_instr(vaddph))]
1519#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1520#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1521pub const fn _mm512_maskz_add_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
1522 unsafe {
1523 let r: __m512h = _mm512_add_ph(a, b);
1524 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
1525 }
1526}
1527
1528/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
1529/// Rounding is done according to the rounding parameter, which can be one of:
1530///
1531/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1532/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1533/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1534/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1535/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1536///
1537/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_round_ph)
1538#[inline]
1539#[target_feature(enable = "avx512fp16")]
1540#[cfg_attr(test, assert_instr(vaddph, ROUNDING = 8))]
1541#[rustc_legacy_const_generics(2)]
1542#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1543pub fn _mm512_add_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
1544 unsafe {
1545 static_assert_rounding!(ROUNDING);
1546 vaddph(a, b, ROUNDING)
1547 }
1548}
1549
1550/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1551/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1552/// Rounding is done according to the rounding parameter, which can be one of:
1553///
1554/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1555/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1556/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1557/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1558/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1559///
1560/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_round_ph)
1561#[inline]
1562#[target_feature(enable = "avx512fp16")]
1563#[cfg_attr(test, assert_instr(vaddph, ROUNDING = 8))]
1564#[rustc_legacy_const_generics(4)]
1565#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1566pub fn _mm512_mask_add_round_ph<const ROUNDING: i32>(
1567 src: __m512h,
1568 k: __mmask32,
1569 a: __m512h,
1570 b: __m512h,
1571) -> __m512h {
1572 unsafe {
1573 static_assert_rounding!(ROUNDING);
1574 let r: __m512h = _mm512_add_round_ph::<ROUNDING>(a, b);
1575 simd_select_bitmask(m:k, yes:r, no:src)
1576 }
1577}
1578
1579/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1580/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1581/// Rounding is done according to the rounding parameter, which can be one of:
1582///
1583/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1584/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1585/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1586/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1587///
1588/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_round_ph)
1589#[inline]
1590#[target_feature(enable = "avx512fp16")]
1591#[cfg_attr(test, assert_instr(vaddph, ROUNDING = 8))]
1592#[rustc_legacy_const_generics(3)]
1593#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1594pub fn _mm512_maskz_add_round_ph<const ROUNDING: i32>(
1595 k: __mmask32,
1596 a: __m512h,
1597 b: __m512h,
1598) -> __m512h {
1599 unsafe {
1600 static_assert_rounding!(ROUNDING);
1601 let r: __m512h = _mm512_add_round_ph::<ROUNDING>(a, b);
1602 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
1603 }
1604}
1605
1606/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
1607/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
1608/// Rounding is done according to the rounding parameter, which can be one of:
1609///
1610/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1611/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1612/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1613/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1614/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1615///
1616/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_round_sh)
1617#[inline]
1618#[target_feature(enable = "avx512fp16")]
1619#[cfg_attr(test, assert_instr(vaddsh, ROUNDING = 8))]
1620#[rustc_legacy_const_generics(2)]
1621#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1622pub fn _mm_add_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
1623 static_assert_rounding!(ROUNDING);
1624 _mm_mask_add_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
1625}
1626
1627/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
1628/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1629/// writemask k (the element is copied from src when mask bit 0 is not set).
1630/// Rounding is done according to the rounding parameter, which can be one of:
1631///
1632/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1633/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1634/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1635/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1636/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1637///
1638/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_round_sh)
1639#[inline]
1640#[target_feature(enable = "avx512fp16")]
1641#[cfg_attr(test, assert_instr(vaddsh, ROUNDING = 8))]
1642#[rustc_legacy_const_generics(4)]
1643#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1644pub fn _mm_mask_add_round_sh<const ROUNDING: i32>(
1645 src: __m128h,
1646 k: __mmask8,
1647 a: __m128h,
1648 b: __m128h,
1649) -> __m128h {
1650 unsafe {
1651 static_assert_rounding!(ROUNDING);
1652 vaddsh(a, b, src, k, ROUNDING)
1653 }
1654}
1655
1656/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
1657/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1658/// zeromask k (the element is zeroed out when mask bit 0 is not set).
1659/// Rounding is done according to the rounding parameter, which can be one of:
1660///
1661/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1662/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1663/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1664/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1665/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1666///
1667/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_round_sh)
1668#[inline]
1669#[target_feature(enable = "avx512fp16")]
1670#[cfg_attr(test, assert_instr(vaddsh, ROUNDING = 8))]
1671#[rustc_legacy_const_generics(3)]
1672#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1673pub fn _mm_maskz_add_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1674 static_assert_rounding!(ROUNDING);
1675 _mm_mask_add_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
1676}
1677
1678/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
1679/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
1680///
1681/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_sh)
1682#[inline]
1683#[target_feature(enable = "avx512fp16")]
1684#[cfg_attr(test, assert_instr(vaddsh))]
1685#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1686#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1687pub const fn _mm_add_sh(a: __m128h, b: __m128h) -> __m128h {
1688 unsafe { simd_insert!(a, 0, _mm_cvtsh_h(a) + _mm_cvtsh_h(b)) }
1689}
1690
1691/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
1692/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1693/// writemask k (the element is copied from src when mask bit 0 is not set).
1694///
1695/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_sh)
1696#[inline]
1697#[target_feature(enable = "avx512fp16")]
1698#[cfg_attr(test, assert_instr(vaddsh))]
1699#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1700#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1701pub const fn _mm_mask_add_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1702 unsafe {
1703 let extractsrc: f16 = simd_extract!(src, 0);
1704 let mut add: f16 = extractsrc;
1705 if (k & 0b00000001) != 0 {
1706 let extracta: f16 = simd_extract!(a, 0);
1707 let extractb: f16 = simd_extract!(b, 0);
1708 add = extracta + extractb;
1709 }
1710 simd_insert!(a, 0, add)
1711 }
1712}
1713
1714/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
1715/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1716/// zeromask k (the element is zeroed out when mask bit 0 is not set).
1717///
1718/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_sh)
1719#[inline]
1720#[target_feature(enable = "avx512fp16")]
1721#[cfg_attr(test, assert_instr(vaddsh))]
1722#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1723#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1724pub const fn _mm_maskz_add_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1725 unsafe {
1726 let mut add: f16 = 0.;
1727 if (k & 0b00000001) != 0 {
1728 let extracta: f16 = simd_extract!(a, 0);
1729 let extractb: f16 = simd_extract!(b, 0);
1730 add = extracta + extractb;
1731 }
1732 simd_insert!(a, 0, add)
1733 }
1734}
1735
1736/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst.
1737///
1738/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_ph)
1739#[inline]
1740#[target_feature(enable = "avx512fp16,avx512vl")]
1741#[cfg_attr(test, assert_instr(vsubph))]
1742#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1743#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1744pub const fn _mm_sub_ph(a: __m128h, b: __m128h) -> __m128h {
1745 unsafe { simd_sub(lhs:a, rhs:b) }
1746}
1747
1748/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1749/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1750///
1751/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_ph)
1752#[inline]
1753#[target_feature(enable = "avx512fp16,avx512vl")]
1754#[cfg_attr(test, assert_instr(vsubph))]
1755#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1756#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1757pub const fn _mm_mask_sub_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1758 unsafe {
1759 let r: __m128h = _mm_sub_ph(a, b);
1760 simd_select_bitmask(m:k, yes:r, no:src)
1761 }
1762}
1763
1764/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1765/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1766///
1767/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_ph)
1768#[inline]
1769#[target_feature(enable = "avx512fp16,avx512vl")]
1770#[cfg_attr(test, assert_instr(vsubph))]
1771#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1772#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1773pub const fn _mm_maskz_sub_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1774 unsafe {
1775 let r: __m128h = _mm_sub_ph(a, b);
1776 simd_select_bitmask(m:k, yes:r, no:_mm_setzero_ph())
1777 }
1778}
1779
1780/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst.
1781///
1782/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_ph)
1783#[inline]
1784#[target_feature(enable = "avx512fp16,avx512vl")]
1785#[cfg_attr(test, assert_instr(vsubph))]
1786#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1787#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1788pub const fn _mm256_sub_ph(a: __m256h, b: __m256h) -> __m256h {
1789 unsafe { simd_sub(lhs:a, rhs:b) }
1790}
1791
1792/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1793/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1794///
1795/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sub_ph)
1796#[inline]
1797#[target_feature(enable = "avx512fp16,avx512vl")]
1798#[cfg_attr(test, assert_instr(vsubph))]
1799#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1800#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1801pub const fn _mm256_mask_sub_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
1802 unsafe {
1803 let r: __m256h = _mm256_sub_ph(a, b);
1804 simd_select_bitmask(m:k, yes:r, no:src)
1805 }
1806}
1807
1808/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1809/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1810///
1811/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sub_ph)
1812#[inline]
1813#[target_feature(enable = "avx512fp16,avx512vl")]
1814#[cfg_attr(test, assert_instr(vsubph))]
1815#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1816#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1817pub const fn _mm256_maskz_sub_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
1818 unsafe {
1819 let r: __m256h = _mm256_sub_ph(a, b);
1820 simd_select_bitmask(m:k, yes:r, no:_mm256_setzero_ph())
1821 }
1822}
1823
1824/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst.
1825///
1826/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_ph)
1827#[inline]
1828#[target_feature(enable = "avx512fp16")]
1829#[cfg_attr(test, assert_instr(vsubph))]
1830#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1831#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1832pub const fn _mm512_sub_ph(a: __m512h, b: __m512h) -> __m512h {
1833 unsafe { simd_sub(lhs:a, rhs:b) }
1834}
1835
1836/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1837/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1838///
1839/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_ph)
1840#[inline]
1841#[target_feature(enable = "avx512fp16")]
1842#[cfg_attr(test, assert_instr(vsubph))]
1843#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1844#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1845pub const fn _mm512_mask_sub_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
1846 unsafe {
1847 let r: __m512h = _mm512_sub_ph(a, b);
1848 simd_select_bitmask(m:k, yes:r, no:src)
1849 }
1850}
1851
1852/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1853/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1854///
1855/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_ph)
1856#[inline]
1857#[target_feature(enable = "avx512fp16")]
1858#[cfg_attr(test, assert_instr(vsubph))]
1859#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1860#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1861pub const fn _mm512_maskz_sub_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
1862 unsafe {
1863 let r: __m512h = _mm512_sub_ph(a, b);
1864 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
1865 }
1866}
1867
1868/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst.
1869/// Rounding is done according to the rounding parameter, which can be one of:
1870///
1871/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1872/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1873/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1874/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1875/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1876///
1877/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_round_ph)
1878#[inline]
1879#[target_feature(enable = "avx512fp16")]
1880#[cfg_attr(test, assert_instr(vsubph, ROUNDING = 8))]
1881#[rustc_legacy_const_generics(2)]
1882#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1883pub fn _mm512_sub_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
1884 unsafe {
1885 static_assert_rounding!(ROUNDING);
1886 vsubph(a, b, ROUNDING)
1887 }
1888}
1889
1890/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1891/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1892/// Rounding is done according to the rounding parameter, which can be one of:
1893///
1894/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1895/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1896/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1897/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1898/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1899///
1900/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_round_ph)
1901#[inline]
1902#[target_feature(enable = "avx512fp16")]
1903#[cfg_attr(test, assert_instr(vsubph, ROUNDING = 8))]
1904#[rustc_legacy_const_generics(4)]
1905#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1906pub fn _mm512_mask_sub_round_ph<const ROUNDING: i32>(
1907 src: __m512h,
1908 k: __mmask32,
1909 a: __m512h,
1910 b: __m512h,
1911) -> __m512h {
1912 unsafe {
1913 static_assert_rounding!(ROUNDING);
1914 let r: __m512h = _mm512_sub_round_ph::<ROUNDING>(a, b);
1915 simd_select_bitmask(m:k, yes:r, no:src)
1916 }
1917}
1918
1919/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1920/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1921/// Rounding is done according to the rounding parameter, which can be one of:
1922///
1923/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1924/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1925/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1926/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1927/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1928///
1929/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_round_ph)
1930#[inline]
1931#[target_feature(enable = "avx512fp16")]
1932#[cfg_attr(test, assert_instr(vsubph, ROUNDING = 8))]
1933#[rustc_legacy_const_generics(3)]
1934#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1935pub fn _mm512_maskz_sub_round_ph<const ROUNDING: i32>(
1936 k: __mmask32,
1937 a: __m512h,
1938 b: __m512h,
1939) -> __m512h {
1940 unsafe {
1941 static_assert_rounding!(ROUNDING);
1942 let r: __m512h = _mm512_sub_round_ph::<ROUNDING>(a, b);
1943 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
1944 }
1945}
1946
1947/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
1948/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
1949/// Rounding is done according to the rounding parameter, which can be one of:
1950///
1951/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1952/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1953/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1954/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1955/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1956///
1957/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_round_sh)
1958#[inline]
1959#[target_feature(enable = "avx512fp16")]
1960#[cfg_attr(test, assert_instr(vsubsh, ROUNDING = 8))]
1961#[rustc_legacy_const_generics(2)]
1962#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1963pub fn _mm_sub_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
1964 static_assert_rounding!(ROUNDING);
1965 _mm_mask_sub_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
1966}
1967
1968/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
1969/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1970/// writemask k (the element is copied from src when mask bit 0 is not set).
1971/// Rounding is done according to the rounding parameter, which can be one of:
1972///
1973/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1974/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1975/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1976/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1977/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1978///
1979/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_round_sh)
1980#[inline]
1981#[target_feature(enable = "avx512fp16")]
1982#[cfg_attr(test, assert_instr(vsubsh, ROUNDING = 8))]
1983#[rustc_legacy_const_generics(4)]
1984#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
1985pub fn _mm_mask_sub_round_sh<const ROUNDING: i32>(
1986 src: __m128h,
1987 k: __mmask8,
1988 a: __m128h,
1989 b: __m128h,
1990) -> __m128h {
1991 unsafe {
1992 static_assert_rounding!(ROUNDING);
1993 vsubsh(a, b, src, k, ROUNDING)
1994 }
1995}
1996
1997/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
1998/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1999/// zeromask k (the element is zeroed out when mask bit 0 is not set).
2000/// Rounding is done according to the rounding parameter, which can be one of:
2001///
2002/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2003/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2004/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2005/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2006/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2007///
2008/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_round_sh)
2009#[inline]
2010#[target_feature(enable = "avx512fp16")]
2011#[cfg_attr(test, assert_instr(vsubsh, ROUNDING = 8))]
2012#[rustc_legacy_const_generics(3)]
2013#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2014pub fn _mm_maskz_sub_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2015 static_assert_rounding!(ROUNDING);
2016 _mm_mask_sub_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
2017}
2018
2019/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
2020/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
2021///
2022/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_sh)
2023#[inline]
2024#[target_feature(enable = "avx512fp16")]
2025#[cfg_attr(test, assert_instr(vsubsh))]
2026#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2027#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2028pub const fn _mm_sub_sh(a: __m128h, b: __m128h) -> __m128h {
2029 unsafe { simd_insert!(a, 0, _mm_cvtsh_h(a) - _mm_cvtsh_h(b)) }
2030}
2031
2032/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
2033/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2034/// writemask k (the element is copied from src when mask bit 0 is not set).
2035///
2036/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_sh)
2037#[inline]
2038#[target_feature(enable = "avx512fp16")]
2039#[cfg_attr(test, assert_instr(vsubsh))]
2040#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2041#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2042pub const fn _mm_mask_sub_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2043 unsafe {
2044 let extractsrc: f16 = simd_extract!(src, 0);
2045 let mut add: f16 = extractsrc;
2046 if (k & 0b00000001) != 0 {
2047 let extracta: f16 = simd_extract!(a, 0);
2048 let extractb: f16 = simd_extract!(b, 0);
2049 add = extracta - extractb;
2050 }
2051 simd_insert!(a, 0, add)
2052 }
2053}
2054
2055/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
2056/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2057/// zeromask k (the element is zeroed out when mask bit 0 is not set).
2058///
2059/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_sh)
2060#[inline]
2061#[target_feature(enable = "avx512fp16")]
2062#[cfg_attr(test, assert_instr(vsubsh))]
2063#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2064#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2065pub const fn _mm_maskz_sub_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2066 unsafe {
2067 let mut add: f16 = 0.;
2068 if (k & 0b00000001) != 0 {
2069 let extracta: f16 = simd_extract!(a, 0);
2070 let extractb: f16 = simd_extract!(b, 0);
2071 add = extracta - extractb;
2072 }
2073 simd_insert!(a, 0, add)
2074 }
2075}
2076
2077/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
2078///
2079/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_ph)
2080#[inline]
2081#[target_feature(enable = "avx512fp16,avx512vl")]
2082#[cfg_attr(test, assert_instr(vmulph))]
2083#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2084#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2085pub const fn _mm_mul_ph(a: __m128h, b: __m128h) -> __m128h {
2086 unsafe { simd_mul(x:a, y:b) }
2087}
2088
2089/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2090/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2091///
2092/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_ph)
2093#[inline]
2094#[target_feature(enable = "avx512fp16,avx512vl")]
2095#[cfg_attr(test, assert_instr(vmulph))]
2096#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2097#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2098pub const fn _mm_mask_mul_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2099 unsafe {
2100 let r: __m128h = _mm_mul_ph(a, b);
2101 simd_select_bitmask(m:k, yes:r, no:src)
2102 }
2103}
2104
2105/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2106/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2107///
2108/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_ph)
2109#[inline]
2110#[target_feature(enable = "avx512fp16,avx512vl")]
2111#[cfg_attr(test, assert_instr(vmulph))]
2112#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2113#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2114pub const fn _mm_maskz_mul_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2115 unsafe {
2116 let r: __m128h = _mm_mul_ph(a, b);
2117 simd_select_bitmask(m:k, yes:r, no:_mm_setzero_ph())
2118 }
2119}
2120
2121/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
2122///
2123/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_ph)
2124#[inline]
2125#[target_feature(enable = "avx512fp16,avx512vl")]
2126#[cfg_attr(test, assert_instr(vmulph))]
2127#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2128#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2129pub const fn _mm256_mul_ph(a: __m256h, b: __m256h) -> __m256h {
2130 unsafe { simd_mul(x:a, y:b) }
2131}
2132
2133/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2134/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2135///
2136/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mul_ph)
2137#[inline]
2138#[target_feature(enable = "avx512fp16,avx512vl")]
2139#[cfg_attr(test, assert_instr(vmulph))]
2140#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2141#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2142pub const fn _mm256_mask_mul_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
2143 unsafe {
2144 let r: __m256h = _mm256_mul_ph(a, b);
2145 simd_select_bitmask(m:k, yes:r, no:src)
2146 }
2147}
2148
2149/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2150/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2151///
2152/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mul_ph)
2153#[inline]
2154#[target_feature(enable = "avx512fp16,avx512vl")]
2155#[cfg_attr(test, assert_instr(vmulph))]
2156#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2157#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2158pub const fn _mm256_maskz_mul_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
2159 unsafe {
2160 let r: __m256h = _mm256_mul_ph(a, b);
2161 simd_select_bitmask(m:k, yes:r, no:_mm256_setzero_ph())
2162 }
2163}
2164
2165/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
2166///
2167/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_ph)
2168#[inline]
2169#[target_feature(enable = "avx512fp16")]
2170#[cfg_attr(test, assert_instr(vmulph))]
2171#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2172#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2173pub const fn _mm512_mul_ph(a: __m512h, b: __m512h) -> __m512h {
2174 unsafe { simd_mul(x:a, y:b) }
2175}
2176
2177/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2178/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2179///
2180/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_ph)
2181#[inline]
2182#[target_feature(enable = "avx512fp16")]
2183#[cfg_attr(test, assert_instr(vmulph))]
2184#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2185#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2186pub const fn _mm512_mask_mul_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
2187 unsafe {
2188 let r: __m512h = _mm512_mul_ph(a, b);
2189 simd_select_bitmask(m:k, yes:r, no:src)
2190 }
2191}
2192
2193/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2194/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2195///
2196/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_ph)
2197#[inline]
2198#[target_feature(enable = "avx512fp16")]
2199#[cfg_attr(test, assert_instr(vmulph))]
2200#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2201#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2202pub const fn _mm512_maskz_mul_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
2203 unsafe {
2204 let r: __m512h = _mm512_mul_ph(a, b);
2205 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
2206 }
2207}
2208
2209/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
2210/// Rounding is done according to the rounding parameter, which can be one of:
2211///
2212/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2213/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2214/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2215/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2216/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2217///
2218/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_round_ph)
2219#[inline]
2220#[target_feature(enable = "avx512fp16")]
2221#[cfg_attr(test, assert_instr(vmulph, ROUNDING = 8))]
2222#[rustc_legacy_const_generics(2)]
2223#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2224pub fn _mm512_mul_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
2225 unsafe {
2226 static_assert_rounding!(ROUNDING);
2227 vmulph(a, b, ROUNDING)
2228 }
2229}
2230
2231/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2232/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2233/// Rounding is done according to the rounding parameter, which can be one of:
2234///
2235/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2236/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2237/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2238/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2239/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2240///
2241/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_round_ph)
2242#[inline]
2243#[target_feature(enable = "avx512fp16")]
2244#[cfg_attr(test, assert_instr(vmulph, ROUNDING = 8))]
2245#[rustc_legacy_const_generics(4)]
2246#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2247pub fn _mm512_mask_mul_round_ph<const ROUNDING: i32>(
2248 src: __m512h,
2249 k: __mmask32,
2250 a: __m512h,
2251 b: __m512h,
2252) -> __m512h {
2253 unsafe {
2254 static_assert_rounding!(ROUNDING);
2255 let r: __m512h = _mm512_mul_round_ph::<ROUNDING>(a, b);
2256 simd_select_bitmask(m:k, yes:r, no:src)
2257 }
2258}
2259
2260/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2261/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2262/// Rounding is done according to the rounding parameter, which can be one of:
2263///
2264/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2265/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2266/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2267/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2268/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2269///
2270/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_round_ph)
2271#[inline]
2272#[target_feature(enable = "avx512fp16")]
2273#[cfg_attr(test, assert_instr(vmulph, ROUNDING = 8))]
2274#[rustc_legacy_const_generics(3)]
2275#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2276pub fn _mm512_maskz_mul_round_ph<const ROUNDING: i32>(
2277 k: __mmask32,
2278 a: __m512h,
2279 b: __m512h,
2280) -> __m512h {
2281 unsafe {
2282 static_assert_rounding!(ROUNDING);
2283 let r: __m512h = _mm512_mul_round_ph::<ROUNDING>(a, b);
2284 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
2285 }
2286}
2287
2288/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
2289/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
2290/// Rounding is done according to the rounding parameter, which can be one of:
2291///
2292/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2293/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2294/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2295/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2296/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2297///
2298/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_round_sh)
2299#[inline]
2300#[target_feature(enable = "avx512fp16")]
2301#[cfg_attr(test, assert_instr(vmulsh, ROUNDING = 8))]
2302#[rustc_legacy_const_generics(2)]
2303#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2304pub fn _mm_mul_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
2305 static_assert_rounding!(ROUNDING);
2306 _mm_mask_mul_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
2307}
2308
2309/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
2310/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2311/// writemask k (the element is copied from src when mask bit 0 is not set).
2312/// Rounding is done according to the rounding parameter, which can be one of:
2313///
2314/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2315/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2316/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2317/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2318/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2319///
2320/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_round_sh)
2321#[inline]
2322#[target_feature(enable = "avx512fp16")]
2323#[cfg_attr(test, assert_instr(vmulsh, ROUNDING = 8))]
2324#[rustc_legacy_const_generics(4)]
2325#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2326pub fn _mm_mask_mul_round_sh<const ROUNDING: i32>(
2327 src: __m128h,
2328 k: __mmask8,
2329 a: __m128h,
2330 b: __m128h,
2331) -> __m128h {
2332 unsafe {
2333 static_assert_rounding!(ROUNDING);
2334 vmulsh(a, b, src, k, ROUNDING)
2335 }
2336}
2337
2338/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
2339/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2340/// zeromask k (the element is zeroed out when mask bit 0 is not set).
2341/// Rounding is done according to the rounding parameter, which can be one of:
2342///
2343/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2344/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2345/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2346/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2347/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2348///
2349/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_round_sh)
2350#[inline]
2351#[target_feature(enable = "avx512fp16")]
2352#[cfg_attr(test, assert_instr(vmulsh, ROUNDING = 8))]
2353#[rustc_legacy_const_generics(3)]
2354#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2355pub fn _mm_maskz_mul_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2356 static_assert_rounding!(ROUNDING);
2357 _mm_mask_mul_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
2358}
2359
2360/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
2361/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
2362///
2363/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_sh)
2364#[inline]
2365#[target_feature(enable = "avx512fp16")]
2366#[cfg_attr(test, assert_instr(vmulsh))]
2367#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2368#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2369pub const fn _mm_mul_sh(a: __m128h, b: __m128h) -> __m128h {
2370 unsafe { simd_insert!(a, 0, _mm_cvtsh_h(a) * _mm_cvtsh_h(b)) }
2371}
2372
2373/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
2374/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2375/// writemask k (the element is copied from src when mask bit 0 is not set).
2376///
2377/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_sh)
2378#[inline]
2379#[target_feature(enable = "avx512fp16")]
2380#[cfg_attr(test, assert_instr(vmulsh))]
2381#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2382#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2383pub const fn _mm_mask_mul_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2384 unsafe {
2385 let extractsrc: f16 = simd_extract!(src, 0);
2386 let mut add: f16 = extractsrc;
2387 if (k & 0b00000001) != 0 {
2388 let extracta: f16 = simd_extract!(a, 0);
2389 let extractb: f16 = simd_extract!(b, 0);
2390 add = extracta * extractb;
2391 }
2392 simd_insert!(a, 0, add)
2393 }
2394}
2395
2396/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
2397/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2398/// zeromask k (the element is zeroed out when mask bit 0 is not set).
2399///
2400/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_sh)
2401#[inline]
2402#[target_feature(enable = "avx512fp16")]
2403#[cfg_attr(test, assert_instr(vmulsh))]
2404#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2405#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2406pub const fn _mm_maskz_mul_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2407 unsafe {
2408 let mut add: f16 = 0.;
2409 if (k & 0b00000001) != 0 {
2410 let extracta: f16 = simd_extract!(a, 0);
2411 let extractb: f16 = simd_extract!(b, 0);
2412 add = extracta * extractb;
2413 }
2414 simd_insert!(a, 0, add)
2415 }
2416}
2417
2418/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst.
2419///
2420/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_ph)
2421#[inline]
2422#[target_feature(enable = "avx512fp16,avx512vl")]
2423#[cfg_attr(test, assert_instr(vdivph))]
2424#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2425#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2426pub const fn _mm_div_ph(a: __m128h, b: __m128h) -> __m128h {
2427 unsafe { simd_div(lhs:a, rhs:b) }
2428}
2429
2430/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2431/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2432///
2433/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_ph)
2434#[inline]
2435#[target_feature(enable = "avx512fp16,avx512vl")]
2436#[cfg_attr(test, assert_instr(vdivph))]
2437#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2438#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2439pub const fn _mm_mask_div_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2440 unsafe {
2441 let r: __m128h = _mm_div_ph(a, b);
2442 simd_select_bitmask(m:k, yes:r, no:src)
2443 }
2444}
2445
2446/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2447/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2448///
2449/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_ph)
2450#[inline]
2451#[target_feature(enable = "avx512fp16,avx512vl")]
2452#[cfg_attr(test, assert_instr(vdivph))]
2453#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2454#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2455pub const fn _mm_maskz_div_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2456 unsafe {
2457 let r: __m128h = _mm_div_ph(a, b);
2458 simd_select_bitmask(m:k, yes:r, no:_mm_setzero_ph())
2459 }
2460}
2461
2462/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst.
2463///
2464/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_div_ph)
2465#[inline]
2466#[target_feature(enable = "avx512fp16,avx512vl")]
2467#[cfg_attr(test, assert_instr(vdivph))]
2468#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2469#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2470pub const fn _mm256_div_ph(a: __m256h, b: __m256h) -> __m256h {
2471 unsafe { simd_div(lhs:a, rhs:b) }
2472}
2473
2474/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2475/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2476///
2477/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_div_ph)
2478#[inline]
2479#[target_feature(enable = "avx512fp16,avx512vl")]
2480#[cfg_attr(test, assert_instr(vdivph))]
2481#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2482#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2483pub const fn _mm256_mask_div_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
2484 unsafe {
2485 let r: __m256h = _mm256_div_ph(a, b);
2486 simd_select_bitmask(m:k, yes:r, no:src)
2487 }
2488}
2489
2490/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2491/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2492///
2493/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_div_ph)
2494#[inline]
2495#[target_feature(enable = "avx512fp16,avx512vl")]
2496#[cfg_attr(test, assert_instr(vdivph))]
2497#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2498#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2499pub const fn _mm256_maskz_div_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
2500 unsafe {
2501 let r: __m256h = _mm256_div_ph(a, b);
2502 simd_select_bitmask(m:k, yes:r, no:_mm256_setzero_ph())
2503 }
2504}
2505
2506/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst.
2507///
2508/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_div_ph)
2509#[inline]
2510#[target_feature(enable = "avx512fp16")]
2511#[cfg_attr(test, assert_instr(vdivph))]
2512#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2513#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2514pub const fn _mm512_div_ph(a: __m512h, b: __m512h) -> __m512h {
2515 unsafe { simd_div(lhs:a, rhs:b) }
2516}
2517
2518/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2519/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2520///
2521/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_div_ph)
2522#[inline]
2523#[target_feature(enable = "avx512fp16")]
2524#[cfg_attr(test, assert_instr(vdivph))]
2525#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2526#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2527pub const fn _mm512_mask_div_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
2528 unsafe {
2529 let r: __m512h = _mm512_div_ph(a, b);
2530 simd_select_bitmask(m:k, yes:r, no:src)
2531 }
2532}
2533
2534/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2535/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2536///
2537/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_div_ph)
2538#[inline]
2539#[target_feature(enable = "avx512fp16")]
2540#[cfg_attr(test, assert_instr(vdivph))]
2541#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2542#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2543pub const fn _mm512_maskz_div_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
2544 unsafe {
2545 let r: __m512h = _mm512_div_ph(a, b);
2546 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
2547 }
2548}
2549
2550/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst.
2551/// Rounding is done according to the rounding parameter, which can be one of:
2552///
2553/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2554/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2555/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2556/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2557/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2558///
2559/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_div_round_ph)
2560#[inline]
2561#[target_feature(enable = "avx512fp16")]
2562#[cfg_attr(test, assert_instr(vdivph, ROUNDING = 8))]
2563#[rustc_legacy_const_generics(2)]
2564#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2565pub fn _mm512_div_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
2566 unsafe {
2567 static_assert_rounding!(ROUNDING);
2568 vdivph(a, b, ROUNDING)
2569 }
2570}
2571
2572/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2573/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2574/// Rounding is done according to the rounding parameter, which can be one of:
2575///
2576/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2577/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2578/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2579/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2580/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2581///
2582/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_div_round_ph)
2583#[inline]
2584#[target_feature(enable = "avx512fp16")]
2585#[cfg_attr(test, assert_instr(vdivph, ROUNDING = 8))]
2586#[rustc_legacy_const_generics(4)]
2587#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2588pub fn _mm512_mask_div_round_ph<const ROUNDING: i32>(
2589 src: __m512h,
2590 k: __mmask32,
2591 a: __m512h,
2592 b: __m512h,
2593) -> __m512h {
2594 unsafe {
2595 static_assert_rounding!(ROUNDING);
2596 let r: __m512h = _mm512_div_round_ph::<ROUNDING>(a, b);
2597 simd_select_bitmask(m:k, yes:r, no:src)
2598 }
2599}
2600
2601/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2602/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2603/// Rounding is done according to the rounding parameter, which can be one of:
2604///
2605/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2606/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2607/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2608/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2609/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2610///
2611/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_div_round_ph)
2612#[inline]
2613#[target_feature(enable = "avx512fp16")]
2614#[cfg_attr(test, assert_instr(vdivph, ROUNDING = 8))]
2615#[rustc_legacy_const_generics(3)]
2616#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2617pub fn _mm512_maskz_div_round_ph<const ROUNDING: i32>(
2618 k: __mmask32,
2619 a: __m512h,
2620 b: __m512h,
2621) -> __m512h {
2622 unsafe {
2623 static_assert_rounding!(ROUNDING);
2624 let r: __m512h = _mm512_div_round_ph::<ROUNDING>(a, b);
2625 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
2626 }
2627}
2628
2629/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
2630/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
2631/// Rounding is done according to the rounding parameter, which can be one of:
2632///
2633/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2634/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2635/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2636/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2637/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2638///
2639/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_round_sh)
2640#[inline]
2641#[target_feature(enable = "avx512fp16")]
2642#[cfg_attr(test, assert_instr(vdivsh, ROUNDING = 8))]
2643#[rustc_legacy_const_generics(2)]
2644#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2645pub fn _mm_div_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
2646 static_assert_rounding!(ROUNDING);
2647 _mm_mask_div_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
2648}
2649
2650/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
2651/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2652/// writemask k (the element is copied from src when mask bit 0 is not set).
2653/// Rounding is done according to the rounding parameter, which can be one of:
2654///
2655/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2656/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2657/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2658/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2659/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2660///
2661/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_round_sh)
2662#[inline]
2663#[target_feature(enable = "avx512fp16")]
2664#[cfg_attr(test, assert_instr(vdivsh, ROUNDING = 8))]
2665#[rustc_legacy_const_generics(4)]
2666#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2667pub fn _mm_mask_div_round_sh<const ROUNDING: i32>(
2668 src: __m128h,
2669 k: __mmask8,
2670 a: __m128h,
2671 b: __m128h,
2672) -> __m128h {
2673 unsafe {
2674 static_assert_rounding!(ROUNDING);
2675 vdivsh(a, b, src, k, ROUNDING)
2676 }
2677}
2678
2679/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
2680/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2681/// zeromask k (the element is zeroed out when mask bit 0 is not set).
2682/// Rounding is done according to the rounding parameter, which can be one of:
2683///
2684/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2685/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2686/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2687/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2688/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2689///
2690/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_round_sh)
2691#[inline]
2692#[target_feature(enable = "avx512fp16")]
2693#[cfg_attr(test, assert_instr(vdivsh, ROUNDING = 8))]
2694#[rustc_legacy_const_generics(3)]
2695#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2696pub fn _mm_maskz_div_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2697 static_assert_rounding!(ROUNDING);
2698 _mm_mask_div_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
2699}
2700
2701/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
2702/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
2703///
2704/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_sh)
2705#[inline]
2706#[target_feature(enable = "avx512fp16")]
2707#[cfg_attr(test, assert_instr(vdivsh))]
2708#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2709#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2710pub const fn _mm_div_sh(a: __m128h, b: __m128h) -> __m128h {
2711 unsafe { simd_insert!(a, 0, _mm_cvtsh_h(a) / _mm_cvtsh_h(b)) }
2712}
2713
2714/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
2715/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2716/// writemask k (the element is copied from src when mask bit 0 is not set).
2717///
2718/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_sh)
2719#[inline]
2720#[target_feature(enable = "avx512fp16")]
2721#[cfg_attr(test, assert_instr(vdivsh))]
2722#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2723#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2724pub const fn _mm_mask_div_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2725 unsafe {
2726 let extractsrc: f16 = simd_extract!(src, 0);
2727 let mut add: f16 = extractsrc;
2728 if (k & 0b00000001) != 0 {
2729 let extracta: f16 = simd_extract!(a, 0);
2730 let extractb: f16 = simd_extract!(b, 0);
2731 add = extracta / extractb;
2732 }
2733 simd_insert!(a, 0, add)
2734 }
2735}
2736
2737/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
2738/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2739/// zeromask k (the element is zeroed out when mask bit 0 is not set).
2740///
2741/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_sh)
2742#[inline]
2743#[target_feature(enable = "avx512fp16")]
2744#[cfg_attr(test, assert_instr(vdivsh))]
2745#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2746#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2747pub const fn _mm_maskz_div_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2748 unsafe {
2749 let mut add: f16 = 0.;
2750 if (k & 0b00000001) != 0 {
2751 let extracta: f16 = simd_extract!(a, 0);
2752 let extractb: f16 = simd_extract!(b, 0);
2753 add = extracta / extractb;
2754 }
2755 simd_insert!(a, 0, add)
2756 }
2757}
2758
2759/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is
2760/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2761/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2762///
2763/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_pch)
2764#[inline]
2765#[target_feature(enable = "avx512fp16,avx512vl")]
2766#[cfg_attr(test, assert_instr(vfmulcph))]
2767#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2768pub fn _mm_mul_pch(a: __m128h, b: __m128h) -> __m128h {
2769 _mm_mask_mul_pch(src:_mm_undefined_ph(), k:0xff, a, b)
2770}
2771
2772/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
2773/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent
2774/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2775///
2776/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_pch)
2777#[inline]
2778#[target_feature(enable = "avx512fp16,avx512vl")]
2779#[cfg_attr(test, assert_instr(vfmulcph))]
2780#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2781pub fn _mm_mask_mul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2782 unsafe { transmute(src:vfmulcph_128(a:transmute(a), b:transmute(b), src:transmute(src), k)) }
2783}
2784
2785/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
2786/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
2787/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2788///
2789/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_pch)
2790#[inline]
2791#[target_feature(enable = "avx512fp16,avx512vl")]
2792#[cfg_attr(test, assert_instr(vfmulcph))]
2793#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2794pub fn _mm_maskz_mul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2795 _mm_mask_mul_pch(src:_mm_setzero_ph(), k, a, b)
2796}
2797
2798/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is
2799/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2800/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2801///
2802/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_pch)
2803#[inline]
2804#[target_feature(enable = "avx512fp16,avx512vl")]
2805#[cfg_attr(test, assert_instr(vfmulcph))]
2806#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2807pub fn _mm256_mul_pch(a: __m256h, b: __m256h) -> __m256h {
2808 _mm256_mask_mul_pch(src:_mm256_undefined_ph(), k:0xff, a, b)
2809}
2810
2811/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
2812/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent
2813/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2814///
2815/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mul_pch)
2816#[inline]
2817#[target_feature(enable = "avx512fp16,avx512vl")]
2818#[cfg_attr(test, assert_instr(vfmulcph))]
2819#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2820pub fn _mm256_mask_mul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
2821 unsafe { transmute(src:vfmulcph_256(a:transmute(a), b:transmute(b), src:transmute(src), k)) }
2822}
2823
2824/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
2825/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
2826/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2827///
2828/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mul_pch)
2829#[inline]
2830#[target_feature(enable = "avx512fp16,avx512vl")]
2831#[cfg_attr(test, assert_instr(vfmulcph))]
2832#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2833pub fn _mm256_maskz_mul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
2834 _mm256_mask_mul_pch(src:_mm256_setzero_ph(), k, a, b)
2835}
2836
2837/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is
2838/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2839/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2840///
2841/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_pch)
2842#[inline]
2843#[target_feature(enable = "avx512fp16")]
2844#[cfg_attr(test, assert_instr(vfmulcph))]
2845#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2846pub fn _mm512_mul_pch(a: __m512h, b: __m512h) -> __m512h {
2847 _mm512_mask_mul_pch(src:_mm512_undefined_ph(), k:0xffff, a, b)
2848}
2849
2850/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
2851/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent
2852/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2853///
2854/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_pch)
2855#[inline]
2856#[target_feature(enable = "avx512fp16")]
2857#[cfg_attr(test, assert_instr(vfmulcph))]
2858#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2859pub fn _mm512_mask_mul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
2860 _mm512_mask_mul_round_pch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
2861}
2862
2863/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
2864/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
2865/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2866///
2867/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_pch)
2868#[inline]
2869#[target_feature(enable = "avx512fp16")]
2870#[cfg_attr(test, assert_instr(vfmulcph))]
2871#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2872pub fn _mm512_maskz_mul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
2873 _mm512_mask_mul_pch(src:_mm512_setzero_ph(), k, a, b)
2874}
2875
2876/// Multiply the packed complex numbers in a and b, and store the results in dst. Each complex number is
2877/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2878/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2879///
2880/// Rounding is done according to the rounding parameter, which can be one of:
2881///
2882/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2883/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2884/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2885/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2886/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2887///
2888/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_round_pch)
2889#[inline]
2890#[target_feature(enable = "avx512fp16")]
2891#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))]
2892#[rustc_legacy_const_generics(2)]
2893#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2894pub fn _mm512_mul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
2895 static_assert_rounding!(ROUNDING);
2896 _mm512_mask_mul_round_pch::<ROUNDING>(src:_mm512_undefined_ph(), k:0xffff, a, b)
2897}
2898
2899/// Multiply the packed complex numbers in a and b, and store the results in dst using writemask k (the element
2900/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent
2901/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2902///
2903/// Rounding is done according to the rounding parameter, which can be one of:
2904///
2905/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2906/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2907/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2908/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2909/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2910///
2911/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_round_pch)
2912#[inline]
2913#[target_feature(enable = "avx512fp16")]
2914#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))]
2915#[rustc_legacy_const_generics(4)]
2916#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2917pub fn _mm512_mask_mul_round_pch<const ROUNDING: i32>(
2918 src: __m512h,
2919 k: __mmask16,
2920 a: __m512h,
2921 b: __m512h,
2922) -> __m512h {
2923 unsafe {
2924 static_assert_rounding!(ROUNDING);
2925 transmute(src:vfmulcph_512(
2926 a:transmute(a),
2927 b:transmute(b),
2928 src:transmute(src),
2929 k,
2930 ROUNDING,
2931 ))
2932 }
2933}
2934
2935/// Multiply the packed complex numbers in a and b, and store the results in dst using zeromask k (the element
2936/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
2937/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2938///
2939/// Rounding is done according to the rounding parameter, which can be one of:
2940///
2941/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2942/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2943/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2944/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2945/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2946///
2947/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_round_pch)
2948#[inline]
2949#[target_feature(enable = "avx512fp16")]
2950#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))]
2951#[rustc_legacy_const_generics(3)]
2952#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2953pub fn _mm512_maskz_mul_round_pch<const ROUNDING: i32>(
2954 k: __mmask16,
2955 a: __m512h,
2956 b: __m512h,
2957) -> __m512h {
2958 static_assert_rounding!(ROUNDING);
2959 _mm512_mask_mul_round_pch::<ROUNDING>(src:_mm512_setzero_ph(), k, a, b)
2960}
2961
2962/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst,
2963/// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is
2964/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2965/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2966///
2967/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_sch)
2968#[inline]
2969#[target_feature(enable = "avx512fp16")]
2970#[cfg_attr(test, assert_instr(vfmulcsh))]
2971#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2972pub fn _mm_mul_sch(a: __m128h, b: __m128h) -> __m128h {
2973 _mm_mask_mul_sch(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
2974}
2975
2976/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using
2977/// writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 6 packed
2978/// elements from a to the upper elements of dst. Each complex number is composed of two adjacent
2979/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2980///
2981/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_sch)
2982#[inline]
2983#[target_feature(enable = "avx512fp16")]
2984#[cfg_attr(test, assert_instr(vfmulcsh))]
2985#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
2986pub fn _mm_mask_mul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2987 _mm_mask_mul_round_sch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
2988}
2989
2990/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using
2991/// zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements
2992/// from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision
2993/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2994///
2995/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_sch)
2996#[inline]
2997#[target_feature(enable = "avx512fp16")]
2998#[cfg_attr(test, assert_instr(vfmulcsh))]
2999#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3000pub fn _mm_maskz_mul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3001 _mm_mask_mul_sch(src:f16x8::ZERO.as_m128h(), k, a, b)
3002}
3003
3004/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst,
3005/// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is
3006/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
3007/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3008///
3009/// Rounding is done according to the rounding parameter, which can be one of:
3010///
3011/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3012/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3013/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3014/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3015/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3016///
3017/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_round_sch)
3018#[inline]
3019#[target_feature(enable = "avx512fp16")]
3020#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))]
3021#[rustc_legacy_const_generics(2)]
3022#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3023pub fn _mm_mul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
3024 static_assert_rounding!(ROUNDING);
3025 _mm_mask_mul_round_sch::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
3026}
3027
3028/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using
3029/// writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 6 packed
3030/// elements from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision
3031/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3032///
3033/// Rounding is done according to the rounding parameter, which can be one of:
3034///
3035/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3036/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3037/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3038/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3039/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3040///
3041/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_round_sch)
3042#[inline]
3043#[target_feature(enable = "avx512fp16")]
3044#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))]
3045#[rustc_legacy_const_generics(4)]
3046#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3047pub fn _mm_mask_mul_round_sch<const ROUNDING: i32>(
3048 src: __m128h,
3049 k: __mmask8,
3050 a: __m128h,
3051 b: __m128h,
3052) -> __m128h {
3053 unsafe {
3054 static_assert_rounding!(ROUNDING);
3055 transmute(src:vfmulcsh(
3056 a:transmute(a),
3057 b:transmute(b),
3058 src:transmute(src),
3059 k,
3060 ROUNDING,
3061 ))
3062 }
3063}
3064
3065/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using
3066/// zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements
3067/// from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision
3068/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3069///
3070/// Rounding is done according to the rounding parameter, which can be one of:
3071///
3072/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3073/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3074/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3075/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3076/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3077///
3078/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_round_sch)
3079#[inline]
3080#[target_feature(enable = "avx512fp16")]
3081#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))]
3082#[rustc_legacy_const_generics(3)]
3083#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3084pub fn _mm_maskz_mul_round_sch<const ROUNDING: i32>(
3085 k: __mmask8,
3086 a: __m128h,
3087 b: __m128h,
3088) -> __m128h {
3089 static_assert_rounding!(ROUNDING);
3090 _mm_mask_mul_round_sch::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
3091}
3092
3093/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is
3094/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
3095/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3096///
3097/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmul_pch)
3098#[inline]
3099#[target_feature(enable = "avx512fp16,avx512vl")]
3100#[cfg_attr(test, assert_instr(vfmulcph))]
3101#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3102pub fn _mm_fmul_pch(a: __m128h, b: __m128h) -> __m128h {
3103 _mm_mul_pch(a, b)
3104}
3105
3106/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
3107/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent
3108/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3109///
3110/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmul_pch)
3111#[inline]
3112#[target_feature(enable = "avx512fp16,avx512vl")]
3113#[cfg_attr(test, assert_instr(vfmulcph))]
3114#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3115pub fn _mm_mask_fmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3116 _mm_mask_mul_pch(src, k, a, b)
3117}
3118
3119/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
3120/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
3121/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3122///
3123/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmul_pch)
3124#[inline]
3125#[target_feature(enable = "avx512fp16,avx512vl")]
3126#[cfg_attr(test, assert_instr(vfmulcph))]
3127#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3128pub fn _mm_maskz_fmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3129 _mm_maskz_mul_pch(k, a, b)
3130}
3131
3132/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is
3133/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
3134/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3135///
3136/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmul_pch)
3137#[inline]
3138#[target_feature(enable = "avx512fp16,avx512vl")]
3139#[cfg_attr(test, assert_instr(vfmulcph))]
3140#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3141pub fn _mm256_fmul_pch(a: __m256h, b: __m256h) -> __m256h {
3142 _mm256_mul_pch(a, b)
3143}
3144
3145/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
3146/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
3147/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3148///
3149/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmul_pch)
3150#[inline]
3151#[target_feature(enable = "avx512fp16,avx512vl")]
3152#[cfg_attr(test, assert_instr(vfmulcph))]
3153#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3154pub fn _mm256_mask_fmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
3155 _mm256_mask_mul_pch(src, k, a, b)
3156}
3157
3158/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
3159/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
3160/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3161///
3162/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmul_pch)
3163#[inline]
3164#[target_feature(enable = "avx512fp16,avx512vl")]
3165#[cfg_attr(test, assert_instr(vfmulcph))]
3166#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3167pub fn _mm256_maskz_fmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
3168 _mm256_maskz_mul_pch(k, a, b)
3169}
3170
3171/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is composed
3172/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3173///
3174/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmul_pch)
3175#[inline]
3176#[target_feature(enable = "avx512fp16")]
3177#[cfg_attr(test, assert_instr(vfmulcph))]
3178#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3179pub fn _mm512_fmul_pch(a: __m512h, b: __m512h) -> __m512h {
3180 _mm512_mul_pch(a, b)
3181}
3182
3183/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
3184/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
3185/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3186///
3187/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmul_pch)
3188#[inline]
3189#[target_feature(enable = "avx512fp16")]
3190#[cfg_attr(test, assert_instr(vfmulcph))]
3191#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3192pub fn _mm512_mask_fmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
3193 _mm512_mask_mul_pch(src, k, a, b)
3194}
3195
3196/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
3197/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
3198/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3199///
3200/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmul_pch)
3201#[inline]
3202#[target_feature(enable = "avx512fp16")]
3203#[cfg_attr(test, assert_instr(vfmulcph))]
3204#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3205pub fn _mm512_maskz_fmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
3206 _mm512_maskz_mul_pch(k, a, b)
3207}
3208
3209/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is composed
3210/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3211/// Rounding is done according to the rounding parameter, which can be one of:
3212///
3213/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3214/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3215/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3216/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3217/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3218///
3219/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmul_round_pch)
3220#[inline]
3221#[target_feature(enable = "avx512fp16")]
3222#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))]
3223#[rustc_legacy_const_generics(2)]
3224#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3225pub fn _mm512_fmul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
3226 static_assert_rounding!(ROUNDING);
3227 _mm512_mul_round_pch::<ROUNDING>(a, b)
3228}
3229
3230/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
3231/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
3232/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3233/// Rounding is done according to the rounding parameter, which can be one of:
3234///
3235/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3236/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3237/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3238/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3239/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3240///
3241/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmul_round_pch)
3242#[inline]
3243#[target_feature(enable = "avx512fp16")]
3244#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))]
3245#[rustc_legacy_const_generics(4)]
3246#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3247pub fn _mm512_mask_fmul_round_pch<const ROUNDING: i32>(
3248 src: __m512h,
3249 k: __mmask16,
3250 a: __m512h,
3251 b: __m512h,
3252) -> __m512h {
3253 static_assert_rounding!(ROUNDING);
3254 _mm512_mask_mul_round_pch::<ROUNDING>(src, k, a, b)
3255}
3256
3257/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
3258/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
3259/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3260/// Rounding is done according to the rounding parameter, which can be one of:
3261///
3262/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3263/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3264/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3265/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3266/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3267///
3268/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmul_round_pch)
3269#[inline]
3270#[target_feature(enable = "avx512fp16")]
3271#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))]
3272#[rustc_legacy_const_generics(3)]
3273#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3274pub fn _mm512_maskz_fmul_round_pch<const ROUNDING: i32>(
3275 k: __mmask16,
3276 a: __m512h,
3277 b: __m512h,
3278) -> __m512h {
3279 static_assert_rounding!(ROUNDING);
3280 _mm512_maskz_mul_round_pch::<ROUNDING>(k, a, b)
3281}
3282
3283/// Multiply the lower complex numbers in a and b, and store the results in dst. Each complex number is
3284/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
3285/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3286///
3287/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmul_sch)
3288#[inline]
3289#[target_feature(enable = "avx512fp16")]
3290#[cfg_attr(test, assert_instr(vfmulcsh))]
3291#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3292pub fn _mm_fmul_sch(a: __m128h, b: __m128h) -> __m128h {
3293 _mm_mul_sch(a, b)
3294}
3295
3296/// Multiply the lower complex numbers in a and b, and store the results in dst using writemask k (the element
3297/// is copied from src when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
3298/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3299///
3300/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmul_sch)
3301#[inline]
3302#[target_feature(enable = "avx512fp16")]
3303#[cfg_attr(test, assert_instr(vfmulcsh))]
3304#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3305pub fn _mm_mask_fmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3306 _mm_mask_mul_sch(src, k, a, b)
3307}
3308
3309/// Multiply the lower complex numbers in a and b, and store the results in dst using zeromask k (the element
3310/// is zeroed out when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
3311/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3312///
3313/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmul_sch)
3314#[inline]
3315#[target_feature(enable = "avx512fp16")]
3316#[cfg_attr(test, assert_instr(vfmulcsh))]
3317#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3318pub fn _mm_maskz_fmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3319 _mm_maskz_mul_sch(k, a, b)
3320}
3321
3322/// Multiply the lower complex numbers in a and b, and store the results in dst. Each complex number is composed
3323/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3324///
3325/// Rounding is done according to the rounding parameter, which can be one of:
3326///
3327/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3328/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3329/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3330/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3331/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3332///
3333/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmul_round_sch)
3334#[inline]
3335#[target_feature(enable = "avx512fp16")]
3336#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))]
3337#[rustc_legacy_const_generics(2)]
3338#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3339pub fn _mm_fmul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
3340 static_assert_rounding!(ROUNDING);
3341 _mm_mul_round_sch::<ROUNDING>(a, b)
3342}
3343
3344/// Multiply the lower complex numbers in a and b, and store the results in dst using writemask k (the element
3345/// is copied from src when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
3346/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3347///
3348/// Rounding is done according to the rounding parameter, which can be one of:
3349///
3350/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3351/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3352/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3353/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3354/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3355///
3356/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmul_round_sch)
3357#[inline]
3358#[target_feature(enable = "avx512fp16")]
3359#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))]
3360#[rustc_legacy_const_generics(4)]
3361#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3362pub fn _mm_mask_fmul_round_sch<const ROUNDING: i32>(
3363 src: __m128h,
3364 k: __mmask8,
3365 a: __m128h,
3366 b: __m128h,
3367) -> __m128h {
3368 static_assert_rounding!(ROUNDING);
3369 _mm_mask_mul_round_sch::<ROUNDING>(src, k, a, b)
3370}
3371
3372/// Multiply the lower complex numbers in a and b, and store the results in dst using zeromask k (the element
3373/// is zeroed out when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
3374/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3375///
3376/// Rounding is done according to the rounding parameter, which can be one of:
3377///
3378/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3379/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3380/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3381/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3382/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3383///
3384/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmul_round_sch)
3385#[inline]
3386#[target_feature(enable = "avx512fp16")]
3387#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))]
3388#[rustc_legacy_const_generics(3)]
3389#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3390pub fn _mm_maskz_fmul_round_sch<const ROUNDING: i32>(
3391 k: __mmask8,
3392 a: __m128h,
3393 b: __m128h,
3394) -> __m128h {
3395 static_assert_rounding!(ROUNDING);
3396 _mm_maskz_mul_round_sch::<ROUNDING>(k, a, b)
3397}
3398
3399/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3400/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3401/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3402/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3403///
3404/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmul_pch)
3405#[inline]
3406#[target_feature(enable = "avx512fp16,avx512vl")]
3407#[cfg_attr(test, assert_instr(vfcmulcph))]
3408#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3409pub fn _mm_cmul_pch(a: __m128h, b: __m128h) -> __m128h {
3410 _mm_mask_cmul_pch(src:_mm_undefined_ph(), k:0xff, a, b)
3411}
3412
3413/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3414/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3415/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3416/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3417///
3418/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmul_pch)
3419#[inline]
3420#[target_feature(enable = "avx512fp16,avx512vl")]
3421#[cfg_attr(test, assert_instr(vfcmulcph))]
3422#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3423pub fn _mm_mask_cmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3424 unsafe { transmute(src:vfcmulcph_128(a:transmute(a), b:transmute(b), src:transmute(src), k)) }
3425}
3426
3427/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3428/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3429/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3430/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3431///
3432/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cmul_pch)
3433#[inline]
3434#[target_feature(enable = "avx512fp16,avx512vl")]
3435#[cfg_attr(test, assert_instr(vfcmulcph))]
3436#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3437pub fn _mm_maskz_cmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3438 _mm_mask_cmul_pch(src:_mm_setzero_ph(), k, a, b)
3439}
3440
3441/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3442/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3443/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3444/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3445///
3446/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmul_pch)
3447#[inline]
3448#[target_feature(enable = "avx512fp16,avx512vl")]
3449#[cfg_attr(test, assert_instr(vfcmulcph))]
3450#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3451pub fn _mm256_cmul_pch(a: __m256h, b: __m256h) -> __m256h {
3452 _mm256_mask_cmul_pch(src:_mm256_undefined_ph(), k:0xff, a, b)
3453}
3454
3455/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3456/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3457/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3458/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3459///
3460/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmul_pch)
3461#[inline]
3462#[target_feature(enable = "avx512fp16,avx512vl")]
3463#[cfg_attr(test, assert_instr(vfcmulcph))]
3464#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3465pub fn _mm256_mask_cmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
3466 unsafe { transmute(src:vfcmulcph_256(a:transmute(a), b:transmute(b), src:transmute(src), k)) }
3467}
3468
3469/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3470/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3471/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3472/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3473///
3474/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cmul_pch)
3475#[inline]
3476#[target_feature(enable = "avx512fp16,avx512vl")]
3477#[cfg_attr(test, assert_instr(vfcmulcph))]
3478#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3479pub fn _mm256_maskz_cmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
3480 _mm256_mask_cmul_pch(src:_mm256_setzero_ph(), k, a, b)
3481}
3482
3483/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3484/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3485/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3486/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3487///
3488/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmul_pch)
3489#[inline]
3490#[target_feature(enable = "avx512fp16")]
3491#[cfg_attr(test, assert_instr(vfcmulcph))]
3492#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3493pub fn _mm512_cmul_pch(a: __m512h, b: __m512h) -> __m512h {
3494 _mm512_mask_cmul_pch(src:_mm512_undefined_ph(), k:0xffff, a, b)
3495}
3496
3497/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3498/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3499/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3500/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3501///
3502/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmul_pch)
3503#[inline]
3504#[target_feature(enable = "avx512fp16")]
3505#[cfg_attr(test, assert_instr(vfcmulcph))]
3506#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3507pub fn _mm512_mask_cmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
3508 _mm512_mask_cmul_round_pch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
3509}
3510
3511/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3512/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3513/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3514/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3515///
3516/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cmul_pch)
3517#[inline]
3518#[target_feature(enable = "avx512fp16")]
3519#[cfg_attr(test, assert_instr(vfcmulcph))]
3520#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3521pub fn _mm512_maskz_cmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
3522 _mm512_mask_cmul_pch(src:_mm512_setzero_ph(), k, a, b)
3523}
3524
3525/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3526/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3527/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3528/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3529///
3530/// Rounding is done according to the rounding parameter, which can be one of:
3531///
3532/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3533/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3534/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3535/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3536/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3537///
3538/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmul_round_pch)
3539#[inline]
3540#[target_feature(enable = "avx512fp16")]
3541#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))]
3542#[rustc_legacy_const_generics(2)]
3543#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3544pub fn _mm512_cmul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
3545 static_assert_rounding!(ROUNDING);
3546 _mm512_mask_cmul_round_pch::<ROUNDING>(src:_mm512_undefined_ph(), k:0xffff, a, b)
3547}
3548
3549/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3550/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3551/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3552/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3553///
3554/// Rounding is done according to the rounding parameter, which can be one of:
3555///
3556/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3557/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3558/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3559/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3560/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3561///
3562/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmul_round_pch)
3563#[inline]
3564#[target_feature(enable = "avx512fp16")]
3565#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))]
3566#[rustc_legacy_const_generics(4)]
3567#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3568pub fn _mm512_mask_cmul_round_pch<const ROUNDING: i32>(
3569 src: __m512h,
3570 k: __mmask16,
3571 a: __m512h,
3572 b: __m512h,
3573) -> __m512h {
3574 unsafe {
3575 static_assert_rounding!(ROUNDING);
3576 transmute(src:vfcmulcph_512(
3577 a:transmute(a),
3578 b:transmute(b),
3579 src:transmute(src),
3580 k,
3581 ROUNDING,
3582 ))
3583 }
3584}
3585
3586/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3587/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3588/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3589/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3590///
3591/// Rounding is done according to the rounding parameter, which can be one of:
3592///
3593/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3594/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3595/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3596/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3597/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3598///
3599/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cmul_round_pch)
3600#[inline]
3601#[target_feature(enable = "avx512fp16")]
3602#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))]
3603#[rustc_legacy_const_generics(3)]
3604#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3605pub fn _mm512_maskz_cmul_round_pch<const ROUNDING: i32>(
3606 k: __mmask16,
3607 a: __m512h,
3608 b: __m512h,
3609) -> __m512h {
3610 static_assert_rounding!(ROUNDING);
3611 _mm512_mask_cmul_round_pch::<ROUNDING>(src:_mm512_setzero_ph(), k, a, b)
3612}
3613
3614/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3615/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3616/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3617///
3618/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmul_sch)
3619#[inline]
3620#[target_feature(enable = "avx512fp16")]
3621#[cfg_attr(test, assert_instr(vfcmulcsh))]
3622#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3623pub fn _mm_cmul_sch(a: __m128h, b: __m128h) -> __m128h {
3624 _mm_mask_cmul_sch(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
3625}
3626
3627/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3628/// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
3629/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3630/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3631///
3632/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmul_sch)
3633#[inline]
3634#[target_feature(enable = "avx512fp16")]
3635#[cfg_attr(test, assert_instr(vfcmulcsh))]
3636#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3637pub fn _mm_mask_cmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3638 _mm_mask_cmul_round_sch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
3639}
3640
3641/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3642/// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
3643/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3644/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3645///
3646/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cmul_sch)
3647#[inline]
3648#[target_feature(enable = "avx512fp16")]
3649#[cfg_attr(test, assert_instr(vfcmulcsh))]
3650#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3651pub fn _mm_maskz_cmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3652 _mm_mask_cmul_sch(src:f16x8::ZERO.as_m128h(), k, a, b)
3653}
3654
3655/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3656/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3657/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3658///
3659/// Rounding is done according to the rounding parameter, which can be one of:
3660///
3661/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3662/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3663/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3664/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3665/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3666///
3667/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmul_round_sch)
3668#[inline]
3669#[target_feature(enable = "avx512fp16")]
3670#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))]
3671#[rustc_legacy_const_generics(2)]
3672#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3673pub fn _mm_cmul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
3674 static_assert_rounding!(ROUNDING);
3675 _mm_mask_cmul_round_sch::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
3676}
3677
3678/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3679/// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
3680/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3681/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3682///
3683/// Rounding is done according to the rounding parameter, which can be one of:
3684///
3685/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3686/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3687/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3688/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3689/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3690///
3691/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmul_round_sch)
3692#[inline]
3693#[target_feature(enable = "avx512fp16")]
3694#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))]
3695#[rustc_legacy_const_generics(4)]
3696#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3697pub fn _mm_mask_cmul_round_sch<const ROUNDING: i32>(
3698 src: __m128h,
3699 k: __mmask8,
3700 a: __m128h,
3701 b: __m128h,
3702) -> __m128h {
3703 unsafe {
3704 static_assert_rounding!(ROUNDING);
3705 transmute(src:vfcmulcsh(
3706 a:transmute(a),
3707 b:transmute(b),
3708 src:transmute(src),
3709 k,
3710 ROUNDING,
3711 ))
3712 }
3713}
3714
3715/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3716/// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
3717/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3718/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3719///
3720/// Rounding is done according to the rounding parameter, which can be one of:
3721///
3722/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3723/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3724/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3725/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3726/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3727///
3728/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cmul_round_sch)
3729#[inline]
3730#[target_feature(enable = "avx512fp16")]
3731#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))]
3732#[rustc_legacy_const_generics(3)]
3733#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3734pub fn _mm_maskz_cmul_round_sch<const ROUNDING: i32>(
3735 k: __mmask8,
3736 a: __m128h,
3737 b: __m128h,
3738) -> __m128h {
3739 static_assert_rounding!(ROUNDING);
3740 _mm_mask_cmul_round_sch::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
3741}
3742
3743/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3744/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3745/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3746/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3747///
3748/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmul_pch)
3749#[inline]
3750#[target_feature(enable = "avx512fp16,avx512vl")]
3751#[cfg_attr(test, assert_instr(vfcmulcph))]
3752#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3753pub fn _mm_fcmul_pch(a: __m128h, b: __m128h) -> __m128h {
3754 _mm_cmul_pch(a, b)
3755}
3756
3757/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3758/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3759/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3760/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3761///
3762/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmul_pch)
3763#[inline]
3764#[target_feature(enable = "avx512fp16,avx512vl")]
3765#[cfg_attr(test, assert_instr(vfcmulcph))]
3766#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3767pub fn _mm_mask_fcmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3768 _mm_mask_cmul_pch(src, k, a, b)
3769}
3770
3771/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3772/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3773/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3774/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3775///
3776/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmul_pch)
3777#[inline]
3778#[target_feature(enable = "avx512fp16,avx512vl")]
3779#[cfg_attr(test, assert_instr(vfcmulcph))]
3780#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3781pub fn _mm_maskz_fcmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3782 _mm_maskz_cmul_pch(k, a, b)
3783}
3784
3785/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3786/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3787/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3788/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3789///
3790/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fcmul_pch)
3791#[inline]
3792#[target_feature(enable = "avx512fp16,avx512vl")]
3793#[cfg_attr(test, assert_instr(vfcmulcph))]
3794#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3795pub fn _mm256_fcmul_pch(a: __m256h, b: __m256h) -> __m256h {
3796 _mm256_cmul_pch(a, b)
3797}
3798
3799/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3800/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3801/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3802/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3803///
3804/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fcmul_pch)
3805#[inline]
3806#[target_feature(enable = "avx512fp16,avx512vl")]
3807#[cfg_attr(test, assert_instr(vfcmulcph))]
3808#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3809pub fn _mm256_mask_fcmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
3810 _mm256_mask_cmul_pch(src, k, a, b)
3811}
3812
3813/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3814/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3815/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3816/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3817///
3818/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fcmul_pch)
3819#[inline]
3820#[target_feature(enable = "avx512fp16,avx512vl")]
3821#[cfg_attr(test, assert_instr(vfcmulcph))]
3822#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3823pub fn _mm256_maskz_fcmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
3824 _mm256_maskz_cmul_pch(k, a, b)
3825}
3826
3827/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3828/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3829/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3830/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3831///
3832/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmul_pch)
3833#[inline]
3834#[target_feature(enable = "avx512fp16")]
3835#[cfg_attr(test, assert_instr(vfcmulcph))]
3836#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3837pub fn _mm512_fcmul_pch(a: __m512h, b: __m512h) -> __m512h {
3838 _mm512_cmul_pch(a, b)
3839}
3840
3841/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3842/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3843/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3844/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3845///
3846/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmul_pch)
3847#[inline]
3848#[target_feature(enable = "avx512fp16")]
3849#[cfg_attr(test, assert_instr(vfcmulcph))]
3850#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3851pub fn _mm512_mask_fcmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
3852 _mm512_mask_cmul_pch(src, k, a, b)
3853}
3854
3855/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3856/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3857/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3858/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3859///
3860/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmul_pch)
3861#[inline]
3862#[target_feature(enable = "avx512fp16")]
3863#[cfg_attr(test, assert_instr(vfcmulcph))]
3864#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3865pub fn _mm512_maskz_fcmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
3866 _mm512_maskz_cmul_pch(k, a, b)
3867}
3868
3869/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3870/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3871/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3872///
3873/// Rounding is done according to the rounding parameter, which can be one of:
3874///
3875/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3876/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3877/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3878/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3879/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3880///
3881/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmul_round_pch)
3882#[inline]
3883#[target_feature(enable = "avx512fp16")]
3884#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))]
3885#[rustc_legacy_const_generics(2)]
3886#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3887pub fn _mm512_fcmul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
3888 static_assert_rounding!(ROUNDING);
3889 _mm512_cmul_round_pch::<ROUNDING>(a, b)
3890}
3891
3892/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3893/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3894/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3895/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3896///
3897/// Rounding is done according to the rounding parameter, which can be one of:
3898///
3899/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3900/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3901/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3902/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3903/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3904///
3905/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmul_round_pch)
3906#[inline]
3907#[target_feature(enable = "avx512fp16")]
3908#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))]
3909#[rustc_legacy_const_generics(4)]
3910#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3911pub fn _mm512_mask_fcmul_round_pch<const ROUNDING: i32>(
3912 src: __m512h,
3913 k: __mmask16,
3914 a: __m512h,
3915 b: __m512h,
3916) -> __m512h {
3917 static_assert_rounding!(ROUNDING);
3918 _mm512_mask_cmul_round_pch::<ROUNDING>(src, k, a, b)
3919}
3920
3921/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3922/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3923/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3924/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3925///
3926/// Rounding is done according to the rounding parameter, which can be one of:
3927///
3928/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3929/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3930/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3931/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3932/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3933///
3934/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmul_round_pch)
3935#[inline]
3936#[target_feature(enable = "avx512fp16")]
3937#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))]
3938#[rustc_legacy_const_generics(3)]
3939#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3940pub fn _mm512_maskz_fcmul_round_pch<const ROUNDING: i32>(
3941 k: __mmask16,
3942 a: __m512h,
3943 b: __m512h,
3944) -> __m512h {
3945 static_assert_rounding!(ROUNDING);
3946 _mm512_maskz_cmul_round_pch::<ROUNDING>(k, a, b)
3947}
3948
3949/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3950/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3951/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3952/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3953///
3954/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmul_sch)
3955#[inline]
3956#[target_feature(enable = "avx512fp16")]
3957#[cfg_attr(test, assert_instr(vfcmulcsh))]
3958#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3959pub fn _mm_fcmul_sch(a: __m128h, b: __m128h) -> __m128h {
3960 _mm_cmul_sch(a, b)
3961}
3962
3963/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3964/// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
3965/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3966/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3967///
3968/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmul_sch)
3969#[inline]
3970#[target_feature(enable = "avx512fp16")]
3971#[cfg_attr(test, assert_instr(vfcmulcsh))]
3972#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3973pub fn _mm_mask_fcmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3974 _mm_mask_cmul_sch(src, k, a, b)
3975}
3976
3977/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3978/// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
3979/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3980/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3981///
3982/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmul_sch)
3983#[inline]
3984#[target_feature(enable = "avx512fp16")]
3985#[cfg_attr(test, assert_instr(vfcmulcsh))]
3986#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
3987pub fn _mm_maskz_fcmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3988 _mm_maskz_cmul_sch(k, a, b)
3989}
3990
3991/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3992/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3993/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3994///
3995/// Rounding is done according to the rounding parameter, which can be one of:
3996///
3997/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3998/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3999/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4000/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4001/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4002///
4003/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmul_round_sch)
4004#[inline]
4005#[target_feature(enable = "avx512fp16")]
4006#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))]
4007#[rustc_legacy_const_generics(2)]
4008#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4009pub fn _mm_fcmul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
4010 static_assert_rounding!(ROUNDING);
4011 _mm_cmul_round_sch::<ROUNDING>(a, b)
4012}
4013
4014/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
4015/// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
4016/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
4017/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4018///
4019/// Rounding is done according to the rounding parameter, which can be one of:
4020///
4021/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4022/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4023/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4024/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4025/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4026///
4027/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmul_round_sch)
4028#[inline]
4029#[target_feature(enable = "avx512fp16")]
4030#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))]
4031#[rustc_legacy_const_generics(4)]
4032#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4033pub fn _mm_mask_fcmul_round_sch<const ROUNDING: i32>(
4034 src: __m128h,
4035 k: __mmask8,
4036 a: __m128h,
4037 b: __m128h,
4038) -> __m128h {
4039 static_assert_rounding!(ROUNDING);
4040 _mm_mask_cmul_round_sch::<ROUNDING>(src, k, a, b)
4041}
4042
4043/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
4044/// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
4045/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
4046/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4047///
4048/// Rounding is done according to the rounding parameter, which can be one of:
4049///
4050/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4051/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4052/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4053/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4054/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4055///
4056/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmul_round_sch)
4057#[inline]
4058#[target_feature(enable = "avx512fp16")]
4059#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))]
4060#[rustc_legacy_const_generics(3)]
4061#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4062pub fn _mm_maskz_fcmul_round_sch<const ROUNDING: i32>(
4063 k: __mmask8,
4064 a: __m128h,
4065 b: __m128h,
4066) -> __m128h {
4067 static_assert_rounding!(ROUNDING);
4068 _mm_maskz_cmul_round_sch::<ROUNDING>(k, a, b)
4069}
4070
4071/// Finds the absolute value of each packed half-precision (16-bit) floating-point element in v2, storing
4072/// the results in dst.
4073///
4074/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_ph)
4075#[inline]
4076#[target_feature(enable = "avx512fp16,avx512vl")]
4077#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4078#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4079pub const fn _mm_abs_ph(v2: __m128h) -> __m128h {
4080 unsafe { transmute(src:_mm_and_si128(a:transmute(v2), b:_mm_set1_epi16(i16::MAX))) }
4081}
4082
4083/// Finds the absolute value of each packed half-precision (16-bit) floating-point element in v2, storing
4084/// the result in dst.
4085///
4086/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_abs_ph)
4087#[inline]
4088#[target_feature(enable = "avx512fp16,avx512vl")]
4089#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4090#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4091pub const fn _mm256_abs_ph(v2: __m256h) -> __m256h {
4092 unsafe { transmute(src:_mm256_and_si256(a:transmute(v2), b:_mm256_set1_epi16(i16::MAX))) }
4093}
4094
4095/// Finds the absolute value of each packed half-precision (16-bit) floating-point element in v2, storing
4096/// the result in dst.
4097///
4098/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_ph)
4099#[inline]
4100#[target_feature(enable = "avx512fp16")]
4101#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4102#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4103pub const fn _mm512_abs_ph(v2: __m512h) -> __m512h {
4104 unsafe { transmute(src:_mm512_and_si512(a:transmute(v2), b:_mm512_set1_epi16(i16::MAX))) }
4105}
4106
4107/// Compute the complex conjugates of complex numbers in a, and store the results in dst. Each complex
4108/// number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines
4109/// the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate
4110/// `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4111///
4112/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_conj_pch)
4113#[inline]
4114#[target_feature(enable = "avx512fp16,avx512vl")]
4115#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4116#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4117pub const fn _mm_conj_pch(a: __m128h) -> __m128h {
4118 unsafe { transmute(src:_mm_xor_si128(a:transmute(a), b:_mm_set1_epi32(i32::MIN))) }
4119}
4120
4121/// Compute the complex conjugates of complex numbers in a, and store the results in dst using writemask k
4122/// (the element is copied from src when corresponding mask bit is not set). Each complex number is composed of two
4123/// adjacent half-precision (16-bit) floating-point elements, which defines the complex number
4124/// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4125///
4126/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_conj_pch)
4127#[inline]
4128#[target_feature(enable = "avx512fp16,avx512vl")]
4129#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4130#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4131pub const fn _mm_mask_conj_pch(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
4132 unsafe {
4133 let r: __m128 = transmute(src:_mm_conj_pch(a));
4134 transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src)))
4135 }
4136}
4137
4138/// Compute the complex conjugates of complex numbers in a, and store the results in dst using zeromask k
4139/// (the element is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
4140/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4141/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4142///
4143/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_conj_pch)
4144#[inline]
4145#[target_feature(enable = "avx512fp16,avx512vl")]
4146#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4147#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4148pub const fn _mm_maskz_conj_pch(k: __mmask8, a: __m128h) -> __m128h {
4149 _mm_mask_conj_pch(src:_mm_setzero_ph(), k, a)
4150}
4151
4152/// Compute the complex conjugates of complex numbers in a, and store the results in dst. Each complex number
4153/// is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
4154/// number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4155///
4156/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_conj_pch)
4157#[inline]
4158#[target_feature(enable = "avx512fp16,avx512vl")]
4159#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4160#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4161pub const fn _mm256_conj_pch(a: __m256h) -> __m256h {
4162 unsafe { transmute(src:_mm256_xor_si256(a:transmute(a), b:_mm256_set1_epi32(i32::MIN))) }
4163}
4164
4165/// Compute the complex conjugates of complex numbers in a, and store the results in dst using writemask k
4166/// (the element is copied from src when corresponding mask bit is not set). Each complex number is composed of two
4167/// adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4168/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4169///
4170/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_conj_pch)
4171#[inline]
4172#[target_feature(enable = "avx512fp16,avx512vl")]
4173#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4174#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4175pub const fn _mm256_mask_conj_pch(src: __m256h, k: __mmask8, a: __m256h) -> __m256h {
4176 unsafe {
4177 let r: __m256 = transmute(src:_mm256_conj_pch(a));
4178 transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src)))
4179 }
4180}
4181
4182/// Compute the complex conjugates of complex numbers in a, and store the results in dst using zeromask k
4183/// (the element is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
4184/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4185/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4186///
4187/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_conj_pch)
4188#[inline]
4189#[target_feature(enable = "avx512fp16,avx512vl")]
4190#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4191#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4192pub const fn _mm256_maskz_conj_pch(k: __mmask8, a: __m256h) -> __m256h {
4193 _mm256_mask_conj_pch(src:_mm256_setzero_ph(), k, a)
4194}
4195
4196/// Compute the complex conjugates of complex numbers in a, and store the results in dst. Each complex number
4197/// is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
4198/// number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4199///
4200/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_conj_pch)
4201#[inline]
4202#[target_feature(enable = "avx512fp16")]
4203#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4204#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4205pub const fn _mm512_conj_pch(a: __m512h) -> __m512h {
4206 unsafe { transmute(src:_mm512_xor_si512(a:transmute(a), b:_mm512_set1_epi32(i32::MIN))) }
4207}
4208
4209/// Compute the complex conjugates of complex numbers in a, and store the results in dst using writemask k
4210/// (the element is copied from src when corresponding mask bit is not set). Each complex number is composed of two
4211/// adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4212/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4213///
4214/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_conj_pch)
4215#[inline]
4216#[target_feature(enable = "avx512fp16")]
4217#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4218#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4219pub const fn _mm512_mask_conj_pch(src: __m512h, k: __mmask16, a: __m512h) -> __m512h {
4220 unsafe {
4221 let r: __m512 = transmute(src:_mm512_conj_pch(a));
4222 transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src)))
4223 }
4224}
4225
4226/// Compute the complex conjugates of complex numbers in a, and store the results in dst using zeromask k
4227/// (the element is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
4228/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4229/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4230///
4231/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_conj_pch)
4232#[inline]
4233#[target_feature(enable = "avx512fp16")]
4234#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4235#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4236pub const fn _mm512_maskz_conj_pch(k: __mmask16, a: __m512h) -> __m512h {
4237 _mm512_mask_conj_pch(src:_mm512_setzero_ph(), k, a)
4238}
4239
4240/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4241/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
4242/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4243///
4244/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_pch)
4245#[inline]
4246#[target_feature(enable = "avx512fp16,avx512vl")]
4247#[cfg_attr(test, assert_instr(vfmaddcph))]
4248#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4249pub fn _mm_fmadd_pch(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4250 _mm_mask3_fmadd_pch(a, b, c, k:0xff)
4251}
4252
4253/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4254/// and store the results in dst using writemask k (the element is copied from a when the corresponding
4255/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit)
4256/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4257///
4258/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_pch)
4259#[inline]
4260#[target_feature(enable = "avx512fp16,avx512vl")]
4261#[cfg_attr(test, assert_instr(vfmaddcph))]
4262#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4263pub fn _mm_mask_fmadd_pch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
4264 unsafe {
4265 let r: __m128 = transmute(src:_mm_mask3_fmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does
4266 transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a)))
4267 }
4268}
4269
4270/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4271/// and store the results in dst using writemask k (the element is copied from c when the corresponding
4272/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit)
4273/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4274///
4275/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_pch)
4276#[inline]
4277#[target_feature(enable = "avx512fp16,avx512vl")]
4278#[cfg_attr(test, assert_instr(vfmaddcph))]
4279#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4280pub fn _mm_mask3_fmadd_pch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
4281 unsafe {
4282 transmute(src:vfmaddcph_mask3_128(
4283 a:transmute(a),
4284 b:transmute(b),
4285 c:transmute(src:c),
4286 k,
4287 ))
4288 }
4289}
4290
4291/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4292/// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask
4293/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4294/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4295///
4296/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_pch)
4297#[inline]
4298#[target_feature(enable = "avx512fp16,avx512vl")]
4299#[cfg_attr(test, assert_instr(vfmaddcph))]
4300#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4301pub fn _mm_maskz_fmadd_pch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4302 unsafe {
4303 transmute(src:vfmaddcph_maskz_128(
4304 a:transmute(a),
4305 b:transmute(b),
4306 c:transmute(src:c),
4307 k,
4308 ))
4309 }
4310}
4311
4312/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4313/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
4314/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4315///
4316/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmadd_pch)
4317#[inline]
4318#[target_feature(enable = "avx512fp16,avx512vl")]
4319#[cfg_attr(test, assert_instr(vfmaddcph))]
4320#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4321pub fn _mm256_fmadd_pch(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
4322 _mm256_mask3_fmadd_pch(a, b, c, k:0xff)
4323}
4324
4325/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4326/// and store the results in dst using writemask k (the element is copied from a when the corresponding mask
4327/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4328/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4329///
4330/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmadd_pch)
4331#[inline]
4332#[target_feature(enable = "avx512fp16,avx512vl")]
4333#[cfg_attr(test, assert_instr(vfmaddcph))]
4334#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4335pub fn _mm256_mask_fmadd_pch(a: __m256h, k: __mmask8, b: __m256h, c: __m256h) -> __m256h {
4336 unsafe {
4337 let r: __m256 = transmute(src:_mm256_mask3_fmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does
4338 transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a)))
4339 }
4340}
4341
4342/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4343/// and store the results in dst using writemask k (the element is copied from c when the corresponding
4344/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit)
4345/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4346///
4347/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmadd_pch)
4348#[inline]
4349#[target_feature(enable = "avx512fp16,avx512vl")]
4350#[cfg_attr(test, assert_instr(vfmaddcph))]
4351#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4352pub fn _mm256_mask3_fmadd_pch(a: __m256h, b: __m256h, c: __m256h, k: __mmask8) -> __m256h {
4353 unsafe {
4354 transmute(src:vfmaddcph_mask3_256(
4355 a:transmute(a),
4356 b:transmute(b),
4357 c:transmute(src:c),
4358 k,
4359 ))
4360 }
4361}
4362
4363/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4364/// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask
4365/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4366/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4367///
4368/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmadd_pch)
4369#[inline]
4370#[target_feature(enable = "avx512fp16,avx512vl")]
4371#[cfg_attr(test, assert_instr(vfmaddcph))]
4372#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4373pub fn _mm256_maskz_fmadd_pch(k: __mmask8, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
4374 unsafe {
4375 transmute(src:vfmaddcph_maskz_256(
4376 a:transmute(a),
4377 b:transmute(b),
4378 c:transmute(src:c),
4379 k,
4380 ))
4381 }
4382}
4383
4384/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4385/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
4386/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4387///
4388/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_pch)
4389#[inline]
4390#[target_feature(enable = "avx512fp16")]
4391#[cfg_attr(test, assert_instr(vfmaddcph))]
4392#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4393pub fn _mm512_fmadd_pch(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
4394 _mm512_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
4395}
4396
4397/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4398/// and store the results in dst using writemask k (the element is copied from a when the corresponding mask
4399/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4400/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4401///
4402/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_pch)
4403#[inline]
4404#[target_feature(enable = "avx512fp16")]
4405#[cfg_attr(test, assert_instr(vfmaddcph))]
4406#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4407pub fn _mm512_mask_fmadd_pch(a: __m512h, k: __mmask16, b: __m512h, c: __m512h) -> __m512h {
4408 _mm512_mask_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c)
4409}
4410
4411/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4412/// and store the results in dst using writemask k (the element is copied from c when the corresponding
4413/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit)
4414/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4415///
4416/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_pch)
4417#[inline]
4418#[target_feature(enable = "avx512fp16")]
4419#[cfg_attr(test, assert_instr(vfmaddcph))]
4420#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4421pub fn _mm512_mask3_fmadd_pch(a: __m512h, b: __m512h, c: __m512h, k: __mmask16) -> __m512h {
4422 _mm512_mask3_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k)
4423}
4424
4425/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4426/// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask
4427/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4428/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4429///
4430/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_pch)
4431#[inline]
4432#[target_feature(enable = "avx512fp16")]
4433#[cfg_attr(test, assert_instr(vfmaddcph))]
4434#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4435pub fn _mm512_maskz_fmadd_pch(k: __mmask16, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
4436 _mm512_maskz_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c)
4437}
4438
4439/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4440/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
4441/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4442///
4443/// Rounding is done according to the rounding parameter, which can be one of:
4444///
4445/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4446/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4447/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4448/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4449/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4450///
4451/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_round_pch)
4452#[inline]
4453#[target_feature(enable = "avx512fp16")]
4454#[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = 8))]
4455#[rustc_legacy_const_generics(3)]
4456#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4457pub fn _mm512_fmadd_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
4458 static_assert_rounding!(ROUNDING);
4459 _mm512_mask3_fmadd_round_pch::<ROUNDING>(a, b, c, k:0xffff)
4460}
4461
4462/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4463/// and store the results in dst using writemask k (the element is copied from a when the corresponding mask
4464/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4465/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4466///
4467/// Rounding is done according to the rounding parameter, which can be one of:
4468///
4469/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4470/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4471/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4472/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4473/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4474///
4475/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_round_pch)
4476#[inline]
4477#[target_feature(enable = "avx512fp16")]
4478#[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = 8))]
4479#[rustc_legacy_const_generics(4)]
4480#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4481pub fn _mm512_mask_fmadd_round_pch<const ROUNDING: i32>(
4482 a: __m512h,
4483 k: __mmask16,
4484 b: __m512h,
4485 c: __m512h,
4486) -> __m512h {
4487 unsafe {
4488 static_assert_rounding!(ROUNDING);
4489 let r: __m512 = transmute(src:_mm512_mask3_fmadd_round_pch::<ROUNDING>(a, b, c, k)); // using `0xffff` would have been fine here, but this is what CLang does
4490 transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a)))
4491 }
4492}
4493
4494/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4495/// and store the results in dst using writemask k (the element is copied from c when the corresponding
4496/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit)
4497/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4498///
4499/// Rounding is done according to the rounding parameter, which can be one of:
4500///
4501/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4502/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4503/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4504/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4505/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4506///
4507/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_round_pch)
4508#[inline]
4509#[target_feature(enable = "avx512fp16")]
4510#[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = 8))]
4511#[rustc_legacy_const_generics(4)]
4512#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4513pub fn _mm512_mask3_fmadd_round_pch<const ROUNDING: i32>(
4514 a: __m512h,
4515 b: __m512h,
4516 c: __m512h,
4517 k: __mmask16,
4518) -> __m512h {
4519 unsafe {
4520 static_assert_rounding!(ROUNDING);
4521 transmute(src:vfmaddcph_mask3_512(
4522 a:transmute(a),
4523 b:transmute(b),
4524 c:transmute(src:c),
4525 k,
4526 ROUNDING,
4527 ))
4528 }
4529}
4530
4531/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4532/// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask
4533/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4534/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4535///
4536/// Rounding is done according to the rounding parameter, which can be one of:
4537///
4538/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4539/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4540/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4541/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4542/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4543///
4544/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_round_pch)
4545#[inline]
4546#[target_feature(enable = "avx512fp16")]
4547#[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = 8))]
4548#[rustc_legacy_const_generics(4)]
4549#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4550pub fn _mm512_maskz_fmadd_round_pch<const ROUNDING: i32>(
4551 k: __mmask16,
4552 a: __m512h,
4553 b: __m512h,
4554 c: __m512h,
4555) -> __m512h {
4556 unsafe {
4557 static_assert_rounding!(ROUNDING);
4558 transmute(src:vfmaddcph_maskz_512(
4559 a:transmute(a),
4560 b:transmute(b),
4561 c:transmute(src:c),
4562 k,
4563 ROUNDING,
4564 ))
4565 }
4566}
4567
4568/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4569/// store the result in the lower elements of dst, and copy the upper 6 packed elements from a to the
4570/// upper elements of dst. Each complex number is composed of two adjacent half-precision (16-bit)
4571/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4572///
4573/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_sch)
4574#[inline]
4575#[target_feature(enable = "avx512fp16")]
4576#[cfg_attr(test, assert_instr(vfmaddcsh))]
4577#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4578pub fn _mm_fmadd_sch(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4579 _mm_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
4580}
4581
4582/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4583/// store the result in the lower elements of dst using writemask k (elements are copied from a when
4584/// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst.
4585/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements,
4586/// which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4587///
4588/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_sch)
4589#[inline]
4590#[target_feature(enable = "avx512fp16")]
4591#[cfg_attr(test, assert_instr(vfmaddcsh))]
4592#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4593pub fn _mm_mask_fmadd_sch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
4594 _mm_mask_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c)
4595}
4596
4597/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4598/// store the result in the lower elements of dst using writemask k (elements are copied from c when
4599/// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst.
4600/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements,
4601/// which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4602///
4603/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_sch)
4604#[inline]
4605#[target_feature(enable = "avx512fp16")]
4606#[cfg_attr(test, assert_instr(vfmaddcsh))]
4607#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4608pub fn _mm_mask3_fmadd_sch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
4609 _mm_mask3_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k)
4610}
4611
4612/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4613/// store the result in the lower elements of dst using zeromask k (elements are zeroed out when mask
4614/// bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst. Each
4615/// complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
4616/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4617///
4618/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_sch)
4619#[inline]
4620#[target_feature(enable = "avx512fp16")]
4621#[cfg_attr(test, assert_instr(vfmaddcsh))]
4622#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4623pub fn _mm_maskz_fmadd_sch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4624 _mm_maskz_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c)
4625}
4626
4627/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4628/// store the result in the lower elements of dst. Each complex number is composed of two adjacent
4629/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4630///
4631/// Rounding is done according to the rounding parameter, which can be one of:
4632///
4633/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4634/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4635/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4636/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4637/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4638///
4639/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_round_sch)
4640#[inline]
4641#[target_feature(enable = "avx512fp16")]
4642#[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = 8))]
4643#[rustc_legacy_const_generics(3)]
4644#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4645pub fn _mm_fmadd_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4646 unsafe {
4647 static_assert_rounding!(ROUNDING);
4648 transmute(src:vfmaddcsh_mask(
4649 a:transmute(a),
4650 b:transmute(b),
4651 c:transmute(c),
4652 k:0xff,
4653 ROUNDING,
4654 ))
4655 }
4656}
4657
4658/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4659/// store the result in the lower elements of dst using writemask k (elements are copied from a when
4660/// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst.
4661/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements,
4662/// which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4663///
4664/// Rounding is done according to the rounding parameter, which can be one of:
4665///
4666/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4667/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4668/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4669/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4670/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4671///
4672/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_round_sch)
4673#[inline]
4674#[target_feature(enable = "avx512fp16")]
4675#[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = 8))]
4676#[rustc_legacy_const_generics(4)]
4677#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4678pub fn _mm_mask_fmadd_round_sch<const ROUNDING: i32>(
4679 a: __m128h,
4680 k: __mmask8,
4681 b: __m128h,
4682 c: __m128h,
4683) -> __m128h {
4684 unsafe {
4685 static_assert_rounding!(ROUNDING);
4686 let a: __m128 = transmute(src:a);
4687 let r: __m128 = vfmaddcsh_mask(a, b:transmute(b), c:transmute(src:c), k, ROUNDING); // using `0xff` would have been fine here, but this is what CLang does
4688 transmute(src:_mm_mask_move_ss(src:a, k, a, b:r))
4689 }
4690}
4691
4692/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4693/// store the result in the lower elements of dst using writemask k (elements are copied from c when
4694/// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst.
4695/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements,
4696/// which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4697///
4698/// Rounding is done according to the rounding parameter, which can be one of:
4699///
4700/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4701/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4702/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4703/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4704/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4705///
4706/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_round_sch)
4707#[inline]
4708#[target_feature(enable = "avx512fp16")]
4709#[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = 8))]
4710#[rustc_legacy_const_generics(4)]
4711#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4712pub fn _mm_mask3_fmadd_round_sch<const ROUNDING: i32>(
4713 a: __m128h,
4714 b: __m128h,
4715 c: __m128h,
4716 k: __mmask8,
4717) -> __m128h {
4718 unsafe {
4719 static_assert_rounding!(ROUNDING);
4720 let c: __m128 = transmute(src:c);
4721 let r: __m128 = vfmaddcsh_mask(a:transmute(a), b:transmute(src:b), c, k, ROUNDING);
4722 transmute(src:_mm_move_ss(a:c, b:r))
4723 }
4724}
4725
4726/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4727/// store the result in the lower elements of dst using zeromask k (elements are zeroed out when mask
4728/// bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst. Each
4729/// complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
4730/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4731///
4732/// Rounding is done according to the rounding parameter, which can be one of:
4733///
4734/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4735/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4736/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4737/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4738/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4739///
4740/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_round_sch)
4741#[inline]
4742#[target_feature(enable = "avx512fp16")]
4743#[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = 8))]
4744#[rustc_legacy_const_generics(4)]
4745#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4746pub fn _mm_maskz_fmadd_round_sch<const ROUNDING: i32>(
4747 k: __mmask8,
4748 a: __m128h,
4749 b: __m128h,
4750 c: __m128h,
4751) -> __m128h {
4752 unsafe {
4753 static_assert_rounding!(ROUNDING);
4754 transmute(src:vfmaddcsh_maskz(
4755 a:transmute(a),
4756 b:transmute(b),
4757 c:transmute(src:c),
4758 k,
4759 ROUNDING,
4760 ))
4761 }
4762}
4763
4764/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4765/// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed
4766/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number
4767/// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4768///
4769/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_pch)
4770#[inline]
4771#[target_feature(enable = "avx512fp16,avx512vl")]
4772#[cfg_attr(test, assert_instr(vfcmaddcph))]
4773#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4774pub fn _mm_fcmadd_pch(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4775 _mm_mask3_fcmadd_pch(a, b, c, k:0xff)
4776}
4777
4778/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4779/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4780/// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4781/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4782/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4783///
4784/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_pch)
4785#[inline]
4786#[target_feature(enable = "avx512fp16,avx512vl")]
4787#[cfg_attr(test, assert_instr(vfcmaddcph))]
4788#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4789pub fn _mm_mask_fcmadd_pch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
4790 unsafe {
4791 let r: __m128 = transmute(src:_mm_mask3_fcmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does
4792 transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a)))
4793 }
4794}
4795
4796/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4797/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4798/// copied from c when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4799/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4800/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4801///
4802/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fcmadd_pch)
4803#[inline]
4804#[target_feature(enable = "avx512fp16,avx512vl")]
4805#[cfg_attr(test, assert_instr(vfcmaddcph))]
4806#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4807pub fn _mm_mask3_fcmadd_pch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
4808 unsafe {
4809 transmute(src:vfcmaddcph_mask3_128(
4810 a:transmute(a),
4811 b:transmute(b),
4812 c:transmute(src:c),
4813 k,
4814 ))
4815 }
4816}
4817
4818/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4819/// to the corresponding complex numbers in c, and store the results in dst using zeromask k (the element is
4820/// zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4821/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4822/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4823///
4824/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_pch)
4825#[inline]
4826#[target_feature(enable = "avx512fp16,avx512vl")]
4827#[cfg_attr(test, assert_instr(vfcmaddcph))]
4828#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4829pub fn _mm_maskz_fcmadd_pch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4830 unsafe {
4831 transmute(src:vfcmaddcph_maskz_128(
4832 a:transmute(a),
4833 b:transmute(b),
4834 c:transmute(src:c),
4835 k,
4836 ))
4837 }
4838}
4839
4840/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4841/// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed
4842/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number
4843/// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4844///
4845/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fcmadd_pch)
4846#[inline]
4847#[target_feature(enable = "avx512fp16,avx512vl")]
4848#[cfg_attr(test, assert_instr(vfcmaddcph))]
4849#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4850pub fn _mm256_fcmadd_pch(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
4851 _mm256_mask3_fcmadd_pch(a, b, c, k:0xff)
4852}
4853
4854/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4855/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4856/// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4857/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4858/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4859///
4860/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fcmadd_pch)
4861#[inline]
4862#[target_feature(enable = "avx512fp16,avx512vl")]
4863#[cfg_attr(test, assert_instr(vfcmaddcph))]
4864#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4865pub fn _mm256_mask_fcmadd_pch(a: __m256h, k: __mmask8, b: __m256h, c: __m256h) -> __m256h {
4866 unsafe {
4867 let r: __m256 = transmute(src:_mm256_mask3_fcmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does
4868 transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a)))
4869 }
4870}
4871
4872/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4873/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4874/// copied from c when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4875/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4876/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4877///
4878/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fcmadd_pch)
4879#[inline]
4880#[target_feature(enable = "avx512fp16,avx512vl")]
4881#[cfg_attr(test, assert_instr(vfcmaddcph))]
4882#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4883pub fn _mm256_mask3_fcmadd_pch(a: __m256h, b: __m256h, c: __m256h, k: __mmask8) -> __m256h {
4884 unsafe {
4885 transmute(src:vfcmaddcph_mask3_256(
4886 a:transmute(a),
4887 b:transmute(b),
4888 c:transmute(src:c),
4889 k,
4890 ))
4891 }
4892}
4893
4894/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4895/// to the corresponding complex numbers in c, and store the results in dst using zeromask k (the element is
4896/// zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4897/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4898/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4899///
4900/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fcmadd_pch)
4901#[inline]
4902#[target_feature(enable = "avx512fp16,avx512vl")]
4903#[cfg_attr(test, assert_instr(vfcmaddcph))]
4904#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4905pub fn _mm256_maskz_fcmadd_pch(k: __mmask8, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
4906 unsafe {
4907 transmute(src:vfcmaddcph_maskz_256(
4908 a:transmute(a),
4909 b:transmute(b),
4910 c:transmute(src:c),
4911 k,
4912 ))
4913 }
4914}
4915
4916/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4917/// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed
4918/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number
4919/// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4920///
4921/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmadd_pch)
4922#[inline]
4923#[target_feature(enable = "avx512fp16")]
4924#[cfg_attr(test, assert_instr(vfcmaddcph))]
4925#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4926pub fn _mm512_fcmadd_pch(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
4927 _mm512_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
4928}
4929
4930/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4931/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4932/// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4933/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4934/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4935///
4936/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmadd_pch)
4937#[inline]
4938#[target_feature(enable = "avx512fp16")]
4939#[cfg_attr(test, assert_instr(vfcmaddcph))]
4940#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4941pub fn _mm512_mask_fcmadd_pch(a: __m512h, k: __mmask16, b: __m512h, c: __m512h) -> __m512h {
4942 _mm512_mask_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c)
4943}
4944
4945/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4946/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4947/// copied from c when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4948/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4949/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4950///
4951/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fcmadd_pch)
4952#[inline]
4953#[target_feature(enable = "avx512fp16")]
4954#[cfg_attr(test, assert_instr(vfcmaddcph))]
4955#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4956pub fn _mm512_mask3_fcmadd_pch(a: __m512h, b: __m512h, c: __m512h, k: __mmask16) -> __m512h {
4957 _mm512_mask3_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k)
4958}
4959
4960/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4961/// to the corresponding complex numbers in c, and store the results in dst using zeromask k (the element is
4962/// zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4963/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4964/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4965///
4966/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmadd_pch)
4967#[inline]
4968#[target_feature(enable = "avx512fp16")]
4969#[cfg_attr(test, assert_instr(vfcmaddcph))]
4970#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4971pub fn _mm512_maskz_fcmadd_pch(k: __mmask16, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
4972 _mm512_maskz_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c)
4973}
4974
4975/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4976/// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed
4977/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number
4978/// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4979///
4980/// Rounding is done according to the rounding parameter, which can be one of:
4981///
4982/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4983/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4984/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4985/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4986/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4987///
4988/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmadd_round_pch)
4989#[inline]
4990#[target_feature(enable = "avx512fp16")]
4991#[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = 8))]
4992#[rustc_legacy_const_generics(3)]
4993#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
4994pub fn _mm512_fcmadd_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
4995 static_assert_rounding!(ROUNDING);
4996 _mm512_mask3_fcmadd_round_pch::<ROUNDING>(a, b, c, k:0xffff)
4997}
4998
4999/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
5000/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
5001/// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent
5002/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
5003/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
5004///
5005/// Rounding is done according to the rounding parameter, which can be one of:
5006///
5007/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5008/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5009/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5010/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5011/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5012///
5013/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmadd_round_pch)
5014#[inline]
5015#[target_feature(enable = "avx512fp16")]
5016#[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = 8))]
5017#[rustc_legacy_const_generics(4)]
5018#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5019pub fn _mm512_mask_fcmadd_round_pch<const ROUNDING: i32>(
5020 a: __m512h,
5021 k: __mmask16,
5022 b: __m512h,
5023 c: __m512h,
5024) -> __m512h {
5025 unsafe {
5026 static_assert_rounding!(ROUNDING);
5027 let r: __m512 = transmute(src:_mm512_mask3_fcmadd_round_pch::<ROUNDING>(a, b, c, k)); // using `0xffff` would have been fine here, but this is what CLang does
5028 transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a)))
5029 }
5030}
5031
5032/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
5033/// to the corresponding complex numbers in c using writemask k (the element is copied from c when the corresponding
5034/// mask bit is not set), and store the results in dst. Each complex number is composed of two adjacent half-precision
5035/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1`, or the complex
5036/// conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
5037///
5038/// Rounding is done according to the rounding parameter, which can be one of:
5039///
5040/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5041/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5042/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5043/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5044/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5045///
5046/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fcmadd_round_pch)
5047#[inline]
5048#[target_feature(enable = "avx512fp16")]
5049#[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = 8))]
5050#[rustc_legacy_const_generics(4)]
5051#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5052pub fn _mm512_mask3_fcmadd_round_pch<const ROUNDING: i32>(
5053 a: __m512h,
5054 b: __m512h,
5055 c: __m512h,
5056 k: __mmask16,
5057) -> __m512h {
5058 unsafe {
5059 static_assert_rounding!(ROUNDING);
5060 transmute(src:vfcmaddcph_mask3_512(
5061 a:transmute(a),
5062 b:transmute(b),
5063 c:transmute(src:c),
5064 k,
5065 ROUNDING,
5066 ))
5067 }
5068}
5069
5070/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
5071/// to the corresponding complex numbers in c using zeromask k (the element is zeroed out when the corresponding
5072/// mask bit is not set), and store the results in dst. Each complex number is composed of two adjacent half-precision
5073/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1`, or the complex
5074/// conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
5075///
5076/// Rounding is done according to the rounding parameter, which can be one of:
5077///
5078/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5079/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5080/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5081/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5082/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5083///
5084/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmadd_round_pch)
5085#[inline]
5086#[target_feature(enable = "avx512fp16")]
5087#[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = 8))]
5088#[rustc_legacy_const_generics(4)]
5089#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5090pub fn _mm512_maskz_fcmadd_round_pch<const ROUNDING: i32>(
5091 k: __mmask16,
5092 a: __m512h,
5093 b: __m512h,
5094 c: __m512h,
5095) -> __m512h {
5096 unsafe {
5097 static_assert_rounding!(ROUNDING);
5098 transmute(src:vfcmaddcph_maskz_512(
5099 a:transmute(a),
5100 b:transmute(b),
5101 c:transmute(src:c),
5102 k,
5103 ROUNDING,
5104 ))
5105 }
5106}
5107
5108/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
5109/// accumulate to the lower complex number in c, and store the result in the lower elements of dst,
5110/// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is
5111/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
5112/// number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
5113///
5114/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_sch)
5115#[inline]
5116#[target_feature(enable = "avx512fp16")]
5117#[cfg_attr(test, assert_instr(vfcmaddcsh))]
5118#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5119pub fn _mm_fcmadd_sch(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5120 _mm_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
5121}
5122
5123/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
5124/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using
5125/// writemask k (the element is copied from a when the corresponding mask bit is not set), and copy the upper
5126/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent
5127/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
5128/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
5129///
5130/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_sch)
5131#[inline]
5132#[target_feature(enable = "avx512fp16")]
5133#[cfg_attr(test, assert_instr(vfcmaddcsh))]
5134#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5135pub fn _mm_mask_fcmadd_sch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
5136 _mm_mask_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c)
5137}
5138
5139/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
5140/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using
5141/// writemask k (the element is copied from c when the corresponding mask bit is not set), and copy the upper
5142/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent
5143/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
5144/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
5145///
5146/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fcmadd_sch)
5147#[inline]
5148#[target_feature(enable = "avx512fp16")]
5149#[cfg_attr(test, assert_instr(vfcmaddcsh))]
5150#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5151pub fn _mm_mask3_fcmadd_sch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
5152 _mm_mask3_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k)
5153}
5154
5155/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
5156/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using
5157/// zeromask k (the element is zeroed out when the corresponding mask bit is not set), and copy the upper
5158/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent
5159/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
5160/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
5161///
5162/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_sch)
5163#[inline]
5164#[target_feature(enable = "avx512fp16")]
5165#[cfg_attr(test, assert_instr(vfcmaddcsh))]
5166#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5167pub fn _mm_maskz_fcmadd_sch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5168 _mm_maskz_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c)
5169}
5170
5171/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
5172/// accumulate to the lower complex number in c, and store the result in the lower elements of dst,
5173/// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is
5174/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
5175/// number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
5176///
5177/// Rounding is done according to the rounding parameter, which can be one of:
5178///
5179/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5180/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5181/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5182/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5183/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5184///
5185/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_round_sch)
5186#[inline]
5187#[target_feature(enable = "avx512fp16")]
5188#[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = 8))]
5189#[rustc_legacy_const_generics(3)]
5190#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5191pub fn _mm_fcmadd_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5192 unsafe {
5193 static_assert_rounding!(ROUNDING);
5194 transmute(src:vfcmaddcsh_mask(
5195 a:transmute(a),
5196 b:transmute(b),
5197 c:transmute(c),
5198 k:0xff,
5199 ROUNDING,
5200 ))
5201 }
5202}
5203
5204/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
5205/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using
5206/// writemask k (the element is copied from a when the corresponding mask bit is not set), and copy the upper
5207/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent
5208/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
5209/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
5210///
5211/// Rounding is done according to the rounding parameter, which can be one of:
5212///
5213/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5214/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5215/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5216/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5217/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5218///
5219/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_round_sch)
5220#[inline]
5221#[target_feature(enable = "avx512fp16")]
5222#[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = 8))]
5223#[rustc_legacy_const_generics(4)]
5224#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5225pub fn _mm_mask_fcmadd_round_sch<const ROUNDING: i32>(
5226 a: __m128h,
5227 k: __mmask8,
5228 b: __m128h,
5229 c: __m128h,
5230) -> __m128h {
5231 unsafe {
5232 static_assert_rounding!(ROUNDING);
5233 let a: __m128 = transmute(src:a);
5234 let r: __m128 = vfcmaddcsh_mask(a, b:transmute(b), c:transmute(src:c), k, ROUNDING);
5235 transmute(src:_mm_mask_move_ss(src:a, k, a, b:r))
5236 }
5237}
5238
5239/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
5240/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using
5241/// writemask k (the element is copied from c when the corresponding mask bit is not set), and copy the upper
5242/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent
5243/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
5244/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
5245///
5246/// Rounding is done according to the rounding parameter, which can be one of:
5247///
5248/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5249/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5250/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5251/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5252/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5253///
5254/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fcmadd_round_sch)
5255#[inline]
5256#[target_feature(enable = "avx512fp16")]
5257#[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = 8))]
5258#[rustc_legacy_const_generics(4)]
5259#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5260pub fn _mm_mask3_fcmadd_round_sch<const ROUNDING: i32>(
5261 a: __m128h,
5262 b: __m128h,
5263 c: __m128h,
5264 k: __mmask8,
5265) -> __m128h {
5266 unsafe {
5267 static_assert_rounding!(ROUNDING);
5268 let c: __m128 = transmute(src:c);
5269 let r: __m128 = vfcmaddcsh_mask(a:transmute(a), b:transmute(src:b), c, k, ROUNDING);
5270 transmute(src:_mm_move_ss(a:c, b:r))
5271 }
5272}
5273
5274/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
5275/// accumulate to the lower complex number in c using zeromask k (the element is zeroed out when the corresponding
5276/// mask bit is not set), and store the result in the lower elements of dst, and copy the upper 6 packed elements
5277/// from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision (16-bit)
5278/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1`, or the complex
5279/// conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
5280///
5281/// Rounding is done according to the rounding parameter, which can be one of:
5282///
5283/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5284/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5285/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5286/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5287/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5288///
5289/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_round_sch)
5290#[inline]
5291#[target_feature(enable = "avx512fp16")]
5292#[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = 8))]
5293#[rustc_legacy_const_generics(4)]
5294#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5295pub fn _mm_maskz_fcmadd_round_sch<const ROUNDING: i32>(
5296 k: __mmask8,
5297 a: __m128h,
5298 b: __m128h,
5299 c: __m128h,
5300) -> __m128h {
5301 unsafe {
5302 static_assert_rounding!(ROUNDING);
5303 transmute(src:vfcmaddcsh_maskz(
5304 a:transmute(a),
5305 b:transmute(b),
5306 c:transmute(src:c),
5307 k,
5308 ROUNDING,
5309 ))
5310 }
5311}
5312
5313/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5314/// result to packed elements in c, and store the results in dst.
5315///
5316/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_ph)
5317#[inline]
5318#[target_feature(enable = "avx512fp16,avx512vl")]
5319#[cfg_attr(test, assert_instr(vfmadd))]
5320#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5321#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5322pub const fn _mm_fmadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5323 unsafe { simd_fma(x:a, y:b, z:c) }
5324}
5325
5326/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5327/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5328/// from a when the corresponding mask bit is not set).
5329///
5330/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_ph)
5331#[inline]
5332#[target_feature(enable = "avx512fp16,avx512vl")]
5333#[cfg_attr(test, assert_instr(vfmadd))]
5334#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5335#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5336pub const fn _mm_mask_fmadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
5337 unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ph(a, b, c), no:a) }
5338}
5339
5340/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5341/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5342/// from c when the corresponding mask bit is not set).
5343///
5344/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_ph)
5345#[inline]
5346#[target_feature(enable = "avx512fp16,avx512vl")]
5347#[cfg_attr(test, assert_instr(vfmadd))]
5348#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5349#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5350pub const fn _mm_mask3_fmadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
5351 unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ph(a, b, c), no:c) }
5352}
5353
5354/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5355/// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed
5356/// out when the corresponding mask bit is not set).
5357///
5358/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_ph)
5359#[inline]
5360#[target_feature(enable = "avx512fp16,avx512vl")]
5361#[cfg_attr(test, assert_instr(vfmadd))]
5362#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5363#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5364pub const fn _mm_maskz_fmadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5365 unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ph(a, b, c), no:_mm_setzero_ph()) }
5366}
5367
5368/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5369/// result to packed elements in c, and store the results in dst.
5370///
5371/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmadd_ph)
5372#[inline]
5373#[target_feature(enable = "avx512fp16,avx512vl")]
5374#[cfg_attr(test, assert_instr(vfmadd))]
5375#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5376#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5377pub const fn _mm256_fmadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
5378 unsafe { simd_fma(x:a, y:b, z:c) }
5379}
5380
5381/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5382/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5383/// from a when the corresponding mask bit is not set).
5384///
5385/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmadd_ph)
5386#[inline]
5387#[target_feature(enable = "avx512fp16,avx512vl")]
5388#[cfg_attr(test, assert_instr(vfmadd))]
5389#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5390#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5391pub const fn _mm256_mask_fmadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
5392 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ph(a, b, c), no:a) }
5393}
5394
5395/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5396/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5397/// from c when the corresponding mask bit is not set).
5398///
5399/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmadd_ph)
5400#[inline]
5401#[target_feature(enable = "avx512fp16,avx512vl")]
5402#[cfg_attr(test, assert_instr(vfmadd))]
5403#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5404#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5405pub const fn _mm256_mask3_fmadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
5406 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ph(a, b, c), no:c) }
5407}
5408
5409/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5410/// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed
5411/// out when the corresponding mask bit is not set).
5412///
5413/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmadd_ph)
5414#[inline]
5415#[target_feature(enable = "avx512fp16,avx512vl")]
5416#[cfg_attr(test, assert_instr(vfmadd))]
5417#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5418#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5419pub const fn _mm256_maskz_fmadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
5420 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ph(a, b, c), no:_mm256_setzero_ph()) }
5421}
5422
5423/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5424/// result to packed elements in c, and store the results in dst.
5425///
5426/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_ph)
5427#[inline]
5428#[target_feature(enable = "avx512fp16")]
5429#[cfg_attr(test, assert_instr(vfmadd))]
5430#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5431#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5432pub const fn _mm512_fmadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
5433 unsafe { simd_fma(x:a, y:b, z:c) }
5434}
5435
5436/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5437/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5438/// from a when the corresponding mask bit is not set).
5439///
5440/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_ph)
5441#[inline]
5442#[target_feature(enable = "avx512fp16")]
5443#[cfg_attr(test, assert_instr(vfmadd))]
5444#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5445#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5446pub const fn _mm512_mask_fmadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
5447 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ph(a, b, c), no:a) }
5448}
5449
5450/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5451/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5452/// from c when the corresponding mask bit is not set).
5453///
5454/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_ph)
5455#[inline]
5456#[target_feature(enable = "avx512fp16")]
5457#[cfg_attr(test, assert_instr(vfmadd))]
5458#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5459#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5460pub const fn _mm512_mask3_fmadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
5461 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ph(a, b, c), no:c) }
5462}
5463
5464/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5465/// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed
5466/// out when the corresponding mask bit is not set).
5467///
5468/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_ph)
5469#[inline]
5470#[target_feature(enable = "avx512fp16")]
5471#[cfg_attr(test, assert_instr(vfmadd))]
5472#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5473#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5474pub const fn _mm512_maskz_fmadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
5475 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ph(a, b, c), no:_mm512_setzero_ph()) }
5476}
5477
5478/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5479/// result to packed elements in c, and store the results in dst.
5480///
5481/// Rounding is done according to the rounding parameter, which can be one of:
5482///
5483/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5484/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5485/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5486/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5487/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5488///
5489/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_round_ph)
5490#[inline]
5491#[target_feature(enable = "avx512fp16")]
5492#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
5493#[rustc_legacy_const_generics(3)]
5494#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5495pub fn _mm512_fmadd_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
5496 unsafe {
5497 static_assert_rounding!(ROUNDING);
5498 vfmaddph_512(a, b, c, ROUNDING)
5499 }
5500}
5501
5502/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5503/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5504/// from a when the corresponding mask bit is not set).
5505///
5506/// Rounding is done according to the rounding parameter, which can be one of:
5507///
5508/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5509/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5510/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5511/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5512/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5513///
5514/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_round_ph)
5515#[inline]
5516#[target_feature(enable = "avx512fp16")]
5517#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
5518#[rustc_legacy_const_generics(4)]
5519#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5520pub fn _mm512_mask_fmadd_round_ph<const ROUNDING: i32>(
5521 a: __m512h,
5522 k: __mmask32,
5523 b: __m512h,
5524 c: __m512h,
5525) -> __m512h {
5526 unsafe {
5527 static_assert_rounding!(ROUNDING);
5528 simd_select_bitmask(m:k, yes:_mm512_fmadd_round_ph::<ROUNDING>(a, b, c), no:a)
5529 }
5530}
5531
5532/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5533/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5534/// from c when the corresponding mask bit is not set).
5535///
5536/// Rounding is done according to the rounding parameter, which can be one of:
5537///
5538/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5539/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5540/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5541/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5542/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5543///
5544/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_round_ph)
5545#[inline]
5546#[target_feature(enable = "avx512fp16")]
5547#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
5548#[rustc_legacy_const_generics(4)]
5549#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5550pub fn _mm512_mask3_fmadd_round_ph<const ROUNDING: i32>(
5551 a: __m512h,
5552 b: __m512h,
5553 c: __m512h,
5554 k: __mmask32,
5555) -> __m512h {
5556 unsafe {
5557 static_assert_rounding!(ROUNDING);
5558 simd_select_bitmask(m:k, yes:_mm512_fmadd_round_ph::<ROUNDING>(a, b, c), no:c)
5559 }
5560}
5561
5562/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5563/// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed
5564/// out when the corresponding mask bit is not set).
5565///
5566/// Rounding is done according to the rounding parameter, which can be one of:
5567///
5568/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5569/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5570/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5571/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5572/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5573///
5574/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_round_ph)
5575#[inline]
5576#[target_feature(enable = "avx512fp16")]
5577#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
5578#[rustc_legacy_const_generics(4)]
5579#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5580pub fn _mm512_maskz_fmadd_round_ph<const ROUNDING: i32>(
5581 k: __mmask32,
5582 a: __m512h,
5583 b: __m512h,
5584 c: __m512h,
5585) -> __m512h {
5586 unsafe {
5587 static_assert_rounding!(ROUNDING);
5588 simd_select_bitmask(
5589 m:k,
5590 yes:_mm512_fmadd_round_ph::<ROUNDING>(a, b, c),
5591 no:_mm512_setzero_ph(),
5592 )
5593 }
5594}
5595
5596/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5597/// result to the lower element in c. Store the result in the lower element of dst, and copy the upper
5598/// 7 packed elements from a to the upper elements of dst.
5599///
5600/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_sh)
5601#[inline]
5602#[target_feature(enable = "avx512fp16")]
5603#[cfg_attr(test, assert_instr(vfmadd))]
5604#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5605#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5606pub const fn _mm_fmadd_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5607 unsafe {
5608 let extracta: f16 = simd_extract!(a, 0);
5609 let extractb: f16 = simd_extract!(b, 0);
5610 let extractc: f16 = simd_extract!(c, 0);
5611 let r: f16 = fmaf16(a:extracta, b:extractb, c:extractc);
5612 simd_insert!(a, 0, r)
5613 }
5614}
5615
5616/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5617/// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element
5618/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the
5619/// upper elements of dst.
5620///
5621/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_sh)
5622#[inline]
5623#[target_feature(enable = "avx512fp16")]
5624#[cfg_attr(test, assert_instr(vfmadd))]
5625#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5626#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5627pub const fn _mm_mask_fmadd_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
5628 unsafe {
5629 let mut fmadd: f16 = simd_extract!(a, 0);
5630 if k & 1 != 0 {
5631 let extractb: f16 = simd_extract!(b, 0);
5632 let extractc: f16 = simd_extract!(c, 0);
5633 fmadd = fmaf16(a:fmadd, b:extractb, c:extractc);
5634 }
5635 simd_insert!(a, 0, fmadd)
5636 }
5637}
5638
5639/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5640/// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element
5641/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the
5642/// upper elements of dst.
5643///
5644/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_sh)
5645#[inline]
5646#[target_feature(enable = "avx512fp16")]
5647#[cfg_attr(test, assert_instr(vfmadd))]
5648#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5649#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5650pub const fn _mm_mask3_fmadd_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
5651 unsafe {
5652 let mut fmadd: f16 = simd_extract!(c, 0);
5653 if k & 1 != 0 {
5654 let extracta: f16 = simd_extract!(a, 0);
5655 let extractb: f16 = simd_extract!(b, 0);
5656 fmadd = fmaf16(a:extracta, b:extractb, c:fmadd);
5657 }
5658 simd_insert!(c, 0, fmadd)
5659 }
5660}
5661
5662/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5663/// result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element
5664/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
5665/// upper elements of dst.
5666///
5667/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_sh)
5668#[inline]
5669#[target_feature(enable = "avx512fp16")]
5670#[cfg_attr(test, assert_instr(vfmadd))]
5671#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5672#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5673pub const fn _mm_maskz_fmadd_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5674 unsafe {
5675 let mut fmadd: f16 = 0.0;
5676 if k & 1 != 0 {
5677 let extracta: f16 = simd_extract!(a, 0);
5678 let extractb: f16 = simd_extract!(b, 0);
5679 let extractc: f16 = simd_extract!(c, 0);
5680 fmadd = fmaf16(a:extracta, b:extractb, c:extractc);
5681 }
5682 simd_insert!(a, 0, fmadd)
5683 }
5684}
5685
5686/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5687/// result to the lower element in c. Store the result in the lower element of dst, and copy the upper
5688/// 7 packed elements from a to the upper elements of dst.
5689///
5690/// Rounding is done according to the rounding parameter, which can be one of:
5691///
5692/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5693/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5694/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5695/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5696/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5697///
5698/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_round_sh)
5699#[inline]
5700#[target_feature(enable = "avx512fp16")]
5701#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
5702#[rustc_legacy_const_generics(3)]
5703#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5704pub fn _mm_fmadd_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5705 unsafe {
5706 static_assert_rounding!(ROUNDING);
5707 let extracta: f16 = simd_extract!(a, 0);
5708 let extractb: f16 = simd_extract!(b, 0);
5709 let extractc: f16 = simd_extract!(c, 0);
5710 let r: f16 = vfmaddsh(a:extracta, b:extractb, c:extractc, ROUNDING);
5711 simd_insert!(a, 0, r)
5712 }
5713}
5714
5715/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5716/// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element
5717/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the
5718/// upper elements of dst.
5719///
5720/// Rounding is done according to the rounding parameter, which can be one of:
5721///
5722/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5723/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5724/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5725/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5726/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5727///
5728/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_round_sh)
5729#[inline]
5730#[target_feature(enable = "avx512fp16")]
5731#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
5732#[rustc_legacy_const_generics(4)]
5733#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5734pub fn _mm_mask_fmadd_round_sh<const ROUNDING: i32>(
5735 a: __m128h,
5736 k: __mmask8,
5737 b: __m128h,
5738 c: __m128h,
5739) -> __m128h {
5740 unsafe {
5741 static_assert_rounding!(ROUNDING);
5742 let mut fmadd: f16 = simd_extract!(a, 0);
5743 if k & 1 != 0 {
5744 let extractb: f16 = simd_extract!(b, 0);
5745 let extractc: f16 = simd_extract!(c, 0);
5746 fmadd = vfmaddsh(a:fmadd, b:extractb, c:extractc, ROUNDING);
5747 }
5748 simd_insert!(a, 0, fmadd)
5749 }
5750}
5751
5752/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5753/// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element
5754/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the
5755/// upper elements of dst.
5756///
5757/// Rounding is done according to the rounding parameter, which can be one of:
5758///
5759/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5760/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5761/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5762/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5763/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5764///
5765/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_round_sh)
5766#[inline]
5767#[target_feature(enable = "avx512fp16")]
5768#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
5769#[rustc_legacy_const_generics(4)]
5770#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5771pub fn _mm_mask3_fmadd_round_sh<const ROUNDING: i32>(
5772 a: __m128h,
5773 b: __m128h,
5774 c: __m128h,
5775 k: __mmask8,
5776) -> __m128h {
5777 unsafe {
5778 static_assert_rounding!(ROUNDING);
5779 let mut fmadd: f16 = simd_extract!(c, 0);
5780 if k & 1 != 0 {
5781 let extracta: f16 = simd_extract!(a, 0);
5782 let extractb: f16 = simd_extract!(b, 0);
5783 fmadd = vfmaddsh(a:extracta, b:extractb, c:fmadd, ROUNDING);
5784 }
5785 simd_insert!(c, 0, fmadd)
5786 }
5787}
5788
5789/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5790/// result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element
5791/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
5792/// upper elements of dst.
5793///
5794/// Rounding is done according to the rounding parameter, which can be one of:
5795///
5796/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5797/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5798/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5799/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5800/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5801///
5802/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_round_sh)
5803#[inline]
5804#[target_feature(enable = "avx512fp16")]
5805#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
5806#[rustc_legacy_const_generics(4)]
5807#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5808pub fn _mm_maskz_fmadd_round_sh<const ROUNDING: i32>(
5809 k: __mmask8,
5810 a: __m128h,
5811 b: __m128h,
5812 c: __m128h,
5813) -> __m128h {
5814 unsafe {
5815 static_assert_rounding!(ROUNDING);
5816 let mut fmadd: f16 = 0.0;
5817 if k & 1 != 0 {
5818 let extracta: f16 = simd_extract!(a, 0);
5819 let extractb: f16 = simd_extract!(b, 0);
5820 let extractc: f16 = simd_extract!(c, 0);
5821 fmadd = vfmaddsh(a:extracta, b:extractb, c:extractc, ROUNDING);
5822 }
5823 simd_insert!(a, 0, fmadd)
5824 }
5825}
5826
5827/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5828/// in c from the intermediate result, and store the results in dst.
5829/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5830///
5831/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_ph)
5832#[inline]
5833#[target_feature(enable = "avx512fp16,avx512vl")]
5834#[cfg_attr(test, assert_instr(vfmsub))]
5835#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5836#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5837pub const fn _mm_fmsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5838 unsafe { simd_fma(x:a, y:b, z:simd_neg(c)) }
5839}
5840
5841/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5842/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5843/// from a when the corresponding mask bit is not set).
5844///
5845/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_ph)
5846#[inline]
5847#[target_feature(enable = "avx512fp16,avx512vl")]
5848#[cfg_attr(test, assert_instr(vfmsub))]
5849#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5850#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5851pub const fn _mm_mask_fmsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
5852 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ph(a, b, c), no:a) }
5853}
5854
5855/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5856/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5857/// from c when the corresponding mask bit is not set).
5858///
5859/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsub_ph)
5860#[inline]
5861#[target_feature(enable = "avx512fp16,avx512vl")]
5862#[cfg_attr(test, assert_instr(vfmsub))]
5863#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5864#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5865pub const fn _mm_mask3_fmsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
5866 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ph(a, b, c), no:c) }
5867}
5868
5869/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5870/// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed
5871/// out when the corresponding mask bit is not set).
5872///
5873/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_ph)
5874#[inline]
5875#[target_feature(enable = "avx512fp16,avx512vl")]
5876#[cfg_attr(test, assert_instr(vfmsub))]
5877#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5878#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5879pub const fn _mm_maskz_fmsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5880 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ph(a, b, c), no:_mm_setzero_ph()) }
5881}
5882
5883/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5884/// in c from the intermediate result, and store the results in dst.
5885///
5886/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsub_ph)
5887#[inline]
5888#[target_feature(enable = "avx512fp16,avx512vl")]
5889#[cfg_attr(test, assert_instr(vfmsub))]
5890#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5891#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5892pub const fn _mm256_fmsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
5893 unsafe { simd_fma(x:a, y:b, z:simd_neg(c)) }
5894}
5895
5896/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5897/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5898/// from a when the corresponding mask bit is not set).
5899///
5900/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmsub_ph)
5901#[inline]
5902#[target_feature(enable = "avx512fp16,avx512vl")]
5903#[cfg_attr(test, assert_instr(vfmsub))]
5904#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5905#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5906pub const fn _mm256_mask_fmsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
5907 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ph(a, b, c), no:a) }
5908}
5909
5910/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5911/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5912/// from c when the corresponding mask bit is not set).
5913///
5914/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmsub_ph)
5915#[inline]
5916#[target_feature(enable = "avx512fp16,avx512vl")]
5917#[cfg_attr(test, assert_instr(vfmsub))]
5918#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5919#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5920pub const fn _mm256_mask3_fmsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
5921 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ph(a, b, c), no:c) }
5922}
5923
5924/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5925/// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed
5926/// out when the corresponding mask bit is not set).
5927///
5928/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmsub_ph)
5929#[inline]
5930#[target_feature(enable = "avx512fp16,avx512vl")]
5931#[cfg_attr(test, assert_instr(vfmsub))]
5932#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5933#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5934pub const fn _mm256_maskz_fmsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
5935 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ph(a, b, c), no:_mm256_setzero_ph()) }
5936}
5937
5938/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5939/// in c from the intermediate result, and store the results in dst.
5940///
5941/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsub_ph)
5942#[inline]
5943#[target_feature(enable = "avx512fp16")]
5944#[cfg_attr(test, assert_instr(vfmsub))]
5945#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5946#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5947pub const fn _mm512_fmsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
5948 unsafe { simd_fma(x:a, y:b, z:simd_neg(c)) }
5949}
5950
5951/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5952/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5953/// from a when the corresponding mask bit is not set).
5954///
5955/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsub_ph)
5956#[inline]
5957#[target_feature(enable = "avx512fp16")]
5958#[cfg_attr(test, assert_instr(vfmsub))]
5959#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5960#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5961pub const fn _mm512_mask_fmsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
5962 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ph(a, b, c), no:a) }
5963}
5964
5965/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5966/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5967/// from c when the corresponding mask bit is not set).
5968///
5969/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsub_ph)
5970#[inline]
5971#[target_feature(enable = "avx512fp16")]
5972#[cfg_attr(test, assert_instr(vfmsub))]
5973#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5974#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5975pub const fn _mm512_mask3_fmsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
5976 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ph(a, b, c), no:c) }
5977}
5978
5979/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5980/// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed
5981/// out when the corresponding mask bit is not set).
5982///
5983/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsub_ph)
5984#[inline]
5985#[target_feature(enable = "avx512fp16")]
5986#[cfg_attr(test, assert_instr(vfmsub))]
5987#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
5988#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5989pub const fn _mm512_maskz_fmsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
5990 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ph(a, b, c), no:_mm512_setzero_ph()) }
5991}
5992
5993/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5994/// in c from the intermediate result, and store the results in dst.
5995///
5996/// Rounding is done according to the rounding parameter, which can be one of:
5997///
5998/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5999/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6000/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6001/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6002/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6003///
6004/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsub_round_ph)
6005#[inline]
6006#[target_feature(enable = "avx512fp16")]
6007#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
6008#[rustc_legacy_const_generics(3)]
6009#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6010pub fn _mm512_fmsub_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
6011 unsafe {
6012 static_assert_rounding!(ROUNDING);
6013 vfmaddph_512(a, b, c:simd_neg(c), ROUNDING)
6014 }
6015}
6016
6017/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6018/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
6019/// from a when the corresponding mask bit is not set).
6020///
6021/// Rounding is done according to the rounding parameter, which can be one of:
6022///
6023/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6024/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6025/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6026/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6027/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6028///
6029/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsub_round_ph)
6030#[inline]
6031#[target_feature(enable = "avx512fp16")]
6032#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
6033#[rustc_legacy_const_generics(4)]
6034#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6035pub fn _mm512_mask_fmsub_round_ph<const ROUNDING: i32>(
6036 a: __m512h,
6037 k: __mmask32,
6038 b: __m512h,
6039 c: __m512h,
6040) -> __m512h {
6041 unsafe {
6042 static_assert_rounding!(ROUNDING);
6043 simd_select_bitmask(m:k, yes:_mm512_fmsub_round_ph::<ROUNDING>(a, b, c), no:a)
6044 }
6045}
6046
6047/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6048/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
6049/// from c when the corresponding mask bit is not set).
6050///
6051/// Rounding is done according to the rounding parameter, which can be one of:
6052///
6053/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6054/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6055/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6056/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6057/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6058///
6059/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsub_round_ph)
6060#[inline]
6061#[target_feature(enable = "avx512fp16")]
6062#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
6063#[rustc_legacy_const_generics(4)]
6064#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6065pub fn _mm512_mask3_fmsub_round_ph<const ROUNDING: i32>(
6066 a: __m512h,
6067 b: __m512h,
6068 c: __m512h,
6069 k: __mmask32,
6070) -> __m512h {
6071 unsafe {
6072 static_assert_rounding!(ROUNDING);
6073 simd_select_bitmask(m:k, yes:_mm512_fmsub_round_ph::<ROUNDING>(a, b, c), no:c)
6074 }
6075}
6076
6077/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6078/// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed
6079/// out when the corresponding mask bit is not set).
6080///
6081/// Rounding is done according to the rounding parameter, which can be one of:
6082///
6083/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6084/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6085/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6086/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6087/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6088///
6089/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsub_round_ph)
6090#[inline]
6091#[target_feature(enable = "avx512fp16")]
6092#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
6093#[rustc_legacy_const_generics(4)]
6094#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6095pub fn _mm512_maskz_fmsub_round_ph<const ROUNDING: i32>(
6096 k: __mmask32,
6097 a: __m512h,
6098 b: __m512h,
6099 c: __m512h,
6100) -> __m512h {
6101 unsafe {
6102 static_assert_rounding!(ROUNDING);
6103 simd_select_bitmask(
6104 m:k,
6105 yes:_mm512_fmsub_round_ph::<ROUNDING>(a, b, c),
6106 no:_mm512_setzero_ph(),
6107 )
6108 }
6109}
6110
6111/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
6112/// in c from the intermediate result. Store the result in the lower element of dst, and copy the upper
6113/// 7 packed elements from a to the upper elements of dst.
6114///
6115/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_sh)
6116#[inline]
6117#[target_feature(enable = "avx512fp16")]
6118#[cfg_attr(test, assert_instr(vfmsub))]
6119#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6120#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6121pub const fn _mm_fmsub_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6122 unsafe {
6123 let extracta: f16 = simd_extract!(a, 0);
6124 let extractb: f16 = simd_extract!(b, 0);
6125 let extractc: f16 = simd_extract!(c, 0);
6126 let r: f16 = fmaf16(a:extracta, b:extractb, -extractc);
6127 simd_insert!(a, 0, r)
6128 }
6129}
6130
6131/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
6132/// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element
6133/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the
6134/// upper elements of dst.
6135///
6136/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_sh)
6137#[inline]
6138#[target_feature(enable = "avx512fp16")]
6139#[cfg_attr(test, assert_instr(vfmsub))]
6140#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6141#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6142pub const fn _mm_mask_fmsub_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
6143 unsafe {
6144 let mut fmsub: f16 = simd_extract!(a, 0);
6145 if k & 1 != 0 {
6146 let extractb: f16 = simd_extract!(b, 0);
6147 let extractc: f16 = simd_extract!(c, 0);
6148 fmsub = fmaf16(a:fmsub, b:extractb, -extractc);
6149 }
6150 simd_insert!(a, 0, fmsub)
6151 }
6152}
6153
6154/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
6155/// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element
6156/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the
6157/// upper elements of dst.
6158///
6159/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsub_sh)
6160#[inline]
6161#[target_feature(enable = "avx512fp16")]
6162#[cfg_attr(test, assert_instr(vfmsub))]
6163#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6164#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6165pub const fn _mm_mask3_fmsub_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
6166 unsafe {
6167 let mut fmsub: f16 = simd_extract!(c, 0);
6168 if k & 1 != 0 {
6169 let extracta: f16 = simd_extract!(a, 0);
6170 let extractb: f16 = simd_extract!(b, 0);
6171 fmsub = fmaf16(a:extracta, b:extractb, -fmsub);
6172 }
6173 simd_insert!(c, 0, fmsub)
6174 }
6175}
6176
6177/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
6178/// in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element
6179/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
6180/// upper elements of dst.
6181///
6182/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_sh)
6183#[inline]
6184#[target_feature(enable = "avx512fp16")]
6185#[cfg_attr(test, assert_instr(vfmsub))]
6186#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6187#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6188pub const fn _mm_maskz_fmsub_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6189 unsafe {
6190 let mut fmsub: f16 = 0.0;
6191 if k & 1 != 0 {
6192 let extracta: f16 = simd_extract!(a, 0);
6193 let extractb: f16 = simd_extract!(b, 0);
6194 let extractc: f16 = simd_extract!(c, 0);
6195 fmsub = fmaf16(a:extracta, b:extractb, -extractc);
6196 }
6197 simd_insert!(a, 0, fmsub)
6198 }
6199}
6200
6201/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
6202/// in c from the intermediate result. Store the result in the lower element of dst, and copy the upper
6203/// 7 packed elements from a to the upper elements of dst.
6204///
6205/// Rounding is done according to the rounding parameter, which can be one of:
6206///
6207/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6208/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6209/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6210/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6211/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6212///
6213/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_round_sh)
6214#[inline]
6215#[target_feature(enable = "avx512fp16")]
6216#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
6217#[rustc_legacy_const_generics(3)]
6218#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6219pub fn _mm_fmsub_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6220 unsafe {
6221 static_assert_rounding!(ROUNDING);
6222 let extracta: f16 = simd_extract!(a, 0);
6223 let extractb: f16 = simd_extract!(b, 0);
6224 let extractc: f16 = simd_extract!(c, 0);
6225 let r: f16 = vfmaddsh(a:extracta, b:extractb, -extractc, ROUNDING);
6226 simd_insert!(a, 0, r)
6227 }
6228}
6229
6230/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
6231/// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element
6232/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the
6233/// upper elements of dst.
6234///
6235/// Rounding is done according to the rounding parameter, which can be one of:
6236///
6237/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6238/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6239/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6240/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6241/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6242///
6243/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_round_sh)
6244#[inline]
6245#[target_feature(enable = "avx512fp16")]
6246#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
6247#[rustc_legacy_const_generics(4)]
6248#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6249pub fn _mm_mask_fmsub_round_sh<const ROUNDING: i32>(
6250 a: __m128h,
6251 k: __mmask8,
6252 b: __m128h,
6253 c: __m128h,
6254) -> __m128h {
6255 unsafe {
6256 static_assert_rounding!(ROUNDING);
6257 let mut fmsub: f16 = simd_extract!(a, 0);
6258 if k & 1 != 0 {
6259 let extractb: f16 = simd_extract!(b, 0);
6260 let extractc: f16 = simd_extract!(c, 0);
6261 fmsub = vfmaddsh(a:fmsub, b:extractb, -extractc, ROUNDING);
6262 }
6263 simd_insert!(a, 0, fmsub)
6264 }
6265}
6266
6267/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
6268/// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element
6269/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the
6270/// upper elements of dst.
6271///
6272/// Rounding is done according to the rounding parameter, which can be one of:
6273///
6274/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6275/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6276/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6277/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6278/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6279///
6280/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsub_round_sh)
6281#[inline]
6282#[target_feature(enable = "avx512fp16")]
6283#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
6284#[rustc_legacy_const_generics(4)]
6285#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6286pub fn _mm_mask3_fmsub_round_sh<const ROUNDING: i32>(
6287 a: __m128h,
6288 b: __m128h,
6289 c: __m128h,
6290 k: __mmask8,
6291) -> __m128h {
6292 unsafe {
6293 static_assert_rounding!(ROUNDING);
6294 let mut fmsub: f16 = simd_extract!(c, 0);
6295 if k & 1 != 0 {
6296 let extracta: f16 = simd_extract!(a, 0);
6297 let extractb: f16 = simd_extract!(b, 0);
6298 fmsub = vfmaddsh(a:extracta, b:extractb, -fmsub, ROUNDING);
6299 }
6300 simd_insert!(c, 0, fmsub)
6301 }
6302}
6303
6304/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
6305/// in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element
6306/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
6307/// upper elements of dst.
6308///
6309/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_round_sh)
6310#[inline]
6311#[target_feature(enable = "avx512fp16")]
6312#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
6313#[rustc_legacy_const_generics(4)]
6314#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6315pub fn _mm_maskz_fmsub_round_sh<const ROUNDING: i32>(
6316 k: __mmask8,
6317 a: __m128h,
6318 b: __m128h,
6319 c: __m128h,
6320) -> __m128h {
6321 unsafe {
6322 static_assert_rounding!(ROUNDING);
6323 let mut fmsub: f16 = 0.0;
6324 if k & 1 != 0 {
6325 let extracta: f16 = simd_extract!(a, 0);
6326 let extractb: f16 = simd_extract!(b, 0);
6327 let extractc: f16 = simd_extract!(c, 0);
6328 fmsub = vfmaddsh(a:extracta, b:extractb, -extractc, ROUNDING);
6329 }
6330 simd_insert!(a, 0, fmsub)
6331 }
6332}
6333
6334/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6335/// result from packed elements in c, and store the results in dst.
6336///
6337/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_ph)
6338#[inline]
6339#[target_feature(enable = "avx512fp16,avx512vl")]
6340#[cfg_attr(test, assert_instr(vfnmadd))]
6341#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6342#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6343pub const fn _mm_fnmadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6344 unsafe { simd_fma(x:simd_neg(a), y:b, z:c) }
6345}
6346
6347/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6348/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6349/// from a when the corresponding mask bit is not set).
6350///
6351/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_ph)
6352#[inline]
6353#[target_feature(enable = "avx512fp16,avx512vl")]
6354#[cfg_attr(test, assert_instr(vfnmadd))]
6355#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6356#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6357pub const fn _mm_mask_fnmadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
6358 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ph(a, b, c), no:a) }
6359}
6360
6361/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6362/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6363/// from c when the corresponding mask bit is not set).
6364///
6365/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_ph)
6366#[inline]
6367#[target_feature(enable = "avx512fp16,avx512vl")]
6368#[cfg_attr(test, assert_instr(vfnmadd))]
6369#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6370#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6371pub const fn _mm_mask3_fnmadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
6372 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ph(a, b, c), no:c) }
6373}
6374
6375/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6376/// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed
6377/// out when the corresponding mask bit is not set).
6378///
6379/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_ph)
6380#[inline]
6381#[target_feature(enable = "avx512fp16,avx512vl")]
6382#[cfg_attr(test, assert_instr(vfnmadd))]
6383#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6384#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6385pub const fn _mm_maskz_fnmadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6386 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ph(a, b, c), no:_mm_setzero_ph()) }
6387}
6388
6389/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6390/// result from packed elements in c, and store the results in dst.
6391///
6392/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmadd_ph)
6393#[inline]
6394#[target_feature(enable = "avx512fp16,avx512vl")]
6395#[cfg_attr(test, assert_instr(vfnmadd))]
6396#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6397#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6398pub const fn _mm256_fnmadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
6399 unsafe { simd_fma(x:simd_neg(a), y:b, z:c) }
6400}
6401
6402/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6403/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6404/// from a when the corresponding mask bit is not set).
6405///
6406/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fnmadd_ph)
6407#[inline]
6408#[target_feature(enable = "avx512fp16,avx512vl")]
6409#[cfg_attr(test, assert_instr(vfnmadd))]
6410#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6411#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6412pub const fn _mm256_mask_fnmadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
6413 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ph(a, b, c), no:a) }
6414}
6415
6416/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6417/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6418/// from c when the corresponding mask bit is not set).
6419///
6420/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fnmadd_ph)
6421#[inline]
6422#[target_feature(enable = "avx512fp16,avx512vl")]
6423#[cfg_attr(test, assert_instr(vfnmadd))]
6424#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6425#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6426pub const fn _mm256_mask3_fnmadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
6427 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ph(a, b, c), no:c) }
6428}
6429
6430/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6431/// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed
6432/// out when the corresponding mask bit is not set).
6433///
6434/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fnmadd_ph)
6435#[inline]
6436#[target_feature(enable = "avx512fp16,avx512vl")]
6437#[cfg_attr(test, assert_instr(vfnmadd))]
6438#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6439#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6440pub const fn _mm256_maskz_fnmadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
6441 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ph(a, b, c), no:_mm256_setzero_ph()) }
6442}
6443
6444/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6445/// result from packed elements in c, and store the results in dst.
6446///
6447/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmadd_ph)
6448#[inline]
6449#[target_feature(enable = "avx512fp16")]
6450#[cfg_attr(test, assert_instr(vfnmadd))]
6451#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6452#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6453pub const fn _mm512_fnmadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
6454 unsafe { simd_fma(x:simd_neg(a), y:b, z:c) }
6455}
6456
6457/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6458/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6459/// from a when the corresponding mask bit is not set).
6460///
6461/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmadd_ph)
6462#[inline]
6463#[target_feature(enable = "avx512fp16")]
6464#[cfg_attr(test, assert_instr(vfnmadd))]
6465#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6466#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6467pub const fn _mm512_mask_fnmadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
6468 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ph(a, b, c), no:a) }
6469}
6470
6471/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6472/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6473/// from c when the corresponding mask bit is not set).
6474///
6475/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmadd_ph)
6476#[inline]
6477#[target_feature(enable = "avx512fp16")]
6478#[cfg_attr(test, assert_instr(vfnmadd))]
6479#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6480#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6481pub const fn _mm512_mask3_fnmadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
6482 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ph(a, b, c), no:c) }
6483}
6484
6485/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6486/// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed
6487/// out when the corresponding mask bit is not set).
6488///
6489/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmadd_ph)
6490#[inline]
6491#[target_feature(enable = "avx512fp16")]
6492#[cfg_attr(test, assert_instr(vfnmadd))]
6493#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6494#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6495pub const fn _mm512_maskz_fnmadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
6496 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ph(a, b, c), no:_mm512_setzero_ph()) }
6497}
6498
6499/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6500/// result from packed elements in c, and store the results in dst.
6501///
6502/// Rounding is done according to the rounding parameter, which can be one of:
6503///
6504/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6505/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6506/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6507/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6508/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6509///
6510/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmadd_round_ph)
6511#[inline]
6512#[target_feature(enable = "avx512fp16")]
6513#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
6514#[rustc_legacy_const_generics(3)]
6515#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6516pub fn _mm512_fnmadd_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
6517 unsafe {
6518 static_assert_rounding!(ROUNDING);
6519 vfmaddph_512(a:simd_neg(a), b, c, ROUNDING)
6520 }
6521}
6522
6523/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6524/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6525/// from a when the corresponding mask bit is not set).
6526///
6527/// Rounding is done according to the rounding parameter, which can be one of:
6528///
6529/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6530/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6531/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6532/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6533/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6534///
6535/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmadd_round_ph)
6536#[inline]
6537#[target_feature(enable = "avx512fp16")]
6538#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
6539#[rustc_legacy_const_generics(4)]
6540#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6541pub fn _mm512_mask_fnmadd_round_ph<const ROUNDING: i32>(
6542 a: __m512h,
6543 k: __mmask32,
6544 b: __m512h,
6545 c: __m512h,
6546) -> __m512h {
6547 unsafe {
6548 static_assert_rounding!(ROUNDING);
6549 simd_select_bitmask(m:k, yes:_mm512_fnmadd_round_ph::<ROUNDING>(a, b, c), no:a)
6550 }
6551}
6552
6553/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6554/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6555/// from c when the corresponding mask bit is not set).
6556///
6557/// Rounding is done according to the rounding parameter, which can be one of:
6558///
6559/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6560/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6561/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6562/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6563/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6564///
6565/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmadd_round_ph)
6566#[inline]
6567#[target_feature(enable = "avx512fp16")]
6568#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
6569#[rustc_legacy_const_generics(4)]
6570#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6571pub fn _mm512_mask3_fnmadd_round_ph<const ROUNDING: i32>(
6572 a: __m512h,
6573 b: __m512h,
6574 c: __m512h,
6575 k: __mmask32,
6576) -> __m512h {
6577 unsafe {
6578 static_assert_rounding!(ROUNDING);
6579 simd_select_bitmask(m:k, yes:_mm512_fnmadd_round_ph::<ROUNDING>(a, b, c), no:c)
6580 }
6581}
6582
6583/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6584/// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed
6585/// out when the corresponding mask bit is not set).
6586///
6587/// Rounding is done according to the rounding parameter, which can be one of:
6588///
6589/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6590/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6591/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6592/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6593/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6594///
6595/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmadd_round_ph)
6596#[inline]
6597#[target_feature(enable = "avx512fp16")]
6598#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
6599#[rustc_legacy_const_generics(4)]
6600#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6601pub fn _mm512_maskz_fnmadd_round_ph<const ROUNDING: i32>(
6602 k: __mmask32,
6603 a: __m512h,
6604 b: __m512h,
6605 c: __m512h,
6606) -> __m512h {
6607 unsafe {
6608 static_assert_rounding!(ROUNDING);
6609 simd_select_bitmask(
6610 m:k,
6611 yes:_mm512_fnmadd_round_ph::<ROUNDING>(a, b, c),
6612 no:_mm512_setzero_ph(),
6613 )
6614 }
6615}
6616
6617/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6618/// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed
6619/// elements from a to the upper elements of dst.
6620///
6621/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_sh)
6622#[inline]
6623#[target_feature(enable = "avx512fp16")]
6624#[cfg_attr(test, assert_instr(vfnmadd))]
6625#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6626#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6627pub const fn _mm_fnmadd_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6628 unsafe {
6629 let extracta: f16 = simd_extract!(a, 0);
6630 let extractb: f16 = simd_extract!(b, 0);
6631 let extractc: f16 = simd_extract!(c, 0);
6632 let r: f16 = fmaf16(-extracta, b:extractb, c:extractc);
6633 simd_insert!(a, 0, r)
6634 }
6635}
6636
6637/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6638/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
6639/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
6640/// elements of dst.
6641///
6642/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_sh)
6643#[inline]
6644#[target_feature(enable = "avx512fp16")]
6645#[cfg_attr(test, assert_instr(vfnmadd))]
6646#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6647#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6648pub const fn _mm_mask_fnmadd_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
6649 unsafe {
6650 let mut fnmadd: f16 = simd_extract!(a, 0);
6651 if k & 1 != 0 {
6652 let extractb: f16 = simd_extract!(b, 0);
6653 let extractc: f16 = simd_extract!(c, 0);
6654 fnmadd = fmaf16(-fnmadd, b:extractb, c:extractc);
6655 }
6656 simd_insert!(a, 0, fnmadd)
6657 }
6658}
6659
6660/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6661/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
6662/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper
6663/// elements of dst.
6664///
6665/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_sh)
6666#[inline]
6667#[target_feature(enable = "avx512fp16")]
6668#[cfg_attr(test, assert_instr(vfnmadd))]
6669#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6670#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6671pub const fn _mm_mask3_fnmadd_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
6672 unsafe {
6673 let mut fnmadd: f16 = simd_extract!(c, 0);
6674 if k & 1 != 0 {
6675 let extracta: f16 = simd_extract!(a, 0);
6676 let extractb: f16 = simd_extract!(b, 0);
6677 fnmadd = fmaf16(-extracta, b:extractb, c:fnmadd);
6678 }
6679 simd_insert!(c, 0, fnmadd)
6680 }
6681}
6682
6683/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6684/// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element
6685/// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
6686/// elements of dst.
6687///
6688/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_sh)
6689#[inline]
6690#[target_feature(enable = "avx512fp16")]
6691#[cfg_attr(test, assert_instr(vfnmadd))]
6692#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6693#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6694pub const fn _mm_maskz_fnmadd_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6695 unsafe {
6696 let mut fnmadd: f16 = 0.0;
6697 if k & 1 != 0 {
6698 let extracta: f16 = simd_extract!(a, 0);
6699 let extractb: f16 = simd_extract!(b, 0);
6700 let extractc: f16 = simd_extract!(c, 0);
6701 fnmadd = fmaf16(-extracta, b:extractb, c:extractc);
6702 }
6703 simd_insert!(a, 0, fnmadd)
6704 }
6705}
6706
6707/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6708/// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed
6709/// elements from a to the upper elements of dst.
6710///
6711/// Rounding is done according to the rounding parameter, which can be one of:
6712///
6713/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6714/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6715/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6716/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6717/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6718///
6719/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_round_sh)
6720#[inline]
6721#[target_feature(enable = "avx512fp16")]
6722#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
6723#[rustc_legacy_const_generics(3)]
6724#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6725pub fn _mm_fnmadd_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6726 unsafe {
6727 static_assert_rounding!(ROUNDING);
6728 let extracta: f16 = simd_extract!(a, 0);
6729 let extractb: f16 = simd_extract!(b, 0);
6730 let extractc: f16 = simd_extract!(c, 0);
6731 let r: f16 = vfmaddsh(-extracta, b:extractb, c:extractc, ROUNDING);
6732 simd_insert!(a, 0, r)
6733 }
6734}
6735
6736/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6737/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
6738/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
6739/// elements of dst.
6740///
6741/// Rounding is done according to the rounding parameter, which can be one of:
6742///
6743/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6744/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6745/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6746/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6747/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6748///
6749/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_round_sh)
6750#[inline]
6751#[target_feature(enable = "avx512fp16")]
6752#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
6753#[rustc_legacy_const_generics(4)]
6754#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6755pub fn _mm_mask_fnmadd_round_sh<const ROUNDING: i32>(
6756 a: __m128h,
6757 k: __mmask8,
6758 b: __m128h,
6759 c: __m128h,
6760) -> __m128h {
6761 unsafe {
6762 static_assert_rounding!(ROUNDING);
6763 let mut fnmadd: f16 = simd_extract!(a, 0);
6764 if k & 1 != 0 {
6765 let extractb: f16 = simd_extract!(b, 0);
6766 let extractc: f16 = simd_extract!(c, 0);
6767 fnmadd = vfmaddsh(-fnmadd, b:extractb, c:extractc, ROUNDING);
6768 }
6769 simd_insert!(a, 0, fnmadd)
6770 }
6771}
6772
6773/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6774/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
6775/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper
6776/// elements of dst.
6777///
6778/// Rounding is done according to the rounding parameter, which can be one of:
6779///
6780/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6781/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6782/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6783/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6784/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6785///
6786/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_round_sh)
6787#[inline]
6788#[target_feature(enable = "avx512fp16")]
6789#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
6790#[rustc_legacy_const_generics(4)]
6791#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6792pub fn _mm_mask3_fnmadd_round_sh<const ROUNDING: i32>(
6793 a: __m128h,
6794 b: __m128h,
6795 c: __m128h,
6796 k: __mmask8,
6797) -> __m128h {
6798 unsafe {
6799 static_assert_rounding!(ROUNDING);
6800 let mut fnmadd: f16 = simd_extract!(c, 0);
6801 if k & 1 != 0 {
6802 let extracta: f16 = simd_extract!(a, 0);
6803 let extractb: f16 = simd_extract!(b, 0);
6804 fnmadd = vfmaddsh(-extracta, b:extractb, c:fnmadd, ROUNDING);
6805 }
6806 simd_insert!(c, 0, fnmadd)
6807 }
6808}
6809
6810/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6811/// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element
6812/// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
6813/// elements of dst.
6814///
6815/// Rounding is done according to the rounding parameter, which can be one of:
6816///
6817/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6818/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6819/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6820/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6821/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6822///
6823/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_round_sh)
6824#[inline]
6825#[target_feature(enable = "avx512fp16")]
6826#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
6827#[rustc_legacy_const_generics(4)]
6828#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6829pub fn _mm_maskz_fnmadd_round_sh<const ROUNDING: i32>(
6830 k: __mmask8,
6831 a: __m128h,
6832 b: __m128h,
6833 c: __m128h,
6834) -> __m128h {
6835 unsafe {
6836 static_assert_rounding!(ROUNDING);
6837 let mut fnmadd: f16 = 0.0;
6838 if k & 1 != 0 {
6839 let extracta: f16 = simd_extract!(a, 0);
6840 let extractb: f16 = simd_extract!(b, 0);
6841 let extractc: f16 = simd_extract!(c, 0);
6842 fnmadd = vfmaddsh(-extracta, b:extractb, c:extractc, ROUNDING);
6843 }
6844 simd_insert!(a, 0, fnmadd)
6845 }
6846}
6847
6848/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6849/// in c from the negated intermediate result, and store the results in dst.
6850///
6851/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_ph)
6852#[inline]
6853#[target_feature(enable = "avx512fp16,avx512vl")]
6854#[cfg_attr(test, assert_instr(vfnmsub))]
6855#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6856#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6857pub const fn _mm_fnmsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6858 unsafe { simd_fma(x:simd_neg(a), y:b, z:simd_neg(c)) }
6859}
6860
6861/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6862/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6863/// copied from a when the corresponding mask bit is not set).
6864///
6865/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_ph)
6866#[inline]
6867#[target_feature(enable = "avx512fp16,avx512vl")]
6868#[cfg_attr(test, assert_instr(vfnmsub))]
6869#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6870#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6871pub const fn _mm_mask_fnmsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
6872 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ph(a, b, c), no:a) }
6873}
6874
6875/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6876/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6877/// copied from c when the corresponding mask bit is not set).
6878///
6879/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_ph)
6880#[inline]
6881#[target_feature(enable = "avx512fp16,avx512vl")]
6882#[cfg_attr(test, assert_instr(vfnmsub))]
6883#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6884#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6885pub const fn _mm_mask3_fnmsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
6886 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ph(a, b, c), no:c) }
6887}
6888
6889/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6890/// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is
6891/// zeroed out when the corresponding mask bit is not set).
6892///
6893/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_ph)
6894#[inline]
6895#[target_feature(enable = "avx512fp16,avx512vl")]
6896#[cfg_attr(test, assert_instr(vfnmsub))]
6897#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6898#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6899pub const fn _mm_maskz_fnmsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6900 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ph(a, b, c), no:_mm_setzero_ph()) }
6901}
6902
6903/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6904/// in c from the negated intermediate result, and store the results in dst.
6905///
6906/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmsub_ph)
6907#[inline]
6908#[target_feature(enable = "avx512fp16,avx512vl")]
6909#[cfg_attr(test, assert_instr(vfnmsub))]
6910#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6911#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6912pub const fn _mm256_fnmsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
6913 unsafe { simd_fma(x:simd_neg(a), y:b, z:simd_neg(c)) }
6914}
6915
6916/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6917/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6918/// copied from a when the corresponding mask bit is not set).
6919///
6920/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fnmsub_ph)
6921#[inline]
6922#[target_feature(enable = "avx512fp16,avx512vl")]
6923#[cfg_attr(test, assert_instr(vfnmsub))]
6924#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6925#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6926pub const fn _mm256_mask_fnmsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
6927 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ph(a, b, c), no:a) }
6928}
6929
6930/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6931/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6932/// copied from c when the corresponding mask bit is not set).
6933///
6934/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fnmsub_ph)
6935#[inline]
6936#[target_feature(enable = "avx512fp16,avx512vl")]
6937#[cfg_attr(test, assert_instr(vfnmsub))]
6938#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6939#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6940pub const fn _mm256_mask3_fnmsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
6941 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ph(a, b, c), no:c) }
6942}
6943
6944/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6945/// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is
6946/// zeroed out when the corresponding mask bit is not set).
6947///
6948/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fnmsub_ph)
6949#[inline]
6950#[target_feature(enable = "avx512fp16,avx512vl")]
6951#[cfg_attr(test, assert_instr(vfnmsub))]
6952#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6953#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6954pub const fn _mm256_maskz_fnmsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
6955 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ph(a, b, c), no:_mm256_setzero_ph()) }
6956}
6957
6958/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6959/// in c from the negated intermediate result, and store the results in dst.
6960///
6961/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmsub_ph)
6962#[inline]
6963#[target_feature(enable = "avx512fp16")]
6964#[cfg_attr(test, assert_instr(vfnmsub))]
6965#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6966#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6967pub const fn _mm512_fnmsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
6968 unsafe { simd_fma(x:simd_neg(a), y:b, z:simd_neg(c)) }
6969}
6970
6971/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6972/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6973/// copied from a when the corresponding mask bit is not set).
6974///
6975/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmsub_ph)
6976#[inline]
6977#[target_feature(enable = "avx512fp16")]
6978#[cfg_attr(test, assert_instr(vfnmsub))]
6979#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6980#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6981pub const fn _mm512_mask_fnmsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
6982 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ph(a, b, c), no:a) }
6983}
6984
6985/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6986/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6987/// copied from c when the corresponding mask bit is not set).
6988///
6989/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmsub_ph)
6990#[inline]
6991#[target_feature(enable = "avx512fp16")]
6992#[cfg_attr(test, assert_instr(vfnmsub))]
6993#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
6994#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6995pub const fn _mm512_mask3_fnmsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
6996 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ph(a, b, c), no:c) }
6997}
6998
6999/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
7000/// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is
7001/// zeroed out when the corresponding mask bit is not set).
7002///
7003/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmsub_ph)
7004#[inline]
7005#[target_feature(enable = "avx512fp16")]
7006#[cfg_attr(test, assert_instr(vfnmsub))]
7007#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7008#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7009pub const fn _mm512_maskz_fnmsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
7010 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ph(a, b, c), no:_mm512_setzero_ph()) }
7011}
7012
7013/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
7014/// in c from the negated intermediate result, and store the results in dst.
7015///
7016/// Rounding is done according to the rounding parameter, which can be one of:
7017///
7018/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7019/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7020/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7021/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7022/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7023///
7024/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmsub_round_ph)
7025#[inline]
7026#[target_feature(enable = "avx512fp16")]
7027#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
7028#[rustc_legacy_const_generics(3)]
7029#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7030pub fn _mm512_fnmsub_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
7031 unsafe {
7032 static_assert_rounding!(ROUNDING);
7033 vfmaddph_512(a:simd_neg(a), b, c:simd_neg(c), ROUNDING)
7034 }
7035}
7036
7037/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
7038/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
7039/// copied from a when the corresponding mask bit is not set).
7040///
7041/// Rounding is done according to the rounding parameter, which can be one of:
7042///
7043/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7044/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7045/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7046/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7047/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7048///
7049/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmsub_round_ph)
7050#[inline]
7051#[target_feature(enable = "avx512fp16")]
7052#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
7053#[rustc_legacy_const_generics(4)]
7054#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7055pub fn _mm512_mask_fnmsub_round_ph<const ROUNDING: i32>(
7056 a: __m512h,
7057 k: __mmask32,
7058 b: __m512h,
7059 c: __m512h,
7060) -> __m512h {
7061 unsafe {
7062 static_assert_rounding!(ROUNDING);
7063 simd_select_bitmask(m:k, yes:_mm512_fnmsub_round_ph::<ROUNDING>(a, b, c), no:a)
7064 }
7065}
7066
7067/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
7068/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
7069/// copied from c when the corresponding mask bit is not set).
7070///
7071/// Rounding is done according to the rounding parameter, which can be one of:
7072///
7073/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7074/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7075/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7076/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7077/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7078///
7079/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmsub_round_ph)
7080#[inline]
7081#[target_feature(enable = "avx512fp16")]
7082#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
7083#[rustc_legacy_const_generics(4)]
7084#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7085pub fn _mm512_mask3_fnmsub_round_ph<const ROUNDING: i32>(
7086 a: __m512h,
7087 b: __m512h,
7088 c: __m512h,
7089 k: __mmask32,
7090) -> __m512h {
7091 unsafe {
7092 static_assert_rounding!(ROUNDING);
7093 simd_select_bitmask(m:k, yes:_mm512_fnmsub_round_ph::<ROUNDING>(a, b, c), no:c)
7094 }
7095}
7096
7097/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
7098/// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is
7099/// zeroed out when the corresponding mask bit is not set).
7100///
7101/// Rounding is done according to the rounding parameter, which can be one of:
7102///
7103/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7104/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7105/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7106/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7107/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7108///
7109/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmsub_round_ph)
7110#[inline]
7111#[target_feature(enable = "avx512fp16")]
7112#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
7113#[rustc_legacy_const_generics(4)]
7114#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7115pub fn _mm512_maskz_fnmsub_round_ph<const ROUNDING: i32>(
7116 k: __mmask32,
7117 a: __m512h,
7118 b: __m512h,
7119 c: __m512h,
7120) -> __m512h {
7121 unsafe {
7122 static_assert_rounding!(ROUNDING);
7123 simd_select_bitmask(
7124 m:k,
7125 yes:_mm512_fnmsub_round_ph::<ROUNDING>(a, b, c),
7126 no:_mm512_setzero_ph(),
7127 )
7128 }
7129}
7130
7131/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
7132/// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed
7133/// elements from a to the upper elements of dst.
7134///
7135/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_sh)
7136#[inline]
7137#[target_feature(enable = "avx512fp16")]
7138#[cfg_attr(test, assert_instr(vfnmsub))]
7139#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7140#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7141pub const fn _mm_fnmsub_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
7142 unsafe {
7143 let extracta: f16 = simd_extract!(a, 0);
7144 let extractb: f16 = simd_extract!(b, 0);
7145 let extractc: f16 = simd_extract!(c, 0);
7146 let r: f16 = fmaf16(-extracta, b:extractb, -extractc);
7147 simd_insert!(a, 0, r)
7148 }
7149}
7150
7151/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
7152/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
7153/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
7154/// elements of dst.
7155///
7156/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_sh)
7157#[inline]
7158#[target_feature(enable = "avx512fp16")]
7159#[cfg_attr(test, assert_instr(vfnmsub))]
7160#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7161#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7162pub const fn _mm_mask_fnmsub_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
7163 unsafe {
7164 let mut fnmsub: f16 = simd_extract!(a, 0);
7165 if k & 1 != 0 {
7166 let extractb: f16 = simd_extract!(b, 0);
7167 let extractc: f16 = simd_extract!(c, 0);
7168 fnmsub = fmaf16(-fnmsub, b:extractb, -extractc);
7169 }
7170 simd_insert!(a, 0, fnmsub)
7171 }
7172}
7173
7174/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
7175/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
7176/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper
7177/// elements of dst.
7178///
7179/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_sh)
7180#[inline]
7181#[target_feature(enable = "avx512fp16")]
7182#[cfg_attr(test, assert_instr(vfnmsub))]
7183#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7184#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7185pub const fn _mm_mask3_fnmsub_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
7186 unsafe {
7187 let mut fnmsub: f16 = simd_extract!(c, 0);
7188 if k & 1 != 0 {
7189 let extracta: f16 = simd_extract!(a, 0);
7190 let extractb: f16 = simd_extract!(b, 0);
7191 fnmsub = fmaf16(-extracta, b:extractb, -fnmsub);
7192 }
7193 simd_insert!(c, 0, fnmsub)
7194 }
7195}
7196
7197/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
7198/// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element
7199/// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
7200/// elements of dst.
7201///
7202/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_sh)
7203#[inline]
7204#[target_feature(enable = "avx512fp16")]
7205#[cfg_attr(test, assert_instr(vfnmsub))]
7206#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7207#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7208pub const fn _mm_maskz_fnmsub_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
7209 unsafe {
7210 let mut fnmsub: f16 = 0.0;
7211 if k & 1 != 0 {
7212 let extracta: f16 = simd_extract!(a, 0);
7213 let extractb: f16 = simd_extract!(b, 0);
7214 let extractc: f16 = simd_extract!(c, 0);
7215 fnmsub = fmaf16(-extracta, b:extractb, -extractc);
7216 }
7217 simd_insert!(a, 0, fnmsub)
7218 }
7219}
7220
7221/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
7222/// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed
7223/// elements from a to the upper elements of dst.
7224///
7225/// Rounding is done according to the rounding parameter, which can be one of:
7226///
7227/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7228/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7229/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7230/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7231/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7232///
7233/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_round_sh)
7234#[inline]
7235#[target_feature(enable = "avx512fp16")]
7236#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
7237#[rustc_legacy_const_generics(3)]
7238#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7239pub fn _mm_fnmsub_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
7240 unsafe {
7241 static_assert_rounding!(ROUNDING);
7242 let extracta: f16 = simd_extract!(a, 0);
7243 let extractb: f16 = simd_extract!(b, 0);
7244 let extractc: f16 = simd_extract!(c, 0);
7245 let r: f16 = vfmaddsh(-extracta, b:extractb, -extractc, ROUNDING);
7246 simd_insert!(a, 0, r)
7247 }
7248}
7249
7250/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
7251/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
7252/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
7253/// elements of dst.
7254///
7255/// Rounding is done according to the rounding parameter, which can be one of:
7256///
7257/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7258/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7259/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7260/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7261/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7262///
7263/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_round_sh)
7264#[inline]
7265#[target_feature(enable = "avx512fp16")]
7266#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
7267#[rustc_legacy_const_generics(4)]
7268#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7269pub fn _mm_mask_fnmsub_round_sh<const ROUNDING: i32>(
7270 a: __m128h,
7271 k: __mmask8,
7272 b: __m128h,
7273 c: __m128h,
7274) -> __m128h {
7275 unsafe {
7276 static_assert_rounding!(ROUNDING);
7277 let mut fnmsub: f16 = simd_extract!(a, 0);
7278 if k & 1 != 0 {
7279 let extractb: f16 = simd_extract!(b, 0);
7280 let extractc: f16 = simd_extract!(c, 0);
7281 fnmsub = vfmaddsh(-fnmsub, b:extractb, -extractc, ROUNDING);
7282 }
7283 simd_insert!(a, 0, fnmsub)
7284 }
7285}
7286
7287/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
7288/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
7289/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper
7290/// elements of dst.
7291///
7292/// Rounding is done according to the rounding parameter, which can be one of:
7293///
7294/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7295/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7296/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7297/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7298/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7299///
7300/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_round_sh)
7301#[inline]
7302#[target_feature(enable = "avx512fp16")]
7303#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
7304#[rustc_legacy_const_generics(4)]
7305#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7306pub fn _mm_mask3_fnmsub_round_sh<const ROUNDING: i32>(
7307 a: __m128h,
7308 b: __m128h,
7309 c: __m128h,
7310 k: __mmask8,
7311) -> __m128h {
7312 unsafe {
7313 static_assert_rounding!(ROUNDING);
7314 let mut fnmsub: f16 = simd_extract!(c, 0);
7315 if k & 1 != 0 {
7316 let extracta: f16 = simd_extract!(a, 0);
7317 let extractb: f16 = simd_extract!(b, 0);
7318 fnmsub = vfmaddsh(-extracta, b:extractb, -fnmsub, ROUNDING);
7319 }
7320 simd_insert!(c, 0, fnmsub)
7321 }
7322}
7323
7324/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
7325/// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element
7326/// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
7327/// elements of dst.
7328///
7329/// Rounding is done according to the rounding parameter, which can be one of:
7330///
7331/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7332/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7333/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7334/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7335/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7336///
7337/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_round_sh)
7338#[inline]
7339#[target_feature(enable = "avx512fp16")]
7340#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
7341#[rustc_legacy_const_generics(4)]
7342#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7343pub fn _mm_maskz_fnmsub_round_sh<const ROUNDING: i32>(
7344 k: __mmask8,
7345 a: __m128h,
7346 b: __m128h,
7347 c: __m128h,
7348) -> __m128h {
7349 unsafe {
7350 static_assert_rounding!(ROUNDING);
7351 let mut fnmsub: f16 = 0.0;
7352 if k & 1 != 0 {
7353 let extracta: f16 = simd_extract!(a, 0);
7354 let extractb: f16 = simd_extract!(b, 0);
7355 let extractc: f16 = simd_extract!(c, 0);
7356 fnmsub = vfmaddsh(-extracta, b:extractb, -extractc, ROUNDING);
7357 }
7358 simd_insert!(a, 0, fnmsub)
7359 }
7360}
7361
7362/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7363/// subtract packed elements in c to/from the intermediate result, and store the results in dst.
7364///
7365/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmaddsub_ph)
7366#[inline]
7367#[target_feature(enable = "avx512fp16,avx512vl")]
7368#[cfg_attr(test, assert_instr(vfmaddsub))]
7369#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7370#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7371pub const fn _mm_fmaddsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
7372 unsafe {
7373 let add: __m128h = simd_fma(x:a, y:b, z:c);
7374 let sub: __m128h = simd_fma(x:a, y:b, z:simd_neg(c));
7375 simd_shuffle!(sub, add, [0, 9, 2, 11, 4, 13, 6, 15])
7376 }
7377}
7378
7379/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7380/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7381/// (the element is copied from a when the corresponding mask bit is not set).
7382///
7383/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmaddsub_ph)
7384#[inline]
7385#[target_feature(enable = "avx512fp16,avx512vl")]
7386#[cfg_attr(test, assert_instr(vfmaddsub))]
7387#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7388#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7389pub const fn _mm_mask_fmaddsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
7390 unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ph(a, b, c), no:a) }
7391}
7392
7393/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7394/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7395/// (the element is copied from c when the corresponding mask bit is not set).
7396///
7397/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmaddsub_ph)
7398#[inline]
7399#[target_feature(enable = "avx512fp16,avx512vl")]
7400#[cfg_attr(test, assert_instr(vfmaddsub))]
7401#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7402#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7403pub const fn _mm_mask3_fmaddsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
7404 unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ph(a, b, c), no:c) }
7405}
7406
7407/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7408/// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7409/// (the element is zeroed out when the corresponding mask bit is not set).
7410///
7411/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmaddsub_ph)
7412#[inline]
7413#[target_feature(enable = "avx512fp16,avx512vl")]
7414#[cfg_attr(test, assert_instr(vfmaddsub))]
7415#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7416#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7417pub const fn _mm_maskz_fmaddsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
7418 unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ph(a, b, c), no:_mm_setzero_ph()) }
7419}
7420
7421/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7422/// subtract packed elements in c to/from the intermediate result, and store the results in dst.
7423///
7424/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmaddsub_ph)
7425#[inline]
7426#[target_feature(enable = "avx512fp16,avx512vl")]
7427#[cfg_attr(test, assert_instr(vfmaddsub))]
7428#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7429#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7430pub const fn _mm256_fmaddsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
7431 unsafe {
7432 let add: __m256h = simd_fma(x:a, y:b, z:c);
7433 let sub: __m256h = simd_fma(x:a, y:b, z:simd_neg(c));
7434 simd_shuffle!(
7435 sub,
7436 add,
7437 [0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31]
7438 )
7439 }
7440}
7441
7442/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7443/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7444/// (the element is copied from a when the corresponding mask bit is not set).
7445///
7446/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmaddsub_ph)
7447#[inline]
7448#[target_feature(enable = "avx512fp16,avx512vl")]
7449#[cfg_attr(test, assert_instr(vfmaddsub))]
7450#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7451#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7452pub const fn _mm256_mask_fmaddsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
7453 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ph(a, b, c), no:a) }
7454}
7455
7456/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7457/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7458/// (the element is copied from c when the corresponding mask bit is not set).
7459///
7460/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmaddsub_ph)
7461#[inline]
7462#[target_feature(enable = "avx512fp16,avx512vl")]
7463#[cfg_attr(test, assert_instr(vfmaddsub))]
7464#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7465#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7466pub const fn _mm256_mask3_fmaddsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
7467 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ph(a, b, c), no:c) }
7468}
7469
7470/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7471/// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7472/// (the element is zeroed out when the corresponding mask bit is not set).
7473///
7474/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmaddsub_ph)
7475#[inline]
7476#[target_feature(enable = "avx512fp16,avx512vl")]
7477#[cfg_attr(test, assert_instr(vfmaddsub))]
7478#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7479#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7480pub const fn _mm256_maskz_fmaddsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
7481 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ph(a, b, c), no:_mm256_setzero_ph()) }
7482}
7483
7484/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7485/// subtract packed elements in c to/from the intermediate result, and store the results in dst.
7486///
7487/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmaddsub_ph)
7488#[inline]
7489#[target_feature(enable = "avx512fp16")]
7490#[cfg_attr(test, assert_instr(vfmaddsub))]
7491#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7492#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7493pub const fn _mm512_fmaddsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
7494 unsafe {
7495 let add: __m512h = simd_fma(x:a, y:b, z:c);
7496 let sub: __m512h = simd_fma(x:a, y:b, z:simd_neg(c));
7497 simd_shuffle!(
7498 sub,
7499 add,
7500 [
7501 0, 33, 2, 35, 4, 37, 6, 39, 8, 41, 10, 43, 12, 45, 14, 47, 16, 49, 18, 51, 20, 53,
7502 22, 55, 24, 57, 26, 59, 28, 61, 30, 63
7503 ]
7504 )
7505 }
7506}
7507
7508/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7509/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7510/// (the element is copied from a when the corresponding mask bit is not set).
7511///
7512/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmaddsub_ph)
7513#[inline]
7514#[target_feature(enable = "avx512fp16")]
7515#[cfg_attr(test, assert_instr(vfmaddsub))]
7516#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7517#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7518pub const fn _mm512_mask_fmaddsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
7519 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ph(a, b, c), no:a) }
7520}
7521
7522/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7523/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7524/// (the element is copied from c when the corresponding mask bit is not set).
7525///
7526/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmaddsub_ph)
7527#[inline]
7528#[target_feature(enable = "avx512fp16")]
7529#[cfg_attr(test, assert_instr(vfmaddsub))]
7530#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7531#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7532pub const fn _mm512_mask3_fmaddsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
7533 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ph(a, b, c), no:c) }
7534}
7535
7536/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7537/// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7538/// (the element is zeroed out when the corresponding mask bit is not set).
7539///
7540/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmaddsub_ph)
7541#[inline]
7542#[target_feature(enable = "avx512fp16")]
7543#[cfg_attr(test, assert_instr(vfmaddsub))]
7544#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7545#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7546pub const fn _mm512_maskz_fmaddsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
7547 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ph(a, b, c), no:_mm512_setzero_ph()) }
7548}
7549
7550/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7551/// subtract packed elements in c to/from the intermediate result, and store the results in dst.
7552///
7553/// Rounding is done according to the rounding parameter, which can be one of:
7554///
7555/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7556/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7557/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7558/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7559/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7560///
7561/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmaddsub_round_ph)
7562#[inline]
7563#[target_feature(enable = "avx512fp16")]
7564#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))]
7565#[rustc_legacy_const_generics(3)]
7566#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7567pub fn _mm512_fmaddsub_round_ph<const ROUNDING: i32>(
7568 a: __m512h,
7569 b: __m512h,
7570 c: __m512h,
7571) -> __m512h {
7572 unsafe {
7573 static_assert_rounding!(ROUNDING);
7574 vfmaddsubph_512(a, b, c, ROUNDING)
7575 }
7576}
7577
7578/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7579/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7580/// (the element is copied from a when the corresponding mask bit is not set).
7581///
7582/// Rounding is done according to the rounding parameter, which can be one of:
7583///
7584/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7585/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7586/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7587/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7588/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7589///
7590/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmaddsub_round_ph)
7591#[inline]
7592#[target_feature(enable = "avx512fp16")]
7593#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))]
7594#[rustc_legacy_const_generics(4)]
7595#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7596pub fn _mm512_mask_fmaddsub_round_ph<const ROUNDING: i32>(
7597 a: __m512h,
7598 k: __mmask32,
7599 b: __m512h,
7600 c: __m512h,
7601) -> __m512h {
7602 unsafe {
7603 static_assert_rounding!(ROUNDING);
7604 simd_select_bitmask(m:k, yes:_mm512_fmaddsub_round_ph::<ROUNDING>(a, b, c), no:a)
7605 }
7606}
7607
7608/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7609/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7610/// (the element is copied from c when the corresponding mask bit is not set).
7611///
7612/// Rounding is done according to the rounding parameter, which can be one of:
7613///
7614/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7615/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7616/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7617/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7618/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7619///
7620/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmaddsub_round_ph)
7621#[inline]
7622#[target_feature(enable = "avx512fp16")]
7623#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))]
7624#[rustc_legacy_const_generics(4)]
7625#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7626pub fn _mm512_mask3_fmaddsub_round_ph<const ROUNDING: i32>(
7627 a: __m512h,
7628 b: __m512h,
7629 c: __m512h,
7630 k: __mmask32,
7631) -> __m512h {
7632 unsafe {
7633 static_assert_rounding!(ROUNDING);
7634 simd_select_bitmask(m:k, yes:_mm512_fmaddsub_round_ph::<ROUNDING>(a, b, c), no:c)
7635 }
7636}
7637
7638/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7639/// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7640/// (the element is zeroed out when the corresponding mask bit is not set).
7641///
7642/// Rounding is done according to the rounding parameter, which can be one of:
7643///
7644/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7645/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7646/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7647/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7648/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7649///
7650/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmaddsub_round_ph)
7651#[inline]
7652#[target_feature(enable = "avx512fp16")]
7653#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))]
7654#[rustc_legacy_const_generics(4)]
7655#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7656pub fn _mm512_maskz_fmaddsub_round_ph<const ROUNDING: i32>(
7657 k: __mmask32,
7658 a: __m512h,
7659 b: __m512h,
7660 c: __m512h,
7661) -> __m512h {
7662 unsafe {
7663 static_assert_rounding!(ROUNDING);
7664 simd_select_bitmask(
7665 m:k,
7666 yes:_mm512_fmaddsub_round_ph::<ROUNDING>(a, b, c),
7667 no:_mm512_setzero_ph(),
7668 )
7669 }
7670}
7671
7672/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7673/// and add packed elements in c to/from the intermediate result, and store the results in dst.
7674///
7675/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsubadd_ph)
7676#[inline]
7677#[target_feature(enable = "avx512fp16,avx512vl")]
7678#[cfg_attr(test, assert_instr(vfmsubadd))]
7679#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7680#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7681pub const fn _mm_fmsubadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
7682 _mm_fmaddsub_ph(a, b, c:unsafe { simd_neg(c) })
7683}
7684
7685/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7686/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7687/// (the element is copied from a when the corresponding mask bit is not set).
7688///
7689/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsubadd_ph)
7690#[inline]
7691#[target_feature(enable = "avx512fp16,avx512vl")]
7692#[cfg_attr(test, assert_instr(vfmsubadd))]
7693#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7694#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7695pub const fn _mm_mask_fmsubadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
7696 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ph(a, b, c), no:a) }
7697}
7698
7699/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7700/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7701/// (the element is copied from c when the corresponding mask bit is not set).
7702///
7703/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsubadd_ph)
7704#[inline]
7705#[target_feature(enable = "avx512fp16,avx512vl")]
7706#[cfg_attr(test, assert_instr(vfmsubadd))]
7707#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7708#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7709pub const fn _mm_mask3_fmsubadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
7710 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ph(a, b, c), no:c) }
7711}
7712
7713/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7714/// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7715/// (the element is zeroed out when the corresponding mask bit is not set).
7716///
7717/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsubadd_ph)
7718#[inline]
7719#[target_feature(enable = "avx512fp16,avx512vl")]
7720#[cfg_attr(test, assert_instr(vfmsubadd))]
7721#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7722#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7723pub const fn _mm_maskz_fmsubadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
7724 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ph(a, b, c), no:_mm_setzero_ph()) }
7725}
7726
7727/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7728/// and add packed elements in c to/from the intermediate result, and store the results in dst.
7729///
7730/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsubadd_ph)
7731#[inline]
7732#[target_feature(enable = "avx512fp16,avx512vl")]
7733#[cfg_attr(test, assert_instr(vfmsubadd))]
7734#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7735#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7736pub const fn _mm256_fmsubadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
7737 _mm256_fmaddsub_ph(a, b, c:unsafe { simd_neg(c) })
7738}
7739
7740/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7741/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7742/// (the element is copied from a when the corresponding mask bit is not set).
7743///
7744/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmsubadd_ph)
7745#[inline]
7746#[target_feature(enable = "avx512fp16,avx512vl")]
7747#[cfg_attr(test, assert_instr(vfmsubadd))]
7748#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7749#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7750pub const fn _mm256_mask_fmsubadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
7751 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ph(a, b, c), no:a) }
7752}
7753
7754/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7755/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7756/// (the element is copied from c when the corresponding mask bit is not set).
7757///
7758/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmsubadd_ph)
7759#[inline]
7760#[target_feature(enable = "avx512fp16,avx512vl")]
7761#[cfg_attr(test, assert_instr(vfmsubadd))]
7762#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7763#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7764pub const fn _mm256_mask3_fmsubadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
7765 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ph(a, b, c), no:c) }
7766}
7767
7768/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7769/// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7770/// (the element is zeroed out when the corresponding mask bit is not set).
7771///
7772/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmsubadd_ph)
7773#[inline]
7774#[target_feature(enable = "avx512fp16,avx512vl")]
7775#[cfg_attr(test, assert_instr(vfmsubadd))]
7776#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7777#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7778pub const fn _mm256_maskz_fmsubadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
7779 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ph(a, b, c), no:_mm256_setzero_ph()) }
7780}
7781
7782/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7783/// and add packed elements in c to/from the intermediate result, and store the results in dst.
7784///
7785/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsubadd_ph)
7786#[inline]
7787#[target_feature(enable = "avx512fp16")]
7788#[cfg_attr(test, assert_instr(vfmsubadd))]
7789#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7790#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7791pub const fn _mm512_fmsubadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
7792 _mm512_fmaddsub_ph(a, b, c:unsafe { simd_neg(c) })
7793}
7794
7795/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7796/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7797/// (the element is copied from a when the corresponding mask bit is not set).
7798///
7799/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsubadd_ph)
7800#[inline]
7801#[target_feature(enable = "avx512fp16")]
7802#[cfg_attr(test, assert_instr(vfmsubadd))]
7803#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7804#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7805pub const fn _mm512_mask_fmsubadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
7806 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ph(a, b, c), no:a) }
7807}
7808
7809/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7810/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7811/// (the element is copied from c when the corresponding mask bit is not set).
7812///
7813/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsubadd_ph)
7814#[inline]
7815#[target_feature(enable = "avx512fp16")]
7816#[cfg_attr(test, assert_instr(vfmsubadd))]
7817#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7818#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7819pub const fn _mm512_mask3_fmsubadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
7820 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ph(a, b, c), no:c) }
7821}
7822
7823/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7824/// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7825/// (the element is zeroed out when the corresponding mask bit is not set).
7826///
7827/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsubadd_ph)
7828#[inline]
7829#[target_feature(enable = "avx512fp16")]
7830#[cfg_attr(test, assert_instr(vfmsubadd))]
7831#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7832#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7833pub const fn _mm512_maskz_fmsubadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
7834 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ph(a, b, c), no:_mm512_setzero_ph()) }
7835}
7836
7837/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7838/// and add packed elements in c to/from the intermediate result, and store the results in dst.
7839///
7840/// Rounding is done according to the rounding parameter, which can be one of:
7841///
7842/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7843/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7844/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7845/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7846/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7847///
7848/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsubadd_round_ph)
7849#[inline]
7850#[target_feature(enable = "avx512fp16")]
7851#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))]
7852#[rustc_legacy_const_generics(3)]
7853#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7854pub fn _mm512_fmsubadd_round_ph<const ROUNDING: i32>(
7855 a: __m512h,
7856 b: __m512h,
7857 c: __m512h,
7858) -> __m512h {
7859 unsafe {
7860 static_assert_rounding!(ROUNDING);
7861 vfmaddsubph_512(a, b, c:simd_neg(c), ROUNDING)
7862 }
7863}
7864
7865/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7866/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7867/// (the element is copied from a when the corresponding mask bit is not set).
7868///
7869/// Rounding is done according to the rounding parameter, which can be one of:
7870///
7871/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7872/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7873/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7874/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7875/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7876///
7877/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsubadd_round_ph)
7878#[inline]
7879#[target_feature(enable = "avx512fp16")]
7880#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))]
7881#[rustc_legacy_const_generics(4)]
7882#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7883pub fn _mm512_mask_fmsubadd_round_ph<const ROUNDING: i32>(
7884 a: __m512h,
7885 k: __mmask32,
7886 b: __m512h,
7887 c: __m512h,
7888) -> __m512h {
7889 unsafe {
7890 static_assert_rounding!(ROUNDING);
7891 simd_select_bitmask(m:k, yes:_mm512_fmsubadd_round_ph::<ROUNDING>(a, b, c), no:a)
7892 }
7893}
7894
7895/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7896/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7897/// (the element is copied from c when the corresponding mask bit is not set).
7898///
7899/// Rounding is done according to the rounding parameter, which can be one of:
7900///
7901/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7902/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7903/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7904/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7905/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7906///
7907/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsubadd_round_ph)
7908#[inline]
7909#[target_feature(enable = "avx512fp16")]
7910#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))]
7911#[rustc_legacy_const_generics(4)]
7912#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7913pub fn _mm512_mask3_fmsubadd_round_ph<const ROUNDING: i32>(
7914 a: __m512h,
7915 b: __m512h,
7916 c: __m512h,
7917 k: __mmask32,
7918) -> __m512h {
7919 unsafe {
7920 static_assert_rounding!(ROUNDING);
7921 simd_select_bitmask(m:k, yes:_mm512_fmsubadd_round_ph::<ROUNDING>(a, b, c), no:c)
7922 }
7923}
7924
7925/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7926/// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7927/// (the element is zeroed out when the corresponding mask bit is not set).
7928///
7929/// Rounding is done according to the rounding parameter, which can be one of:
7930///
7931/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7932/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7933/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7934/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7935/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7936///
7937/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsubadd_round_ph)
7938#[inline]
7939#[target_feature(enable = "avx512fp16")]
7940#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))]
7941#[rustc_legacy_const_generics(4)]
7942#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7943pub fn _mm512_maskz_fmsubadd_round_ph<const ROUNDING: i32>(
7944 k: __mmask32,
7945 a: __m512h,
7946 b: __m512h,
7947 c: __m512h,
7948) -> __m512h {
7949 unsafe {
7950 static_assert_rounding!(ROUNDING);
7951 simd_select_bitmask(
7952 m:k,
7953 yes:_mm512_fmsubadd_round_ph::<ROUNDING>(a, b, c),
7954 no:_mm512_setzero_ph(),
7955 )
7956 }
7957}
7958
7959/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`.
7960/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7961///
7962/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_ph)
7963#[inline]
7964#[target_feature(enable = "avx512fp16,avx512vl")]
7965#[cfg_attr(test, assert_instr(vrcpph))]
7966#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7967pub fn _mm_rcp_ph(a: __m128h) -> __m128h {
7968 _mm_mask_rcp_ph(src:_mm_undefined_ph(), k:0xff, a)
7969}
7970
7971/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
7972/// using writemask `k` (elements are copied from `src` when the corresponding mask bit is not set).
7973/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7974///
7975/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rcp_ph)
7976#[inline]
7977#[target_feature(enable = "avx512fp16,avx512vl")]
7978#[cfg_attr(test, assert_instr(vrcpph))]
7979#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7980pub fn _mm_mask_rcp_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
7981 unsafe { vrcpph_128(a, src, k) }
7982}
7983
7984/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
7985/// using zeromask `k` (elements are zeroed out when the corresponding mask bit is not set).
7986/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7987///
7988/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rcp_ph)
7989#[inline]
7990#[target_feature(enable = "avx512fp16,avx512vl")]
7991#[cfg_attr(test, assert_instr(vrcpph))]
7992#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
7993pub fn _mm_maskz_rcp_ph(k: __mmask8, a: __m128h) -> __m128h {
7994 _mm_mask_rcp_ph(src:_mm_setzero_ph(), k, a)
7995}
7996
7997/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`.
7998/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7999///
8000/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rcp_ph)
8001#[inline]
8002#[target_feature(enable = "avx512fp16,avx512vl")]
8003#[cfg_attr(test, assert_instr(vrcpph))]
8004#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8005pub fn _mm256_rcp_ph(a: __m256h) -> __m256h {
8006 _mm256_mask_rcp_ph(src:_mm256_undefined_ph(), k:0xffff, a)
8007}
8008
8009/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
8010/// using writemask `k` (elements are copied from `src` when the corresponding mask bit is not set).
8011/// The maximum relative error for this approximation is less than `1.5*2^-12`.
8012///
8013/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_rcp_ph)
8014#[inline]
8015#[target_feature(enable = "avx512fp16,avx512vl")]
8016#[cfg_attr(test, assert_instr(vrcpph))]
8017#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8018pub fn _mm256_mask_rcp_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
8019 unsafe { vrcpph_256(a, src, k) }
8020}
8021
8022/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
8023/// using zeromask `k` (elements are zeroed out when the corresponding mask bit is not set).
8024/// The maximum relative error for this approximation is less than `1.5*2^-12`.
8025///
8026/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_rcp_ph)
8027#[inline]
8028#[target_feature(enable = "avx512fp16,avx512vl")]
8029#[cfg_attr(test, assert_instr(vrcpph))]
8030#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8031pub fn _mm256_maskz_rcp_ph(k: __mmask16, a: __m256h) -> __m256h {
8032 _mm256_mask_rcp_ph(src:_mm256_setzero_ph(), k, a)
8033}
8034
8035/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`.
8036/// The maximum relative error for this approximation is less than `1.5*2^-12`.
8037///
8038/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rcp_ph)
8039#[inline]
8040#[target_feature(enable = "avx512fp16")]
8041#[cfg_attr(test, assert_instr(vrcpph))]
8042#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8043pub fn _mm512_rcp_ph(a: __m512h) -> __m512h {
8044 _mm512_mask_rcp_ph(src:_mm512_undefined_ph(), k:0xffffffff, a)
8045}
8046
8047/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
8048/// using writemask `k` (elements are copied from `src` when the corresponding mask bit is not set).
8049/// The maximum relative error for this approximation is less than `1.5*2^-12`.
8050///
8051/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rcp_ph)
8052#[inline]
8053#[target_feature(enable = "avx512fp16")]
8054#[cfg_attr(test, assert_instr(vrcpph))]
8055#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8056pub fn _mm512_mask_rcp_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
8057 unsafe { vrcpph_512(a, src, k) }
8058}
8059
8060/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
8061/// using zeromask `k` (elements are zeroed out when the corresponding mask bit is not set).
8062/// The maximum relative error for this approximation is less than `1.5*2^-12`.
8063///
8064/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rcp_ph)
8065#[inline]
8066#[target_feature(enable = "avx512fp16")]
8067#[cfg_attr(test, assert_instr(vrcpph))]
8068#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8069pub fn _mm512_maskz_rcp_ph(k: __mmask32, a: __m512h) -> __m512h {
8070 _mm512_mask_rcp_ph(src:_mm512_setzero_ph(), k, a)
8071}
8072
8073/// Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in b,
8074/// store the result in the lower element of dst, and copy the upper 7 packed elements from a to the
8075/// upper elements of dst.
8076/// The maximum relative error for this approximation is less than `1.5*2^-12`.
8077///
8078/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_sh)
8079#[inline]
8080#[target_feature(enable = "avx512fp16")]
8081#[cfg_attr(test, assert_instr(vrcpsh))]
8082#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8083pub fn _mm_rcp_sh(a: __m128h, b: __m128h) -> __m128h {
8084 _mm_mask_rcp_sh(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
8085}
8086
8087/// Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in b,
8088/// store the result in the lower element of dst using writemask k (the element is copied from src when
8089/// mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
8090/// The maximum relative error for this approximation is less than `1.5*2^-12`.
8091///
8092/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rcp_sh)
8093#[inline]
8094#[target_feature(enable = "avx512fp16")]
8095#[cfg_attr(test, assert_instr(vrcpsh))]
8096#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8097pub fn _mm_mask_rcp_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8098 unsafe { vrcpsh(a, b, src, k) }
8099}
8100
8101/// Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in b,
8102/// store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0
8103/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
8104/// The maximum relative error for this approximation is less than `1.5*2^-12`.
8105///
8106/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rcp_sh)
8107#[inline]
8108#[target_feature(enable = "avx512fp16")]
8109#[cfg_attr(test, assert_instr(vrcpsh))]
8110#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8111pub fn _mm_maskz_rcp_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8112 _mm_mask_rcp_sh(src:f16x8::ZERO.as_m128h(), k, a, b)
8113}
8114
8115/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
8116/// elements in a, and store the results in dst.
8117/// The maximum relative error for this approximation is less than `1.5*2^-12`.
8118///
8119/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt_ph)
8120#[inline]
8121#[target_feature(enable = "avx512fp16,avx512vl")]
8122#[cfg_attr(test, assert_instr(vrsqrtph))]
8123#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8124pub fn _mm_rsqrt_ph(a: __m128h) -> __m128h {
8125 _mm_mask_rsqrt_ph(src:_mm_undefined_ph(), k:0xff, a)
8126}
8127
8128/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
8129/// elements in a, and store the results in dst using writemask k (elements are copied from src when
8130/// the corresponding mask bit is not set).
8131/// The maximum relative error for this approximation is less than `1.5*2^-12`.
8132///
8133/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rsqrt_ph)
8134#[inline]
8135#[target_feature(enable = "avx512fp16,avx512vl")]
8136#[cfg_attr(test, assert_instr(vrsqrtph))]
8137#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8138pub fn _mm_mask_rsqrt_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
8139 unsafe { vrsqrtph_128(a, src, k) }
8140}
8141
8142/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
8143/// elements in a, and store the results in dst using zeromask k (elements are zeroed out when the
8144/// corresponding mask bit is not set).
8145/// The maximum relative error for this approximation is less than `1.5*2^-12`.
8146///
8147/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rsqrt_ph)
8148#[inline]
8149#[target_feature(enable = "avx512fp16,avx512vl")]
8150#[cfg_attr(test, assert_instr(vrsqrtph))]
8151#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8152pub fn _mm_maskz_rsqrt_ph(k: __mmask8, a: __m128h) -> __m128h {
8153 _mm_mask_rsqrt_ph(src:_mm_setzero_ph(), k, a)
8154}
8155
8156/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
8157/// elements in a, and store the results in dst.
8158/// The maximum relative error for this approximation is less than `1.5*2^-12`.
8159///
8160/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rsqrt_ph)
8161#[inline]
8162#[target_feature(enable = "avx512fp16,avx512vl")]
8163#[cfg_attr(test, assert_instr(vrsqrtph))]
8164#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8165pub fn _mm256_rsqrt_ph(a: __m256h) -> __m256h {
8166 _mm256_mask_rsqrt_ph(src:_mm256_undefined_ph(), k:0xffff, a)
8167}
8168
8169/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
8170/// elements in a, and store the results in dst using writemask k (elements are copied from src when
8171/// the corresponding mask bit is not set).
8172/// The maximum relative error for this approximation is less than `1.5*2^-12`.
8173///
8174/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_rsqrt_ph)
8175#[inline]
8176#[target_feature(enable = "avx512fp16,avx512vl")]
8177#[cfg_attr(test, assert_instr(vrsqrtph))]
8178#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8179pub fn _mm256_mask_rsqrt_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
8180 unsafe { vrsqrtph_256(a, src, k) }
8181}
8182
8183/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
8184/// elements in a, and store the results in dst using zeromask k (elements are zeroed out when the
8185/// corresponding mask bit is not set).
8186/// The maximum relative error for this approximation is less than `1.5*2^-12`.
8187///
8188/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_rsqrt_ph)
8189#[inline]
8190#[target_feature(enable = "avx512fp16,avx512vl")]
8191#[cfg_attr(test, assert_instr(vrsqrtph))]
8192#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8193pub fn _mm256_maskz_rsqrt_ph(k: __mmask16, a: __m256h) -> __m256h {
8194 _mm256_mask_rsqrt_ph(src:_mm256_setzero_ph(), k, a)
8195}
8196
8197/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
8198/// elements in a, and store the results in dst.
8199/// The maximum relative error for this approximation is less than `1.5*2^-12`.
8200///
8201/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rsqrt_ph)
8202#[inline]
8203#[target_feature(enable = "avx512fp16")]
8204#[cfg_attr(test, assert_instr(vrsqrtph))]
8205#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8206pub fn _mm512_rsqrt_ph(a: __m512h) -> __m512h {
8207 _mm512_mask_rsqrt_ph(src:_mm512_undefined_ph(), k:0xffffffff, a)
8208}
8209
8210/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
8211/// elements in a, and store the results in dst using writemask k (elements are copied from src when
8212/// the corresponding mask bit is not set).
8213/// The maximum relative error for this approximation is less than `1.5*2^-12`.
8214///
8215/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rsqrt_ph)
8216#[inline]
8217#[target_feature(enable = "avx512fp16")]
8218#[cfg_attr(test, assert_instr(vrsqrtph))]
8219#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8220pub fn _mm512_mask_rsqrt_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
8221 unsafe { vrsqrtph_512(a, src, k) }
8222}
8223
8224/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
8225/// elements in a, and store the results in dst using zeromask k (elements are zeroed out when the
8226/// corresponding mask bit is not set).
8227/// The maximum relative error for this approximation is less than `1.5*2^-12`.
8228///
8229/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rsqrt_ph)
8230#[inline]
8231#[target_feature(enable = "avx512fp16")]
8232#[cfg_attr(test, assert_instr(vrsqrtph))]
8233#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8234pub fn _mm512_maskz_rsqrt_ph(k: __mmask32, a: __m512h) -> __m512h {
8235 _mm512_mask_rsqrt_ph(src:_mm512_setzero_ph(), k, a)
8236}
8237
8238/// Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point
8239/// element in b, store the result in the lower element of dst, and copy the upper 7 packed elements from a
8240/// to the upper elements of dst.
8241/// The maximum relative error for this approximation is less than `1.5*2^-12`.
8242///
8243/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt_sh)
8244#[inline]
8245#[target_feature(enable = "avx512fp16")]
8246#[cfg_attr(test, assert_instr(vrsqrtsh))]
8247#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8248pub fn _mm_rsqrt_sh(a: __m128h, b: __m128h) -> __m128h {
8249 _mm_mask_rsqrt_sh(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
8250}
8251
8252/// Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point
8253/// element in b, store the result in the lower element of dst using writemask k (the element is copied from src
8254/// when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
8255/// The maximum relative error for this approximation is less than `1.5*2^-12`.
8256///
8257/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rsqrt_sh)
8258#[inline]
8259#[target_feature(enable = "avx512fp16")]
8260#[cfg_attr(test, assert_instr(vrsqrtsh))]
8261#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8262pub fn _mm_mask_rsqrt_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8263 unsafe { vrsqrtsh(a, b, src, k) }
8264}
8265
8266/// Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point
8267/// element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when
8268/// mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
8269/// The maximum relative error for this approximation is less than `1.5*2^-12`.
8270///
8271/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rsqrt_sh)
8272#[inline]
8273#[target_feature(enable = "avx512fp16")]
8274#[cfg_attr(test, assert_instr(vrsqrtsh))]
8275#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8276pub fn _mm_maskz_rsqrt_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8277 _mm_mask_rsqrt_sh(src:f16x8::ZERO.as_m128h(), k, a, b)
8278}
8279
8280/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8281/// results in dst.
8282///
8283/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_ph)
8284#[inline]
8285#[target_feature(enable = "avx512fp16,avx512vl")]
8286#[cfg_attr(test, assert_instr(vsqrtph))]
8287#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8288pub fn _mm_sqrt_ph(a: __m128h) -> __m128h {
8289 unsafe { simd_fsqrt(a) }
8290}
8291
8292/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8293/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8294///
8295/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_ph)
8296#[inline]
8297#[target_feature(enable = "avx512fp16,avx512vl")]
8298#[cfg_attr(test, assert_instr(vsqrtph))]
8299#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8300pub fn _mm_mask_sqrt_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
8301 unsafe { simd_select_bitmask(m:k, yes:_mm_sqrt_ph(a), no:src) }
8302}
8303
8304/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8305/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8306///
8307/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_ph)
8308#[inline]
8309#[target_feature(enable = "avx512fp16,avx512vl")]
8310#[cfg_attr(test, assert_instr(vsqrtph))]
8311#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8312pub fn _mm_maskz_sqrt_ph(k: __mmask8, a: __m128h) -> __m128h {
8313 unsafe { simd_select_bitmask(m:k, yes:_mm_sqrt_ph(a), no:_mm_setzero_ph()) }
8314}
8315
8316/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8317/// results in dst.
8318///
8319/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sqrt_ph)
8320#[inline]
8321#[target_feature(enable = "avx512fp16,avx512vl")]
8322#[cfg_attr(test, assert_instr(vsqrtph))]
8323#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8324pub fn _mm256_sqrt_ph(a: __m256h) -> __m256h {
8325 unsafe { simd_fsqrt(a) }
8326}
8327
8328/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8329/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8330///
8331/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sqrt_ph)
8332#[inline]
8333#[target_feature(enable = "avx512fp16,avx512vl")]
8334#[cfg_attr(test, assert_instr(vsqrtph))]
8335#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8336pub fn _mm256_mask_sqrt_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
8337 unsafe { simd_select_bitmask(m:k, yes:_mm256_sqrt_ph(a), no:src) }
8338}
8339
8340/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8341/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8342///
8343/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sqrt_ph)
8344#[inline]
8345#[target_feature(enable = "avx512fp16,avx512vl")]
8346#[cfg_attr(test, assert_instr(vsqrtph))]
8347#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8348pub fn _mm256_maskz_sqrt_ph(k: __mmask16, a: __m256h) -> __m256h {
8349 unsafe { simd_select_bitmask(m:k, yes:_mm256_sqrt_ph(a), no:_mm256_setzero_ph()) }
8350}
8351
8352/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8353/// results in dst.
8354///
8355/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sqrt_ph)
8356#[inline]
8357#[target_feature(enable = "avx512fp16")]
8358#[cfg_attr(test, assert_instr(vsqrtph))]
8359#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8360pub fn _mm512_sqrt_ph(a: __m512h) -> __m512h {
8361 unsafe { simd_fsqrt(a) }
8362}
8363
8364/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8365/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8366///
8367/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sqrt_ph)
8368#[inline]
8369#[target_feature(enable = "avx512fp16")]
8370#[cfg_attr(test, assert_instr(vsqrtph))]
8371#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8372pub fn _mm512_mask_sqrt_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
8373 unsafe { simd_select_bitmask(m:k, yes:_mm512_sqrt_ph(a), no:src) }
8374}
8375
8376/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8377/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8378///
8379/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sqrt_ph)
8380#[inline]
8381#[target_feature(enable = "avx512fp16")]
8382#[cfg_attr(test, assert_instr(vsqrtph))]
8383#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8384pub fn _mm512_maskz_sqrt_ph(k: __mmask32, a: __m512h) -> __m512h {
8385 unsafe { simd_select_bitmask(m:k, yes:_mm512_sqrt_ph(a), no:_mm512_setzero_ph()) }
8386}
8387
8388/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8389/// results in dst.
8390/// Rounding is done according to the rounding parameter, which can be one of:
8391///
8392/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8393/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8394/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8395/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8396/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8397///
8398/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sqrt_round_ph)
8399#[inline]
8400#[target_feature(enable = "avx512fp16")]
8401#[cfg_attr(test, assert_instr(vsqrtph, ROUNDING = 8))]
8402#[rustc_legacy_const_generics(1)]
8403#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8404pub fn _mm512_sqrt_round_ph<const ROUNDING: i32>(a: __m512h) -> __m512h {
8405 unsafe {
8406 static_assert_rounding!(ROUNDING);
8407 vsqrtph_512(a, ROUNDING)
8408 }
8409}
8410
8411/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8412/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8413/// Rounding is done according to the rounding parameter, which can be one of:
8414///
8415/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8416/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8417/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8418/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8419/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8420///
8421/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sqrt_round_ph)
8422#[inline]
8423#[target_feature(enable = "avx512fp16")]
8424#[cfg_attr(test, assert_instr(vsqrtph, ROUNDING = 8))]
8425#[rustc_legacy_const_generics(3)]
8426#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8427pub fn _mm512_mask_sqrt_round_ph<const ROUNDING: i32>(
8428 src: __m512h,
8429 k: __mmask32,
8430 a: __m512h,
8431) -> __m512h {
8432 unsafe {
8433 static_assert_rounding!(ROUNDING);
8434 simd_select_bitmask(m:k, yes:_mm512_sqrt_round_ph::<ROUNDING>(a), no:src)
8435 }
8436}
8437
8438/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8439/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8440/// Rounding is done according to the rounding parameter, which can be one of:
8441///
8442/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8443/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8444/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8445/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8446/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8447///
8448/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sqrt_round_ph)
8449#[inline]
8450#[target_feature(enable = "avx512fp16")]
8451#[cfg_attr(test, assert_instr(vsqrtph, ROUNDING = 8))]
8452#[rustc_legacy_const_generics(2)]
8453#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8454pub fn _mm512_maskz_sqrt_round_ph<const ROUNDING: i32>(k: __mmask32, a: __m512h) -> __m512h {
8455 unsafe {
8456 static_assert_rounding!(ROUNDING);
8457 simd_select_bitmask(m:k, yes:_mm512_sqrt_round_ph::<ROUNDING>(a), no:_mm512_setzero_ph())
8458 }
8459}
8460
8461/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
8462/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
8463/// elements of dst.
8464///
8465/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_sh)
8466#[inline]
8467#[target_feature(enable = "avx512fp16")]
8468#[cfg_attr(test, assert_instr(vsqrtsh))]
8469#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8470pub fn _mm_sqrt_sh(a: __m128h, b: __m128h) -> __m128h {
8471 _mm_mask_sqrt_sh(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
8472}
8473
8474/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
8475/// the result in the lower element of dst using writemask k (the element is copied from src when mask
8476/// bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
8477///
8478/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_sh)
8479#[inline]
8480#[target_feature(enable = "avx512fp16")]
8481#[cfg_attr(test, assert_instr(vsqrtsh))]
8482#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8483pub fn _mm_mask_sqrt_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8484 _mm_mask_sqrt_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
8485}
8486
8487/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
8488/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0
8489/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
8490///
8491/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_sh)
8492#[inline]
8493#[target_feature(enable = "avx512fp16")]
8494#[cfg_attr(test, assert_instr(vsqrtsh))]
8495#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8496pub fn _mm_maskz_sqrt_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8497 _mm_mask_sqrt_sh(src:f16x8::ZERO.as_m128h(), k, a, b)
8498}
8499
8500/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
8501/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
8502/// elements of dst.
8503/// Rounding is done according to the rounding parameter, which can be one of:
8504///
8505/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8506/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8507/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8508/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8509/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8510///
8511/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_round_sh)
8512#[inline]
8513#[target_feature(enable = "avx512fp16")]
8514#[cfg_attr(test, assert_instr(vsqrtsh, ROUNDING = 8))]
8515#[rustc_legacy_const_generics(2)]
8516#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8517pub fn _mm_sqrt_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
8518 static_assert_rounding!(ROUNDING);
8519 _mm_mask_sqrt_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
8520}
8521
8522/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
8523/// the result in the lower element of dst using writemask k (the element is copied from src when mask
8524/// bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
8525/// Rounding is done according to the rounding parameter, which can be one of:
8526///
8527/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8528/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8529/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8530/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8531/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8532///
8533/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_round_sh)
8534#[inline]
8535#[target_feature(enable = "avx512fp16")]
8536#[cfg_attr(test, assert_instr(vsqrtsh, ROUNDING = 8))]
8537#[rustc_legacy_const_generics(4)]
8538#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8539pub fn _mm_mask_sqrt_round_sh<const ROUNDING: i32>(
8540 src: __m128h,
8541 k: __mmask8,
8542 a: __m128h,
8543 b: __m128h,
8544) -> __m128h {
8545 unsafe {
8546 static_assert_rounding!(ROUNDING);
8547 vsqrtsh(a, b, src, k, ROUNDING)
8548 }
8549}
8550
8551/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
8552/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0
8553/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
8554/// Rounding is done according to the rounding parameter, which can be one of:
8555///
8556/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8557/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8558/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8559/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8560/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8561///
8562/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_round_sh)
8563#[inline]
8564#[target_feature(enable = "avx512fp16")]
8565#[cfg_attr(test, assert_instr(vsqrtsh, ROUNDING = 8))]
8566#[rustc_legacy_const_generics(3)]
8567#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8568pub fn _mm_maskz_sqrt_round_sh<const ROUNDING: i32>(
8569 k: __mmask8,
8570 a: __m128h,
8571 b: __m128h,
8572) -> __m128h {
8573 static_assert_rounding!(ROUNDING);
8574 _mm_mask_sqrt_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
8575}
8576
8577/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8578/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum
8579/// value when inputs are NaN or signed-zero values.
8580///
8581/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_ph)
8582#[inline]
8583#[target_feature(enable = "avx512fp16,avx512vl")]
8584#[cfg_attr(test, assert_instr(vmaxph))]
8585#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8586pub fn _mm_max_ph(a: __m128h, b: __m128h) -> __m128h {
8587 unsafe { vmaxph_128(a, b) }
8588}
8589
8590/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8591/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8592/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8593/// NaN or signed-zero values.
8594///
8595/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_ph)
8596#[inline]
8597#[target_feature(enable = "avx512fp16,avx512vl")]
8598#[cfg_attr(test, assert_instr(vmaxph))]
8599#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8600pub fn _mm_mask_max_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8601 unsafe { simd_select_bitmask(m:k, yes:_mm_max_ph(a, b), no:src) }
8602}
8603
8604/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8605/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8606/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8607/// NaN or signed-zero values.
8608///
8609/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_ph)
8610#[inline]
8611#[target_feature(enable = "avx512fp16,avx512vl")]
8612#[cfg_attr(test, assert_instr(vmaxph))]
8613#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8614pub fn _mm_maskz_max_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8615 unsafe { simd_select_bitmask(m:k, yes:_mm_max_ph(a, b), no:_mm_setzero_ph()) }
8616}
8617
8618/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8619/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum
8620/// value when inputs are NaN or signed-zero values.
8621///
8622/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_ph)
8623#[inline]
8624#[target_feature(enable = "avx512fp16,avx512vl")]
8625#[cfg_attr(test, assert_instr(vmaxph))]
8626#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8627pub fn _mm256_max_ph(a: __m256h, b: __m256h) -> __m256h {
8628 unsafe { vmaxph_256(a, b) }
8629}
8630
8631/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8632/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8633/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8634/// NaN or signed-zero values.
8635///
8636/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_max_ph)
8637#[inline]
8638#[target_feature(enable = "avx512fp16,avx512vl")]
8639#[cfg_attr(test, assert_instr(vmaxph))]
8640#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8641pub fn _mm256_mask_max_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
8642 unsafe { simd_select_bitmask(m:k, yes:_mm256_max_ph(a, b), no:src) }
8643}
8644
8645/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8646/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8647/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8648/// NaN or signed-zero values.
8649///
8650/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_max_ph)
8651#[inline]
8652#[target_feature(enable = "avx512fp16,avx512vl")]
8653#[cfg_attr(test, assert_instr(vmaxph))]
8654#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8655pub fn _mm256_maskz_max_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
8656 unsafe { simd_select_bitmask(m:k, yes:_mm256_max_ph(a, b), no:_mm256_setzero_ph()) }
8657}
8658
8659/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8660/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum
8661/// value when inputs are NaN or signed-zero values.
8662///
8663/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_ph)
8664#[inline]
8665#[target_feature(enable = "avx512fp16")]
8666#[cfg_attr(test, assert_instr(vmaxph))]
8667#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8668pub fn _mm512_max_ph(a: __m512h, b: __m512h) -> __m512h {
8669 _mm512_max_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b)
8670}
8671
8672/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8673/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8674/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8675/// NaN or signed-zero values.
8676///
8677/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_ph)
8678#[inline]
8679#[target_feature(enable = "avx512fp16")]
8680#[cfg_attr(test, assert_instr(vmaxph))]
8681#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8682pub fn _mm512_mask_max_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
8683 unsafe { simd_select_bitmask(m:k, yes:_mm512_max_ph(a, b), no:src) }
8684}
8685
8686/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8687/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8688/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8689/// NaN or signed-zero values.
8690///
8691/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_ph)
8692#[inline]
8693#[target_feature(enable = "avx512fp16")]
8694#[cfg_attr(test, assert_instr(vmaxph))]
8695#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8696pub fn _mm512_maskz_max_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
8697 unsafe { simd_select_bitmask(m:k, yes:_mm512_max_ph(a, b), no:_mm512_setzero_ph()) }
8698}
8699
8700/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8701/// values in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8702/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8703/// NaN or signed-zero values.
8704///
8705/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_round_ph)
8706#[inline]
8707#[target_feature(enable = "avx512fp16")]
8708#[cfg_attr(test, assert_instr(vmaxph, SAE = 8))]
8709#[rustc_legacy_const_generics(2)]
8710#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8711pub fn _mm512_max_round_ph<const SAE: i32>(a: __m512h, b: __m512h) -> __m512h {
8712 unsafe {
8713 static_assert_sae!(SAE);
8714 vmaxph_512(a, b, SAE)
8715 }
8716}
8717
8718/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8719/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8720/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
8721/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8722///
8723/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_round_ph)
8724#[inline]
8725#[target_feature(enable = "avx512fp16")]
8726#[cfg_attr(test, assert_instr(vmaxph, SAE = 8))]
8727#[rustc_legacy_const_generics(4)]
8728#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8729pub fn _mm512_mask_max_round_ph<const SAE: i32>(
8730 src: __m512h,
8731 k: __mmask32,
8732 a: __m512h,
8733 b: __m512h,
8734) -> __m512h {
8735 unsafe {
8736 static_assert_sae!(SAE);
8737 simd_select_bitmask(m:k, yes:_mm512_max_round_ph::<SAE>(a, b), no:src)
8738 }
8739}
8740
8741/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8742/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8743/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
8744/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8745///
8746/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_round_ph)
8747#[inline]
8748#[target_feature(enable = "avx512fp16")]
8749#[cfg_attr(test, assert_instr(vmaxph, SAE = 8))]
8750#[rustc_legacy_const_generics(3)]
8751#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8752pub fn _mm512_maskz_max_round_ph<const SAE: i32>(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
8753 unsafe {
8754 static_assert_sae!(SAE);
8755 simd_select_bitmask(m:k, yes:_mm512_max_round_ph::<SAE>(a, b), no:_mm512_setzero_ph())
8756 }
8757}
8758
8759/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum
8760/// value in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
8761/// of dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value
8762/// when inputs are NaN or signed-zero values.
8763///
8764/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_sh)
8765#[inline]
8766#[target_feature(enable = "avx512fp16,avx512vl")]
8767#[cfg_attr(test, assert_instr(vmaxsh))]
8768#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8769pub fn _mm_max_sh(a: __m128h, b: __m128h) -> __m128h {
8770 _mm_mask_max_sh(src:_mm_undefined_ph(), k:0xff, a, b)
8771}
8772
8773/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum
8774/// value in the lower element of dst using writemask k (the element is copied from src when mask bit 0
8775/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst. Does not follow
8776/// the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8777///
8778/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_sh)
8779#[inline]
8780#[target_feature(enable = "avx512fp16,avx512vl")]
8781#[cfg_attr(test, assert_instr(vmaxsh))]
8782#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8783pub fn _mm_mask_max_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8784 _mm_mask_max_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
8785}
8786
8787/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value
8788/// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and
8789/// copy the upper 7 packed elements from a to the upper elements of dst. Does not follow the IEEE Standard
8790/// for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8791///
8792/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_sh)
8793#[inline]
8794#[target_feature(enable = "avx512fp16,avx512vl")]
8795#[cfg_attr(test, assert_instr(vmaxsh))]
8796#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8797pub fn _mm_maskz_max_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8798 _mm_mask_max_sh(src:f16x8::ZERO.as_m128h(), k, a, b)
8799}
8800
8801/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value
8802/// in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
8803/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
8804/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8805///
8806/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_round_sh)
8807#[inline]
8808#[target_feature(enable = "avx512fp16,avx512vl")]
8809#[cfg_attr(test, assert_instr(vmaxsh, SAE = 8))]
8810#[rustc_legacy_const_generics(2)]
8811#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8812pub fn _mm_max_round_sh<const SAE: i32>(a: __m128h, b: __m128h) -> __m128h {
8813 static_assert_sae!(SAE);
8814 _mm_mask_max_round_sh::<SAE>(src:_mm_undefined_ph(), k:0xff, a, b)
8815}
8816
8817/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value
8818/// in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
8819/// and copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by
8820/// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic
8821/// (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8822///
8823/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_round_sh)
8824#[inline]
8825#[target_feature(enable = "avx512fp16,avx512vl")]
8826#[cfg_attr(test, assert_instr(vmaxsh, SAE = 8))]
8827#[rustc_legacy_const_generics(4)]
8828#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8829pub fn _mm_mask_max_round_sh<const SAE: i32>(
8830 src: __m128h,
8831 k: __mmask8,
8832 a: __m128h,
8833 b: __m128h,
8834) -> __m128h {
8835 unsafe {
8836 static_assert_sae!(SAE);
8837 vmaxsh(a, b, src, k, SAE)
8838 }
8839}
8840
8841/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value
8842/// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and
8843/// copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by
8844/// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic
8845/// (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8846///
8847/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_round_sh)
8848#[inline]
8849#[target_feature(enable = "avx512fp16,avx512vl")]
8850#[cfg_attr(test, assert_instr(vmaxsh, SAE = 8))]
8851#[rustc_legacy_const_generics(3)]
8852#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8853pub fn _mm_maskz_max_round_sh<const SAE: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8854 static_assert_sae!(SAE);
8855 _mm_mask_max_round_sh::<SAE>(src:f16x8::ZERO.as_m128h(), k, a, b)
8856}
8857
8858/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8859/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value
8860/// when inputs are NaN or signed-zero values.
8861///
8862/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_ph)
8863#[inline]
8864#[target_feature(enable = "avx512fp16,avx512vl")]
8865#[cfg_attr(test, assert_instr(vminph))]
8866#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8867pub fn _mm_min_ph(a: __m128h, b: __m128h) -> __m128h {
8868 unsafe { vminph_128(a, b) }
8869}
8870
8871/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8872/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8873/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
8874/// NaN or signed-zero values.
8875///
8876/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_ph)
8877#[inline]
8878#[target_feature(enable = "avx512fp16,avx512vl")]
8879#[cfg_attr(test, assert_instr(vminph))]
8880#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8881pub fn _mm_mask_min_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8882 unsafe { simd_select_bitmask(m:k, yes:_mm_min_ph(a, b), no:src) }
8883}
8884
8885/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8886/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8887/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
8888/// NaN or signed-zero values.
8889///
8890/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_ph)
8891#[inline]
8892#[target_feature(enable = "avx512fp16,avx512vl")]
8893#[cfg_attr(test, assert_instr(vminph))]
8894#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8895pub fn _mm_maskz_min_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8896 unsafe { simd_select_bitmask(m:k, yes:_mm_min_ph(a, b), no:_mm_setzero_ph()) }
8897}
8898
8899/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8900/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value
8901/// when inputs are NaN or signed-zero values.
8902///
8903/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_ph)
8904#[inline]
8905#[target_feature(enable = "avx512fp16,avx512vl")]
8906#[cfg_attr(test, assert_instr(vminph))]
8907#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8908pub fn _mm256_min_ph(a: __m256h, b: __m256h) -> __m256h {
8909 unsafe { vminph_256(a, b) }
8910}
8911
8912/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8913/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8914/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
8915/// NaN or signed-zero values.
8916///
8917/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_min_ph)
8918#[inline]
8919#[target_feature(enable = "avx512fp16,avx512vl")]
8920#[cfg_attr(test, assert_instr(vminph))]
8921#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8922pub fn _mm256_mask_min_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
8923 unsafe { simd_select_bitmask(m:k, yes:_mm256_min_ph(a, b), no:src) }
8924}
8925
8926/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8927/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8928/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
8929/// NaN or signed-zero values.
8930///
8931/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_min_ph)
8932#[inline]
8933#[target_feature(enable = "avx512fp16,avx512vl")]
8934#[cfg_attr(test, assert_instr(vminph))]
8935#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8936pub fn _mm256_maskz_min_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
8937 unsafe { simd_select_bitmask(m:k, yes:_mm256_min_ph(a, b), no:_mm256_setzero_ph()) }
8938}
8939
8940/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8941/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value
8942/// when inputs are NaN or signed-zero values.
8943///
8944/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_ph)
8945#[inline]
8946#[target_feature(enable = "avx512fp16")]
8947#[cfg_attr(test, assert_instr(vminph))]
8948#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8949pub fn _mm512_min_ph(a: __m512h, b: __m512h) -> __m512h {
8950 _mm512_min_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b)
8951}
8952
8953/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8954/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8955/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
8956/// NaN or signed-zero values.
8957///
8958/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_ph)
8959#[inline]
8960#[target_feature(enable = "avx512fp16")]
8961#[cfg_attr(test, assert_instr(vminph))]
8962#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8963pub fn _mm512_mask_min_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
8964 unsafe { simd_select_bitmask(m:k, yes:_mm512_min_ph(a, b), no:src) }
8965}
8966
8967/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8968/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8969/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
8970/// NaN or signed-zero values.
8971///
8972/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_ph)
8973#[inline]
8974#[target_feature(enable = "avx512fp16")]
8975#[cfg_attr(test, assert_instr(vminph))]
8976#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8977pub fn _mm512_maskz_min_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
8978 unsafe { simd_select_bitmask(m:k, yes:_mm512_min_ph(a, b), no:_mm512_setzero_ph()) }
8979}
8980
8981/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8982/// values in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not
8983/// follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8984///
8985/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_round_ph)
8986#[inline]
8987#[target_feature(enable = "avx512fp16")]
8988#[cfg_attr(test, assert_instr(vminph, SAE = 8))]
8989#[rustc_legacy_const_generics(2)]
8990#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
8991pub fn _mm512_min_round_ph<const SAE: i32>(a: __m512h, b: __m512h) -> __m512h {
8992 unsafe {
8993 static_assert_sae!(SAE);
8994 vminph_512(a, b, SAE)
8995 }
8996}
8997
8998/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8999/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9000/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
9001/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
9002///
9003/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_round_ph)
9004#[inline]
9005#[target_feature(enable = "avx512fp16")]
9006#[cfg_attr(test, assert_instr(vminph, SAE = 8))]
9007#[rustc_legacy_const_generics(4)]
9008#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9009pub fn _mm512_mask_min_round_ph<const SAE: i32>(
9010 src: __m512h,
9011 k: __mmask32,
9012 a: __m512h,
9013 b: __m512h,
9014) -> __m512h {
9015 unsafe {
9016 static_assert_sae!(SAE);
9017 simd_select_bitmask(m:k, yes:_mm512_min_round_ph::<SAE>(a, b), no:src)
9018 }
9019}
9020
9021/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
9022/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9023/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
9024/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
9025///
9026/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_round_ph)
9027#[inline]
9028#[target_feature(enable = "avx512fp16")]
9029#[cfg_attr(test, assert_instr(vminph, SAE = 8))]
9030#[rustc_legacy_const_generics(3)]
9031#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9032pub fn _mm512_maskz_min_round_ph<const SAE: i32>(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
9033 unsafe {
9034 static_assert_sae!(SAE);
9035 simd_select_bitmask(m:k, yes:_mm512_min_round_ph::<SAE>(a, b), no:_mm512_setzero_ph())
9036 }
9037}
9038
9039/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum
9040/// value in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
9041/// of dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when
9042/// inputs are NaN or signed-zero values.
9043///
9044/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_sh)
9045#[inline]
9046#[target_feature(enable = "avx512fp16,avx512vl")]
9047#[cfg_attr(test, assert_instr(vminsh))]
9048#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9049pub fn _mm_min_sh(a: __m128h, b: __m128h) -> __m128h {
9050 _mm_mask_min_sh(src:_mm_undefined_ph(), k:0xff, a, b)
9051}
9052
9053/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum
9054/// value in the lower element of dst using writemask k (the element is copied from src when mask bit 0
9055/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst. Does not follow
9056/// the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
9057///
9058/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_sh)
9059#[inline]
9060#[target_feature(enable = "avx512fp16,avx512vl")]
9061#[cfg_attr(test, assert_instr(vminsh))]
9062#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9063pub fn _mm_mask_min_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
9064 _mm_mask_min_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
9065}
9066
9067/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value
9068/// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and
9069/// copy the upper 7 packed elements from a to the upper elements of dst. Does not follow the IEEE Standard
9070/// for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
9071///
9072/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_sh)
9073#[inline]
9074#[target_feature(enable = "avx512fp16,avx512vl")]
9075#[cfg_attr(test, assert_instr(vminsh))]
9076#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9077pub fn _mm_maskz_min_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
9078 _mm_mask_min_sh(src:f16x8::ZERO.as_m128h(), k, a, b)
9079}
9080
9081/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value
9082/// in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
9083/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
9084/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
9085///
9086/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_round_sh)
9087#[inline]
9088#[target_feature(enable = "avx512fp16,avx512vl")]
9089#[cfg_attr(test, assert_instr(vminsh, SAE = 8))]
9090#[rustc_legacy_const_generics(2)]
9091#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9092pub fn _mm_min_round_sh<const SAE: i32>(a: __m128h, b: __m128h) -> __m128h {
9093 static_assert_sae!(SAE);
9094 _mm_mask_min_round_sh::<SAE>(src:_mm_undefined_ph(), k:0xff, a, b)
9095}
9096
9097/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value
9098/// in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
9099/// and copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by
9100/// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic
9101/// (IEEE 754) minimum value when inputs are NaN or signed-zero values.
9102///
9103/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_round_sh)
9104#[inline]
9105#[target_feature(enable = "avx512fp16,avx512vl")]
9106#[cfg_attr(test, assert_instr(vminsh, SAE = 8))]
9107#[rustc_legacy_const_generics(4)]
9108#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9109pub fn _mm_mask_min_round_sh<const SAE: i32>(
9110 src: __m128h,
9111 k: __mmask8,
9112 a: __m128h,
9113 b: __m128h,
9114) -> __m128h {
9115 unsafe {
9116 static_assert_sae!(SAE);
9117 vminsh(a, b, src, k, SAE)
9118 }
9119}
9120
9121/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value
9122/// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and
9123/// copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by
9124/// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic
9125/// (IEEE 754) minimum value when inputs are NaN or signed-zero values.
9126///
9127/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_round_sh)
9128#[inline]
9129#[target_feature(enable = "avx512fp16,avx512vl")]
9130#[cfg_attr(test, assert_instr(vminsh, SAE = 8))]
9131#[rustc_legacy_const_generics(3)]
9132#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9133pub fn _mm_maskz_min_round_sh<const SAE: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
9134 static_assert_sae!(SAE);
9135 _mm_mask_min_round_sh::<SAE>(src:f16x8::ZERO.as_m128h(), k, a, b)
9136}
9137
9138/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
9139/// (16-bit) floating-point number representing the integer exponent, and store the results in dst.
9140/// This intrinsic essentially calculates `floor(log2(x))` for each element.
9141///
9142/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_ph)
9143#[inline]
9144#[target_feature(enable = "avx512fp16,avx512vl")]
9145#[cfg_attr(test, assert_instr(vgetexpph))]
9146#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9147pub fn _mm_getexp_ph(a: __m128h) -> __m128h {
9148 _mm_mask_getexp_ph(src:_mm_undefined_ph(), k:0xff, a)
9149}
9150
9151/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
9152/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k
9153/// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates
9154/// `floor(log2(x))` for each element.
9155///
9156/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_ph)
9157#[inline]
9158#[target_feature(enable = "avx512fp16,avx512vl")]
9159#[cfg_attr(test, assert_instr(vgetexpph))]
9160#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9161pub fn _mm_mask_getexp_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
9162 unsafe { vgetexpph_128(a, src, k) }
9163}
9164
9165/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
9166/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask
9167/// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates
9168/// `floor(log2(x))` for each element.
9169///
9170/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_ph)
9171#[inline]
9172#[target_feature(enable = "avx512fp16,avx512vl")]
9173#[cfg_attr(test, assert_instr(vgetexpph))]
9174#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9175pub fn _mm_maskz_getexp_ph(k: __mmask8, a: __m128h) -> __m128h {
9176 _mm_mask_getexp_ph(src:_mm_setzero_ph(), k, a)
9177}
9178
9179/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
9180/// (16-bit) floating-point number representing the integer exponent, and store the results in dst.
9181/// This intrinsic essentially calculates `floor(log2(x))` for each element.
9182///
9183/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_getexp_ph)
9184#[inline]
9185#[target_feature(enable = "avx512fp16,avx512vl")]
9186#[cfg_attr(test, assert_instr(vgetexpph))]
9187#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9188pub fn _mm256_getexp_ph(a: __m256h) -> __m256h {
9189 _mm256_mask_getexp_ph(src:_mm256_undefined_ph(), k:0xffff, a)
9190}
9191
9192/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
9193/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k
9194/// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates
9195/// `floor(log2(x))` for each element.
9196///
9197/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_getexp_ph)
9198#[inline]
9199#[target_feature(enable = "avx512fp16,avx512vl")]
9200#[cfg_attr(test, assert_instr(vgetexpph))]
9201#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9202pub fn _mm256_mask_getexp_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
9203 unsafe { vgetexpph_256(a, src, k) }
9204}
9205
9206/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
9207/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask
9208/// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates
9209/// `floor(log2(x))` for each element.
9210///
9211/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_getexp_ph)
9212#[inline]
9213#[target_feature(enable = "avx512fp16,avx512vl")]
9214#[cfg_attr(test, assert_instr(vgetexpph))]
9215#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9216pub fn _mm256_maskz_getexp_ph(k: __mmask16, a: __m256h) -> __m256h {
9217 _mm256_mask_getexp_ph(src:_mm256_setzero_ph(), k, a)
9218}
9219
9220/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
9221/// (16-bit) floating-point number representing the integer exponent, and store the results in dst.
9222/// This intrinsic essentially calculates `floor(log2(x))` for each element.
9223///
9224/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getexp_ph)
9225#[inline]
9226#[target_feature(enable = "avx512fp16")]
9227#[cfg_attr(test, assert_instr(vgetexpph))]
9228#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9229pub fn _mm512_getexp_ph(a: __m512h) -> __m512h {
9230 _mm512_mask_getexp_ph(src:_mm512_undefined_ph(), k:0xffffffff, a)
9231}
9232
9233/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
9234/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k
9235/// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates
9236/// `floor(log2(x))` for each element.
9237///
9238/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getexp_ph)
9239#[inline]
9240#[target_feature(enable = "avx512fp16")]
9241#[cfg_attr(test, assert_instr(vgetexpph))]
9242#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9243pub fn _mm512_mask_getexp_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
9244 _mm512_mask_getexp_round_ph::<_MM_FROUND_CUR_DIRECTION>(src, k, a)
9245}
9246
9247/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
9248/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask
9249/// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates
9250/// `floor(log2(x))` for each element.
9251///
9252/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getexp_ph)
9253#[inline]
9254#[target_feature(enable = "avx512fp16")]
9255#[cfg_attr(test, assert_instr(vgetexpph))]
9256#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9257pub fn _mm512_maskz_getexp_ph(k: __mmask32, a: __m512h) -> __m512h {
9258 _mm512_mask_getexp_ph(src:_mm512_setzero_ph(), k, a)
9259}
9260
9261/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
9262/// (16-bit) floating-point number representing the integer exponent, and store the results in dst.
9263/// This intrinsic essentially calculates `floor(log2(x))` for each element. Exceptions can be suppressed
9264/// by passing _MM_FROUND_NO_EXC in the sae parameter
9265///
9266/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getexp_round_ph)
9267#[inline]
9268#[target_feature(enable = "avx512fp16")]
9269#[cfg_attr(test, assert_instr(vgetexpph, SAE = 8))]
9270#[rustc_legacy_const_generics(1)]
9271#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9272pub fn _mm512_getexp_round_ph<const SAE: i32>(a: __m512h) -> __m512h {
9273 static_assert_sae!(SAE);
9274 _mm512_mask_getexp_round_ph::<SAE>(src:_mm512_undefined_ph(), k:0xffffffff, a)
9275}
9276
9277/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
9278/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k
9279/// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates
9280/// `floor(log2(x))` for each element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9281///
9282/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getexp_round_ph)
9283#[inline]
9284#[target_feature(enable = "avx512fp16")]
9285#[cfg_attr(test, assert_instr(vgetexpph, SAE = 8))]
9286#[rustc_legacy_const_generics(3)]
9287#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9288pub fn _mm512_mask_getexp_round_ph<const SAE: i32>(
9289 src: __m512h,
9290 k: __mmask32,
9291 a: __m512h,
9292) -> __m512h {
9293 unsafe {
9294 static_assert_sae!(SAE);
9295 vgetexpph_512(a, src, k, SAE)
9296 }
9297}
9298
9299/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
9300/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask
9301/// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates
9302/// `floor(log2(x))` for each element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9303///
9304/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getexp_round_ph)
9305#[inline]
9306#[target_feature(enable = "avx512fp16")]
9307#[cfg_attr(test, assert_instr(vgetexpph, SAE = 8))]
9308#[rustc_legacy_const_generics(2)]
9309#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9310pub fn _mm512_maskz_getexp_round_ph<const SAE: i32>(k: __mmask32, a: __m512h) -> __m512h {
9311 static_assert_sae!(SAE);
9312 _mm512_mask_getexp_round_ph::<SAE>(src:_mm512_setzero_ph(), k, a)
9313}
9314
9315/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
9316/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
9317/// of dst, and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially
9318/// calculates `floor(log2(x))` for the lower element.
9319///
9320/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_sh)
9321#[inline]
9322#[target_feature(enable = "avx512fp16")]
9323#[cfg_attr(test, assert_instr(vgetexpsh))]
9324#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9325pub fn _mm_getexp_sh(a: __m128h, b: __m128h) -> __m128h {
9326 _mm_mask_getexp_sh(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
9327}
9328
9329/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
9330/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
9331/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 7
9332/// packed elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))`
9333/// for the lower element.
9334///
9335/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_sh)
9336#[inline]
9337#[target_feature(enable = "avx512fp16")]
9338#[cfg_attr(test, assert_instr(vgetexpsh))]
9339#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9340pub fn _mm_mask_getexp_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
9341 _mm_mask_getexp_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
9342}
9343
9344/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
9345/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
9346/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed
9347/// elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))` for the
9348/// lower element.
9349///
9350/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_sh)
9351#[inline]
9352#[target_feature(enable = "avx512fp16")]
9353#[cfg_attr(test, assert_instr(vgetexpsh))]
9354#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9355pub fn _mm_maskz_getexp_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
9356 _mm_mask_getexp_sh(src:f16x8::ZERO.as_m128h(), k, a, b)
9357}
9358
9359/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
9360/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
9361/// of dst, and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially
9362/// calculates `floor(log2(x))` for the lower element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
9363/// in the sae parameter
9364///
9365/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_round_sh)
9366#[inline]
9367#[target_feature(enable = "avx512fp16")]
9368#[cfg_attr(test, assert_instr(vgetexpsh, SAE = 8))]
9369#[rustc_legacy_const_generics(2)]
9370#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9371pub fn _mm_getexp_round_sh<const SAE: i32>(a: __m128h, b: __m128h) -> __m128h {
9372 static_assert_sae!(SAE);
9373 _mm_mask_getexp_round_sh::<SAE>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
9374}
9375
9376/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
9377/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
9378/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 7
9379/// packed elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))`
9380/// for the lower element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9381///
9382/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_round_sh)
9383#[inline]
9384#[target_feature(enable = "avx512fp16")]
9385#[cfg_attr(test, assert_instr(vgetexpsh, SAE = 8))]
9386#[rustc_legacy_const_generics(4)]
9387#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9388pub fn _mm_mask_getexp_round_sh<const SAE: i32>(
9389 src: __m128h,
9390 k: __mmask8,
9391 a: __m128h,
9392 b: __m128h,
9393) -> __m128h {
9394 unsafe {
9395 static_assert_sae!(SAE);
9396 vgetexpsh(a, b, src, k, SAE)
9397 }
9398}
9399
9400/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
9401/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
9402/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed
9403/// elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))` for the
9404/// lower element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9405///
9406/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_round_sh)
9407#[inline]
9408#[target_feature(enable = "avx512fp16")]
9409#[cfg_attr(test, assert_instr(vgetexpsh, SAE = 8))]
9410#[rustc_legacy_const_generics(3)]
9411#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9412pub fn _mm_maskz_getexp_round_sh<const SAE: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
9413 static_assert_sae!(SAE);
9414 _mm_mask_getexp_round_sh::<SAE>(src:f16x8::ZERO.as_m128h(), k, a, b)
9415}
9416
9417/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9418/// the results in dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends
9419/// on the interval range defined by norm and the sign depends on sign and the source sign.
9420///
9421/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9422///
9423/// _MM_MANT_NORM_1_2 // interval [1, 2)
9424/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9425/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9426/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9427///
9428/// The sign is determined by sc which can take the following values:
9429///
9430/// _MM_MANT_SIGN_src // sign = sign(src)
9431/// _MM_MANT_SIGN_zero // sign = 0
9432/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9433///
9434/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_ph)
9435#[inline]
9436#[target_feature(enable = "avx512fp16,avx512vl")]
9437#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
9438#[rustc_legacy_const_generics(1, 2)]
9439#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9440pub fn _mm_getmant_ph<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
9441 a: __m128h,
9442) -> __m128h {
9443 static_assert_uimm_bits!(NORM, 4);
9444 static_assert_uimm_bits!(SIGN, 2);
9445 _mm_mask_getmant_ph::<NORM, SIGN>(src:_mm_undefined_ph(), k:0xff, a)
9446}
9447
9448/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9449/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9450/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
9451/// by norm and the sign depends on sign and the source sign.
9452///
9453/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9454///
9455/// _MM_MANT_NORM_1_2 // interval [1, 2)
9456/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9457/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9458/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9459///
9460/// The sign is determined by sc which can take the following values:
9461///
9462/// _MM_MANT_SIGN_src // sign = sign(src)
9463/// _MM_MANT_SIGN_zero // sign = 0
9464/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9465///
9466/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_ph)
9467#[inline]
9468#[target_feature(enable = "avx512fp16,avx512vl")]
9469#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
9470#[rustc_legacy_const_generics(3, 4)]
9471#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9472pub fn _mm_mask_getmant_ph<
9473 const NORM: _MM_MANTISSA_NORM_ENUM,
9474 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9475>(
9476 src: __m128h,
9477 k: __mmask8,
9478 a: __m128h,
9479) -> __m128h {
9480 unsafe {
9481 static_assert_uimm_bits!(NORM, 4);
9482 static_assert_uimm_bits!(SIGN, 2);
9483 vgetmantph_128(a, (SIGN << 2) | NORM, src, k)
9484 }
9485}
9486
9487/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9488/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9489/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
9490/// by norm and the sign depends on sign and the source sign.
9491///
9492/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9493///
9494/// _MM_MANT_NORM_1_2 // interval [1, 2)
9495/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9496/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9497/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9498///
9499/// The sign is determined by sc which can take the following values:
9500///
9501/// _MM_MANT_SIGN_src // sign = sign(src)
9502/// _MM_MANT_SIGN_zero // sign = 0
9503/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9504///
9505/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_ph)
9506#[inline]
9507#[target_feature(enable = "avx512fp16,avx512vl")]
9508#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
9509#[rustc_legacy_const_generics(2, 3)]
9510#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9511pub fn _mm_maskz_getmant_ph<
9512 const NORM: _MM_MANTISSA_NORM_ENUM,
9513 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9514>(
9515 k: __mmask8,
9516 a: __m128h,
9517) -> __m128h {
9518 static_assert_uimm_bits!(NORM, 4);
9519 static_assert_uimm_bits!(SIGN, 2);
9520 _mm_mask_getmant_ph::<NORM, SIGN>(src:_mm_setzero_ph(), k, a)
9521}
9522
9523/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9524/// the results in dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends
9525/// on the interval range defined by norm and the sign depends on sign and the source sign.
9526///
9527/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9528///
9529/// _MM_MANT_NORM_1_2 // interval [1, 2)
9530/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9531/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9532/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9533///
9534/// The sign is determined by sc which can take the following values:
9535///
9536/// _MM_MANT_SIGN_src // sign = sign(src)
9537/// _MM_MANT_SIGN_zero // sign = 0
9538/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9539///
9540/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_getmant_ph)
9541#[inline]
9542#[target_feature(enable = "avx512fp16,avx512vl")]
9543#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
9544#[rustc_legacy_const_generics(1, 2)]
9545#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9546pub fn _mm256_getmant_ph<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
9547 a: __m256h,
9548) -> __m256h {
9549 static_assert_uimm_bits!(NORM, 4);
9550 static_assert_uimm_bits!(SIGN, 2);
9551 _mm256_mask_getmant_ph::<NORM, SIGN>(src:_mm256_undefined_ph(), k:0xffff, a)
9552}
9553
9554/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9555/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9556/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
9557/// by norm and the sign depends on sign and the source sign.
9558///
9559/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9560///
9561/// _MM_MANT_NORM_1_2 // interval [1, 2)
9562/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9563/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9564/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9565///
9566/// The sign is determined by sc which can take the following values:
9567///
9568/// _MM_MANT_SIGN_src // sign = sign(src)
9569/// _MM_MANT_SIGN_zero // sign = 0
9570/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9571///
9572/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_getmant_ph)
9573#[inline]
9574#[target_feature(enable = "avx512fp16,avx512vl")]
9575#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
9576#[rustc_legacy_const_generics(3, 4)]
9577#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9578pub fn _mm256_mask_getmant_ph<
9579 const NORM: _MM_MANTISSA_NORM_ENUM,
9580 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9581>(
9582 src: __m256h,
9583 k: __mmask16,
9584 a: __m256h,
9585) -> __m256h {
9586 unsafe {
9587 static_assert_uimm_bits!(NORM, 4);
9588 static_assert_uimm_bits!(SIGN, 2);
9589 vgetmantph_256(a, (SIGN << 2) | NORM, src, k)
9590 }
9591}
9592
9593/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9594/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9595/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
9596/// by norm and the sign depends on sign and the source sign.
9597///
9598/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9599///
9600/// _MM_MANT_NORM_1_2 // interval [1, 2)
9601/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9602/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9603/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9604///
9605/// The sign is determined by sc which can take the following values:
9606///
9607/// _MM_MANT_SIGN_src // sign = sign(src)
9608/// _MM_MANT_SIGN_zero // sign = 0
9609/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9610///
9611/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_getmant_ph)
9612#[inline]
9613#[target_feature(enable = "avx512fp16,avx512vl")]
9614#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
9615#[rustc_legacy_const_generics(2, 3)]
9616#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9617pub fn _mm256_maskz_getmant_ph<
9618 const NORM: _MM_MANTISSA_NORM_ENUM,
9619 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9620>(
9621 k: __mmask16,
9622 a: __m256h,
9623) -> __m256h {
9624 static_assert_uimm_bits!(NORM, 4);
9625 static_assert_uimm_bits!(SIGN, 2);
9626 _mm256_mask_getmant_ph::<NORM, SIGN>(src:_mm256_setzero_ph(), k, a)
9627}
9628
9629/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9630/// the results in dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends
9631/// on the interval range defined by norm and the sign depends on sign and the source sign.
9632///
9633/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9634///
9635/// _MM_MANT_NORM_1_2 // interval [1, 2)
9636/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9637/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9638/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9639///
9640/// The sign is determined by sc which can take the following values:
9641///
9642/// _MM_MANT_SIGN_src // sign = sign(src)
9643/// _MM_MANT_SIGN_zero // sign = 0
9644/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9645///
9646/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getmant_ph)
9647#[inline]
9648#[target_feature(enable = "avx512fp16")]
9649#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
9650#[rustc_legacy_const_generics(1, 2)]
9651#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9652pub fn _mm512_getmant_ph<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
9653 a: __m512h,
9654) -> __m512h {
9655 static_assert_uimm_bits!(NORM, 4);
9656 static_assert_uimm_bits!(SIGN, 2);
9657 _mm512_mask_getmant_ph::<NORM, SIGN>(src:_mm512_undefined_ph(), k:0xffffffff, a)
9658}
9659
9660/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9661/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9662/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
9663/// by norm and the sign depends on sign and the source sign.
9664///
9665/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9666///
9667/// _MM_MANT_NORM_1_2 // interval [1, 2)
9668/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9669/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9670/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9671///
9672/// The sign is determined by sc which can take the following values:
9673///
9674/// _MM_MANT_SIGN_src // sign = sign(src)
9675/// _MM_MANT_SIGN_zero // sign = 0
9676/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9677///
9678/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getmant_ph)
9679#[inline]
9680#[target_feature(enable = "avx512fp16")]
9681#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
9682#[rustc_legacy_const_generics(3, 4)]
9683#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9684pub fn _mm512_mask_getmant_ph<
9685 const NORM: _MM_MANTISSA_NORM_ENUM,
9686 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9687>(
9688 src: __m512h,
9689 k: __mmask32,
9690 a: __m512h,
9691) -> __m512h {
9692 static_assert_uimm_bits!(NORM, 4);
9693 static_assert_uimm_bits!(SIGN, 2);
9694 _mm512_mask_getmant_round_ph::<NORM, SIGN, _MM_FROUND_CUR_DIRECTION>(src, k, a)
9695}
9696
9697/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9698/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9699/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
9700/// by norm and the sign depends on sign and the source sign.
9701///
9702/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9703///
9704/// _MM_MANT_NORM_1_2 // interval [1, 2)
9705/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9706/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9707/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9708///
9709/// The sign is determined by sc which can take the following values:
9710///
9711/// _MM_MANT_SIGN_src // sign = sign(src)
9712/// _MM_MANT_SIGN_zero // sign = 0
9713/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9714///
9715/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getmant_ph)
9716#[inline]
9717#[target_feature(enable = "avx512fp16")]
9718#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
9719#[rustc_legacy_const_generics(2, 3)]
9720#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9721pub fn _mm512_maskz_getmant_ph<
9722 const NORM: _MM_MANTISSA_NORM_ENUM,
9723 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9724>(
9725 k: __mmask32,
9726 a: __m512h,
9727) -> __m512h {
9728 static_assert_uimm_bits!(NORM, 4);
9729 static_assert_uimm_bits!(SIGN, 2);
9730 _mm512_mask_getmant_ph::<NORM, SIGN>(src:_mm512_setzero_ph(), k, a)
9731}
9732
9733/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9734/// the results in dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends
9735/// on the interval range defined by norm and the sign depends on sign and the source sign. Exceptions can
9736/// be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9737///
9738/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9739///
9740/// _MM_MANT_NORM_1_2 // interval [1, 2)
9741/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9742/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9743/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9744///
9745/// The sign is determined by sc which can take the following values:
9746///
9747/// _MM_MANT_SIGN_src // sign = sign(src)
9748/// _MM_MANT_SIGN_zero // sign = 0
9749/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9750///
9751/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9752///
9753/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getmant_round_ph)
9754#[inline]
9755#[target_feature(enable = "avx512fp16")]
9756#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0, SAE = 8))]
9757#[rustc_legacy_const_generics(1, 2, 3)]
9758#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9759pub fn _mm512_getmant_round_ph<
9760 const NORM: _MM_MANTISSA_NORM_ENUM,
9761 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9762 const SAE: i32,
9763>(
9764 a: __m512h,
9765) -> __m512h {
9766 static_assert_uimm_bits!(NORM, 4);
9767 static_assert_uimm_bits!(SIGN, 2);
9768 static_assert_sae!(SAE);
9769 _mm512_mask_getmant_round_ph::<NORM, SIGN, SAE>(src:_mm512_undefined_ph(), k:0xffffffff, a)
9770}
9771
9772/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9773/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9774/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
9775/// by norm and the sign depends on sign and the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
9776/// in the sae parameter
9777///
9778/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9779///
9780/// _MM_MANT_NORM_1_2 // interval [1, 2)
9781/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9782/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9783/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9784///
9785/// The sign is determined by sc which can take the following values:
9786///
9787/// _MM_MANT_SIGN_src // sign = sign(src)
9788/// _MM_MANT_SIGN_zero // sign = 0
9789/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9790///
9791/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9792///
9793/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getmant_round_ph)
9794#[inline]
9795#[target_feature(enable = "avx512fp16")]
9796#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0, SAE = 8))]
9797#[rustc_legacy_const_generics(3, 4, 5)]
9798#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9799pub fn _mm512_mask_getmant_round_ph<
9800 const NORM: _MM_MANTISSA_NORM_ENUM,
9801 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9802 const SAE: i32,
9803>(
9804 src: __m512h,
9805 k: __mmask32,
9806 a: __m512h,
9807) -> __m512h {
9808 unsafe {
9809 static_assert_uimm_bits!(NORM, 4);
9810 static_assert_uimm_bits!(SIGN, 2);
9811 static_assert_sae!(SAE);
9812 vgetmantph_512(a, (SIGN << 2) | NORM, src, k, SAE)
9813 }
9814}
9815
9816/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9817/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9818/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
9819/// by norm and the sign depends on sign and the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
9820/// in the sae parameter
9821///
9822/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9823///
9824/// _MM_MANT_NORM_1_2 // interval [1, 2)
9825/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9826/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9827/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9828///
9829/// The sign is determined by sc which can take the following values:
9830///
9831/// _MM_MANT_SIGN_src // sign = sign(src)
9832/// _MM_MANT_SIGN_zero // sign = 0
9833/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9834///
9835/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9836///
9837/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getmant_round_ph)
9838#[inline]
9839#[target_feature(enable = "avx512fp16")]
9840#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0, SAE = 8))]
9841#[rustc_legacy_const_generics(2, 3, 4)]
9842#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9843pub fn _mm512_maskz_getmant_round_ph<
9844 const NORM: _MM_MANTISSA_NORM_ENUM,
9845 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9846 const SAE: i32,
9847>(
9848 k: __mmask32,
9849 a: __m512h,
9850) -> __m512h {
9851 static_assert_uimm_bits!(NORM, 4);
9852 static_assert_uimm_bits!(SIGN, 2);
9853 static_assert_sae!(SAE);
9854 _mm512_mask_getmant_round_ph::<NORM, SIGN, SAE>(src:_mm512_setzero_ph(), k, a)
9855}
9856
9857/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
9858/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
9859/// elements of dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends
9860/// on the interval range defined by norm and the sign depends on sign and the source sign.
9861///
9862/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9863///
9864/// _MM_MANT_NORM_1_2 // interval [1, 2)
9865/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9866/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9867/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9868///
9869/// The sign is determined by sc which can take the following values:
9870///
9871/// _MM_MANT_SIGN_src // sign = sign(src)
9872/// _MM_MANT_SIGN_zero // sign = 0
9873/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9874///
9875/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_sh)
9876#[inline]
9877#[target_feature(enable = "avx512fp16")]
9878#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0))]
9879#[rustc_legacy_const_generics(2, 3)]
9880#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9881pub fn _mm_getmant_sh<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
9882 a: __m128h,
9883 b: __m128h,
9884) -> __m128h {
9885 static_assert_uimm_bits!(NORM, 4);
9886 static_assert_uimm_bits!(SIGN, 2);
9887 _mm_mask_getmant_sh::<NORM, SIGN>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
9888}
9889
9890/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
9891/// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
9892/// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates
9893/// `±(2^k)*|x.significand|`, where k depends on the interval range defined by norm and the sign depends on sign and
9894/// the source sign.
9895///
9896/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9897///
9898/// _MM_MANT_NORM_1_2 // interval [1, 2)
9899/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9900/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9901/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9902///
9903/// The sign is determined by sc which can take the following values:
9904///
9905/// _MM_MANT_SIGN_src // sign = sign(src)
9906/// _MM_MANT_SIGN_zero // sign = 0
9907/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9908///
9909/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_sh)
9910#[inline]
9911#[target_feature(enable = "avx512fp16")]
9912#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0))]
9913#[rustc_legacy_const_generics(4, 5)]
9914#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9915pub fn _mm_mask_getmant_sh<
9916 const NORM: _MM_MANTISSA_NORM_ENUM,
9917 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9918>(
9919 src: __m128h,
9920 k: __mmask8,
9921 a: __m128h,
9922 b: __m128h,
9923) -> __m128h {
9924 static_assert_uimm_bits!(NORM, 4);
9925 static_assert_uimm_bits!(SIGN, 2);
9926 _mm_mask_getmant_round_sh::<NORM, SIGN, _MM_FROUND_CUR_DIRECTION>(src, k, a, b)
9927}
9928
9929/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
9930/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set),
9931/// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates
9932/// `±(2^k)*|x.significand|`, where k depends on the interval range defined by norm and the sign depends on sign and
9933/// the source sign.
9934///
9935/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9936///
9937/// _MM_MANT_NORM_1_2 // interval [1, 2)
9938/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9939/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9940/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9941///
9942/// The sign is determined by sc which can take the following values:
9943///
9944/// _MM_MANT_SIGN_src // sign = sign(src)
9945/// _MM_MANT_SIGN_zero // sign = 0
9946/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9947///
9948/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_sh)
9949#[inline]
9950#[target_feature(enable = "avx512fp16")]
9951#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0))]
9952#[rustc_legacy_const_generics(3, 4)]
9953#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9954pub fn _mm_maskz_getmant_sh<
9955 const NORM: _MM_MANTISSA_NORM_ENUM,
9956 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9957>(
9958 k: __mmask8,
9959 a: __m128h,
9960 b: __m128h,
9961) -> __m128h {
9962 static_assert_uimm_bits!(NORM, 4);
9963 static_assert_uimm_bits!(SIGN, 2);
9964 _mm_mask_getmant_sh::<NORM, SIGN>(src:f16x8::ZERO.as_m128h(), k, a, b)
9965}
9966
9967/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
9968/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
9969/// elements of dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends
9970/// on the interval range defined by norm and the sign depends on sign and the source sign. Exceptions can
9971/// be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9972///
9973/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9974///
9975/// _MM_MANT_NORM_1_2 // interval [1, 2)
9976/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9977/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9978/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9979///
9980/// The sign is determined by sc which can take the following values:
9981///
9982/// _MM_MANT_SIGN_src // sign = sign(src)
9983/// _MM_MANT_SIGN_zero // sign = 0
9984/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9985///
9986/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9987///
9988/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_round_sh)
9989#[inline]
9990#[target_feature(enable = "avx512fp16")]
9991#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0, SAE = 8))]
9992#[rustc_legacy_const_generics(2, 3, 4)]
9993#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
9994pub fn _mm_getmant_round_sh<
9995 const NORM: _MM_MANTISSA_NORM_ENUM,
9996 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9997 const SAE: i32,
9998>(
9999 a: __m128h,
10000 b: __m128h,
10001) -> __m128h {
10002 static_assert_uimm_bits!(NORM, 4);
10003 static_assert_uimm_bits!(SIGN, 2);
10004 static_assert_sae!(SAE);
10005 _mm_mask_getmant_round_sh::<NORM, SIGN, SAE>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
10006}
10007
10008/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
10009/// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
10010/// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates
10011/// `±(2^k)*|x.significand|`, where k depends on the interval range defined by norm and the sign depends on sign and
10012/// the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
10013///
10014/// The mantissa is normalized to the interval specified by interv, which can take the following values:
10015///
10016/// _MM_MANT_NORM_1_2 // interval [1, 2)
10017/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
10018/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
10019/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
10020///
10021/// The sign is determined by sc which can take the following values:
10022///
10023/// _MM_MANT_SIGN_src // sign = sign(src)
10024/// _MM_MANT_SIGN_zero // sign = 0
10025/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
10026///
10027/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
10028///
10029/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_round_sh)
10030#[inline]
10031#[target_feature(enable = "avx512fp16")]
10032#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0, SAE = 8))]
10033#[rustc_legacy_const_generics(4, 5, 6)]
10034#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10035pub fn _mm_mask_getmant_round_sh<
10036 const NORM: _MM_MANTISSA_NORM_ENUM,
10037 const SIGN: _MM_MANTISSA_SIGN_ENUM,
10038 const SAE: i32,
10039>(
10040 src: __m128h,
10041 k: __mmask8,
10042 a: __m128h,
10043 b: __m128h,
10044) -> __m128h {
10045 unsafe {
10046 static_assert_uimm_bits!(NORM, 4);
10047 static_assert_uimm_bits!(SIGN, 2);
10048 static_assert_sae!(SAE);
10049 vgetmantsh(a, b, (SIGN << 2) | NORM, src, k, SAE)
10050 }
10051}
10052
10053/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
10054/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set),
10055/// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates
10056/// `±(2^k)*|x.significand|`, where k depends on the interval range defined by norm and the sign depends on sign and
10057/// the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
10058///
10059/// The mantissa is normalized to the interval specified by interv, which can take the following values:
10060///
10061/// _MM_MANT_NORM_1_2 // interval [1, 2)
10062/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
10063/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
10064/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
10065///
10066/// The sign is determined by sc which can take the following values:
10067///
10068/// _MM_MANT_SIGN_src // sign = sign(src)
10069/// _MM_MANT_SIGN_zero // sign = 0
10070/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
10071///
10072/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
10073///
10074/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_round_sh)
10075#[inline]
10076#[target_feature(enable = "avx512fp16")]
10077#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0, SAE = 8))]
10078#[rustc_legacy_const_generics(3, 4, 5)]
10079#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10080pub fn _mm_maskz_getmant_round_sh<
10081 const NORM: _MM_MANTISSA_NORM_ENUM,
10082 const SIGN: _MM_MANTISSA_SIGN_ENUM,
10083 const SAE: i32,
10084>(
10085 k: __mmask8,
10086 a: __m128h,
10087 b: __m128h,
10088) -> __m128h {
10089 static_assert_uimm_bits!(NORM, 4);
10090 static_assert_uimm_bits!(SIGN, 2);
10091 static_assert_sae!(SAE);
10092 _mm_mask_getmant_round_sh::<NORM, SIGN, SAE>(src:f16x8::ZERO.as_m128h(), k, a, b)
10093}
10094
10095/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
10096/// specified by imm8, and store the results in dst.
10097///
10098/// Rounding is done according to the imm8 parameter, which can be one of:
10099///
10100/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10101/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10102/// * [`_MM_FROUND_TO_POS_INF`] : round up
10103/// * [`_MM_FROUND_TO_ZERO`] : truncate
10104/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10105///
10106/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_ph)
10107#[inline]
10108#[target_feature(enable = "avx512fp16,avx512vl")]
10109#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
10110#[rustc_legacy_const_generics(1)]
10111#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10112pub fn _mm_roundscale_ph<const IMM8: i32>(a: __m128h) -> __m128h {
10113 static_assert_uimm_bits!(IMM8, 8);
10114 _mm_mask_roundscale_ph::<IMM8>(src:_mm_undefined_ph(), k:0xff, a)
10115}
10116
10117/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
10118/// specified by imm8, and store the results in dst using writemask k (elements are copied from src when
10119/// the corresponding mask bit is not set).
10120///
10121/// Rounding is done according to the imm8 parameter, which can be one of:
10122///
10123/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10124/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10125/// * [`_MM_FROUND_TO_POS_INF`] : round up
10126/// * [`_MM_FROUND_TO_ZERO`] : truncate
10127/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10128///
10129/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_ph)
10130#[inline]
10131#[target_feature(enable = "avx512fp16,avx512vl")]
10132#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
10133#[rustc_legacy_const_generics(3)]
10134#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10135pub fn _mm_mask_roundscale_ph<const IMM8: i32>(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
10136 unsafe {
10137 static_assert_uimm_bits!(IMM8, 8);
10138 vrndscaleph_128(a, IMM8, src, k)
10139 }
10140}
10141
10142/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
10143/// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
10144/// mask bit is not set).
10145///
10146/// Rounding is done according to the imm8 parameter, which can be one of:
10147///
10148/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10149/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10150/// * [`_MM_FROUND_TO_POS_INF`] : round up
10151/// * [`_MM_FROUND_TO_ZERO`] : truncate
10152/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10153///
10154/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_ph)
10155#[inline]
10156#[target_feature(enable = "avx512fp16,avx512vl")]
10157#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
10158#[rustc_legacy_const_generics(2)]
10159#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10160pub fn _mm_maskz_roundscale_ph<const IMM8: i32>(k: __mmask8, a: __m128h) -> __m128h {
10161 static_assert_uimm_bits!(IMM8, 8);
10162 _mm_mask_roundscale_ph::<IMM8>(src:_mm_setzero_ph(), k, a)
10163}
10164
10165/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
10166/// specified by imm8, and store the results in dst.
10167///
10168/// Rounding is done according to the imm8 parameter, which can be one of:
10169///
10170/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10171/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10172/// * [`_MM_FROUND_TO_POS_INF`] : round up
10173/// * [`_MM_FROUND_TO_ZERO`] : truncate
10174/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10175///
10176/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_roundscale_ph)
10177#[inline]
10178#[target_feature(enable = "avx512fp16,avx512vl")]
10179#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
10180#[rustc_legacy_const_generics(1)]
10181#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10182pub fn _mm256_roundscale_ph<const IMM8: i32>(a: __m256h) -> __m256h {
10183 static_assert_uimm_bits!(IMM8, 8);
10184 _mm256_mask_roundscale_ph::<IMM8>(src:_mm256_undefined_ph(), k:0xffff, a)
10185}
10186
10187/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
10188/// specified by imm8, and store the results in dst using writemask k (elements are copied from src when
10189/// the corresponding mask bit is not set).
10190///
10191/// Rounding is done according to the imm8 parameter, which can be one of:
10192///
10193/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10194/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10195/// * [`_MM_FROUND_TO_POS_INF`] : round up
10196/// * [`_MM_FROUND_TO_ZERO`] : truncate
10197/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10198///
10199/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_roundscale_ph)
10200#[inline]
10201#[target_feature(enable = "avx512fp16,avx512vl")]
10202#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
10203#[rustc_legacy_const_generics(3)]
10204#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10205pub fn _mm256_mask_roundscale_ph<const IMM8: i32>(
10206 src: __m256h,
10207 k: __mmask16,
10208 a: __m256h,
10209) -> __m256h {
10210 unsafe {
10211 static_assert_uimm_bits!(IMM8, 8);
10212 vrndscaleph_256(a, IMM8, src, k)
10213 }
10214}
10215
10216/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
10217/// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
10218/// mask bit is not set).
10219///
10220/// Rounding is done according to the imm8 parameter, which can be one of:
10221///
10222/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10223/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10224/// * [`_MM_FROUND_TO_POS_INF`] : round up
10225/// * [`_MM_FROUND_TO_ZERO`] : truncate
10226/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10227///
10228/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_roundscale_ph)
10229#[inline]
10230#[target_feature(enable = "avx512fp16,avx512vl")]
10231#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
10232#[rustc_legacy_const_generics(2)]
10233#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10234pub fn _mm256_maskz_roundscale_ph<const IMM8: i32>(k: __mmask16, a: __m256h) -> __m256h {
10235 static_assert_uimm_bits!(IMM8, 8);
10236 _mm256_mask_roundscale_ph::<IMM8>(src:_mm256_setzero_ph(), k, a)
10237}
10238
10239/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
10240/// specified by imm8, and store the results in dst.
10241///
10242/// Rounding is done according to the imm8 parameter, which can be one of:
10243///
10244/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10245/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10246/// * [`_MM_FROUND_TO_POS_INF`] : round up
10247/// * [`_MM_FROUND_TO_ZERO`] : truncate
10248/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10249///
10250/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_roundscale_ph)
10251#[inline]
10252#[target_feature(enable = "avx512fp16")]
10253#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
10254#[rustc_legacy_const_generics(1)]
10255#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10256pub fn _mm512_roundscale_ph<const IMM8: i32>(a: __m512h) -> __m512h {
10257 static_assert_uimm_bits!(IMM8, 8);
10258 _mm512_mask_roundscale_ph::<IMM8>(src:_mm512_undefined_ph(), k:0xffffffff, a)
10259}
10260
10261/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
10262/// specified by imm8, and store the results in dst using writemask k (elements are copied from src when
10263/// the corresponding mask bit is not set).
10264///
10265/// Rounding is done according to the imm8 parameter, which can be one of:
10266///
10267/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10268/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10269/// * [`_MM_FROUND_TO_POS_INF`] : round up
10270/// * [`_MM_FROUND_TO_ZERO`] : truncate
10271/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10272///
10273/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_roundscale_ph)
10274#[inline]
10275#[target_feature(enable = "avx512fp16")]
10276#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
10277#[rustc_legacy_const_generics(3)]
10278#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10279pub fn _mm512_mask_roundscale_ph<const IMM8: i32>(
10280 src: __m512h,
10281 k: __mmask32,
10282 a: __m512h,
10283) -> __m512h {
10284 static_assert_uimm_bits!(IMM8, 8);
10285 _mm512_mask_roundscale_round_ph::<IMM8, _MM_FROUND_CUR_DIRECTION>(src, k, a)
10286}
10287
10288/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
10289/// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
10290/// mask bit is not set).
10291///
10292/// Rounding is done according to the imm8 parameter, which can be one of:
10293///
10294/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10295/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10296/// * [`_MM_FROUND_TO_POS_INF`] : round up
10297/// * [`_MM_FROUND_TO_ZERO`] : truncate
10298/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10299///
10300/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_roundscale_ph)
10301#[inline]
10302#[target_feature(enable = "avx512fp16")]
10303#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
10304#[rustc_legacy_const_generics(2)]
10305#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10306pub fn _mm512_maskz_roundscale_ph<const IMM8: i32>(k: __mmask32, a: __m512h) -> __m512h {
10307 static_assert_uimm_bits!(IMM8, 8);
10308 _mm512_mask_roundscale_ph::<IMM8>(src:_mm512_setzero_ph(), k, a)
10309}
10310
10311/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
10312/// specified by imm8, and store the results in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
10313/// in the sae parameter
10314///
10315/// Rounding is done according to the imm8 parameter, which can be one of:
10316///
10317/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10318/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10319/// * [`_MM_FROUND_TO_POS_INF`] : round up
10320/// * [`_MM_FROUND_TO_ZERO`] : truncate
10321/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10322///
10323/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_roundscale_round_ph)
10324#[inline]
10325#[target_feature(enable = "avx512fp16")]
10326#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0, SAE = 8))]
10327#[rustc_legacy_const_generics(1, 2)]
10328#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10329pub fn _mm512_roundscale_round_ph<const IMM8: i32, const SAE: i32>(a: __m512h) -> __m512h {
10330 static_assert_uimm_bits!(IMM8, 8);
10331 static_assert_sae!(SAE);
10332 _mm512_mask_roundscale_round_ph::<IMM8, SAE>(src:_mm512_undefined_ph(), k:0xffffffff, a)
10333}
10334
10335/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
10336/// specified by imm8, and store the results in dst using writemask k (elements are copied from src when
10337/// the corresponding mask bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
10338/// in the sae parameter
10339///
10340/// Rounding is done according to the imm8 parameter, which can be one of:
10341///
10342/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10343/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10344/// * [`_MM_FROUND_TO_POS_INF`] : round up
10345/// * [`_MM_FROUND_TO_ZERO`] : truncate
10346/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10347///
10348/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_roundscale_round_ph)
10349#[inline]
10350#[target_feature(enable = "avx512fp16")]
10351#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0, SAE = 8))]
10352#[rustc_legacy_const_generics(3, 4)]
10353#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10354pub fn _mm512_mask_roundscale_round_ph<const IMM8: i32, const SAE: i32>(
10355 src: __m512h,
10356 k: __mmask32,
10357 a: __m512h,
10358) -> __m512h {
10359 unsafe {
10360 static_assert_uimm_bits!(IMM8, 8);
10361 static_assert_sae!(SAE);
10362 vrndscaleph_512(a, IMM8, src, k, SAE)
10363 }
10364}
10365
10366/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
10367/// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
10368/// mask bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
10369///
10370/// Rounding is done according to the imm8 parameter, which can be one of:
10371///
10372/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10373/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10374/// * [`_MM_FROUND_TO_POS_INF`] : round up
10375/// * [`_MM_FROUND_TO_ZERO`] : truncate
10376/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10377///
10378/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_roundscale_round_ph)
10379#[inline]
10380#[target_feature(enable = "avx512fp16")]
10381#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0, SAE = 8))]
10382#[rustc_legacy_const_generics(2, 3)]
10383#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10384pub fn _mm512_maskz_roundscale_round_ph<const IMM8: i32, const SAE: i32>(
10385 k: __mmask32,
10386 a: __m512h,
10387) -> __m512h {
10388 static_assert_uimm_bits!(IMM8, 8);
10389 static_assert_sae!(SAE);
10390 _mm512_mask_roundscale_round_ph::<IMM8, SAE>(src:_mm512_setzero_ph(), k, a)
10391}
10392
10393/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
10394/// specified by imm8, store the result in the lower element of dst, and copy the upper 7 packed elements
10395/// from a to the upper elements of dst.
10396///
10397/// Rounding is done according to the imm8 parameter, which can be one of:
10398///
10399/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10400/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10401/// * [`_MM_FROUND_TO_POS_INF`] : round up
10402/// * [`_MM_FROUND_TO_ZERO`] : truncate
10403/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10404///
10405/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_sh)
10406#[inline]
10407#[target_feature(enable = "avx512fp16")]
10408#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0))]
10409#[rustc_legacy_const_generics(2)]
10410#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10411pub fn _mm_roundscale_sh<const IMM8: i32>(a: __m128h, b: __m128h) -> __m128h {
10412 static_assert_uimm_bits!(IMM8, 8);
10413 _mm_mask_roundscale_sh::<IMM8>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
10414}
10415
10416/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
10417/// specified by imm8, store the result in the lower element of dst using writemask k (the element is copied
10418/// from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
10419///
10420/// Rounding is done according to the imm8 parameter, which can be one of:
10421///
10422/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10423/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10424/// * [`_MM_FROUND_TO_POS_INF`] : round up
10425/// * [`_MM_FROUND_TO_ZERO`] : truncate
10426/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10427///
10428/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_sh)
10429#[inline]
10430#[target_feature(enable = "avx512fp16")]
10431#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0))]
10432#[rustc_legacy_const_generics(4)]
10433#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10434pub fn _mm_mask_roundscale_sh<const IMM8: i32>(
10435 src: __m128h,
10436 k: __mmask8,
10437 a: __m128h,
10438 b: __m128h,
10439) -> __m128h {
10440 static_assert_uimm_bits!(IMM8, 8);
10441 _mm_mask_roundscale_round_sh::<IMM8, _MM_FROUND_CUR_DIRECTION>(src, k, a, b)
10442}
10443
10444/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
10445/// specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed
10446/// out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
10447///
10448/// Rounding is done according to the imm8 parameter, which can be one of:
10449///
10450/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10451/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10452/// * [`_MM_FROUND_TO_POS_INF`] : round up
10453/// * [`_MM_FROUND_TO_ZERO`] : truncate
10454/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10455///
10456/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_sh)
10457#[inline]
10458#[target_feature(enable = "avx512fp16")]
10459#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0))]
10460#[rustc_legacy_const_generics(3)]
10461#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10462pub fn _mm_maskz_roundscale_sh<const IMM8: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
10463 static_assert_uimm_bits!(IMM8, 8);
10464 _mm_mask_roundscale_sh::<IMM8>(src:f16x8::ZERO.as_m128h(), k, a, b)
10465}
10466
10467/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
10468/// specified by imm8, store the result in the lower element of dst, and copy the upper 7 packed elements
10469/// from a to the upper elements of dst.
10470///
10471/// Rounding is done according to the imm8 parameter, which can be one of:
10472///
10473/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10474/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10475/// * [`_MM_FROUND_TO_POS_INF`] : round up
10476/// * [`_MM_FROUND_TO_ZERO`] : truncate
10477/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10478///
10479/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
10480///
10481/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_round_sh)
10482#[inline]
10483#[target_feature(enable = "avx512fp16")]
10484#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0, SAE = 8))]
10485#[rustc_legacy_const_generics(2, 3)]
10486#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10487pub fn _mm_roundscale_round_sh<const IMM8: i32, const SAE: i32>(a: __m128h, b: __m128h) -> __m128h {
10488 static_assert_uimm_bits!(IMM8, 8);
10489 static_assert_sae!(SAE);
10490 _mm_mask_roundscale_round_sh::<IMM8, SAE>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
10491}
10492
10493/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
10494/// specified by imm8, store the result in the lower element of dst using writemask k (the element is copied
10495/// from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
10496///
10497/// Rounding is done according to the imm8 parameter, which can be one of:
10498///
10499/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10500/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10501/// * [`_MM_FROUND_TO_POS_INF`] : round up
10502/// * [`_MM_FROUND_TO_ZERO`] : truncate
10503/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10504///
10505/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
10506///
10507/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_round_sh)
10508#[inline]
10509#[target_feature(enable = "avx512fp16")]
10510#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0, SAE = 8))]
10511#[rustc_legacy_const_generics(4, 5)]
10512#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10513pub fn _mm_mask_roundscale_round_sh<const IMM8: i32, const SAE: i32>(
10514 src: __m128h,
10515 k: __mmask8,
10516 a: __m128h,
10517 b: __m128h,
10518) -> __m128h {
10519 unsafe {
10520 static_assert_uimm_bits!(IMM8, 8);
10521 static_assert_sae!(SAE);
10522 vrndscalesh(a, b, src, k, IMM8, SAE)
10523 }
10524}
10525
10526/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
10527/// specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed
10528/// out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
10529///
10530/// Rounding is done according to the imm8 parameter, which can be one of:
10531///
10532/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10533/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10534/// * [`_MM_FROUND_TO_POS_INF`] : round up
10535/// * [`_MM_FROUND_TO_ZERO`] : truncate
10536/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10537///
10538/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
10539///
10540/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_round_sh)
10541#[inline]
10542#[target_feature(enable = "avx512fp16")]
10543#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0, SAE = 8))]
10544#[rustc_legacy_const_generics(3, 4)]
10545#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10546pub fn _mm_maskz_roundscale_round_sh<const IMM8: i32, const SAE: i32>(
10547 k: __mmask8,
10548 a: __m128h,
10549 b: __m128h,
10550) -> __m128h {
10551 static_assert_uimm_bits!(IMM8, 8);
10552 static_assert_sae!(SAE);
10553 _mm_mask_roundscale_round_sh::<IMM8, SAE>(src:f16x8::ZERO.as_m128h(), k, a, b)
10554}
10555
10556/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10557/// the results in dst.
10558///
10559/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_ph)
10560#[inline]
10561#[target_feature(enable = "avx512fp16,avx512vl")]
10562#[cfg_attr(test, assert_instr(vscalefph))]
10563#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10564pub fn _mm_scalef_ph(a: __m128h, b: __m128h) -> __m128h {
10565 _mm_mask_scalef_ph(src:_mm_undefined_ph(), k:0xff, a, b)
10566}
10567
10568/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10569/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10570///
10571/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_ph)
10572#[inline]
10573#[target_feature(enable = "avx512fp16,avx512vl")]
10574#[cfg_attr(test, assert_instr(vscalefph))]
10575#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10576pub fn _mm_mask_scalef_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
10577 unsafe { vscalefph_128(a, b, src, k) }
10578}
10579
10580/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10581/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10582///
10583/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_ph)
10584#[inline]
10585#[target_feature(enable = "avx512fp16,avx512vl")]
10586#[cfg_attr(test, assert_instr(vscalefph))]
10587#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10588pub fn _mm_maskz_scalef_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
10589 _mm_mask_scalef_ph(src:_mm_setzero_ph(), k, a, b)
10590}
10591
10592/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10593/// the results in dst.
10594///
10595/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_scalef_ph)
10596#[inline]
10597#[target_feature(enable = "avx512fp16,avx512vl")]
10598#[cfg_attr(test, assert_instr(vscalefph))]
10599#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10600pub fn _mm256_scalef_ph(a: __m256h, b: __m256h) -> __m256h {
10601 _mm256_mask_scalef_ph(src:_mm256_undefined_ph(), k:0xffff, a, b)
10602}
10603
10604/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10605/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10606///
10607/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_scalef_ph)
10608#[inline]
10609#[target_feature(enable = "avx512fp16,avx512vl")]
10610#[cfg_attr(test, assert_instr(vscalefph))]
10611#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10612pub fn _mm256_mask_scalef_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
10613 unsafe { vscalefph_256(a, b, src, k) }
10614}
10615
10616/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10617/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10618///
10619/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_scalef_ph)
10620#[inline]
10621#[target_feature(enable = "avx512fp16,avx512vl")]
10622#[cfg_attr(test, assert_instr(vscalefph))]
10623#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10624pub fn _mm256_maskz_scalef_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
10625 _mm256_mask_scalef_ph(src:_mm256_setzero_ph(), k, a, b)
10626}
10627
10628/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10629/// the results in dst.
10630///
10631/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_scalef_ph)
10632#[inline]
10633#[target_feature(enable = "avx512fp16")]
10634#[cfg_attr(test, assert_instr(vscalefph))]
10635#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10636pub fn _mm512_scalef_ph(a: __m512h, b: __m512h) -> __m512h {
10637 _mm512_mask_scalef_ph(src:_mm512_undefined_ph(), k:0xffffffff, a, b)
10638}
10639
10640/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10641/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10642///
10643/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_scalef_ph)
10644#[inline]
10645#[target_feature(enable = "avx512fp16")]
10646#[cfg_attr(test, assert_instr(vscalefph))]
10647#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10648pub fn _mm512_mask_scalef_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
10649 _mm512_mask_scalef_round_ph::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
10650}
10651
10652/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10653/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10654///
10655/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_scalef_ph)
10656#[inline]
10657#[target_feature(enable = "avx512fp16")]
10658#[cfg_attr(test, assert_instr(vscalefph))]
10659#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10660pub fn _mm512_maskz_scalef_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
10661 _mm512_mask_scalef_ph(src:_mm512_setzero_ph(), k, a, b)
10662}
10663
10664/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10665/// the results in dst.
10666///
10667/// Rounding is done according to the rounding parameter, which can be one of:
10668///
10669/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10670/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10671/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10672/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10673/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10674///
10675/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_scalef_round_ph)
10676#[inline]
10677#[target_feature(enable = "avx512fp16")]
10678#[cfg_attr(test, assert_instr(vscalefph, ROUNDING = 8))]
10679#[rustc_legacy_const_generics(2)]
10680#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10681pub fn _mm512_scalef_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
10682 static_assert_rounding!(ROUNDING);
10683 _mm512_mask_scalef_round_ph::<ROUNDING>(src:_mm512_undefined_ph(), k:0xffffffff, a, b)
10684}
10685
10686/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10687/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10688///
10689/// Rounding is done according to the rounding parameter, which can be one of:
10690///
10691/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10692/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10693/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10694/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10695/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10696///
10697/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_scalef_round_ph)
10698#[inline]
10699#[target_feature(enable = "avx512fp16")]
10700#[cfg_attr(test, assert_instr(vscalefph, ROUNDING = 8))]
10701#[rustc_legacy_const_generics(4)]
10702#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10703pub fn _mm512_mask_scalef_round_ph<const ROUNDING: i32>(
10704 src: __m512h,
10705 k: __mmask32,
10706 a: __m512h,
10707 b: __m512h,
10708) -> __m512h {
10709 unsafe {
10710 static_assert_rounding!(ROUNDING);
10711 vscalefph_512(a, b, src, k, ROUNDING)
10712 }
10713}
10714
10715/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10716/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10717///
10718/// Rounding is done according to the rounding parameter, which can be one of:
10719///
10720/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10721/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10722/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10723/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10724/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10725///
10726/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_scalef_round_ph)
10727#[inline]
10728#[target_feature(enable = "avx512fp16")]
10729#[cfg_attr(test, assert_instr(vscalefph, ROUNDING = 8))]
10730#[rustc_legacy_const_generics(3)]
10731#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10732pub fn _mm512_maskz_scalef_round_ph<const ROUNDING: i32>(
10733 k: __mmask32,
10734 a: __m512h,
10735 b: __m512h,
10736) -> __m512h {
10737 static_assert_rounding!(ROUNDING);
10738 _mm512_mask_scalef_round_ph::<ROUNDING>(src:_mm512_setzero_ph(), k, a, b)
10739}
10740
10741/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
10742/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
10743/// elements of dst.
10744///
10745/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_sh)
10746#[inline]
10747#[target_feature(enable = "avx512fp16")]
10748#[cfg_attr(test, assert_instr(vscalefsh))]
10749#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10750pub fn _mm_scalef_sh(a: __m128h, b: __m128h) -> __m128h {
10751 _mm_mask_scalef_sh(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
10752}
10753
10754/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
10755/// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
10756/// and copy the upper 7 packed elements from a to the upper elements of dst.
10757///
10758/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_sh)
10759#[inline]
10760#[target_feature(enable = "avx512fp16")]
10761#[cfg_attr(test, assert_instr(vscalefsh))]
10762#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10763pub fn _mm_mask_scalef_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
10764 _mm_mask_scalef_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
10765}
10766
10767/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
10768/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set),
10769/// and copy the upper 7 packed elements from a to the upper elements of dst.
10770///
10771/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_sh)
10772#[inline]
10773#[target_feature(enable = "avx512fp16")]
10774#[cfg_attr(test, assert_instr(vscalefsh))]
10775#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10776pub fn _mm_maskz_scalef_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
10777 _mm_mask_scalef_sh(src:f16x8::ZERO.as_m128h(), k, a, b)
10778}
10779
10780/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
10781/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
10782/// elements of dst.
10783///
10784/// Rounding is done according to the rounding parameter, which can be one of:
10785///
10786/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10787/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10788/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10789/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10790/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10791///
10792/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_round_sh)
10793#[inline]
10794#[target_feature(enable = "avx512fp16")]
10795#[cfg_attr(test, assert_instr(vscalefsh, ROUNDING = 8))]
10796#[rustc_legacy_const_generics(2)]
10797#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10798pub fn _mm_scalef_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
10799 static_assert_rounding!(ROUNDING);
10800 _mm_mask_scalef_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
10801}
10802
10803/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
10804/// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
10805/// and copy the upper 7 packed elements from a to the upper elements of dst.
10806///
10807/// Rounding is done according to the rounding parameter, which can be one of:
10808///
10809/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10810/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10811/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10812/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10813/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10814///
10815/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_round_sh)
10816#[inline]
10817#[target_feature(enable = "avx512fp16")]
10818#[cfg_attr(test, assert_instr(vscalefsh, ROUNDING = 8))]
10819#[rustc_legacy_const_generics(4)]
10820#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10821pub fn _mm_mask_scalef_round_sh<const ROUNDING: i32>(
10822 src: __m128h,
10823 k: __mmask8,
10824 a: __m128h,
10825 b: __m128h,
10826) -> __m128h {
10827 unsafe {
10828 static_assert_rounding!(ROUNDING);
10829 vscalefsh(a, b, src, k, ROUNDING)
10830 }
10831}
10832
10833/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
10834/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set),
10835/// and copy the upper 7 packed elements from a to the upper elements of dst.
10836///
10837/// Rounding is done according to the rounding parameter, which can be one of:
10838///
10839/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10840/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10841/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10842/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10843/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10844///
10845/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_round_sh)
10846#[inline]
10847#[target_feature(enable = "avx512fp16")]
10848#[cfg_attr(test, assert_instr(vscalefsh, ROUNDING = 8))]
10849#[rustc_legacy_const_generics(3)]
10850#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10851pub fn _mm_maskz_scalef_round_sh<const ROUNDING: i32>(
10852 k: __mmask8,
10853 a: __m128h,
10854 b: __m128h,
10855) -> __m128h {
10856 static_assert_rounding!(ROUNDING);
10857 _mm_mask_scalef_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
10858}
10859
10860/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10861/// number of bits specified by imm8, and store the results in dst.
10862///
10863/// Rounding is done according to the imm8 parameter, which can be one of:
10864///
10865/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10866/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10867/// * [`_MM_FROUND_TO_POS_INF`] : round up
10868/// * [`_MM_FROUND_TO_ZERO`] : truncate
10869/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10870///
10871/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_ph)
10872#[inline]
10873#[target_feature(enable = "avx512fp16,avx512vl")]
10874#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
10875#[rustc_legacy_const_generics(1)]
10876#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10877pub fn _mm_reduce_ph<const IMM8: i32>(a: __m128h) -> __m128h {
10878 static_assert_uimm_bits!(IMM8, 8);
10879 _mm_mask_reduce_ph::<IMM8>(src:_mm_undefined_ph(), k:0xff, a)
10880}
10881
10882/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10883/// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied
10884/// from src when the corresponding mask bit is not set).
10885///
10886/// Rounding is done according to the imm8 parameter, which can be one of:
10887///
10888/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10889/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10890/// * [`_MM_FROUND_TO_POS_INF`] : round up
10891/// * [`_MM_FROUND_TO_ZERO`] : truncate
10892/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10893///
10894/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_ph)
10895#[inline]
10896#[target_feature(enable = "avx512fp16,avx512vl")]
10897#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
10898#[rustc_legacy_const_generics(3)]
10899#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10900pub fn _mm_mask_reduce_ph<const IMM8: i32>(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
10901 unsafe {
10902 static_assert_uimm_bits!(IMM8, 8);
10903 vreduceph_128(a, IMM8, src, k)
10904 }
10905}
10906
10907/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10908/// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed
10909/// out when the corresponding mask bit is not set).
10910///
10911/// Rounding is done according to the imm8 parameter, which can be one of:
10912///
10913/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10914/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10915/// * [`_MM_FROUND_TO_POS_INF`] : round up
10916/// * [`_MM_FROUND_TO_ZERO`] : truncate
10917/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10918///
10919/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_ph)
10920#[inline]
10921#[target_feature(enable = "avx512fp16,avx512vl")]
10922#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
10923#[rustc_legacy_const_generics(2)]
10924#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10925pub fn _mm_maskz_reduce_ph<const IMM8: i32>(k: __mmask8, a: __m128h) -> __m128h {
10926 static_assert_uimm_bits!(IMM8, 8);
10927 _mm_mask_reduce_ph::<IMM8>(src:_mm_setzero_ph(), k, a)
10928}
10929
10930/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10931/// number of bits specified by imm8, and store the results in dst.
10932///
10933/// Rounding is done according to the imm8 parameter, which can be one of:
10934///
10935/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10936/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10937/// * [`_MM_FROUND_TO_POS_INF`] : round up
10938/// * [`_MM_FROUND_TO_ZERO`] : truncate
10939/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10940///
10941/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_ph)
10942#[inline]
10943#[target_feature(enable = "avx512fp16,avx512vl")]
10944#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
10945#[rustc_legacy_const_generics(1)]
10946#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10947pub fn _mm256_reduce_ph<const IMM8: i32>(a: __m256h) -> __m256h {
10948 static_assert_uimm_bits!(IMM8, 8);
10949 _mm256_mask_reduce_ph::<IMM8>(src:_mm256_undefined_ph(), k:0xffff, a)
10950}
10951
10952/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10953/// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied
10954/// from src when the corresponding mask bit is not set).
10955///
10956/// Rounding is done according to the imm8 parameter, which can be one of:
10957///
10958/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10959/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10960/// * [`_MM_FROUND_TO_POS_INF`] : round up
10961/// * [`_MM_FROUND_TO_ZERO`] : truncate
10962/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10963///
10964/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_reduce_ph)
10965#[inline]
10966#[target_feature(enable = "avx512fp16,avx512vl")]
10967#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
10968#[rustc_legacy_const_generics(3)]
10969#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10970pub fn _mm256_mask_reduce_ph<const IMM8: i32>(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
10971 unsafe {
10972 static_assert_uimm_bits!(IMM8, 8);
10973 vreduceph_256(a, IMM8, src, k)
10974 }
10975}
10976
10977/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10978/// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed
10979/// out when the corresponding mask bit is not set).
10980///
10981/// Rounding is done according to the imm8 parameter, which can be one of:
10982///
10983/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10984/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10985/// * [`_MM_FROUND_TO_POS_INF`] : round up
10986/// * [`_MM_FROUND_TO_ZERO`] : truncate
10987/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10988///
10989/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_reduce_ph)
10990#[inline]
10991#[target_feature(enable = "avx512fp16,avx512vl")]
10992#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
10993#[rustc_legacy_const_generics(2)]
10994#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
10995pub fn _mm256_maskz_reduce_ph<const IMM8: i32>(k: __mmask16, a: __m256h) -> __m256h {
10996 static_assert_uimm_bits!(IMM8, 8);
10997 _mm256_mask_reduce_ph::<IMM8>(src:_mm256_setzero_ph(), k, a)
10998}
10999
11000/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
11001/// number of bits specified by imm8, and store the results in dst.
11002///
11003/// Rounding is done according to the imm8 parameter, which can be one of:
11004///
11005/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
11006/// * [`_MM_FROUND_TO_NEG_INF`] : round down
11007/// * [`_MM_FROUND_TO_POS_INF`] : round up
11008/// * [`_MM_FROUND_TO_ZERO`] : truncate
11009/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11010///
11011/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_ph)
11012#[inline]
11013#[target_feature(enable = "avx512fp16")]
11014#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
11015#[rustc_legacy_const_generics(1)]
11016#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
11017pub fn _mm512_reduce_ph<const IMM8: i32>(a: __m512h) -> __m512h {
11018 static_assert_uimm_bits!(IMM8, 8);
11019 _mm512_mask_reduce_ph::<IMM8>(src:_mm512_undefined_ph(), k:0xffffffff, a)
11020}
11021
11022/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
11023/// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied
11024/// from src when the corresponding mask bit is not set).
11025///
11026/// Rounding is done according to the imm8 parameter, which can be one of:
11027///
11028/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
11029/// * [`_MM_FROUND_TO_NEG_INF`] : round down
11030/// * [`_MM_FROUND_TO_POS_INF`] : round up
11031/// * [`_MM_FROUND_TO_ZERO`] : truncate
11032/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11033///
11034/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_ph)
11035#[inline]
11036#[target_feature(enable = "avx512fp16")]
11037#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
11038#[rustc_legacy_const_generics(3)]
11039#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
11040pub fn _mm512_mask_reduce_ph<const IMM8: i32>(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
11041 static_assert_uimm_bits!(IMM8, 8);
11042 _mm512_mask_reduce_round_ph::<IMM8, _MM_FROUND_CUR_DIRECTION>(src, k, a)
11043}
11044
11045/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
11046/// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed
11047/// out when the corresponding mask bit is not set).
11048///
11049/// Rounding is done according to the imm8 parameter, which can be one of:
11050///
11051/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
11052/// * [`_MM_FROUND_TO_NEG_INF`] : round down
11053/// * [`_MM_FROUND_TO_POS_INF`] : round up
11054/// * [`_MM_FROUND_TO_ZERO`] : truncate
11055/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11056///
11057/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_reduce_ph)
11058#[inline]
11059#[target_feature(enable = "avx512fp16")]
11060#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
11061#[rustc_legacy_const_generics(2)]
11062#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
11063pub fn _mm512_maskz_reduce_ph<const IMM8: i32>(k: __mmask32, a: __m512h) -> __m512h {
11064 static_assert_uimm_bits!(IMM8, 8);
11065 _mm512_mask_reduce_ph::<IMM8>(src:_mm512_setzero_ph(), k, a)
11066}
11067
11068/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
11069/// number of bits specified by imm8, and store the results in dst.
11070///
11071/// Rounding is done according to the imm8 parameter, which can be one of:
11072///
11073/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
11074/// * [`_MM_FROUND_TO_NEG_INF`] : round down
11075/// * [`_MM_FROUND_TO_POS_INF`] : round up
11076/// * [`_MM_FROUND_TO_ZERO`] : truncate
11077/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11078///
11079/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
11080///
11081/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_round_ph)
11082#[inline]
11083#[target_feature(enable = "avx512fp16")]
11084#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0, SAE = 8))]
11085#[rustc_legacy_const_generics(1, 2)]
11086#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
11087pub fn _mm512_reduce_round_ph<const IMM8: i32, const SAE: i32>(a: __m512h) -> __m512h {
11088 static_assert_uimm_bits!(IMM8, 8);
11089 static_assert_sae!(SAE);
11090 _mm512_mask_reduce_round_ph::<IMM8, SAE>(src:_mm512_undefined_ph(), k:0xffffffff, a)
11091}
11092
11093/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
11094/// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied
11095/// from src when the corresponding mask bit is not set).
11096///
11097/// Rounding is done according to the imm8 parameter, which can be one of:
11098///
11099/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
11100/// * [`_MM_FROUND_TO_NEG_INF`] : round down
11101/// * [`_MM_FROUND_TO_POS_INF`] : round up
11102/// * [`_MM_FROUND_TO_ZERO`] : truncate
11103/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11104///
11105/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
11106///
11107/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_round_ph)
11108#[inline]
11109#[target_feature(enable = "avx512fp16")]
11110#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0, SAE = 8))]
11111#[rustc_legacy_const_generics(3, 4)]
11112#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
11113pub fn _mm512_mask_reduce_round_ph<const IMM8: i32, const SAE: i32>(
11114 src: __m512h,
11115 k: __mmask32,
11116 a: __m512h,
11117) -> __m512h {
11118 unsafe {
11119 static_assert_uimm_bits!(IMM8, 8);
11120 static_assert_sae!(SAE);
11121 vreduceph_512(a, IMM8, src, k, SAE)
11122 }
11123}
11124
11125/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
11126/// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed
11127/// out when the corresponding mask bit is not set).
11128///
11129/// Rounding is done according to the imm8 parameter, which can be one of:
11130///
11131/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
11132/// * [`_MM_FROUND_TO_NEG_INF`] : round down
11133/// * [`_MM_FROUND_TO_POS_INF`] : round up
11134/// * [`_MM_FROUND_TO_ZERO`] : truncate
11135/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11136///
11137/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
11138///
11139/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_reduce_round_ph)
11140#[inline]
11141#[target_feature(enable = "avx512fp16")]
11142#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0, SAE = 8))]
11143#[rustc_legacy_const_generics(2, 3)]
11144#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
11145pub fn _mm512_maskz_reduce_round_ph<const IMM8: i32, const SAE: i32>(
11146 k: __mmask32,
11147 a: __m512h,
11148) -> __m512h {
11149 static_assert_uimm_bits!(IMM8, 8);
11150 static_assert_sae!(SAE);
11151 _mm512_mask_reduce_round_ph::<IMM8, SAE>(src:_mm512_setzero_ph(), k, a)
11152}
11153
11154/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
11155/// the number of bits specified by imm8, store the result in the lower element of dst, and copy the
11156/// upper 7 packed elements from a to the upper elements of dst.
11157///
11158/// Rounding is done according to the imm8 parameter, which can be one of:
11159///
11160/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
11161/// * [`_MM_FROUND_TO_NEG_INF`] : round down
11162/// * [`_MM_FROUND_TO_POS_INF`] : round up
11163/// * [`_MM_FROUND_TO_ZERO`] : truncate
11164/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11165///
11166/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_sh)
11167#[inline]
11168#[target_feature(enable = "avx512fp16")]
11169#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0))]
11170#[rustc_legacy_const_generics(2)]
11171#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
11172pub fn _mm_reduce_sh<const IMM8: i32>(a: __m128h, b: __m128h) -> __m128h {
11173 static_assert_uimm_bits!(IMM8, 8);
11174 _mm_mask_reduce_sh::<IMM8>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
11175}
11176
11177/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
11178/// the number of bits specified by imm8, store the result in the lower element of dst using writemask k
11179/// (the element is copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from
11180/// a to the upper elements of dst.
11181///
11182/// Rounding is done according to the imm8 parameter, which can be one of:
11183///
11184/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
11185/// * [`_MM_FROUND_TO_NEG_INF`] : round down
11186/// * [`_MM_FROUND_TO_POS_INF`] : round up
11187/// * [`_MM_FROUND_TO_ZERO`] : truncate
11188/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11189///
11190/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_sh)
11191#[inline]
11192#[target_feature(enable = "avx512fp16")]
11193#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0))]
11194#[rustc_legacy_const_generics(4)]
11195#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
11196pub fn _mm_mask_reduce_sh<const IMM8: i32>(
11197 src: __m128h,
11198 k: __mmask8,
11199 a: __m128h,
11200 b: __m128h,
11201) -> __m128h {
11202 static_assert_uimm_bits!(IMM8, 8);
11203 _mm_mask_reduce_round_sh::<IMM8, _MM_FROUND_CUR_DIRECTION>(src, k, a, b)
11204}
11205
11206/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
11207/// the number of bits specified by imm8, store the result in the lower element of dst using zeromask k
11208/// (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a
11209/// to the upper elements of dst.
11210///
11211/// Rounding is done according to the imm8 parameter, which can be one of:
11212///
11213/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
11214/// * [`_MM_FROUND_TO_NEG_INF`] : round down
11215/// * [`_MM_FROUND_TO_POS_INF`] : round up
11216/// * [`_MM_FROUND_TO_ZERO`] : truncate
11217/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11218///
11219/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_sh)
11220#[inline]
11221#[target_feature(enable = "avx512fp16")]
11222#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0))]
11223#[rustc_legacy_const_generics(3)]
11224#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
11225pub fn _mm_maskz_reduce_sh<const IMM8: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
11226 static_assert_uimm_bits!(IMM8, 8);
11227 _mm_mask_reduce_sh::<IMM8>(src:f16x8::ZERO.as_m128h(), k, a, b)
11228}
11229
11230/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
11231/// the number of bits specified by imm8, store the result in the lower element of dst, and copy the upper
11232/// 7 packed elements from a to the upper elements of dst.
11233///
11234/// Rounding is done according to the imm8 parameter, which can be one of:
11235///
11236/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
11237/// * [`_MM_FROUND_TO_NEG_INF`] : round down
11238/// * [`_MM_FROUND_TO_POS_INF`] : round up
11239/// * [`_MM_FROUND_TO_ZERO`] : truncate
11240/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11241///
11242/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
11243///
11244/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_round_sh)
11245#[inline]
11246#[target_feature(enable = "avx512fp16")]
11247#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0, SAE = 8))]
11248#[rustc_legacy_const_generics(2, 3)]
11249#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
11250pub fn _mm_reduce_round_sh<const IMM8: i32, const SAE: i32>(a: __m128h, b: __m128h) -> __m128h {
11251 static_assert_uimm_bits!(IMM8, 8);
11252 static_assert_sae!(SAE);
11253 _mm_mask_reduce_round_sh::<IMM8, SAE>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
11254}
11255
11256/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
11257/// the number of bits specified by imm8, store the result in the lower element of dst using writemask k
11258/// (the element is copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a
11259/// to the upper elements of dst.
11260///
11261/// Rounding is done according to the imm8 parameter, which can be one of:
11262///
11263/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
11264/// * [`_MM_FROUND_TO_NEG_INF`] : round down
11265/// * [`_MM_FROUND_TO_POS_INF`] : round up
11266/// * [`_MM_FROUND_TO_ZERO`] : truncate
11267/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11268///
11269/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
11270///
11271/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_round_sh)
11272#[inline]
11273#[target_feature(enable = "avx512fp16")]
11274#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0, SAE = 8))]
11275#[rustc_legacy_const_generics(4, 5)]
11276#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
11277pub fn _mm_mask_reduce_round_sh<const IMM8: i32, const SAE: i32>(
11278 src: __m128h,
11279 k: __mmask8,
11280 a: __m128h,
11281 b: __m128h,
11282) -> __m128h {
11283 unsafe {
11284 static_assert_uimm_bits!(IMM8, 8);
11285 static_assert_sae!(SAE);
11286 vreducesh(a, b, src, k, IMM8, SAE)
11287 }
11288}
11289
11290/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
11291/// the number of bits specified by imm8, store the result in the lower element of dst using zeromask k
11292/// (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a
11293/// to the upper elements of dst.
11294///
11295/// Rounding is done according to the imm8 parameter, which can be one of:
11296///
11297/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
11298/// * [`_MM_FROUND_TO_NEG_INF`] : round down
11299/// * [`_MM_FROUND_TO_POS_INF`] : round up
11300/// * [`_MM_FROUND_TO_ZERO`] : truncate
11301/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11302///
11303/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
11304///
11305/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_round_sh)
11306#[inline]
11307#[target_feature(enable = "avx512fp16")]
11308#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0, SAE = 8))]
11309#[rustc_legacy_const_generics(3, 4)]
11310#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
11311pub fn _mm_maskz_reduce_round_sh<const IMM8: i32, const SAE: i32>(
11312 k: __mmask8,
11313 a: __m128h,
11314 b: __m128h,
11315) -> __m128h {
11316 static_assert_uimm_bits!(IMM8, 8);
11317 static_assert_sae!(SAE);
11318 _mm_mask_reduce_round_sh::<IMM8, SAE>(src:f16x8::ZERO.as_m128h(), k, a, b)
11319}
11320
11321/// Reduce the packed half-precision (16-bit) floating-point elements in a by addition. Returns the
11322/// sum of all elements in a.
11323///
11324/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_add_ph)
11325#[inline]
11326#[target_feature(enable = "avx512fp16,avx512vl")]
11327#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11328#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11329pub const fn _mm_reduce_add_ph(a: __m128h) -> f16 {
11330 unsafe {
11331 let b: __m128h = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]);
11332 let a: __m128h = _mm_add_ph(a, b);
11333 let b: __m128h = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]);
11334 let a: __m128h = _mm_add_ph(a, b);
11335 simd_extract!(a, 0, f16) + simd_extract!(a, 1, f16)
11336 }
11337}
11338
11339/// Reduce the packed half-precision (16-bit) floating-point elements in a by addition. Returns the
11340/// sum of all elements in a.
11341///
11342/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_add_ph)
11343#[inline]
11344#[target_feature(enable = "avx512fp16,avx512vl")]
11345#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11346#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11347pub const fn _mm256_reduce_add_ph(a: __m256h) -> f16 {
11348 unsafe {
11349 let p: __m128h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
11350 let q: __m128h = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
11351 _mm_reduce_add_ph(_mm_add_ph(a:p, b:q))
11352 }
11353}
11354
11355/// Reduce the packed half-precision (16-bit) floating-point elements in a by addition. Returns the
11356/// sum of all elements in a.
11357///
11358/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_add_ph)
11359#[inline]
11360#[target_feature(enable = "avx512fp16")]
11361#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11362#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11363pub const fn _mm512_reduce_add_ph(a: __m512h) -> f16 {
11364 unsafe {
11365 let p: __m256h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
11366 let q: __m256h = simd_shuffle!(
11367 a,
11368 a,
11369 [
11370 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
11371 ]
11372 );
11373 _mm256_reduce_add_ph(_mm256_add_ph(a:p, b:q))
11374 }
11375}
11376
11377/// Reduce the packed half-precision (16-bit) floating-point elements in a by multiplication. Returns
11378/// the product of all elements in a.
11379///
11380/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_mul_ph)
11381#[inline]
11382#[target_feature(enable = "avx512fp16,avx512vl")]
11383#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11384#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11385pub const fn _mm_reduce_mul_ph(a: __m128h) -> f16 {
11386 unsafe {
11387 let b: __m128h = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]);
11388 let a: __m128h = _mm_mul_ph(a, b);
11389 let b: __m128h = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]);
11390 let a: __m128h = _mm_mul_ph(a, b);
11391 simd_extract!(a, 0, f16) * simd_extract!(a, 1, f16)
11392 }
11393}
11394
11395/// Reduce the packed half-precision (16-bit) floating-point elements in a by multiplication. Returns
11396/// the product of all elements in a.
11397///
11398/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_mul_ph)
11399#[inline]
11400#[target_feature(enable = "avx512fp16,avx512vl")]
11401#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11402#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11403pub const fn _mm256_reduce_mul_ph(a: __m256h) -> f16 {
11404 unsafe {
11405 let p: __m128h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
11406 let q: __m128h = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
11407 _mm_reduce_mul_ph(_mm_mul_ph(a:p, b:q))
11408 }
11409}
11410
11411/// Reduce the packed half-precision (16-bit) floating-point elements in a by multiplication. Returns
11412/// the product of all elements in a.
11413///
11414/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_mul_ph)
11415#[inline]
11416#[target_feature(enable = "avx512fp16")]
11417#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11418#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11419pub const fn _mm512_reduce_mul_ph(a: __m512h) -> f16 {
11420 unsafe {
11421 let p: __m256h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
11422 let q: __m256h = simd_shuffle!(
11423 a,
11424 a,
11425 [
11426 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
11427 ]
11428 );
11429 _mm256_reduce_mul_ph(_mm256_mul_ph(a:p, b:q))
11430 }
11431}
11432
11433/// Reduce the packed half-precision (16-bit) floating-point elements in a by minimum. Returns the
11434/// minimum of all elements in a.
11435///
11436/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_min_ph)
11437#[inline]
11438#[target_feature(enable = "avx512fp16,avx512vl")]
11439#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11440pub fn _mm_reduce_min_ph(a: __m128h) -> f16 {
11441 unsafe {
11442 let b: __m128h = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]);
11443 let a: __m128h = _mm_min_ph(a, b);
11444 let b: __m128h = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]);
11445 let a: __m128h = _mm_min_ph(a, b);
11446 let b: __m128h = simd_shuffle!(a, a, [1, 0, 2, 3, 4, 5, 6, 7]);
11447 simd_extract!(_mm_min_sh(a, b), 0)
11448 }
11449}
11450
11451/// Reduce the packed half-precision (16-bit) floating-point elements in a by minimum. Returns the
11452/// minimum of all elements in a.
11453///
11454/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_min_ph)
11455#[inline]
11456#[target_feature(enable = "avx512fp16,avx512vl")]
11457#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11458pub fn _mm256_reduce_min_ph(a: __m256h) -> f16 {
11459 unsafe {
11460 let p: __m128h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
11461 let q: __m128h = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
11462 _mm_reduce_min_ph(_mm_min_ph(a:p, b:q))
11463 }
11464}
11465
11466/// Reduce the packed half-precision (16-bit) floating-point elements in a by minimum. Returns the
11467/// minimum of all elements in a.
11468///
11469/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_min_ph)
11470#[inline]
11471#[target_feature(enable = "avx512fp16")]
11472#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11473pub fn _mm512_reduce_min_ph(a: __m512h) -> f16 {
11474 unsafe {
11475 let p: __m256h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
11476 let q: __m256h = simd_shuffle!(
11477 a,
11478 a,
11479 [
11480 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
11481 ]
11482 );
11483 _mm256_reduce_min_ph(_mm256_min_ph(a:p, b:q))
11484 }
11485}
11486
11487/// Reduce the packed half-precision (16-bit) floating-point elements in a by maximum. Returns the
11488/// maximum of all elements in a.
11489///
11490/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_max_ph)
11491#[inline]
11492#[target_feature(enable = "avx512fp16,avx512vl")]
11493#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11494pub fn _mm_reduce_max_ph(a: __m128h) -> f16 {
11495 unsafe {
11496 let b: __m128h = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]);
11497 let a: __m128h = _mm_max_ph(a, b);
11498 let b: __m128h = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]);
11499 let a: __m128h = _mm_max_ph(a, b);
11500 let b: __m128h = simd_shuffle!(a, a, [1, 0, 2, 3, 4, 5, 6, 7]);
11501 simd_extract!(_mm_max_sh(a, b), 0)
11502 }
11503}
11504
11505/// Reduce the packed half-precision (16-bit) floating-point elements in a by maximum. Returns the
11506/// maximum of all elements in a.
11507///
11508/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_max_ph)
11509#[inline]
11510#[target_feature(enable = "avx512fp16,avx512vl")]
11511#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11512pub fn _mm256_reduce_max_ph(a: __m256h) -> f16 {
11513 unsafe {
11514 let p: __m128h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
11515 let q: __m128h = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
11516 _mm_reduce_max_ph(_mm_max_ph(a:p, b:q))
11517 }
11518}
11519
11520/// Reduce the packed half-precision (16-bit) floating-point elements in a by maximum. Returns the
11521/// maximum of all elements in a.
11522///
11523/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_max_ph)
11524#[inline]
11525#[target_feature(enable = "avx512fp16")]
11526#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11527pub fn _mm512_reduce_max_ph(a: __m512h) -> f16 {
11528 unsafe {
11529 let p: __m256h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
11530 let q: __m256h = simd_shuffle!(
11531 a,
11532 a,
11533 [
11534 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
11535 ]
11536 );
11537 _mm256_reduce_max_ph(_mm256_max_ph(a:p, b:q))
11538 }
11539}
11540
11541macro_rules! fpclass_asm { // FIXME: use LLVM intrinsics
11542 ($mask_type: ty, $reg: ident, $a: expr) => {{
11543 let dst: $mask_type;
11544 asm!(
11545 "vfpclassph {k}, {src}, {imm8}",
11546 k = lateout(kreg) dst,
11547 src = in($reg) $a,
11548 imm8 = const IMM8,
11549 options(pure, nomem, nostack)
11550 );
11551 dst
11552 }};
11553 ($mask_type: ty, $mask: expr, $reg: ident, $a: expr) => {{
11554 let dst: $mask_type;
11555 asm!(
11556 "vfpclassph {k} {{ {mask} }}, {src}, {imm8}",
11557 k = lateout(kreg) dst,
11558 mask = in(kreg) $mask,
11559 src = in($reg) $a,
11560 imm8 = const IMM8,
11561 options(pure, nomem, nostack)
11562 );
11563 dst
11564 }};
11565}
11566
11567/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
11568/// by imm8, and store the results in mask vector k.
11569/// imm can be a combination of:
11570///
11571/// 0x01 // QNaN
11572/// 0x02 // Positive Zero
11573/// 0x04 // Negative Zero
11574/// 0x08 // Positive Infinity
11575/// 0x10 // Negative Infinity
11576/// 0x20 // Denormal
11577/// 0x40 // Negative
11578/// 0x80 // SNaN
11579///
11580/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fpclass_ph_mask)
11581#[inline]
11582#[target_feature(enable = "avx512fp16,avx512vl")]
11583#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))]
11584#[rustc_legacy_const_generics(1)]
11585#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
11586pub fn _mm_fpclass_ph_mask<const IMM8: i32>(a: __m128h) -> __mmask8 {
11587 unsafe {
11588 static_assert_uimm_bits!(IMM8, 8);
11589 fpclass_asm!(__mmask8, xmm_reg, a)
11590 }
11591}
11592
11593/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
11594/// by imm8, and store the results in mask vector k using zeromask k (elements are zeroed out when the
11595/// corresponding mask bit is not set).
11596/// imm can be a combination of:
11597///
11598/// 0x01 // QNaN
11599/// 0x02 // Positive Zero
11600/// 0x04 // Negative Zero
11601/// 0x08 // Positive Infinity
11602/// 0x10 // Negative Infinity
11603/// 0x20 // Denormal
11604/// 0x40 // Negative
11605/// 0x80 // SNaN
11606///
11607/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fpclass_ph_mask)
11608#[inline]
11609#[target_feature(enable = "avx512fp16,avx512vl")]
11610#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))]
11611#[rustc_legacy_const_generics(2)]
11612#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
11613pub fn _mm_mask_fpclass_ph_mask<const IMM8: i32>(k1: __mmask8, a: __m128h) -> __mmask8 {
11614 unsafe {
11615 static_assert_uimm_bits!(IMM8, 8);
11616 fpclass_asm!(__mmask8, k1, xmm_reg, a)
11617 }
11618}
11619
11620/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
11621/// by imm8, and store the results in mask vector k.
11622/// imm can be a combination of:
11623///
11624/// 0x01 // QNaN
11625/// 0x02 // Positive Zero
11626/// 0x04 // Negative Zero
11627/// 0x08 // Positive Infinity
11628/// 0x10 // Negative Infinity
11629/// 0x20 // Denormal
11630/// 0x40 // Negative
11631/// 0x80 // SNaN
11632///
11633/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fpclass_ph_mask)
11634#[inline]
11635#[target_feature(enable = "avx512fp16,avx512vl")]
11636#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))]
11637#[rustc_legacy_const_generics(1)]
11638#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
11639pub fn _mm256_fpclass_ph_mask<const IMM8: i32>(a: __m256h) -> __mmask16 {
11640 unsafe {
11641 static_assert_uimm_bits!(IMM8, 8);
11642 fpclass_asm!(__mmask16, ymm_reg, a)
11643 }
11644}
11645
11646/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
11647/// by imm8, and store the results in mask vector k using zeromask k (elements are zeroed out when the
11648/// corresponding mask bit is not set).
11649/// imm can be a combination of:
11650///
11651/// 0x01 // QNaN
11652/// 0x02 // Positive Zero
11653/// 0x04 // Negative Zero
11654/// 0x08 // Positive Infinity
11655/// 0x10 // Negative Infinity
11656/// 0x20 // Denormal
11657/// 0x40 // Negative
11658/// 0x80 // SNaN
11659///
11660/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fpclass_ph_mask)
11661#[inline]
11662#[target_feature(enable = "avx512fp16,avx512vl")]
11663#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))]
11664#[rustc_legacy_const_generics(2)]
11665#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
11666pub fn _mm256_mask_fpclass_ph_mask<const IMM8: i32>(k1: __mmask16, a: __m256h) -> __mmask16 {
11667 unsafe {
11668 static_assert_uimm_bits!(IMM8, 8);
11669 fpclass_asm!(__mmask16, k1, ymm_reg, a)
11670 }
11671}
11672
11673/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
11674/// by imm8, and store the results in mask vector k.
11675/// imm can be a combination of:
11676///
11677/// 0x01 // QNaN
11678/// 0x02 // Positive Zero
11679/// 0x04 // Negative Zero
11680/// 0x08 // Positive Infinity
11681/// 0x10 // Negative Infinity
11682/// 0x20 // Denormal
11683/// 0x40 // Negative
11684/// 0x80 // SNaN
11685///
11686/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fpclass_ph_mask)
11687#[inline]
11688#[target_feature(enable = "avx512fp16")]
11689#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))]
11690#[rustc_legacy_const_generics(1)]
11691#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
11692pub fn _mm512_fpclass_ph_mask<const IMM8: i32>(a: __m512h) -> __mmask32 {
11693 unsafe {
11694 static_assert_uimm_bits!(IMM8, 8);
11695 fpclass_asm!(__mmask32, zmm_reg, a)
11696 }
11697}
11698
11699/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
11700/// by imm8, and store the results in mask vector k using zeromask k (elements are zeroed out when the
11701/// corresponding mask bit is not set).
11702/// imm can be a combination of:
11703///
11704/// 0x01 // QNaN
11705/// 0x02 // Positive Zero
11706/// 0x04 // Negative Zero
11707/// 0x08 // Positive Infinity
11708/// 0x10 // Negative Infinity
11709/// 0x20 // Denormal
11710/// 0x40 // Negative
11711/// 0x80 // SNaN
11712///
11713/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fpclass_ph_mask)
11714#[inline]
11715#[target_feature(enable = "avx512fp16")]
11716#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))]
11717#[rustc_legacy_const_generics(2)]
11718#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
11719pub fn _mm512_mask_fpclass_ph_mask<const IMM8: i32>(k1: __mmask32, a: __m512h) -> __mmask32 {
11720 unsafe {
11721 static_assert_uimm_bits!(IMM8, 8);
11722 fpclass_asm!(__mmask32, k1, zmm_reg, a)
11723 }
11724}
11725
11726/// Test the lower half-precision (16-bit) floating-point element in a for special categories specified
11727/// by imm8, and store the result in mask vector k.
11728/// imm can be a combination of:
11729///
11730/// 0x01 // QNaN
11731/// 0x02 // Positive Zero
11732/// 0x04 // Negative Zero
11733/// 0x08 // Positive Infinity
11734/// 0x10 // Negative Infinity
11735/// 0x20 // Denormal
11736/// 0x40 // Negative
11737/// 0x80 // SNaN
11738///
11739/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fpclass_sh_mask)
11740#[inline]
11741#[target_feature(enable = "avx512fp16")]
11742#[cfg_attr(test, assert_instr(vfpclasssh, IMM8 = 0))]
11743#[rustc_legacy_const_generics(1)]
11744#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
11745pub fn _mm_fpclass_sh_mask<const IMM8: i32>(a: __m128h) -> __mmask8 {
11746 _mm_mask_fpclass_sh_mask::<IMM8>(k1:0xff, a)
11747}
11748
11749/// Test the lower half-precision (16-bit) floating-point element in a for special categories specified
11750/// by imm8, and store the result in mask vector k using zeromask k (elements are zeroed out when the
11751/// corresponding mask bit is not set).
11752/// imm can be a combination of:
11753///
11754/// 0x01 // QNaN
11755/// 0x02 // Positive Zero
11756/// 0x04 // Negative Zero
11757/// 0x08 // Positive Infinity
11758/// 0x10 // Negative Infinity
11759/// 0x20 // Denormal
11760/// 0x40 // Negative
11761/// 0x80 // SNaN
11762///
11763/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fpclass_sh_mask)
11764#[inline]
11765#[target_feature(enable = "avx512fp16")]
11766#[cfg_attr(test, assert_instr(vfpclasssh, IMM8 = 0))]
11767#[rustc_legacy_const_generics(2)]
11768#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
11769pub fn _mm_mask_fpclass_sh_mask<const IMM8: i32>(k1: __mmask8, a: __m128h) -> __mmask8 {
11770 unsafe {
11771 static_assert_uimm_bits!(IMM8, 8);
11772 vfpclasssh(a, IMM8, k:k1)
11773 }
11774}
11775
11776/// Blend packed half-precision (16-bit) floating-point elements from a and b using control mask k,
11777/// and store the results in dst.
11778///
11779/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_blend_ph)
11780#[inline]
11781#[target_feature(enable = "avx512fp16,avx512vl")]
11782#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
11783#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11784pub const fn _mm_mask_blend_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
11785 unsafe { simd_select_bitmask(m:k, yes:b, no:a) }
11786}
11787
11788/// Blend packed half-precision (16-bit) floating-point elements from a and b using control mask k,
11789/// and store the results in dst.
11790///
11791/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_blend_ph)
11792#[inline]
11793#[target_feature(enable = "avx512fp16,avx512vl")]
11794#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
11795#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11796pub const fn _mm256_mask_blend_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
11797 unsafe { simd_select_bitmask(m:k, yes:b, no:a) }
11798}
11799
11800/// Blend packed half-precision (16-bit) floating-point elements from a and b using control mask k,
11801/// and store the results in dst.
11802///
11803/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_blend_ph)
11804#[inline]
11805#[target_feature(enable = "avx512fp16")]
11806#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
11807#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11808pub const fn _mm512_mask_blend_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
11809 unsafe { simd_select_bitmask(m:k, yes:b, no:a) }
11810}
11811
11812/// Shuffle half-precision (16-bit) floating-point elements in a and b using the corresponding selector
11813/// and index in idx, and store the results in dst.
11814///
11815/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutex2var_ph)
11816#[inline]
11817#[target_feature(enable = "avx512fp16,avx512vl")]
11818#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
11819pub fn _mm_permutex2var_ph(a: __m128h, idx: __m128i, b: __m128h) -> __m128h {
11820 _mm_castsi128_ph(_mm_permutex2var_epi16(
11821 a:_mm_castph_si128(a),
11822 idx,
11823 b:_mm_castph_si128(b),
11824 ))
11825}
11826
11827/// Shuffle half-precision (16-bit) floating-point elements in a and b using the corresponding selector
11828/// and index in idx, and store the results in dst.
11829///
11830/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutex2var_ph)
11831#[inline]
11832#[target_feature(enable = "avx512fp16,avx512vl")]
11833#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
11834pub fn _mm256_permutex2var_ph(a: __m256h, idx: __m256i, b: __m256h) -> __m256h {
11835 _mm256_castsi256_ph(_mm256_permutex2var_epi16(
11836 a:_mm256_castph_si256(a),
11837 idx,
11838 b:_mm256_castph_si256(b),
11839 ))
11840}
11841
11842/// Shuffle half-precision (16-bit) floating-point elements in a and b using the corresponding selector
11843/// and index in idx, and store the results in dst.
11844///
11845/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutex2var_ph)
11846#[inline]
11847#[target_feature(enable = "avx512fp16")]
11848#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
11849pub fn _mm512_permutex2var_ph(a: __m512h, idx: __m512i, b: __m512h) -> __m512h {
11850 _mm512_castsi512_ph(_mm512_permutex2var_epi16(
11851 a:_mm512_castph_si512(a),
11852 idx,
11853 b:_mm512_castph_si512(b),
11854 ))
11855}
11856
11857/// Shuffle half-precision (16-bit) floating-point elements in a using the corresponding index in idx,
11858/// and store the results in dst.
11859///
11860/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutexvar_ph)
11861#[inline]
11862#[target_feature(enable = "avx512fp16,avx512vl")]
11863#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
11864pub fn _mm_permutexvar_ph(idx: __m128i, a: __m128h) -> __m128h {
11865 _mm_castsi128_ph(_mm_permutexvar_epi16(idx, a:_mm_castph_si128(a)))
11866}
11867
11868/// Shuffle half-precision (16-bit) floating-point elements in a using the corresponding index in idx,
11869/// and store the results in dst.
11870///
11871/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutexvar_ph)
11872#[inline]
11873#[target_feature(enable = "avx512fp16,avx512vl")]
11874#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
11875pub fn _mm256_permutexvar_ph(idx: __m256i, a: __m256h) -> __m256h {
11876 _mm256_castsi256_ph(_mm256_permutexvar_epi16(idx, a:_mm256_castph_si256(a)))
11877}
11878
11879/// Shuffle half-precision (16-bit) floating-point elements in a using the corresponding index in idx,
11880/// and store the results in dst.
11881///
11882/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutexvar_ph)
11883#[inline]
11884#[target_feature(enable = "avx512fp16")]
11885#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
11886pub fn _mm512_permutexvar_ph(idx: __m512i, a: __m512h) -> __m512h {
11887 _mm512_castsi512_ph(_mm512_permutexvar_epi16(idx, a:_mm512_castph_si512(a)))
11888}
11889
11890/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11891/// and store the results in dst.
11892///
11893/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi16_ph)
11894#[inline]
11895#[target_feature(enable = "avx512fp16,avx512vl")]
11896#[cfg_attr(test, assert_instr(vcvtw2ph))]
11897#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
11898pub fn _mm_cvtepi16_ph(a: __m128i) -> __m128h {
11899 unsafe { vcvtw2ph_128(a.as_i16x8(), _MM_FROUND_CUR_DIRECTION) }
11900}
11901
11902/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11903/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11904/// mask bit is not set).
11905///
11906/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi16_ph)
11907#[inline]
11908#[target_feature(enable = "avx512fp16,avx512vl")]
11909#[cfg_attr(test, assert_instr(vcvtw2ph))]
11910#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
11911pub fn _mm_mask_cvtepi16_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
11912 unsafe { simd_select_bitmask(m:k, yes:_mm_cvtepi16_ph(a), no:src) }
11913}
11914
11915/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11916/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11917///
11918/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi16_ph)
11919#[inline]
11920#[target_feature(enable = "avx512fp16,avx512vl")]
11921#[cfg_attr(test, assert_instr(vcvtw2ph))]
11922#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
11923pub fn _mm_maskz_cvtepi16_ph(k: __mmask8, a: __m128i) -> __m128h {
11924 _mm_mask_cvtepi16_ph(src:_mm_setzero_ph(), k, a)
11925}
11926
11927/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11928/// and store the results in dst.
11929///
11930/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi16_ph)
11931#[inline]
11932#[target_feature(enable = "avx512fp16,avx512vl")]
11933#[cfg_attr(test, assert_instr(vcvtw2ph))]
11934#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
11935pub fn _mm256_cvtepi16_ph(a: __m256i) -> __m256h {
11936 unsafe { vcvtw2ph_256(a.as_i16x16(), _MM_FROUND_CUR_DIRECTION) }
11937}
11938
11939/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11940/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11941/// mask bit is not set).
11942///
11943/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi16_ph)
11944#[inline]
11945#[target_feature(enable = "avx512fp16,avx512vl")]
11946#[cfg_attr(test, assert_instr(vcvtw2ph))]
11947#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
11948pub fn _mm256_mask_cvtepi16_ph(src: __m256h, k: __mmask16, a: __m256i) -> __m256h {
11949 unsafe { simd_select_bitmask(m:k, yes:_mm256_cvtepi16_ph(a), no:src) }
11950}
11951
11952/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11953/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11954///
11955/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi16_ph)
11956#[inline]
11957#[target_feature(enable = "avx512fp16,avx512vl")]
11958#[cfg_attr(test, assert_instr(vcvtw2ph))]
11959#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
11960pub fn _mm256_maskz_cvtepi16_ph(k: __mmask16, a: __m256i) -> __m256h {
11961 _mm256_mask_cvtepi16_ph(src:_mm256_setzero_ph(), k, a)
11962}
11963
11964/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11965/// and store the results in dst.
11966///
11967/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi16_ph)
11968#[inline]
11969#[target_feature(enable = "avx512fp16")]
11970#[cfg_attr(test, assert_instr(vcvtw2ph))]
11971#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
11972pub fn _mm512_cvtepi16_ph(a: __m512i) -> __m512h {
11973 unsafe { vcvtw2ph_512(a.as_i16x32(), _MM_FROUND_CUR_DIRECTION) }
11974}
11975
11976/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11977/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11978/// mask bit is not set).
11979///
11980/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi16_ph)
11981#[inline]
11982#[target_feature(enable = "avx512fp16")]
11983#[cfg_attr(test, assert_instr(vcvtw2ph))]
11984#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
11985pub fn _mm512_mask_cvtepi16_ph(src: __m512h, k: __mmask32, a: __m512i) -> __m512h {
11986 unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepi16_ph(a), no:src) }
11987}
11988
11989/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11990/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11991///
11992/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi16_ph)
11993#[inline]
11994#[target_feature(enable = "avx512fp16")]
11995#[cfg_attr(test, assert_instr(vcvtw2ph))]
11996#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
11997pub fn _mm512_maskz_cvtepi16_ph(k: __mmask32, a: __m512i) -> __m512h {
11998 _mm512_mask_cvtepi16_ph(src:_mm512_setzero_ph(), k, a)
11999}
12000
12001/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
12002/// and store the results in dst.
12003///
12004/// Rounding is done according to the rounding parameter, which can be one of:
12005///
12006/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12007/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12008/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12009/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12010/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12011///
12012/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi16_ph)
12013#[inline]
12014#[target_feature(enable = "avx512fp16")]
12015#[cfg_attr(test, assert_instr(vcvtw2ph, ROUNDING = 8))]
12016#[rustc_legacy_const_generics(1)]
12017#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12018pub fn _mm512_cvt_roundepi16_ph<const ROUNDING: i32>(a: __m512i) -> __m512h {
12019 unsafe {
12020 static_assert_rounding!(ROUNDING);
12021 vcvtw2ph_512(a.as_i16x32(), ROUNDING)
12022 }
12023}
12024
12025/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
12026/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12027/// mask bit is not set).
12028///
12029/// Rounding is done according to the rounding parameter, which can be one of:
12030///
12031/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12032/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12033/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12034/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12035/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12036///
12037/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi16_ph)
12038#[inline]
12039#[target_feature(enable = "avx512fp16")]
12040#[cfg_attr(test, assert_instr(vcvtw2ph, ROUNDING = 8))]
12041#[rustc_legacy_const_generics(3)]
12042#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12043pub fn _mm512_mask_cvt_roundepi16_ph<const ROUNDING: i32>(
12044 src: __m512h,
12045 k: __mmask32,
12046 a: __m512i,
12047) -> __m512h {
12048 unsafe {
12049 static_assert_rounding!(ROUNDING);
12050 simd_select_bitmask(m:k, yes:_mm512_cvt_roundepi16_ph::<ROUNDING>(a), no:src)
12051 }
12052}
12053
12054/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
12055/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12056///
12057/// Rounding is done according to the rounding parameter, which can be one of:
12058///
12059/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12060/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12061/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12062/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12063/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12064///
12065/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi16_ph)
12066#[inline]
12067#[target_feature(enable = "avx512fp16")]
12068#[cfg_attr(test, assert_instr(vcvtw2ph, ROUNDING = 8))]
12069#[rustc_legacy_const_generics(2)]
12070#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12071pub fn _mm512_maskz_cvt_roundepi16_ph<const ROUNDING: i32>(k: __mmask32, a: __m512i) -> __m512h {
12072 static_assert_rounding!(ROUNDING);
12073 _mm512_mask_cvt_roundepi16_ph::<ROUNDING>(src:_mm512_setzero_ph(), k, a)
12074}
12075
12076/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
12077/// and store the results in dst.
12078///
12079/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu16_ph)
12080#[inline]
12081#[target_feature(enable = "avx512fp16,avx512vl")]
12082#[cfg_attr(test, assert_instr(vcvtuw2ph))]
12083#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12084pub fn _mm_cvtepu16_ph(a: __m128i) -> __m128h {
12085 unsafe { vcvtuw2ph_128(a.as_u16x8(), _MM_FROUND_CUR_DIRECTION) }
12086}
12087
12088/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
12089/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12090/// mask bit is not set).
12091///
12092/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu16_ph)
12093#[inline]
12094#[target_feature(enable = "avx512fp16,avx512vl")]
12095#[cfg_attr(test, assert_instr(vcvtuw2ph))]
12096#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12097pub fn _mm_mask_cvtepu16_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
12098 unsafe { simd_select_bitmask(m:k, yes:_mm_cvtepu16_ph(a), no:src) }
12099}
12100
12101/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
12102/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12103///
12104/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu16_ph)
12105#[inline]
12106#[target_feature(enable = "avx512fp16,avx512vl")]
12107#[cfg_attr(test, assert_instr(vcvtuw2ph))]
12108#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12109pub fn _mm_maskz_cvtepu16_ph(k: __mmask8, a: __m128i) -> __m128h {
12110 _mm_mask_cvtepu16_ph(src:_mm_setzero_ph(), k, a)
12111}
12112
12113/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
12114/// and store the results in dst.
12115///
12116/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu16_ph)
12117#[inline]
12118#[target_feature(enable = "avx512fp16,avx512vl")]
12119#[cfg_attr(test, assert_instr(vcvtuw2ph))]
12120#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12121pub fn _mm256_cvtepu16_ph(a: __m256i) -> __m256h {
12122 unsafe { vcvtuw2ph_256(a.as_u16x16(), _MM_FROUND_CUR_DIRECTION) }
12123}
12124
12125/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
12126/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12127/// mask bit is not set).
12128///
12129/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu16_ph)
12130#[inline]
12131#[target_feature(enable = "avx512fp16,avx512vl")]
12132#[cfg_attr(test, assert_instr(vcvtuw2ph))]
12133#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12134pub fn _mm256_mask_cvtepu16_ph(src: __m256h, k: __mmask16, a: __m256i) -> __m256h {
12135 unsafe { simd_select_bitmask(m:k, yes:_mm256_cvtepu16_ph(a), no:src) }
12136}
12137
12138/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
12139/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12140///
12141/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu16_ph)
12142#[inline]
12143#[target_feature(enable = "avx512fp16,avx512vl")]
12144#[cfg_attr(test, assert_instr(vcvtuw2ph))]
12145#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12146pub fn _mm256_maskz_cvtepu16_ph(k: __mmask16, a: __m256i) -> __m256h {
12147 _mm256_mask_cvtepu16_ph(src:_mm256_setzero_ph(), k, a)
12148}
12149
12150/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
12151/// and store the results in dst.
12152///
12153/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu16_ph)
12154#[inline]
12155#[target_feature(enable = "avx512fp16")]
12156#[cfg_attr(test, assert_instr(vcvtuw2ph))]
12157#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12158pub fn _mm512_cvtepu16_ph(a: __m512i) -> __m512h {
12159 unsafe { vcvtuw2ph_512(a.as_u16x32(), _MM_FROUND_CUR_DIRECTION) }
12160}
12161
12162/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
12163/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12164/// mask bit is not set).
12165///
12166/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu16_ph)
12167#[inline]
12168#[target_feature(enable = "avx512fp16")]
12169#[cfg_attr(test, assert_instr(vcvtuw2ph))]
12170#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12171pub fn _mm512_mask_cvtepu16_ph(src: __m512h, k: __mmask32, a: __m512i) -> __m512h {
12172 unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepu16_ph(a), no:src) }
12173}
12174
12175/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
12176/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12177///
12178/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu16_ph)
12179#[inline]
12180#[target_feature(enable = "avx512fp16")]
12181#[cfg_attr(test, assert_instr(vcvtuw2ph))]
12182#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12183pub fn _mm512_maskz_cvtepu16_ph(k: __mmask32, a: __m512i) -> __m512h {
12184 _mm512_mask_cvtepu16_ph(src:_mm512_setzero_ph(), k, a)
12185}
12186
12187/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
12188/// and store the results in dst.
12189///
12190/// Rounding is done according to the rounding parameter, which can be one of:
12191///
12192/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12193/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12194/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12195/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12196/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12197///
12198/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepu16_ph)
12199#[inline]
12200#[target_feature(enable = "avx512fp16")]
12201#[cfg_attr(test, assert_instr(vcvtuw2ph, ROUNDING = 8))]
12202#[rustc_legacy_const_generics(1)]
12203#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12204pub fn _mm512_cvt_roundepu16_ph<const ROUNDING: i32>(a: __m512i) -> __m512h {
12205 unsafe {
12206 static_assert_rounding!(ROUNDING);
12207 vcvtuw2ph_512(a.as_u16x32(), ROUNDING)
12208 }
12209}
12210
12211/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
12212/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12213/// mask bit is not set).
12214///
12215/// Rounding is done according to the rounding parameter, which can be one of:
12216///
12217/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12218/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12219/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12220/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12221/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12222///
12223/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepu16_ph)
12224#[inline]
12225#[target_feature(enable = "avx512fp16")]
12226#[cfg_attr(test, assert_instr(vcvtuw2ph, ROUNDING = 8))]
12227#[rustc_legacy_const_generics(3)]
12228#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12229pub fn _mm512_mask_cvt_roundepu16_ph<const ROUNDING: i32>(
12230 src: __m512h,
12231 k: __mmask32,
12232 a: __m512i,
12233) -> __m512h {
12234 unsafe {
12235 static_assert_rounding!(ROUNDING);
12236 simd_select_bitmask(m:k, yes:_mm512_cvt_roundepu16_ph::<ROUNDING>(a), no:src)
12237 }
12238}
12239
12240/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
12241/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12242///
12243/// Rounding is done according to the rounding parameter, which can be one of:
12244///
12245/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12246/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12247/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12248/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12249/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12250///
12251/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepu16_ph)
12252#[inline]
12253#[target_feature(enable = "avx512fp16")]
12254#[cfg_attr(test, assert_instr(vcvtuw2ph, ROUNDING = 8))]
12255#[rustc_legacy_const_generics(2)]
12256#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12257pub fn _mm512_maskz_cvt_roundepu16_ph<const ROUNDING: i32>(k: __mmask32, a: __m512i) -> __m512h {
12258 static_assert_rounding!(ROUNDING);
12259 _mm512_mask_cvt_roundepu16_ph::<ROUNDING>(src:_mm512_setzero_ph(), k, a)
12260}
12261
12262/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12263/// and store the results in dst. The upper 64 bits of dst are zeroed out.
12264///
12265/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_ph)
12266#[inline]
12267#[target_feature(enable = "avx512fp16,avx512vl")]
12268#[cfg_attr(test, assert_instr(vcvtdq2ph))]
12269#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12270pub fn _mm_cvtepi32_ph(a: __m128i) -> __m128h {
12271 _mm_mask_cvtepi32_ph(src:_mm_setzero_ph(), k:0xff, a)
12272}
12273
12274/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12275/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12276/// mask bit is not set). The upper 64 bits of dst are zeroed out.
12277///
12278/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi32_ph)
12279#[inline]
12280#[target_feature(enable = "avx512fp16,avx512vl")]
12281#[cfg_attr(test, assert_instr(vcvtdq2ph))]
12282#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12283pub fn _mm_mask_cvtepi32_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
12284 unsafe { vcvtdq2ph_128(a.as_i32x4(), src, k) }
12285}
12286
12287/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12288/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12289/// The upper 64 bits of dst are zeroed out.
12290///
12291/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi32_ph)
12292#[inline]
12293#[target_feature(enable = "avx512fp16,avx512vl")]
12294#[cfg_attr(test, assert_instr(vcvtdq2ph))]
12295#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12296pub fn _mm_maskz_cvtepi32_ph(k: __mmask8, a: __m128i) -> __m128h {
12297 _mm_mask_cvtepi32_ph(src:_mm_setzero_ph(), k, a)
12298}
12299
12300/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12301/// and store the results in dst.
12302///
12303/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi32_ph)
12304#[inline]
12305#[target_feature(enable = "avx512fp16,avx512vl")]
12306#[cfg_attr(test, assert_instr(vcvtdq2ph))]
12307#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12308pub fn _mm256_cvtepi32_ph(a: __m256i) -> __m128h {
12309 unsafe { vcvtdq2ph_256(a.as_i32x8(), _MM_FROUND_CUR_DIRECTION) }
12310}
12311
12312/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12313/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12314/// mask bit is not set).
12315///
12316/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi32_ph)
12317#[inline]
12318#[target_feature(enable = "avx512fp16,avx512vl")]
12319#[cfg_attr(test, assert_instr(vcvtdq2ph))]
12320#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12321pub fn _mm256_mask_cvtepi32_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h {
12322 unsafe { simd_select_bitmask(m:k, yes:_mm256_cvtepi32_ph(a), no:src) }
12323}
12324
12325/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12326/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12327///
12328/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi32_ph)
12329#[inline]
12330#[target_feature(enable = "avx512fp16,avx512vl")]
12331#[cfg_attr(test, assert_instr(vcvtdq2ph))]
12332#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12333pub fn _mm256_maskz_cvtepi32_ph(k: __mmask8, a: __m256i) -> __m128h {
12334 _mm256_mask_cvtepi32_ph(src:_mm_setzero_ph(), k, a)
12335}
12336
12337/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12338/// and store the results in dst.
12339///
12340/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi32_ph)
12341#[inline]
12342#[target_feature(enable = "avx512fp16")]
12343#[cfg_attr(test, assert_instr(vcvtdq2ph))]
12344#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12345pub fn _mm512_cvtepi32_ph(a: __m512i) -> __m256h {
12346 unsafe { vcvtdq2ph_512(a.as_i32x16(), _MM_FROUND_CUR_DIRECTION) }
12347}
12348
12349/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12350/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12351/// mask bit is not set).
12352///
12353/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi32_ph)
12354#[inline]
12355#[target_feature(enable = "avx512fp16")]
12356#[cfg_attr(test, assert_instr(vcvtdq2ph))]
12357#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12358pub fn _mm512_mask_cvtepi32_ph(src: __m256h, k: __mmask16, a: __m512i) -> __m256h {
12359 unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepi32_ph(a), no:src) }
12360}
12361
12362/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12363/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12364///
12365/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi32_ph)
12366#[inline]
12367#[target_feature(enable = "avx512fp16")]
12368#[cfg_attr(test, assert_instr(vcvtdq2ph))]
12369#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12370pub fn _mm512_maskz_cvtepi32_ph(k: __mmask16, a: __m512i) -> __m256h {
12371 _mm512_mask_cvtepi32_ph(src:f16x16::ZERO.as_m256h(), k, a)
12372}
12373
12374/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12375/// and store the results in dst.
12376///
12377/// Rounding is done according to the rounding parameter, which can be one of:
12378///
12379/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12380/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12381/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12382/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12383/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12384///
12385/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi32_ph)
12386#[inline]
12387#[target_feature(enable = "avx512fp16")]
12388#[cfg_attr(test, assert_instr(vcvtdq2ph, ROUNDING = 8))]
12389#[rustc_legacy_const_generics(1)]
12390#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12391pub fn _mm512_cvt_roundepi32_ph<const ROUNDING: i32>(a: __m512i) -> __m256h {
12392 unsafe {
12393 static_assert_rounding!(ROUNDING);
12394 vcvtdq2ph_512(a.as_i32x16(), ROUNDING)
12395 }
12396}
12397
12398/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12399/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12400/// mask bit is not set).
12401///
12402/// Rounding is done according to the rounding parameter, which can be one of:
12403///
12404/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12405/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12406/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12407/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12408/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12409///
12410/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi32_ph)
12411#[inline]
12412#[target_feature(enable = "avx512fp16")]
12413#[cfg_attr(test, assert_instr(vcvtdq2ph, ROUNDING = 8))]
12414#[rustc_legacy_const_generics(3)]
12415#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12416pub fn _mm512_mask_cvt_roundepi32_ph<const ROUNDING: i32>(
12417 src: __m256h,
12418 k: __mmask16,
12419 a: __m512i,
12420) -> __m256h {
12421 unsafe {
12422 static_assert_rounding!(ROUNDING);
12423 simd_select_bitmask(m:k, yes:_mm512_cvt_roundepi32_ph::<ROUNDING>(a), no:src)
12424 }
12425}
12426
12427/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12428/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12429///
12430/// Rounding is done according to the rounding parameter, which can be one of:
12431///
12432/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12433/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12434/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12435/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12436/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12437///
12438/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi32_ph)
12439#[inline]
12440#[target_feature(enable = "avx512fp16")]
12441#[cfg_attr(test, assert_instr(vcvtdq2ph, ROUNDING = 8))]
12442#[rustc_legacy_const_generics(2)]
12443#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12444pub fn _mm512_maskz_cvt_roundepi32_ph<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m256h {
12445 static_assert_rounding!(ROUNDING);
12446 _mm512_mask_cvt_roundepi32_ph::<ROUNDING>(src:f16x16::ZERO.as_m256h(), k, a)
12447}
12448
12449/// Convert the signed 32-bit integer b to a half-precision (16-bit) floating-point element, store the
12450/// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
12451/// of dst.
12452///
12453/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvti32_sh)
12454#[inline]
12455#[target_feature(enable = "avx512fp16")]
12456#[cfg_attr(test, assert_instr(vcvtsi2sh))]
12457#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12458pub fn _mm_cvti32_sh(a: __m128h, b: i32) -> __m128h {
12459 unsafe { vcvtsi2sh(a, b, _MM_FROUND_CUR_DIRECTION) }
12460}
12461
12462/// Convert the signed 32-bit integer b to a half-precision (16-bit) floating-point element, store the
12463/// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
12464/// of dst.
12465///
12466/// Rounding is done according to the rounding parameter, which can be one of:
12467///
12468/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12469/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12470/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12471/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12472/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12473///
12474/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi32_sh)
12475#[inline]
12476#[target_feature(enable = "avx512fp16")]
12477#[cfg_attr(test, assert_instr(vcvtsi2sh, ROUNDING = 8))]
12478#[rustc_legacy_const_generics(2)]
12479#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12480pub fn _mm_cvt_roundi32_sh<const ROUNDING: i32>(a: __m128h, b: i32) -> __m128h {
12481 unsafe {
12482 static_assert_rounding!(ROUNDING);
12483 vcvtsi2sh(a, b, ROUNDING)
12484 }
12485}
12486
12487/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12488/// and store the results in dst. The upper 64 bits of dst are zeroed out.
12489///
12490/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu32_ph)
12491#[inline]
12492#[target_feature(enable = "avx512fp16,avx512vl")]
12493#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12494#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12495pub fn _mm_cvtepu32_ph(a: __m128i) -> __m128h {
12496 _mm_mask_cvtepu32_ph(src:_mm_setzero_ph(), k:0xff, a)
12497}
12498
12499/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12500/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12501/// mask bit is not set). The upper 64 bits of dst are zeroed out.
12502///
12503/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu32_ph)
12504#[inline]
12505#[target_feature(enable = "avx512fp16,avx512vl")]
12506#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12507#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12508pub fn _mm_mask_cvtepu32_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
12509 unsafe { vcvtudq2ph_128(a.as_u32x4(), src, k) }
12510}
12511
12512/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12513/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12514/// The upper 64 bits of dst are zeroed out.
12515///
12516/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu32_ph)
12517#[inline]
12518#[target_feature(enable = "avx512fp16,avx512vl")]
12519#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12520#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12521pub fn _mm_maskz_cvtepu32_ph(k: __mmask8, a: __m128i) -> __m128h {
12522 _mm_mask_cvtepu32_ph(src:_mm_setzero_ph(), k, a)
12523}
12524
12525/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12526/// and store the results in dst.
12527///
12528/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu32_ph)
12529#[inline]
12530#[target_feature(enable = "avx512fp16,avx512vl")]
12531#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12532#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12533pub fn _mm256_cvtepu32_ph(a: __m256i) -> __m128h {
12534 unsafe { vcvtudq2ph_256(a.as_u32x8(), _MM_FROUND_CUR_DIRECTION) }
12535}
12536
12537/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12538/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12539/// mask bit is not set).
12540///
12541/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu32_ph)
12542#[inline]
12543#[target_feature(enable = "avx512fp16,avx512vl")]
12544#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12545#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12546pub fn _mm256_mask_cvtepu32_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h {
12547 unsafe { simd_select_bitmask(m:k, yes:_mm256_cvtepu32_ph(a), no:src) }
12548}
12549
12550/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12551/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12552///
12553/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu32_ph)
12554#[inline]
12555#[target_feature(enable = "avx512fp16,avx512vl")]
12556#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12557#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12558pub fn _mm256_maskz_cvtepu32_ph(k: __mmask8, a: __m256i) -> __m128h {
12559 _mm256_mask_cvtepu32_ph(src:_mm_setzero_ph(), k, a)
12560}
12561
12562/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12563/// and store the results in dst.
12564///
12565/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu32_ph)
12566#[inline]
12567#[target_feature(enable = "avx512fp16")]
12568#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12569#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12570pub fn _mm512_cvtepu32_ph(a: __m512i) -> __m256h {
12571 unsafe { vcvtudq2ph_512(a.as_u32x16(), _MM_FROUND_CUR_DIRECTION) }
12572}
12573
12574/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12575/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12576/// mask bit is not set).
12577///
12578/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu32_ph)
12579#[inline]
12580#[target_feature(enable = "avx512fp16")]
12581#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12582#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12583pub fn _mm512_mask_cvtepu32_ph(src: __m256h, k: __mmask16, a: __m512i) -> __m256h {
12584 unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepu32_ph(a), no:src) }
12585}
12586
12587/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12588/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12589///
12590/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu32_ph)
12591#[inline]
12592#[target_feature(enable = "avx512fp16")]
12593#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12594#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12595pub fn _mm512_maskz_cvtepu32_ph(k: __mmask16, a: __m512i) -> __m256h {
12596 _mm512_mask_cvtepu32_ph(src:f16x16::ZERO.as_m256h(), k, a)
12597}
12598
12599/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12600/// and store the results in dst.
12601///
12602/// Rounding is done according to the rounding parameter, which can be one of:
12603///
12604/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12605/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12606/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12607/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12608/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12609///
12610/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepu32_ph)
12611#[inline]
12612#[target_feature(enable = "avx512fp16")]
12613#[cfg_attr(test, assert_instr(vcvtudq2ph, ROUNDING = 8))]
12614#[rustc_legacy_const_generics(1)]
12615#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12616pub fn _mm512_cvt_roundepu32_ph<const ROUNDING: i32>(a: __m512i) -> __m256h {
12617 unsafe {
12618 static_assert_rounding!(ROUNDING);
12619 vcvtudq2ph_512(a.as_u32x16(), ROUNDING)
12620 }
12621}
12622
12623/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12624/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12625/// mask bit is not set).
12626///
12627/// Rounding is done according to the rounding parameter, which can be one of:
12628///
12629/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12630/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12631/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12632/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12633/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12634///
12635/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepu32_ph)
12636#[inline]
12637#[target_feature(enable = "avx512fp16")]
12638#[cfg_attr(test, assert_instr(vcvtudq2ph, ROUNDING = 8))]
12639#[rustc_legacy_const_generics(3)]
12640#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12641pub fn _mm512_mask_cvt_roundepu32_ph<const ROUNDING: i32>(
12642 src: __m256h,
12643 k: __mmask16,
12644 a: __m512i,
12645) -> __m256h {
12646 unsafe {
12647 static_assert_rounding!(ROUNDING);
12648 simd_select_bitmask(m:k, yes:_mm512_cvt_roundepu32_ph::<ROUNDING>(a), no:src)
12649 }
12650}
12651
12652/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12653/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12654///
12655/// Rounding is done according to the rounding parameter, which can be one of:
12656///
12657/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12658/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12659/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12660/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12661/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12662///
12663/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepu32_ph)
12664#[inline]
12665#[target_feature(enable = "avx512fp16")]
12666#[cfg_attr(test, assert_instr(vcvtudq2ph, ROUNDING = 8))]
12667#[rustc_legacy_const_generics(2)]
12668#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12669pub fn _mm512_maskz_cvt_roundepu32_ph<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m256h {
12670 static_assert_rounding!(ROUNDING);
12671 _mm512_mask_cvt_roundepu32_ph::<ROUNDING>(src:f16x16::ZERO.as_m256h(), k, a)
12672}
12673
12674/// Convert the unsigned 32-bit integer b to a half-precision (16-bit) floating-point element, store the
12675/// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
12676/// of dst.
12677///
12678/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu32_sh)
12679#[inline]
12680#[target_feature(enable = "avx512fp16")]
12681#[cfg_attr(test, assert_instr(vcvtusi2sh))]
12682#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12683pub fn _mm_cvtu32_sh(a: __m128h, b: u32) -> __m128h {
12684 unsafe { vcvtusi2sh(a, b, _MM_FROUND_CUR_DIRECTION) }
12685}
12686
12687/// Convert the unsigned 32-bit integer b to a half-precision (16-bit) floating-point element, store the
12688/// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
12689/// of dst.
12690///
12691/// Rounding is done according to the rounding parameter, which can be one of:
12692///
12693/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12694/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12695/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12696/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12697/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12698///
12699/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundu32_sh)
12700#[inline]
12701#[target_feature(enable = "avx512fp16")]
12702#[cfg_attr(test, assert_instr(vcvtusi2sh, ROUNDING = 8))]
12703#[rustc_legacy_const_generics(2)]
12704#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12705pub fn _mm_cvt_roundu32_sh<const ROUNDING: i32>(a: __m128h, b: u32) -> __m128h {
12706 unsafe {
12707 static_assert_rounding!(ROUNDING);
12708 vcvtusi2sh(a, b, ROUNDING)
12709 }
12710}
12711
12712/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12713/// and store the results in dst. The upper 96 bits of dst are zeroed out.
12714///
12715/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi64_ph)
12716#[inline]
12717#[target_feature(enable = "avx512fp16,avx512vl")]
12718#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12719#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12720pub fn _mm_cvtepi64_ph(a: __m128i) -> __m128h {
12721 _mm_mask_cvtepi64_ph(src:_mm_setzero_ph(), k:0xff, a)
12722}
12723
12724/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12725/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12726/// mask bit is not set). The upper 96 bits of dst are zeroed out.
12727///
12728/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi64_ph)
12729#[inline]
12730#[target_feature(enable = "avx512fp16,avx512vl")]
12731#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12732#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12733pub fn _mm_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
12734 unsafe { vcvtqq2ph_128(a.as_i64x2(), src, k) }
12735}
12736
12737/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12738/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12739/// The upper 96 bits of dst are zeroed out.
12740///
12741/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi64_ph)
12742#[inline]
12743#[target_feature(enable = "avx512fp16,avx512vl")]
12744#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12745#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12746pub fn _mm_maskz_cvtepi64_ph(k: __mmask8, a: __m128i) -> __m128h {
12747 _mm_mask_cvtepi64_ph(src:_mm_setzero_ph(), k, a)
12748}
12749
12750/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12751/// and store the results in dst. The upper 64 bits of dst are zeroed out.
12752///
12753/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi64_ph)
12754#[inline]
12755#[target_feature(enable = "avx512fp16,avx512vl")]
12756#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12757#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12758pub fn _mm256_cvtepi64_ph(a: __m256i) -> __m128h {
12759 _mm256_mask_cvtepi64_ph(src:_mm_setzero_ph(), k:0xff, a)
12760}
12761
12762/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12763/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12764/// mask bit is not set). The upper 64 bits of dst are zeroed out.
12765///
12766/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi64_ph)
12767#[inline]
12768#[target_feature(enable = "avx512fp16,avx512vl")]
12769#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12770#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12771pub fn _mm256_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h {
12772 unsafe { vcvtqq2ph_256(a.as_i64x4(), src, k) }
12773}
12774
12775/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12776/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12777/// The upper 64 bits of dst are zeroed out.
12778///
12779/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi64_ph)
12780#[inline]
12781#[target_feature(enable = "avx512fp16,avx512vl")]
12782#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12783#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12784pub fn _mm256_maskz_cvtepi64_ph(k: __mmask8, a: __m256i) -> __m128h {
12785 _mm256_mask_cvtepi64_ph(src:_mm_setzero_ph(), k, a)
12786}
12787
12788/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12789/// and store the results in dst.
12790///
12791/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi64_ph)
12792#[inline]
12793#[target_feature(enable = "avx512fp16")]
12794#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12795#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12796pub fn _mm512_cvtepi64_ph(a: __m512i) -> __m128h {
12797 unsafe { vcvtqq2ph_512(a.as_i64x8(), _MM_FROUND_CUR_DIRECTION) }
12798}
12799
12800/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12801/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12802/// mask bit is not set).
12803///
12804/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi64_ph)
12805#[inline]
12806#[target_feature(enable = "avx512fp16")]
12807#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12808#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12809pub fn _mm512_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m512i) -> __m128h {
12810 unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepi64_ph(a), no:src) }
12811}
12812
12813/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12814/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12815///
12816/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi64_ph)
12817#[inline]
12818#[target_feature(enable = "avx512fp16")]
12819#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12820#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12821pub fn _mm512_maskz_cvtepi64_ph(k: __mmask8, a: __m512i) -> __m128h {
12822 _mm512_mask_cvtepi64_ph(src:f16x8::ZERO.as_m128h(), k, a)
12823}
12824
12825/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12826/// and store the results in dst.
12827///
12828/// Rounding is done according to the rounding parameter, which can be one of:
12829///
12830/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12831/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12832/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12833/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12834/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12835///
12836/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi64_ph)
12837#[inline]
12838#[target_feature(enable = "avx512fp16")]
12839#[cfg_attr(test, assert_instr(vcvtqq2ph, ROUNDING = 8))]
12840#[rustc_legacy_const_generics(1)]
12841#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12842pub fn _mm512_cvt_roundepi64_ph<const ROUNDING: i32>(a: __m512i) -> __m128h {
12843 unsafe {
12844 static_assert_rounding!(ROUNDING);
12845 vcvtqq2ph_512(a.as_i64x8(), ROUNDING)
12846 }
12847}
12848
12849/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12850/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12851/// mask bit is not set).
12852///
12853/// Rounding is done according to the rounding parameter, which can be one of:
12854///
12855/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12856/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12857/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12858/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12859/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12860///
12861/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi64_ph)
12862#[inline]
12863#[target_feature(enable = "avx512fp16")]
12864#[cfg_attr(test, assert_instr(vcvtqq2ph, ROUNDING = 8))]
12865#[rustc_legacy_const_generics(3)]
12866#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12867pub fn _mm512_mask_cvt_roundepi64_ph<const ROUNDING: i32>(
12868 src: __m128h,
12869 k: __mmask8,
12870 a: __m512i,
12871) -> __m128h {
12872 unsafe {
12873 static_assert_rounding!(ROUNDING);
12874 simd_select_bitmask(m:k, yes:_mm512_cvt_roundepi64_ph::<ROUNDING>(a), no:src)
12875 }
12876}
12877
12878/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12879/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12880///
12881/// Rounding is done according to the rounding parameter, which can be one of:
12882///
12883/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12884/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12885/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12886/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12887/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12888///
12889/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi64_ph)
12890#[inline]
12891#[target_feature(enable = "avx512fp16")]
12892#[cfg_attr(test, assert_instr(vcvtqq2ph, ROUNDING = 8))]
12893#[rustc_legacy_const_generics(2)]
12894#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12895pub fn _mm512_maskz_cvt_roundepi64_ph<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m128h {
12896 static_assert_rounding!(ROUNDING);
12897 _mm512_mask_cvt_roundepi64_ph::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a)
12898}
12899
12900/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12901/// and store the results in dst. The upper 96 bits of dst are zeroed out.
12902///
12903/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu64_ph)
12904#[inline]
12905#[target_feature(enable = "avx512fp16,avx512vl")]
12906#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12907#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12908pub fn _mm_cvtepu64_ph(a: __m128i) -> __m128h {
12909 _mm_mask_cvtepu64_ph(src:_mm_setzero_ph(), k:0xff, a)
12910}
12911
12912/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12913/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12914/// mask bit is not set). The upper 96 bits of dst are zeroed out.
12915///
12916/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu64_ph)
12917#[inline]
12918#[target_feature(enable = "avx512fp16,avx512vl")]
12919#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12920#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12921pub fn _mm_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
12922 unsafe { vcvtuqq2ph_128(a.as_u64x2(), src, k) }
12923}
12924
12925/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12926/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12927/// The upper 96 bits of dst are zeroed out.
12928///
12929/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu64_ph)
12930#[inline]
12931#[target_feature(enable = "avx512fp16,avx512vl")]
12932#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12933#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12934pub fn _mm_maskz_cvtepu64_ph(k: __mmask8, a: __m128i) -> __m128h {
12935 _mm_mask_cvtepu64_ph(src:_mm_setzero_ph(), k, a)
12936}
12937
12938/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12939/// and store the results in dst. The upper 64 bits of dst are zeroed out.
12940///
12941/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu64_ph)
12942#[inline]
12943#[target_feature(enable = "avx512fp16,avx512vl")]
12944#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12945#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12946pub fn _mm256_cvtepu64_ph(a: __m256i) -> __m128h {
12947 _mm256_mask_cvtepu64_ph(src:_mm_setzero_ph(), k:0xff, a)
12948}
12949
12950/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12951/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12952/// mask bit is not set). The upper 64 bits of dst are zeroed out.
12953///
12954/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu64_ph)
12955#[inline]
12956#[target_feature(enable = "avx512fp16,avx512vl")]
12957#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12958#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12959pub fn _mm256_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h {
12960 unsafe { vcvtuqq2ph_256(a.as_u64x4(), src, k) }
12961}
12962
12963/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12964/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12965/// The upper 64 bits of dst are zeroed out.
12966///
12967/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu64_ph)
12968#[inline]
12969#[target_feature(enable = "avx512fp16,avx512vl")]
12970#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12971#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12972pub fn _mm256_maskz_cvtepu64_ph(k: __mmask8, a: __m256i) -> __m128h {
12973 _mm256_mask_cvtepu64_ph(src:_mm_setzero_ph(), k, a)
12974}
12975
12976/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12977/// and store the results in dst.
12978///
12979/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu64_ph)
12980#[inline]
12981#[target_feature(enable = "avx512fp16")]
12982#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12983#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12984pub fn _mm512_cvtepu64_ph(a: __m512i) -> __m128h {
12985 unsafe { vcvtuqq2ph_512(a.as_u64x8(), _MM_FROUND_CUR_DIRECTION) }
12986}
12987
12988/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12989/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12990/// mask bit is not set).
12991///
12992/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu64_ph)
12993#[inline]
12994#[target_feature(enable = "avx512fp16")]
12995#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12996#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
12997pub fn _mm512_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m512i) -> __m128h {
12998 unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepu64_ph(a), no:src) }
12999}
13000
13001/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
13002/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13003///
13004/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu64_ph)
13005#[inline]
13006#[target_feature(enable = "avx512fp16")]
13007#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
13008#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13009pub fn _mm512_maskz_cvtepu64_ph(k: __mmask8, a: __m512i) -> __m128h {
13010 _mm512_mask_cvtepu64_ph(src:f16x8::ZERO.as_m128h(), k, a)
13011}
13012
13013/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
13014/// and store the results in dst.
13015///
13016/// Rounding is done according to the rounding parameter, which can be one of:
13017///
13018/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13019/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13020/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13021/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13022/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13023///
13024/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepu64_ph)
13025#[inline]
13026#[target_feature(enable = "avx512fp16")]
13027#[cfg_attr(test, assert_instr(vcvtuqq2ph, ROUNDING = 8))]
13028#[rustc_legacy_const_generics(1)]
13029#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13030pub fn _mm512_cvt_roundepu64_ph<const ROUNDING: i32>(a: __m512i) -> __m128h {
13031 unsafe {
13032 static_assert_rounding!(ROUNDING);
13033 vcvtuqq2ph_512(a.as_u64x8(), ROUNDING)
13034 }
13035}
13036
13037/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
13038/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
13039/// mask bit is not set).
13040///
13041/// Rounding is done according to the rounding parameter, which can be one of:
13042///
13043/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13044/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13045/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13046/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13047/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13048///
13049/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepu64_ph)
13050#[inline]
13051#[target_feature(enable = "avx512fp16")]
13052#[cfg_attr(test, assert_instr(vcvtuqq2ph, ROUNDING = 8))]
13053#[rustc_legacy_const_generics(3)]
13054#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13055pub fn _mm512_mask_cvt_roundepu64_ph<const ROUNDING: i32>(
13056 src: __m128h,
13057 k: __mmask8,
13058 a: __m512i,
13059) -> __m128h {
13060 unsafe {
13061 static_assert_rounding!(ROUNDING);
13062 simd_select_bitmask(m:k, yes:_mm512_cvt_roundepu64_ph::<ROUNDING>(a), no:src)
13063 }
13064}
13065
13066/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
13067/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13068///
13069/// Rounding is done according to the rounding parameter, which can be one of:
13070///
13071/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13072/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13073/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13074/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13075/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13076///
13077/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepu64_ph)
13078#[inline]
13079#[target_feature(enable = "avx512fp16")]
13080#[cfg_attr(test, assert_instr(vcvtuqq2ph, ROUNDING = 8))]
13081#[rustc_legacy_const_generics(2)]
13082#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13083pub fn _mm512_maskz_cvt_roundepu64_ph<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m128h {
13084 static_assert_rounding!(ROUNDING);
13085 _mm512_mask_cvt_roundepu64_ph::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a)
13086}
13087
13088/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
13089/// floating-point elements, and store the results in dst.
13090///
13091/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtxps_ph)
13092#[inline]
13093#[target_feature(enable = "avx512fp16,avx512vl")]
13094#[cfg_attr(test, assert_instr(vcvtps2phx))]
13095#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13096pub fn _mm_cvtxps_ph(a: __m128) -> __m128h {
13097 _mm_mask_cvtxps_ph(src:_mm_setzero_ph(), k:0xff, a)
13098}
13099
13100/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
13101/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
13102/// when the corresponding mask bit is not set). The upper 64 bits of dst are zeroed out.
13103///
13104/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtxps_ph)
13105#[inline]
13106#[target_feature(enable = "avx512fp16,avx512vl")]
13107#[cfg_attr(test, assert_instr(vcvtps2phx))]
13108#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13109pub fn _mm_mask_cvtxps_ph(src: __m128h, k: __mmask8, a: __m128) -> __m128h {
13110 unsafe { vcvtps2phx_128(a, src, k) }
13111}
13112
13113/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
13114/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
13115/// corresponding mask bit is not set). The upper 64 bits of dst are zeroed out.
13116///
13117/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtxps_ph)
13118#[inline]
13119#[target_feature(enable = "avx512fp16,avx512vl")]
13120#[cfg_attr(test, assert_instr(vcvtps2phx))]
13121#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13122pub fn _mm_maskz_cvtxps_ph(k: __mmask8, a: __m128) -> __m128h {
13123 _mm_mask_cvtxps_ph(src:_mm_setzero_ph(), k, a)
13124}
13125
13126/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
13127/// floating-point elements, and store the results in dst.
13128///
13129/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtxps_ph)
13130#[inline]
13131#[target_feature(enable = "avx512fp16,avx512vl")]
13132#[cfg_attr(test, assert_instr(vcvtps2phx))]
13133#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13134pub fn _mm256_cvtxps_ph(a: __m256) -> __m128h {
13135 _mm256_mask_cvtxps_ph(src:_mm_setzero_ph(), k:0xff, a)
13136}
13137
13138/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
13139/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
13140/// when the corresponding mask bit is not set).
13141///
13142/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtxps_ph)
13143#[inline]
13144#[target_feature(enable = "avx512fp16,avx512vl")]
13145#[cfg_attr(test, assert_instr(vcvtps2phx))]
13146#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13147pub fn _mm256_mask_cvtxps_ph(src: __m128h, k: __mmask8, a: __m256) -> __m128h {
13148 unsafe { vcvtps2phx_256(a, src, k) }
13149}
13150
13151/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
13152/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
13153/// corresponding mask bit is not set).
13154///
13155/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtxps_ph)
13156#[inline]
13157#[target_feature(enable = "avx512fp16,avx512vl")]
13158#[cfg_attr(test, assert_instr(vcvtps2phx))]
13159#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13160pub fn _mm256_maskz_cvtxps_ph(k: __mmask8, a: __m256) -> __m128h {
13161 _mm256_mask_cvtxps_ph(src:_mm_setzero_ph(), k, a)
13162}
13163
13164/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
13165/// floating-point elements, and store the results in dst.
13166///
13167/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtxps_ph)
13168#[inline]
13169#[target_feature(enable = "avx512fp16")]
13170#[cfg_attr(test, assert_instr(vcvtps2phx))]
13171#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13172pub fn _mm512_cvtxps_ph(a: __m512) -> __m256h {
13173 _mm512_mask_cvtxps_ph(src:f16x16::ZERO.as_m256h(), k:0xffff, a)
13174}
13175
13176/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
13177/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
13178/// when the corresponding mask bit is not set).
13179///
13180/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtxps_ph)
13181#[inline]
13182#[target_feature(enable = "avx512fp16")]
13183#[cfg_attr(test, assert_instr(vcvtps2phx))]
13184#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13185pub fn _mm512_mask_cvtxps_ph(src: __m256h, k: __mmask16, a: __m512) -> __m256h {
13186 unsafe { vcvtps2phx_512(a, src, k, _MM_FROUND_CUR_DIRECTION) }
13187}
13188
13189/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
13190/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
13191/// corresponding mask bit is not set).
13192///
13193/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtxps_ph)
13194#[inline]
13195#[target_feature(enable = "avx512fp16")]
13196#[cfg_attr(test, assert_instr(vcvtps2phx))]
13197#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13198pub fn _mm512_maskz_cvtxps_ph(k: __mmask16, a: __m512) -> __m256h {
13199 _mm512_mask_cvtxps_ph(src:f16x16::ZERO.as_m256h(), k, a)
13200}
13201
13202/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
13203/// floating-point elements, and store the results in dst.
13204///
13205/// Rounding is done according to the rounding parameter, which can be one of:
13206///
13207/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13208/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13209/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13210/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13211/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13212///
13213/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtx_roundps_ph)
13214#[inline]
13215#[target_feature(enable = "avx512fp16")]
13216#[cfg_attr(test, assert_instr(vcvtps2phx, ROUNDING = 8))]
13217#[rustc_legacy_const_generics(1)]
13218#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13219pub fn _mm512_cvtx_roundps_ph<const ROUNDING: i32>(a: __m512) -> __m256h {
13220 static_assert_rounding!(ROUNDING);
13221 _mm512_mask_cvtx_roundps_ph::<ROUNDING>(src:f16x16::ZERO.as_m256h(), k:0xffff, a)
13222}
13223
13224/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
13225/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
13226/// when the corresponding mask bit is not set).
13227///
13228/// Rounding is done according to the rounding parameter, which can be one of:
13229///
13230/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13231/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13232/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13233/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13234/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13235///
13236/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtx_roundps_ph)
13237#[inline]
13238#[target_feature(enable = "avx512fp16")]
13239#[cfg_attr(test, assert_instr(vcvtps2phx, ROUNDING = 8))]
13240#[rustc_legacy_const_generics(3)]
13241#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13242pub fn _mm512_mask_cvtx_roundps_ph<const ROUNDING: i32>(
13243 src: __m256h,
13244 k: __mmask16,
13245 a: __m512,
13246) -> __m256h {
13247 unsafe {
13248 static_assert_rounding!(ROUNDING);
13249 vcvtps2phx_512(a, src, k, ROUNDING)
13250 }
13251}
13252
13253/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
13254/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
13255/// corresponding mask bit is not set).
13256///
13257/// Rounding is done according to the rounding parameter, which can be one of:
13258///
13259/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13260/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13261/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13262/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13263/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13264///
13265/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtx_roundps_ph)
13266#[inline]
13267#[target_feature(enable = "avx512fp16")]
13268#[cfg_attr(test, assert_instr(vcvtps2phx, ROUNDING = 8))]
13269#[rustc_legacy_const_generics(2)]
13270#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13271pub fn _mm512_maskz_cvtx_roundps_ph<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m256h {
13272 static_assert_rounding!(ROUNDING);
13273 _mm512_mask_cvtx_roundps_ph::<ROUNDING>(src:f16x16::ZERO.as_m256h(), k, a)
13274}
13275
13276/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
13277/// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed
13278/// elements from a to the upper elements of dst.
13279///
13280/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_sh)
13281#[inline]
13282#[target_feature(enable = "avx512fp16")]
13283#[cfg_attr(test, assert_instr(vcvtss2sh))]
13284#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13285pub fn _mm_cvtss_sh(a: __m128h, b: __m128) -> __m128h {
13286 _mm_mask_cvtss_sh(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
13287}
13288
13289/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
13290/// floating-point elements, store the result in the lower element of dst using writemask k (the element
13291/// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
13292/// upper elements of dst.
13293///
13294/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtss_sh)
13295#[inline]
13296#[target_feature(enable = "avx512fp16")]
13297#[cfg_attr(test, assert_instr(vcvtss2sh))]
13298#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13299pub fn _mm_mask_cvtss_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128) -> __m128h {
13300 unsafe { vcvtss2sh(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
13301}
13302
13303/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
13304/// floating-point elements, store the result in the lower element of dst using zeromask k (the element
13305/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
13306/// elements of dst.
13307///
13308/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtss_sh)
13309#[inline]
13310#[target_feature(enable = "avx512fp16")]
13311#[cfg_attr(test, assert_instr(vcvtss2sh))]
13312#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13313pub fn _mm_maskz_cvtss_sh(k: __mmask8, a: __m128h, b: __m128) -> __m128h {
13314 _mm_mask_cvtss_sh(src:f16x8::ZERO.as_m128h(), k, a, b)
13315}
13316
13317/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
13318/// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed
13319/// elements from a to the upper elements of dst.
13320///
13321/// Rounding is done according to the rounding parameter, which can be one of:
13322///
13323/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13324/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13325/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13326/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13327/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13328///
13329/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_sh)
13330#[inline]
13331#[target_feature(enable = "avx512fp16")]
13332#[cfg_attr(test, assert_instr(vcvtss2sh, ROUNDING = 8))]
13333#[rustc_legacy_const_generics(2)]
13334#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13335pub fn _mm_cvt_roundss_sh<const ROUNDING: i32>(a: __m128h, b: __m128) -> __m128h {
13336 static_assert_rounding!(ROUNDING);
13337 _mm_mask_cvt_roundss_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
13338}
13339
13340/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
13341/// floating-point elements, store the result in the lower element of dst using writemask k (the element
13342/// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
13343/// upper elements of dst.
13344///
13345/// Rounding is done according to the rounding parameter, which can be one of:
13346///
13347/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13348/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13349/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13350/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13351/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13352///
13353/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundss_sh)
13354#[inline]
13355#[target_feature(enable = "avx512fp16")]
13356#[cfg_attr(test, assert_instr(vcvtss2sh, ROUNDING = 8))]
13357#[rustc_legacy_const_generics(4)]
13358#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13359pub fn _mm_mask_cvt_roundss_sh<const ROUNDING: i32>(
13360 src: __m128h,
13361 k: __mmask8,
13362 a: __m128h,
13363 b: __m128,
13364) -> __m128h {
13365 unsafe {
13366 static_assert_rounding!(ROUNDING);
13367 vcvtss2sh(a, b, src, k, ROUNDING)
13368 }
13369}
13370
13371/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
13372/// floating-point elements, store the result in the lower element of dst using zeromask k (the element
13373/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
13374/// elements of dst.
13375///
13376/// Rounding is done according to the rounding parameter, which can be one of:
13377///
13378/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13379/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13380/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13381/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13382/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13383///
13384/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundss_sh)
13385#[inline]
13386#[target_feature(enable = "avx512fp16")]
13387#[cfg_attr(test, assert_instr(vcvtss2sh, ROUNDING = 8))]
13388#[rustc_legacy_const_generics(3)]
13389#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13390pub fn _mm_maskz_cvt_roundss_sh<const ROUNDING: i32>(
13391 k: __mmask8,
13392 a: __m128h,
13393 b: __m128,
13394) -> __m128h {
13395 static_assert_rounding!(ROUNDING);
13396 _mm_mask_cvt_roundss_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
13397}
13398
13399/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13400/// floating-point elements, and store the results in dst. The upper 96 bits of dst are zeroed out.
13401///
13402/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_ph)
13403#[inline]
13404#[target_feature(enable = "avx512fp16,avx512vl")]
13405#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13406#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13407pub fn _mm_cvtpd_ph(a: __m128d) -> __m128h {
13408 _mm_mask_cvtpd_ph(src:_mm_setzero_ph(), k:0xff, a)
13409}
13410
13411/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13412/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
13413/// when the corresponding mask bit is not set). The upper 96 bits of dst are zeroed out.
13414///
13415/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtpd_ph)
13416#[inline]
13417#[target_feature(enable = "avx512fp16,avx512vl")]
13418#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13419#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13420pub fn _mm_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m128d) -> __m128h {
13421 unsafe { vcvtpd2ph_128(a, src, k) }
13422}
13423
13424/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13425/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
13426/// corresponding mask bit is not set). The upper 96 bits of dst are zeroed out.
13427///
13428/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtpd_ph)
13429#[inline]
13430#[target_feature(enable = "avx512fp16,avx512vl")]
13431#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13432#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13433pub fn _mm_maskz_cvtpd_ph(k: __mmask8, a: __m128d) -> __m128h {
13434 _mm_mask_cvtpd_ph(src:_mm_setzero_ph(), k, a)
13435}
13436
13437/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13438/// floating-point elements, and store the results in dst. The upper 64 bits of dst are zeroed out.
13439///
13440/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtpd_ph)
13441#[inline]
13442#[target_feature(enable = "avx512fp16,avx512vl")]
13443#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13444#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13445pub fn _mm256_cvtpd_ph(a: __m256d) -> __m128h {
13446 _mm256_mask_cvtpd_ph(src:_mm_setzero_ph(), k:0xff, a)
13447}
13448
13449/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13450/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
13451/// when the corresponding mask bit is not set). The upper 64 bits of dst are zeroed out.
13452///
13453/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtpd_ph)
13454#[inline]
13455#[target_feature(enable = "avx512fp16,avx512vl")]
13456#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13457#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13458pub fn _mm256_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m256d) -> __m128h {
13459 unsafe { vcvtpd2ph_256(a, src, k) }
13460}
13461
13462/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13463/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
13464/// corresponding mask bit is not set). The upper 64 bits of dst are zeroed out.
13465///
13466/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtpd_ph)
13467#[inline]
13468#[target_feature(enable = "avx512fp16,avx512vl")]
13469#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13470#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13471pub fn _mm256_maskz_cvtpd_ph(k: __mmask8, a: __m256d) -> __m128h {
13472 _mm256_mask_cvtpd_ph(src:_mm_setzero_ph(), k, a)
13473}
13474
13475/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13476/// floating-point elements, and store the results in dst.
13477///
13478/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtpd_ph)
13479#[inline]
13480#[target_feature(enable = "avx512fp16")]
13481#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13482#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13483pub fn _mm512_cvtpd_ph(a: __m512d) -> __m128h {
13484 _mm512_mask_cvtpd_ph(src:f16x8::ZERO.as_m128h(), k:0xff, a)
13485}
13486
13487/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13488/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
13489/// when the corresponding mask bit is not set).
13490///
13491/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtpd_ph)
13492#[inline]
13493#[target_feature(enable = "avx512fp16")]
13494#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13495#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13496pub fn _mm512_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m512d) -> __m128h {
13497 unsafe { vcvtpd2ph_512(a, src, k, _MM_FROUND_CUR_DIRECTION) }
13498}
13499
13500/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13501/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
13502/// corresponding mask bit is not set).
13503///
13504/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtpd_ph)
13505#[inline]
13506#[target_feature(enable = "avx512fp16")]
13507#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13508#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13509pub fn _mm512_maskz_cvtpd_ph(k: __mmask8, a: __m512d) -> __m128h {
13510 _mm512_mask_cvtpd_ph(src:f16x8::ZERO.as_m128h(), k, a)
13511}
13512
13513/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13514/// floating-point elements, and store the results in dst.
13515///
13516/// Rounding is done according to the rounding parameter, which can be one of:
13517///
13518/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13519/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13520/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13521/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13522/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13523///
13524/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundpd_ph)
13525#[inline]
13526#[target_feature(enable = "avx512fp16")]
13527#[cfg_attr(test, assert_instr(vcvtpd2ph, ROUNDING = 8))]
13528#[rustc_legacy_const_generics(1)]
13529#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13530pub fn _mm512_cvt_roundpd_ph<const ROUNDING: i32>(a: __m512d) -> __m128h {
13531 static_assert_rounding!(ROUNDING);
13532 _mm512_mask_cvt_roundpd_ph::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a)
13533}
13534
13535/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13536/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
13537/// when the corresponding mask bit is not set).
13538///
13539/// Rounding is done according to the rounding parameter, which can be one of:
13540///
13541/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13542/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13543/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13544/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13545/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13546///
13547/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundpd_ph)
13548#[inline]
13549#[target_feature(enable = "avx512fp16")]
13550#[cfg_attr(test, assert_instr(vcvtpd2ph, ROUNDING = 8))]
13551#[rustc_legacy_const_generics(3)]
13552#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13553pub fn _mm512_mask_cvt_roundpd_ph<const ROUNDING: i32>(
13554 src: __m128h,
13555 k: __mmask8,
13556 a: __m512d,
13557) -> __m128h {
13558 unsafe {
13559 static_assert_rounding!(ROUNDING);
13560 vcvtpd2ph_512(a, src, k, ROUNDING)
13561 }
13562}
13563
13564/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13565/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
13566/// corresponding mask bit is not set).
13567///
13568/// Rounding is done according to the rounding parameter, which can be one of:
13569///
13570/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13571/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13572/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13573/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13574/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13575///
13576/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_ph)
13577#[inline]
13578#[target_feature(enable = "avx512fp16")]
13579#[cfg_attr(test, assert_instr(vcvtpd2ph, ROUNDING = 8))]
13580#[rustc_legacy_const_generics(2)]
13581#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13582pub fn _mm512_maskz_cvt_roundpd_ph<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m128h {
13583 static_assert_rounding!(ROUNDING);
13584 _mm512_mask_cvt_roundpd_ph::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a)
13585}
13586
13587/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
13588/// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed
13589/// elements from a to the upper elements of dst.
13590///
13591/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_sh)
13592#[inline]
13593#[target_feature(enable = "avx512fp16")]
13594#[cfg_attr(test, assert_instr(vcvtsd2sh))]
13595#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13596pub fn _mm_cvtsd_sh(a: __m128h, b: __m128d) -> __m128h {
13597 _mm_mask_cvtsd_sh(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
13598}
13599
13600/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
13601/// floating-point elements, store the result in the lower element of dst using writemask k (the element
13602/// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
13603/// upper elements of dst.
13604///
13605/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsd_sh)
13606#[inline]
13607#[target_feature(enable = "avx512fp16")]
13608#[cfg_attr(test, assert_instr(vcvtsd2sh))]
13609#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13610pub fn _mm_mask_cvtsd_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128d) -> __m128h {
13611 unsafe { vcvtsd2sh(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
13612}
13613
13614/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
13615/// floating-point elements, store the result in the lower element of dst using zeromask k (the element
13616/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
13617/// elements of dst.
13618///
13619/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsd_sh)
13620#[inline]
13621#[target_feature(enable = "avx512fp16")]
13622#[cfg_attr(test, assert_instr(vcvtsd2sh))]
13623#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13624pub fn _mm_maskz_cvtsd_sh(k: __mmask8, a: __m128h, b: __m128d) -> __m128h {
13625 _mm_mask_cvtsd_sh(src:f16x8::ZERO.as_m128h(), k, a, b)
13626}
13627
13628/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
13629/// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed
13630/// elements from a to the upper elements of dst.
13631///
13632/// Rounding is done according to the rounding parameter, which can be one of:
13633///
13634/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13635/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13636/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13637/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13638/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13639///
13640/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_sh)
13641#[inline]
13642#[target_feature(enable = "avx512fp16")]
13643#[cfg_attr(test, assert_instr(vcvtsd2sh, ROUNDING = 8))]
13644#[rustc_legacy_const_generics(2)]
13645#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13646pub fn _mm_cvt_roundsd_sh<const ROUNDING: i32>(a: __m128h, b: __m128d) -> __m128h {
13647 static_assert_rounding!(ROUNDING);
13648 _mm_mask_cvt_roundsd_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
13649}
13650
13651/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
13652/// floating-point elements, store the result in the lower element of dst using writemask k (the element
13653/// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
13654/// upper elements of dst.
13655///
13656/// Rounding is done according to the rounding parameter, which can be one of:
13657///
13658/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13659/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13660/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13661/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13662/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13663///
13664/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsd_sh)
13665#[inline]
13666#[target_feature(enable = "avx512fp16")]
13667#[cfg_attr(test, assert_instr(vcvtsd2sh, ROUNDING = 8))]
13668#[rustc_legacy_const_generics(4)]
13669#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13670pub fn _mm_mask_cvt_roundsd_sh<const ROUNDING: i32>(
13671 src: __m128h,
13672 k: __mmask8,
13673 a: __m128h,
13674 b: __m128d,
13675) -> __m128h {
13676 unsafe {
13677 static_assert_rounding!(ROUNDING);
13678 vcvtsd2sh(a, b, src, k, ROUNDING)
13679 }
13680}
13681
13682/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
13683/// floating-point elements, store the result in the lower element of dst using zeromask k (the element
13684/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
13685/// elements of dst.
13686///
13687/// Rounding is done according to the rounding parameter, which can be one of:
13688///
13689/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13690/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13691/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13692/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13693/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13694///
13695/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsd_sh)
13696#[inline]
13697#[target_feature(enable = "avx512fp16")]
13698#[cfg_attr(test, assert_instr(vcvtsd2sh, ROUNDING = 8))]
13699#[rustc_legacy_const_generics(3)]
13700#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13701pub fn _mm_maskz_cvt_roundsd_sh<const ROUNDING: i32>(
13702 k: __mmask8,
13703 a: __m128h,
13704 b: __m128d,
13705) -> __m128h {
13706 static_assert_rounding!(ROUNDING);
13707 _mm_mask_cvt_roundsd_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
13708}
13709
13710/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13711/// store the results in dst.
13712///
13713/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epi16)
13714#[inline]
13715#[target_feature(enable = "avx512fp16,avx512vl")]
13716#[cfg_attr(test, assert_instr(vcvtph2w))]
13717#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13718pub fn _mm_cvtph_epi16(a: __m128h) -> __m128i {
13719 _mm_mask_cvtph_epi16(src:_mm_undefined_si128(), k:0xff, a)
13720}
13721
13722/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13723/// store the results in dst using writemask k (elements are copied from src when the corresponding
13724/// mask bit is not set).
13725///
13726/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epi16)
13727#[inline]
13728#[target_feature(enable = "avx512fp16,avx512vl")]
13729#[cfg_attr(test, assert_instr(vcvtph2w))]
13730#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13731pub fn _mm_mask_cvtph_epi16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
13732 unsafe { transmute(src:vcvtph2w_128(a, src.as_i16x8(), k)) }
13733}
13734
13735/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13736/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13737///
13738/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epi16)
13739#[inline]
13740#[target_feature(enable = "avx512fp16,avx512vl")]
13741#[cfg_attr(test, assert_instr(vcvtph2w))]
13742#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13743pub fn _mm_maskz_cvtph_epi16(k: __mmask8, a: __m128h) -> __m128i {
13744 _mm_mask_cvtph_epi16(src:_mm_setzero_si128(), k, a)
13745}
13746
13747/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13748/// store the results in dst.
13749///
13750/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epi16)
13751#[inline]
13752#[target_feature(enable = "avx512fp16,avx512vl")]
13753#[cfg_attr(test, assert_instr(vcvtph2w))]
13754#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13755pub fn _mm256_cvtph_epi16(a: __m256h) -> __m256i {
13756 _mm256_mask_cvtph_epi16(src:_mm256_undefined_si256(), k:0xffff, a)
13757}
13758
13759/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13760/// store the results in dst using writemask k (elements are copied from src when the corresponding
13761/// mask bit is not set).
13762///
13763/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epi16)
13764#[inline]
13765#[target_feature(enable = "avx512fp16,avx512vl")]
13766#[cfg_attr(test, assert_instr(vcvtph2w))]
13767#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13768pub fn _mm256_mask_cvtph_epi16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i {
13769 unsafe { transmute(src:vcvtph2w_256(a, src.as_i16x16(), k)) }
13770}
13771
13772/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13773/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13774///
13775/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epi16)
13776#[inline]
13777#[target_feature(enable = "avx512fp16,avx512vl")]
13778#[cfg_attr(test, assert_instr(vcvtph2w))]
13779#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13780pub fn _mm256_maskz_cvtph_epi16(k: __mmask16, a: __m256h) -> __m256i {
13781 _mm256_mask_cvtph_epi16(src:_mm256_setzero_si256(), k, a)
13782}
13783
13784/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13785/// store the results in dst.
13786///
13787/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi16)
13788#[inline]
13789#[target_feature(enable = "avx512fp16")]
13790#[cfg_attr(test, assert_instr(vcvtph2w))]
13791#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13792pub fn _mm512_cvtph_epi16(a: __m512h) -> __m512i {
13793 _mm512_mask_cvtph_epi16(src:_mm512_undefined_epi32(), k:0xffffffff, a)
13794}
13795
13796/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13797/// store the results in dst using writemask k (elements are copied from src when the corresponding
13798/// mask bit is not set).
13799///
13800/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi16)
13801#[inline]
13802#[target_feature(enable = "avx512fp16")]
13803#[cfg_attr(test, assert_instr(vcvtph2w))]
13804#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13805pub fn _mm512_mask_cvtph_epi16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i {
13806 unsafe {
13807 transmute(src:vcvtph2w_512(
13808 a,
13809 src.as_i16x32(),
13810 k,
13811 _MM_FROUND_CUR_DIRECTION,
13812 ))
13813 }
13814}
13815
13816/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13817/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13818///
13819/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi16)
13820#[inline]
13821#[target_feature(enable = "avx512fp16")]
13822#[cfg_attr(test, assert_instr(vcvtph2w))]
13823#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13824pub fn _mm512_maskz_cvtph_epi16(k: __mmask32, a: __m512h) -> __m512i {
13825 _mm512_mask_cvtph_epi16(src:_mm512_setzero_si512(), k, a)
13826}
13827
13828/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13829/// store the results in dst.
13830///
13831/// Rounding is done according to the rounding parameter, which can be one of:
13832///
13833/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13834/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13835/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13836/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13837/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13838///
13839/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi16)
13840#[inline]
13841#[target_feature(enable = "avx512fp16")]
13842#[cfg_attr(test, assert_instr(vcvtph2w, ROUNDING = 8))]
13843#[rustc_legacy_const_generics(1)]
13844#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13845pub fn _mm512_cvt_roundph_epi16<const ROUNDING: i32>(a: __m512h) -> __m512i {
13846 static_assert_rounding!(ROUNDING);
13847 _mm512_mask_cvt_roundph_epi16::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xffffffff, a)
13848}
13849
13850/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13851/// store the results in dst using writemask k (elements are copied from src when the corresponding
13852/// mask bit is not set).
13853///
13854/// Rounding is done according to the rounding parameter, which can be one of:
13855///
13856/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13857/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13858/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13859/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13860/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13861///
13862/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi16)
13863#[inline]
13864#[target_feature(enable = "avx512fp16")]
13865#[cfg_attr(test, assert_instr(vcvtph2w, ROUNDING = 8))]
13866#[rustc_legacy_const_generics(3)]
13867#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13868pub fn _mm512_mask_cvt_roundph_epi16<const ROUNDING: i32>(
13869 src: __m512i,
13870 k: __mmask32,
13871 a: __m512h,
13872) -> __m512i {
13873 unsafe {
13874 static_assert_rounding!(ROUNDING);
13875 transmute(src:vcvtph2w_512(a, src.as_i16x32(), k, ROUNDING))
13876 }
13877}
13878
13879/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13880/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13881///
13882/// Rounding is done according to the rounding parameter, which can be one of:
13883///
13884/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13885/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13886/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13887/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13888/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13889///
13890/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi16)
13891#[inline]
13892#[target_feature(enable = "avx512fp16")]
13893#[cfg_attr(test, assert_instr(vcvtph2w, ROUNDING = 8))]
13894#[rustc_legacy_const_generics(2)]
13895#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13896pub fn _mm512_maskz_cvt_roundph_epi16<const ROUNDING: i32>(k: __mmask32, a: __m512h) -> __m512i {
13897 static_assert_rounding!(ROUNDING);
13898 _mm512_mask_cvt_roundph_epi16::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
13899}
13900
13901/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13902/// and store the results in dst.
13903///
13904/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epu16)
13905#[inline]
13906#[target_feature(enable = "avx512fp16,avx512vl")]
13907#[cfg_attr(test, assert_instr(vcvtph2uw))]
13908#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13909pub fn _mm_cvtph_epu16(a: __m128h) -> __m128i {
13910 _mm_mask_cvtph_epu16(src:_mm_undefined_si128(), k:0xff, a)
13911}
13912
13913/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13914/// and store the results in dst using writemask k (elements are copied from src when the corresponding
13915/// mask bit is not set).
13916///
13917/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epu16)
13918#[inline]
13919#[target_feature(enable = "avx512fp16,avx512vl")]
13920#[cfg_attr(test, assert_instr(vcvtph2uw))]
13921#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13922pub fn _mm_mask_cvtph_epu16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
13923 unsafe { transmute(src:vcvtph2uw_128(a, src.as_u16x8(), k)) }
13924}
13925
13926/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13927/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13928///
13929/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epu16)
13930#[inline]
13931#[target_feature(enable = "avx512fp16,avx512vl")]
13932#[cfg_attr(test, assert_instr(vcvtph2uw))]
13933#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13934pub fn _mm_maskz_cvtph_epu16(k: __mmask8, a: __m128h) -> __m128i {
13935 _mm_mask_cvtph_epu16(src:_mm_setzero_si128(), k, a)
13936}
13937
13938/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13939/// and store the results in dst.
13940///
13941/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epu16)
13942#[inline]
13943#[target_feature(enable = "avx512fp16,avx512vl")]
13944#[cfg_attr(test, assert_instr(vcvtph2uw))]
13945#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13946pub fn _mm256_cvtph_epu16(a: __m256h) -> __m256i {
13947 _mm256_mask_cvtph_epu16(src:_mm256_undefined_si256(), k:0xffff, a)
13948}
13949
13950/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13951/// and store the results in dst using writemask k (elements are copied from src when the corresponding
13952/// mask bit is not set).
13953///
13954/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epu16)
13955#[inline]
13956#[target_feature(enable = "avx512fp16,avx512vl")]
13957#[cfg_attr(test, assert_instr(vcvtph2uw))]
13958#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13959pub fn _mm256_mask_cvtph_epu16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i {
13960 unsafe { transmute(src:vcvtph2uw_256(a, src.as_u16x16(), k)) }
13961}
13962
13963/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13964/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13965///
13966/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epu16)
13967#[inline]
13968#[target_feature(enable = "avx512fp16,avx512vl")]
13969#[cfg_attr(test, assert_instr(vcvtph2uw))]
13970#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13971pub fn _mm256_maskz_cvtph_epu16(k: __mmask16, a: __m256h) -> __m256i {
13972 _mm256_mask_cvtph_epu16(src:_mm256_setzero_si256(), k, a)
13973}
13974
13975/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13976/// and store the results in dst.
13977///
13978/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epu16)
13979#[inline]
13980#[target_feature(enable = "avx512fp16")]
13981#[cfg_attr(test, assert_instr(vcvtph2uw))]
13982#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13983pub fn _mm512_cvtph_epu16(a: __m512h) -> __m512i {
13984 _mm512_mask_cvtph_epu16(src:_mm512_undefined_epi32(), k:0xffffffff, a)
13985}
13986
13987/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13988/// and store the results in dst using writemask k (elements are copied from src when the corresponding
13989/// mask bit is not set).
13990///
13991/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epu16)
13992#[inline]
13993#[target_feature(enable = "avx512fp16")]
13994#[cfg_attr(test, assert_instr(vcvtph2uw))]
13995#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
13996pub fn _mm512_mask_cvtph_epu16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i {
13997 unsafe {
13998 transmute(src:vcvtph2uw_512(
13999 a,
14000 src.as_u16x32(),
14001 k,
14002 _MM_FROUND_CUR_DIRECTION,
14003 ))
14004 }
14005}
14006
14007/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
14008/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14009///
14010/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epu16)
14011#[inline]
14012#[target_feature(enable = "avx512fp16")]
14013#[cfg_attr(test, assert_instr(vcvtph2uw))]
14014#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14015pub fn _mm512_maskz_cvtph_epu16(k: __mmask32, a: __m512h) -> __m512i {
14016 _mm512_mask_cvtph_epu16(src:_mm512_setzero_si512(), k, a)
14017}
14018
14019/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
14020/// and store the results in dst.
14021///
14022/// Exceptions can be suppressed by passing [`_MM_FROUND_NO_EXC`] in the sae parameter.
14023///
14024/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu16)
14025#[inline]
14026#[target_feature(enable = "avx512fp16")]
14027#[cfg_attr(test, assert_instr(vcvtph2uw, SAE = 8))]
14028#[rustc_legacy_const_generics(1)]
14029#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14030pub fn _mm512_cvt_roundph_epu16<const SAE: i32>(a: __m512h) -> __m512i {
14031 static_assert_sae!(SAE);
14032 _mm512_mask_cvt_roundph_epu16::<SAE>(src:_mm512_undefined_epi32(), k:0xffffffff, a)
14033}
14034
14035/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
14036/// and store the results in dst using writemask k (elements are copied from src when the corresponding
14037/// mask bit is not set).
14038///
14039/// Exceptions can be suppressed by passing [`_MM_FROUND_NO_EXC`] in the sae parameter.
14040///
14041/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu16)
14042#[inline]
14043#[target_feature(enable = "avx512fp16")]
14044#[cfg_attr(test, assert_instr(vcvtph2uw, SAE = 8))]
14045#[rustc_legacy_const_generics(3)]
14046#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14047pub fn _mm512_mask_cvt_roundph_epu16<const SAE: i32>(
14048 src: __m512i,
14049 k: __mmask32,
14050 a: __m512h,
14051) -> __m512i {
14052 unsafe {
14053 static_assert_sae!(SAE);
14054 transmute(src:vcvtph2uw_512(a, src.as_u16x32(), k, SAE))
14055 }
14056}
14057
14058/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
14059/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14060///
14061/// Exceptions can be suppressed by passing [`_MM_FROUND_NO_EXC`] in the sae parameter.
14062///
14063/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu16)
14064#[inline]
14065#[target_feature(enable = "avx512fp16")]
14066#[cfg_attr(test, assert_instr(vcvtph2uw, SAE = 8))]
14067#[rustc_legacy_const_generics(2)]
14068#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14069pub fn _mm512_maskz_cvt_roundph_epu16<const SAE: i32>(k: __mmask32, a: __m512h) -> __m512i {
14070 static_assert_sae!(SAE);
14071 _mm512_mask_cvt_roundph_epu16::<SAE>(src:_mm512_setzero_si512(), k, a)
14072}
14073
14074/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
14075/// truncation, and store the results in dst.
14076///
14077/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epi16)
14078#[inline]
14079#[target_feature(enable = "avx512fp16,avx512vl")]
14080#[cfg_attr(test, assert_instr(vcvttph2w))]
14081#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14082pub fn _mm_cvttph_epi16(a: __m128h) -> __m128i {
14083 _mm_mask_cvttph_epi16(src:_mm_undefined_si128(), k:0xff, a)
14084}
14085
14086/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
14087/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
14088/// mask bit is not set).
14089///
14090/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epi16)
14091#[inline]
14092#[target_feature(enable = "avx512fp16,avx512vl")]
14093#[cfg_attr(test, assert_instr(vcvttph2w))]
14094#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14095pub fn _mm_mask_cvttph_epi16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
14096 unsafe { transmute(src:vcvttph2w_128(a, src.as_i16x8(), k)) }
14097}
14098
14099/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
14100/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
14101/// mask bit is not set).
14102///
14103/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epi16)
14104#[inline]
14105#[target_feature(enable = "avx512fp16,avx512vl")]
14106#[cfg_attr(test, assert_instr(vcvttph2w))]
14107#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14108pub fn _mm_maskz_cvttph_epi16(k: __mmask8, a: __m128h) -> __m128i {
14109 _mm_mask_cvttph_epi16(src:_mm_setzero_si128(), k, a)
14110}
14111
14112/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
14113/// truncation, and store the results in dst.
14114///
14115/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epi16)
14116#[inline]
14117#[target_feature(enable = "avx512fp16,avx512vl")]
14118#[cfg_attr(test, assert_instr(vcvttph2w))]
14119#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14120pub fn _mm256_cvttph_epi16(a: __m256h) -> __m256i {
14121 _mm256_mask_cvttph_epi16(src:_mm256_undefined_si256(), k:0xffff, a)
14122}
14123
14124/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
14125/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
14126/// mask bit is not set).
14127///
14128/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epi16)
14129#[inline]
14130#[target_feature(enable = "avx512fp16,avx512vl")]
14131#[cfg_attr(test, assert_instr(vcvttph2w))]
14132#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14133pub fn _mm256_mask_cvttph_epi16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i {
14134 unsafe { transmute(src:vcvttph2w_256(a, src.as_i16x16(), k)) }
14135}
14136
14137/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
14138/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
14139/// mask bit is not set).
14140///
14141/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epi16)
14142#[inline]
14143#[target_feature(enable = "avx512fp16,avx512vl")]
14144#[cfg_attr(test, assert_instr(vcvttph2w))]
14145#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14146pub fn _mm256_maskz_cvttph_epi16(k: __mmask16, a: __m256h) -> __m256i {
14147 _mm256_mask_cvttph_epi16(src:_mm256_setzero_si256(), k, a)
14148}
14149
14150/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
14151/// truncation, and store the results in dst.
14152///
14153/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epi16)
14154#[inline]
14155#[target_feature(enable = "avx512fp16")]
14156#[cfg_attr(test, assert_instr(vcvttph2w))]
14157#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14158pub fn _mm512_cvttph_epi16(a: __m512h) -> __m512i {
14159 _mm512_mask_cvttph_epi16(src:_mm512_undefined_epi32(), k:0xffffffff, a)
14160}
14161
14162/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
14163/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
14164/// mask bit is not set).
14165///
14166/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epi16)
14167#[inline]
14168#[target_feature(enable = "avx512fp16")]
14169#[cfg_attr(test, assert_instr(vcvttph2w))]
14170#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14171pub fn _mm512_mask_cvttph_epi16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i {
14172 unsafe {
14173 transmute(src:vcvttph2w_512(
14174 a,
14175 src.as_i16x32(),
14176 k,
14177 _MM_FROUND_CUR_DIRECTION,
14178 ))
14179 }
14180}
14181
14182/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
14183/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
14184/// mask bit is not set).
14185///
14186/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epi16)
14187#[inline]
14188#[target_feature(enable = "avx512fp16")]
14189#[cfg_attr(test, assert_instr(vcvttph2w))]
14190#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14191pub fn _mm512_maskz_cvttph_epi16(k: __mmask32, a: __m512h) -> __m512i {
14192 _mm512_mask_cvttph_epi16(src:_mm512_setzero_si512(), k, a)
14193}
14194
14195/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
14196/// truncation, and store the results in dst.
14197///
14198/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14199///
14200/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epi16)
14201#[inline]
14202#[target_feature(enable = "avx512fp16")]
14203#[cfg_attr(test, assert_instr(vcvttph2w, SAE = 8))]
14204#[rustc_legacy_const_generics(1)]
14205#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14206pub fn _mm512_cvtt_roundph_epi16<const SAE: i32>(a: __m512h) -> __m512i {
14207 static_assert_sae!(SAE);
14208 _mm512_mask_cvtt_roundph_epi16::<SAE>(src:_mm512_undefined_epi32(), k:0xffffffff, a)
14209}
14210
14211/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
14212/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
14213/// mask bit is not set).
14214///
14215/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14216///
14217/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epi16)
14218#[inline]
14219#[target_feature(enable = "avx512fp16")]
14220#[cfg_attr(test, assert_instr(vcvttph2w, SAE = 8))]
14221#[rustc_legacy_const_generics(3)]
14222#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14223pub fn _mm512_mask_cvtt_roundph_epi16<const SAE: i32>(
14224 src: __m512i,
14225 k: __mmask32,
14226 a: __m512h,
14227) -> __m512i {
14228 unsafe {
14229 static_assert_sae!(SAE);
14230 transmute(src:vcvttph2w_512(a, src.as_i16x32(), k, SAE))
14231 }
14232}
14233
14234/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
14235/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
14236/// mask bit is not set).
14237///
14238/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14239///
14240/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epi16)
14241#[inline]
14242#[target_feature(enable = "avx512fp16")]
14243#[cfg_attr(test, assert_instr(vcvttph2w, SAE = 8))]
14244#[rustc_legacy_const_generics(2)]
14245#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14246pub fn _mm512_maskz_cvtt_roundph_epi16<const SAE: i32>(k: __mmask32, a: __m512h) -> __m512i {
14247 static_assert_sae!(SAE);
14248 _mm512_mask_cvtt_roundph_epi16::<SAE>(src:_mm512_setzero_si512(), k, a)
14249}
14250
14251/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14252/// truncation, and store the results in dst.
14253///
14254/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu16)
14255#[inline]
14256#[target_feature(enable = "avx512fp16,avx512vl")]
14257#[cfg_attr(test, assert_instr(vcvttph2uw))]
14258#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14259pub fn _mm_cvttph_epu16(a: __m128h) -> __m128i {
14260 _mm_mask_cvttph_epu16(src:_mm_undefined_si128(), k:0xff, a)
14261}
14262
14263/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14264/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
14265/// mask bit is not set).
14266///
14267/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu16)
14268#[inline]
14269#[target_feature(enable = "avx512fp16,avx512vl")]
14270#[cfg_attr(test, assert_instr(vcvttph2uw))]
14271#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14272pub fn _mm_mask_cvttph_epu16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
14273 unsafe { transmute(src:vcvttph2uw_128(a, src.as_u16x8(), k)) }
14274}
14275
14276/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14277/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
14278/// mask bit is not set).
14279///
14280/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu16)
14281#[inline]
14282#[target_feature(enable = "avx512fp16,avx512vl")]
14283#[cfg_attr(test, assert_instr(vcvttph2uw))]
14284#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14285pub fn _mm_maskz_cvttph_epu16(k: __mmask8, a: __m128h) -> __m128i {
14286 _mm_mask_cvttph_epu16(src:_mm_setzero_si128(), k, a)
14287}
14288
14289/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14290/// truncation, and store the results in dst.
14291///
14292/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu16)
14293#[inline]
14294#[target_feature(enable = "avx512fp16,avx512vl")]
14295#[cfg_attr(test, assert_instr(vcvttph2uw))]
14296#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14297pub fn _mm256_cvttph_epu16(a: __m256h) -> __m256i {
14298 _mm256_mask_cvttph_epu16(src:_mm256_undefined_si256(), k:0xffff, a)
14299}
14300
14301/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14302/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
14303/// mask bit is not set).
14304///
14305/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu16)
14306#[inline]
14307#[target_feature(enable = "avx512fp16,avx512vl")]
14308#[cfg_attr(test, assert_instr(vcvttph2uw))]
14309#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14310pub fn _mm256_mask_cvttph_epu16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i {
14311 unsafe { transmute(src:vcvttph2uw_256(a, src.as_u16x16(), k)) }
14312}
14313
14314/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14315/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
14316/// mask bit is not set).
14317///
14318/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu16)
14319#[inline]
14320#[target_feature(enable = "avx512fp16,avx512vl")]
14321#[cfg_attr(test, assert_instr(vcvttph2uw))]
14322#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14323pub fn _mm256_maskz_cvttph_epu16(k: __mmask16, a: __m256h) -> __m256i {
14324 _mm256_mask_cvttph_epu16(src:_mm256_setzero_si256(), k, a)
14325}
14326
14327/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14328/// truncation, and store the results in dst.
14329///
14330/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu16)
14331#[inline]
14332#[target_feature(enable = "avx512fp16")]
14333#[cfg_attr(test, assert_instr(vcvttph2uw))]
14334#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14335pub fn _mm512_cvttph_epu16(a: __m512h) -> __m512i {
14336 _mm512_mask_cvttph_epu16(src:_mm512_undefined_epi32(), k:0xffffffff, a)
14337}
14338
14339/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14340/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
14341/// mask bit is not set).
14342///
14343/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu16)
14344#[inline]
14345#[target_feature(enable = "avx512fp16")]
14346#[cfg_attr(test, assert_instr(vcvttph2uw))]
14347#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14348pub fn _mm512_mask_cvttph_epu16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i {
14349 unsafe {
14350 transmute(src:vcvttph2uw_512(
14351 a,
14352 src.as_u16x32(),
14353 k,
14354 _MM_FROUND_CUR_DIRECTION,
14355 ))
14356 }
14357}
14358
14359/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14360/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
14361/// mask bit is not set).
14362///
14363/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu16)
14364#[inline]
14365#[target_feature(enable = "avx512fp16")]
14366#[cfg_attr(test, assert_instr(vcvttph2uw))]
14367#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14368pub fn _mm512_maskz_cvttph_epu16(k: __mmask32, a: __m512h) -> __m512i {
14369 _mm512_mask_cvttph_epu16(src:_mm512_setzero_si512(), k, a)
14370}
14371
14372/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14373/// truncation, and store the results in dst.
14374///
14375/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14376///
14377/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epu16)
14378#[inline]
14379#[target_feature(enable = "avx512fp16")]
14380#[cfg_attr(test, assert_instr(vcvttph2uw, SAE = 8))]
14381#[rustc_legacy_const_generics(1)]
14382#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14383pub fn _mm512_cvtt_roundph_epu16<const SAE: i32>(a: __m512h) -> __m512i {
14384 static_assert_sae!(SAE);
14385 _mm512_mask_cvtt_roundph_epu16::<SAE>(src:_mm512_undefined_epi32(), k:0xffffffff, a)
14386}
14387
14388/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14389/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
14390/// mask bit is not set).
14391///
14392/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14393///
14394/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epu16)
14395#[inline]
14396#[target_feature(enable = "avx512fp16")]
14397#[cfg_attr(test, assert_instr(vcvttph2uw, SAE = 8))]
14398#[rustc_legacy_const_generics(3)]
14399#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14400pub fn _mm512_mask_cvtt_roundph_epu16<const SAE: i32>(
14401 src: __m512i,
14402 k: __mmask32,
14403 a: __m512h,
14404) -> __m512i {
14405 unsafe {
14406 static_assert_sae!(SAE);
14407 transmute(src:vcvttph2uw_512(a, src.as_u16x32(), k, SAE))
14408 }
14409}
14410
14411/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14412/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
14413/// mask bit is not set).
14414///
14415/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14416///
14417/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epu16)
14418#[inline]
14419#[target_feature(enable = "avx512fp16")]
14420#[cfg_attr(test, assert_instr(vcvttph2uw, SAE = 8))]
14421#[rustc_legacy_const_generics(2)]
14422#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14423pub fn _mm512_maskz_cvtt_roundph_epu16<const SAE: i32>(k: __mmask32, a: __m512h) -> __m512i {
14424 static_assert_sae!(SAE);
14425 _mm512_mask_cvtt_roundph_epu16::<SAE>(src:_mm512_setzero_si512(), k, a)
14426}
14427
14428/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14429/// results in dst.
14430///
14431/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epi32)
14432#[inline]
14433#[target_feature(enable = "avx512fp16,avx512vl")]
14434#[cfg_attr(test, assert_instr(vcvtph2dq))]
14435#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14436pub fn _mm_cvtph_epi32(a: __m128h) -> __m128i {
14437 _mm_mask_cvtph_epi32(src:_mm_undefined_si128(), k:0xff, a)
14438}
14439
14440/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14441/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14442///
14443/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epi32)
14444#[inline]
14445#[target_feature(enable = "avx512fp16,avx512vl")]
14446#[cfg_attr(test, assert_instr(vcvtph2dq))]
14447#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14448pub fn _mm_mask_cvtph_epi32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
14449 unsafe { transmute(src:vcvtph2dq_128(a, src.as_i32x4(), k)) }
14450}
14451
14452/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14453/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14454///
14455/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epi32)
14456#[inline]
14457#[target_feature(enable = "avx512fp16,avx512vl")]
14458#[cfg_attr(test, assert_instr(vcvtph2dq))]
14459#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14460pub fn _mm_maskz_cvtph_epi32(k: __mmask8, a: __m128h) -> __m128i {
14461 _mm_mask_cvtph_epi32(src:_mm_setzero_si128(), k, a)
14462}
14463
14464/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14465/// results in dst.
14466///
14467/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epi32)
14468#[inline]
14469#[target_feature(enable = "avx512fp16,avx512vl")]
14470#[cfg_attr(test, assert_instr(vcvtph2dq))]
14471#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14472pub fn _mm256_cvtph_epi32(a: __m128h) -> __m256i {
14473 _mm256_mask_cvtph_epi32(src:_mm256_undefined_si256(), k:0xff, a)
14474}
14475
14476/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14477/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14478///
14479/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epi32)
14480#[inline]
14481#[target_feature(enable = "avx512fp16,avx512vl")]
14482#[cfg_attr(test, assert_instr(vcvtph2dq))]
14483#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14484pub fn _mm256_mask_cvtph_epi32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
14485 unsafe { transmute(src:vcvtph2dq_256(a, src.as_i32x8(), k)) }
14486}
14487
14488/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14489/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14490///
14491/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epi32)
14492#[inline]
14493#[target_feature(enable = "avx512fp16,avx512vl")]
14494#[cfg_attr(test, assert_instr(vcvtph2dq))]
14495#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14496pub fn _mm256_maskz_cvtph_epi32(k: __mmask8, a: __m128h) -> __m256i {
14497 _mm256_mask_cvtph_epi32(src:_mm256_setzero_si256(), k, a)
14498}
14499
14500/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14501/// results in dst.
14502///
14503/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi32)
14504#[inline]
14505#[target_feature(enable = "avx512fp16")]
14506#[cfg_attr(test, assert_instr(vcvtph2dq))]
14507#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14508pub fn _mm512_cvtph_epi32(a: __m256h) -> __m512i {
14509 _mm512_mask_cvtph_epi32(src:_mm512_undefined_epi32(), k:0xffff, a)
14510}
14511
14512/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14513/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14514///
14515/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi32)
14516#[inline]
14517#[target_feature(enable = "avx512fp16")]
14518#[cfg_attr(test, assert_instr(vcvtph2dq))]
14519#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14520pub fn _mm512_mask_cvtph_epi32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i {
14521 unsafe {
14522 transmute(src:vcvtph2dq_512(
14523 a,
14524 src.as_i32x16(),
14525 k,
14526 _MM_FROUND_CUR_DIRECTION,
14527 ))
14528 }
14529}
14530
14531/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14532/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14533///
14534/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi32)
14535#[inline]
14536#[target_feature(enable = "avx512fp16")]
14537#[cfg_attr(test, assert_instr(vcvtph2dq))]
14538#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14539pub fn _mm512_maskz_cvtph_epi32(k: __mmask16, a: __m256h) -> __m512i {
14540 _mm512_mask_cvtph_epi32(src:_mm512_setzero_si512(), k, a)
14541}
14542
14543/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14544/// results in dst.
14545///
14546/// Rounding is done according to the rounding parameter, which can be one of:
14547///
14548/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14549/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14550/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14551/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14552/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14553///
14554/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi32)
14555#[inline]
14556#[target_feature(enable = "avx512fp16")]
14557#[cfg_attr(test, assert_instr(vcvtph2dq, ROUNDING = 8))]
14558#[rustc_legacy_const_generics(1)]
14559#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14560pub fn _mm512_cvt_roundph_epi32<const ROUNDING: i32>(a: __m256h) -> __m512i {
14561 static_assert_rounding!(ROUNDING);
14562 _mm512_mask_cvt_roundph_epi32::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xffff, a)
14563}
14564
14565/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14566/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14567///
14568/// Rounding is done according to the rounding parameter, which can be one of:
14569///
14570/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14571/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14572/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14573/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14574/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14575///
14576/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi32)
14577#[inline]
14578#[target_feature(enable = "avx512fp16")]
14579#[cfg_attr(test, assert_instr(vcvtph2dq, ROUNDING = 8))]
14580#[rustc_legacy_const_generics(3)]
14581#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14582pub fn _mm512_mask_cvt_roundph_epi32<const ROUNDING: i32>(
14583 src: __m512i,
14584 k: __mmask16,
14585 a: __m256h,
14586) -> __m512i {
14587 unsafe {
14588 static_assert_rounding!(ROUNDING);
14589 transmute(src:vcvtph2dq_512(a, src.as_i32x16(), k, ROUNDING))
14590 }
14591}
14592
14593/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14594/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14595///
14596/// Rounding is done according to the rounding parameter, which can be one of:
14597///
14598/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14599/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14600/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14601/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14602/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14603///
14604/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi32)
14605#[inline]
14606#[target_feature(enable = "avx512fp16")]
14607#[cfg_attr(test, assert_instr(vcvtph2dq, ROUNDING = 8))]
14608#[rustc_legacy_const_generics(2)]
14609#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14610pub fn _mm512_maskz_cvt_roundph_epi32<const ROUNDING: i32>(k: __mmask16, a: __m256h) -> __m512i {
14611 static_assert_rounding!(ROUNDING);
14612 _mm512_mask_cvt_roundph_epi32::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
14613}
14614
14615/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer, and store
14616/// the result in dst.
14617///
14618/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_i32)
14619#[inline]
14620#[target_feature(enable = "avx512fp16")]
14621#[cfg_attr(test, assert_instr(vcvtsh2si))]
14622#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14623pub fn _mm_cvtsh_i32(a: __m128h) -> i32 {
14624 unsafe { vcvtsh2si32(a, _MM_FROUND_CUR_DIRECTION) }
14625}
14626
14627/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer, and store
14628/// the result in dst.
14629///
14630/// Rounding is done according to the rounding parameter, which can be one of:
14631///
14632/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14633/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14634/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14635/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14636/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14637///
14638/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_i32)
14639#[inline]
14640#[target_feature(enable = "avx512fp16")]
14641#[cfg_attr(test, assert_instr(vcvtsh2si, ROUNDING = 8))]
14642#[rustc_legacy_const_generics(1)]
14643#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14644pub fn _mm_cvt_roundsh_i32<const ROUNDING: i32>(a: __m128h) -> i32 {
14645 unsafe {
14646 static_assert_rounding!(ROUNDING);
14647 vcvtsh2si32(a, ROUNDING)
14648 }
14649}
14650
14651/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14652/// results in dst.
14653///
14654/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epu32)
14655#[inline]
14656#[target_feature(enable = "avx512fp16,avx512vl")]
14657#[cfg_attr(test, assert_instr(vcvtph2udq))]
14658#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14659pub fn _mm_cvtph_epu32(a: __m128h) -> __m128i {
14660 _mm_mask_cvtph_epu32(src:_mm_undefined_si128(), k:0xff, a)
14661}
14662
14663/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14664/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14665///
14666/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epu32)
14667#[inline]
14668#[target_feature(enable = "avx512fp16,avx512vl")]
14669#[cfg_attr(test, assert_instr(vcvtph2udq))]
14670#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14671pub fn _mm_mask_cvtph_epu32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
14672 unsafe { transmute(src:vcvtph2udq_128(a, src.as_u32x4(), k)) }
14673}
14674
14675/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14676/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14677///
14678/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epu32)
14679#[inline]
14680#[target_feature(enable = "avx512fp16,avx512vl")]
14681#[cfg_attr(test, assert_instr(vcvtph2udq))]
14682#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14683pub fn _mm_maskz_cvtph_epu32(k: __mmask8, a: __m128h) -> __m128i {
14684 _mm_mask_cvtph_epu32(src:_mm_setzero_si128(), k, a)
14685}
14686
14687/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14688/// the results in dst.
14689///
14690/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epu32)
14691#[inline]
14692#[target_feature(enable = "avx512fp16,avx512vl")]
14693#[cfg_attr(test, assert_instr(vcvtph2udq))]
14694#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14695pub fn _mm256_cvtph_epu32(a: __m128h) -> __m256i {
14696 _mm256_mask_cvtph_epu32(src:_mm256_undefined_si256(), k:0xff, a)
14697}
14698
14699/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14700/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14701///
14702/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epu32)
14703#[inline]
14704#[target_feature(enable = "avx512fp16,avx512vl")]
14705#[cfg_attr(test, assert_instr(vcvtph2udq))]
14706#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14707pub fn _mm256_mask_cvtph_epu32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
14708 unsafe { transmute(src:vcvtph2udq_256(a, src.as_u32x8(), k)) }
14709}
14710
14711/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14712/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14713///
14714/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epu32)
14715#[inline]
14716#[target_feature(enable = "avx512fp16,avx512vl")]
14717#[cfg_attr(test, assert_instr(vcvtph2udq))]
14718#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14719pub fn _mm256_maskz_cvtph_epu32(k: __mmask8, a: __m128h) -> __m256i {
14720 _mm256_mask_cvtph_epu32(src:_mm256_setzero_si256(), k, a)
14721}
14722
14723/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14724/// the results in dst.
14725///
14726/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epu32)
14727#[inline]
14728#[target_feature(enable = "avx512fp16")]
14729#[cfg_attr(test, assert_instr(vcvtph2udq))]
14730#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14731pub fn _mm512_cvtph_epu32(a: __m256h) -> __m512i {
14732 _mm512_mask_cvtph_epu32(src:_mm512_undefined_epi32(), k:0xffff, a)
14733}
14734
14735/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14736/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14737///
14738/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epu32)
14739#[inline]
14740#[target_feature(enable = "avx512fp16")]
14741#[cfg_attr(test, assert_instr(vcvtph2udq))]
14742#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14743pub fn _mm512_mask_cvtph_epu32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i {
14744 unsafe {
14745 transmute(src:vcvtph2udq_512(
14746 a,
14747 src.as_u32x16(),
14748 k,
14749 _MM_FROUND_CUR_DIRECTION,
14750 ))
14751 }
14752}
14753
14754/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14755/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14756///
14757/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epu32)
14758#[inline]
14759#[target_feature(enable = "avx512fp16")]
14760#[cfg_attr(test, assert_instr(vcvtph2udq))]
14761#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14762pub fn _mm512_maskz_cvtph_epu32(k: __mmask16, a: __m256h) -> __m512i {
14763 _mm512_mask_cvtph_epu32(src:_mm512_setzero_si512(), k, a)
14764}
14765
14766/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14767/// the results in dst.
14768///
14769/// Rounding is done according to the rounding parameter, which can be one of:
14770///
14771/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14772/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14773/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14774/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14775/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14776///
14777/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu32)
14778#[inline]
14779#[target_feature(enable = "avx512fp16")]
14780#[cfg_attr(test, assert_instr(vcvtph2udq, ROUNDING = 8))]
14781#[rustc_legacy_const_generics(1)]
14782#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14783pub fn _mm512_cvt_roundph_epu32<const ROUNDING: i32>(a: __m256h) -> __m512i {
14784 static_assert_rounding!(ROUNDING);
14785 _mm512_mask_cvt_roundph_epu32::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xffff, a)
14786}
14787
14788/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14789/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14790///
14791/// Rounding is done according to the rounding parameter, which can be one of:
14792///
14793/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14794/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14795/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14796/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14797/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14798///
14799/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu32)
14800#[inline]
14801#[target_feature(enable = "avx512fp16")]
14802#[cfg_attr(test, assert_instr(vcvtph2udq, ROUNDING = 8))]
14803#[rustc_legacy_const_generics(3)]
14804#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14805pub fn _mm512_mask_cvt_roundph_epu32<const ROUNDING: i32>(
14806 src: __m512i,
14807 k: __mmask16,
14808 a: __m256h,
14809) -> __m512i {
14810 unsafe {
14811 static_assert_rounding!(ROUNDING);
14812 transmute(src:vcvtph2udq_512(a, src.as_u32x16(), k, ROUNDING))
14813 }
14814}
14815
14816/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14817/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14818///
14819/// Rounding is done according to the rounding parameter, which can be one of:
14820///
14821/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14822/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14823/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14824/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14825/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14826///
14827/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu32)
14828#[inline]
14829#[target_feature(enable = "avx512fp16")]
14830#[cfg_attr(test, assert_instr(vcvtph2udq, ROUNDING = 8))]
14831#[rustc_legacy_const_generics(2)]
14832#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14833pub fn _mm512_maskz_cvt_roundph_epu32<const ROUNDING: i32>(k: __mmask16, a: __m256h) -> __m512i {
14834 static_assert_rounding!(ROUNDING);
14835 _mm512_mask_cvt_roundph_epu32::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
14836}
14837
14838/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer, and store
14839/// the result in dst.
14840///
14841/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_u32)
14842#[inline]
14843#[target_feature(enable = "avx512fp16")]
14844#[cfg_attr(test, assert_instr(vcvtsh2usi))]
14845#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14846pub fn _mm_cvtsh_u32(a: __m128h) -> u32 {
14847 unsafe { vcvtsh2usi32(a, _MM_FROUND_CUR_DIRECTION) }
14848}
14849
14850/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer, and store
14851/// the result in dst.
14852///
14853/// Exceptions can be suppressed by passing [`_MM_FROUND_NO_EXC`] in the sae parameter.
14854///
14855/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_u32)
14856#[inline]
14857#[target_feature(enable = "avx512fp16")]
14858#[cfg_attr(test, assert_instr(vcvtsh2usi, SAE = 8))]
14859#[rustc_legacy_const_generics(1)]
14860#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14861pub fn _mm_cvt_roundsh_u32<const SAE: i32>(a: __m128h) -> u32 {
14862 unsafe {
14863 static_assert_rounding!(SAE);
14864 vcvtsh2usi32(a, SAE)
14865 }
14866}
14867
14868/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14869/// store the results in dst.
14870///
14871/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epi32)
14872#[inline]
14873#[target_feature(enable = "avx512fp16,avx512vl")]
14874#[cfg_attr(test, assert_instr(vcvttph2dq))]
14875#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14876pub fn _mm_cvttph_epi32(a: __m128h) -> __m128i {
14877 _mm_mask_cvttph_epi32(src:_mm_undefined_si128(), k:0xff, a)
14878}
14879
14880/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14881/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14882///
14883/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epi32)
14884#[inline]
14885#[target_feature(enable = "avx512fp16,avx512vl")]
14886#[cfg_attr(test, assert_instr(vcvttph2dq))]
14887#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14888pub fn _mm_mask_cvttph_epi32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
14889 unsafe { transmute(src:vcvttph2dq_128(a, src.as_i32x4(), k)) }
14890}
14891
14892/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14893/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14894///
14895/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epi32)
14896#[inline]
14897#[target_feature(enable = "avx512fp16,avx512vl")]
14898#[cfg_attr(test, assert_instr(vcvttph2dq))]
14899#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14900pub fn _mm_maskz_cvttph_epi32(k: __mmask8, a: __m128h) -> __m128i {
14901 _mm_mask_cvttph_epi32(src:_mm_setzero_si128(), k, a)
14902}
14903
14904/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14905/// store the results in dst.
14906///
14907/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epi32)
14908#[inline]
14909#[target_feature(enable = "avx512fp16,avx512vl")]
14910#[cfg_attr(test, assert_instr(vcvttph2dq))]
14911#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14912pub fn _mm256_cvttph_epi32(a: __m128h) -> __m256i {
14913 _mm256_mask_cvttph_epi32(src:_mm256_undefined_si256(), k:0xff, a)
14914}
14915
14916/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14917/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14918///
14919/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epi32)
14920#[inline]
14921#[target_feature(enable = "avx512fp16,avx512vl")]
14922#[cfg_attr(test, assert_instr(vcvttph2dq))]
14923#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14924pub fn _mm256_mask_cvttph_epi32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
14925 unsafe { transmute(src:vcvttph2dq_256(a, src.as_i32x8(), k)) }
14926}
14927
14928/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14929/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14930///
14931/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epi32)
14932#[inline]
14933#[target_feature(enable = "avx512fp16,avx512vl")]
14934#[cfg_attr(test, assert_instr(vcvttph2dq))]
14935#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14936pub fn _mm256_maskz_cvttph_epi32(k: __mmask8, a: __m128h) -> __m256i {
14937 _mm256_mask_cvttph_epi32(src:_mm256_setzero_si256(), k, a)
14938}
14939
14940/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14941/// store the results in dst.
14942///
14943/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epi32)
14944#[inline]
14945#[target_feature(enable = "avx512fp16")]
14946#[cfg_attr(test, assert_instr(vcvttph2dq))]
14947#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14948pub fn _mm512_cvttph_epi32(a: __m256h) -> __m512i {
14949 _mm512_mask_cvttph_epi32(src:_mm512_undefined_epi32(), k:0xffff, a)
14950}
14951
14952/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14953/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14954///
14955/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epi32)
14956#[inline]
14957#[target_feature(enable = "avx512fp16")]
14958#[cfg_attr(test, assert_instr(vcvttph2dq))]
14959#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14960pub fn _mm512_mask_cvttph_epi32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i {
14961 unsafe {
14962 transmute(src:vcvttph2dq_512(
14963 a,
14964 src.as_i32x16(),
14965 k,
14966 _MM_FROUND_CUR_DIRECTION,
14967 ))
14968 }
14969}
14970
14971/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14972/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14973///
14974/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epi32)
14975#[inline]
14976#[target_feature(enable = "avx512fp16")]
14977#[cfg_attr(test, assert_instr(vcvttph2dq))]
14978#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14979pub fn _mm512_maskz_cvttph_epi32(k: __mmask16, a: __m256h) -> __m512i {
14980 _mm512_mask_cvttph_epi32(src:_mm512_setzero_si512(), k, a)
14981}
14982
14983/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14984/// store the results in dst.
14985///
14986/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14987///
14988/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epi32)
14989#[inline]
14990#[target_feature(enable = "avx512fp16")]
14991#[cfg_attr(test, assert_instr(vcvttph2dq, SAE = 8))]
14992#[rustc_legacy_const_generics(1)]
14993#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14994pub fn _mm512_cvtt_roundph_epi32<const SAE: i32>(a: __m256h) -> __m512i {
14995 static_assert_sae!(SAE);
14996 _mm512_mask_cvtt_roundph_epi32::<SAE>(src:_mm512_undefined_epi32(), k:0xffff, a)
14997}
14998
14999/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
15000/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15001///
15002/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
15003///
15004/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epi32)
15005#[inline]
15006#[target_feature(enable = "avx512fp16")]
15007#[cfg_attr(test, assert_instr(vcvttph2dq, SAE = 8))]
15008#[rustc_legacy_const_generics(3)]
15009#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15010pub fn _mm512_mask_cvtt_roundph_epi32<const SAE: i32>(
15011 src: __m512i,
15012 k: __mmask16,
15013 a: __m256h,
15014) -> __m512i {
15015 unsafe {
15016 static_assert_sae!(SAE);
15017 transmute(src:vcvttph2dq_512(a, src.as_i32x16(), k, SAE))
15018 }
15019}
15020
15021/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
15022/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15023///
15024/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
15025///
15026/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epi32)
15027#[inline]
15028#[target_feature(enable = "avx512fp16")]
15029#[cfg_attr(test, assert_instr(vcvttph2dq, SAE = 8))]
15030#[rustc_legacy_const_generics(2)]
15031#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15032pub fn _mm512_maskz_cvtt_roundph_epi32<const SAE: i32>(k: __mmask16, a: __m256h) -> __m512i {
15033 static_assert_sae!(SAE);
15034 _mm512_mask_cvtt_roundph_epi32::<SAE>(src:_mm512_setzero_si512(), k, a)
15035}
15036
15037/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer with truncation, and store
15038/// the result in dst.
15039///
15040/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_i32)
15041#[inline]
15042#[target_feature(enable = "avx512fp16")]
15043#[cfg_attr(test, assert_instr(vcvttsh2si))]
15044#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15045pub fn _mm_cvttsh_i32(a: __m128h) -> i32 {
15046 unsafe { vcvttsh2si32(a, _MM_FROUND_CUR_DIRECTION) }
15047}
15048
15049/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer with truncation, and store
15050/// the result in dst.
15051///
15052/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
15053///
15054/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_i32)
15055#[inline]
15056#[target_feature(enable = "avx512fp16")]
15057#[cfg_attr(test, assert_instr(vcvttsh2si, SAE = 8))]
15058#[rustc_legacy_const_generics(1)]
15059#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15060pub fn _mm_cvtt_roundsh_i32<const SAE: i32>(a: __m128h) -> i32 {
15061 unsafe {
15062 static_assert_sae!(SAE);
15063 vcvttsh2si32(a, SAE)
15064 }
15065}
15066
15067/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
15068/// store the results in dst.
15069///
15070/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu32)
15071#[inline]
15072#[target_feature(enable = "avx512fp16,avx512vl")]
15073#[cfg_attr(test, assert_instr(vcvttph2udq))]
15074#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15075pub fn _mm_cvttph_epu32(a: __m128h) -> __m128i {
15076 _mm_mask_cvttph_epu32(src:_mm_undefined_si128(), k:0xff, a)
15077}
15078
15079/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
15080/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15081///
15082/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu32)
15083#[inline]
15084#[target_feature(enable = "avx512fp16,avx512vl")]
15085#[cfg_attr(test, assert_instr(vcvttph2udq))]
15086#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15087pub fn _mm_mask_cvttph_epu32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
15088 unsafe { transmute(src:vcvttph2udq_128(a, src.as_u32x4(), k)) }
15089}
15090
15091/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
15092/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15093///
15094/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu32)
15095#[inline]
15096#[target_feature(enable = "avx512fp16,avx512vl")]
15097#[cfg_attr(test, assert_instr(vcvttph2udq))]
15098#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15099pub fn _mm_maskz_cvttph_epu32(k: __mmask8, a: __m128h) -> __m128i {
15100 _mm_mask_cvttph_epu32(src:_mm_setzero_si128(), k, a)
15101}
15102
15103/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
15104/// store the results in dst.
15105///
15106/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu32)
15107#[inline]
15108#[target_feature(enable = "avx512fp16,avx512vl")]
15109#[cfg_attr(test, assert_instr(vcvttph2udq))]
15110#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15111pub fn _mm256_cvttph_epu32(a: __m128h) -> __m256i {
15112 _mm256_mask_cvttph_epu32(src:_mm256_undefined_si256(), k:0xff, a)
15113}
15114
15115/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
15116/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15117///
15118/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu32)
15119#[inline]
15120#[target_feature(enable = "avx512fp16,avx512vl")]
15121#[cfg_attr(test, assert_instr(vcvttph2udq))]
15122#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15123pub fn _mm256_mask_cvttph_epu32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
15124 unsafe { transmute(src:vcvttph2udq_256(a, src.as_u32x8(), k)) }
15125}
15126
15127/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
15128/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15129///
15130/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu32)
15131#[inline]
15132#[target_feature(enable = "avx512fp16,avx512vl")]
15133#[cfg_attr(test, assert_instr(vcvttph2udq))]
15134#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15135pub fn _mm256_maskz_cvttph_epu32(k: __mmask8, a: __m128h) -> __m256i {
15136 _mm256_mask_cvttph_epu32(src:_mm256_setzero_si256(), k, a)
15137}
15138
15139/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
15140/// store the results in dst.
15141///
15142/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu32)
15143#[inline]
15144#[target_feature(enable = "avx512fp16")]
15145#[cfg_attr(test, assert_instr(vcvttph2udq))]
15146#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15147pub fn _mm512_cvttph_epu32(a: __m256h) -> __m512i {
15148 _mm512_mask_cvttph_epu32(src:_mm512_undefined_epi32(), k:0xffff, a)
15149}
15150
15151/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
15152/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15153///
15154/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu32)
15155#[inline]
15156#[target_feature(enable = "avx512fp16")]
15157#[cfg_attr(test, assert_instr(vcvttph2udq))]
15158#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15159pub fn _mm512_mask_cvttph_epu32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i {
15160 unsafe {
15161 transmute(src:vcvttph2udq_512(
15162 a,
15163 src.as_u32x16(),
15164 k,
15165 _MM_FROUND_CUR_DIRECTION,
15166 ))
15167 }
15168}
15169
15170/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
15171/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15172///
15173/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu32)
15174#[inline]
15175#[target_feature(enable = "avx512fp16")]
15176#[cfg_attr(test, assert_instr(vcvttph2udq))]
15177#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15178pub fn _mm512_maskz_cvttph_epu32(k: __mmask16, a: __m256h) -> __m512i {
15179 _mm512_mask_cvttph_epu32(src:_mm512_setzero_si512(), k, a)
15180}
15181
15182/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
15183/// store the results in dst.
15184///
15185/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
15186///
15187/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epu32)
15188#[inline]
15189#[target_feature(enable = "avx512fp16")]
15190#[cfg_attr(test, assert_instr(vcvttph2udq, SAE = 8))]
15191#[rustc_legacy_const_generics(1)]
15192#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15193pub fn _mm512_cvtt_roundph_epu32<const SAE: i32>(a: __m256h) -> __m512i {
15194 static_assert_sae!(SAE);
15195 _mm512_mask_cvtt_roundph_epu32::<SAE>(src:_mm512_undefined_epi32(), k:0xffff, a)
15196}
15197
15198/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
15199/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15200///
15201/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
15202///
15203/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epu32)
15204#[inline]
15205#[target_feature(enable = "avx512fp16")]
15206#[cfg_attr(test, assert_instr(vcvttph2udq, SAE = 8))]
15207#[rustc_legacy_const_generics(3)]
15208#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15209pub fn _mm512_mask_cvtt_roundph_epu32<const SAE: i32>(
15210 src: __m512i,
15211 k: __mmask16,
15212 a: __m256h,
15213) -> __m512i {
15214 unsafe {
15215 static_assert_sae!(SAE);
15216 transmute(src:vcvttph2udq_512(a, src.as_u32x16(), k, SAE))
15217 }
15218}
15219
15220/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
15221/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15222///
15223/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
15224///
15225/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epu32)
15226#[inline]
15227#[target_feature(enable = "avx512fp16")]
15228#[cfg_attr(test, assert_instr(vcvttph2udq, SAE = 8))]
15229#[rustc_legacy_const_generics(2)]
15230#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15231pub fn _mm512_maskz_cvtt_roundph_epu32<const SAE: i32>(k: __mmask16, a: __m256h) -> __m512i {
15232 static_assert_sae!(SAE);
15233 _mm512_mask_cvtt_roundph_epu32::<SAE>(src:_mm512_setzero_si512(), k, a)
15234}
15235
15236/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer with truncation, and store
15237/// the result in dst.
15238///
15239/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_u32)
15240#[inline]
15241#[target_feature(enable = "avx512fp16")]
15242#[cfg_attr(test, assert_instr(vcvttsh2usi))]
15243#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15244pub fn _mm_cvttsh_u32(a: __m128h) -> u32 {
15245 unsafe { vcvttsh2usi32(a, _MM_FROUND_CUR_DIRECTION) }
15246}
15247
15248/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer with truncation, and store
15249/// the result in dst.
15250///
15251/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
15252///
15253/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_u32)
15254#[inline]
15255#[target_feature(enable = "avx512fp16")]
15256#[cfg_attr(test, assert_instr(vcvttsh2usi, SAE = 8))]
15257#[rustc_legacy_const_generics(1)]
15258#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15259pub fn _mm_cvtt_roundsh_u32<const SAE: i32>(a: __m128h) -> u32 {
15260 unsafe {
15261 static_assert_sae!(SAE);
15262 vcvttsh2usi32(a, SAE)
15263 }
15264}
15265
15266/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15267/// store the results in dst.
15268///
15269/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epi64)
15270#[inline]
15271#[target_feature(enable = "avx512fp16,avx512vl")]
15272#[cfg_attr(test, assert_instr(vcvtph2qq))]
15273#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15274pub fn _mm_cvtph_epi64(a: __m128h) -> __m128i {
15275 _mm_mask_cvtph_epi64(src:_mm_undefined_si128(), k:0xff, a)
15276}
15277
15278/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15279/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15280///
15281/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epi64)
15282#[inline]
15283#[target_feature(enable = "avx512fp16,avx512vl")]
15284#[cfg_attr(test, assert_instr(vcvtph2qq))]
15285#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15286pub fn _mm_mask_cvtph_epi64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
15287 unsafe { transmute(src:vcvtph2qq_128(a, src.as_i64x2(), k)) }
15288}
15289
15290/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15291/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15292///
15293/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epi64)
15294#[inline]
15295#[target_feature(enable = "avx512fp16,avx512vl")]
15296#[cfg_attr(test, assert_instr(vcvtph2qq))]
15297#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15298pub fn _mm_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m128i {
15299 _mm_mask_cvtph_epi64(src:_mm_setzero_si128(), k, a)
15300}
15301
15302/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15303/// store the results in dst.
15304///
15305/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epi64)
15306#[inline]
15307#[target_feature(enable = "avx512fp16,avx512vl")]
15308#[cfg_attr(test, assert_instr(vcvtph2qq))]
15309#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15310pub fn _mm256_cvtph_epi64(a: __m128h) -> __m256i {
15311 _mm256_mask_cvtph_epi64(src:_mm256_undefined_si256(), k:0xff, a)
15312}
15313
15314/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15315/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15316///
15317/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epi64)
15318#[inline]
15319#[target_feature(enable = "avx512fp16,avx512vl")]
15320#[cfg_attr(test, assert_instr(vcvtph2qq))]
15321#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15322pub fn _mm256_mask_cvtph_epi64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
15323 unsafe { transmute(src:vcvtph2qq_256(a, src.as_i64x4(), k)) }
15324}
15325
15326/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15327/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15328///
15329/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epi64)
15330#[inline]
15331#[target_feature(enable = "avx512fp16,avx512vl")]
15332#[cfg_attr(test, assert_instr(vcvtph2qq))]
15333#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15334pub fn _mm256_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m256i {
15335 _mm256_mask_cvtph_epi64(src:_mm256_setzero_si256(), k, a)
15336}
15337
15338/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15339/// store the results in dst.
15340///
15341/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi64)
15342#[inline]
15343#[target_feature(enable = "avx512fp16")]
15344#[cfg_attr(test, assert_instr(vcvtph2qq))]
15345#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15346pub fn _mm512_cvtph_epi64(a: __m128h) -> __m512i {
15347 _mm512_mask_cvtph_epi64(src:_mm512_undefined_epi32(), k:0xff, a)
15348}
15349
15350/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15351/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15352///
15353/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi64)
15354#[inline]
15355#[target_feature(enable = "avx512fp16")]
15356#[cfg_attr(test, assert_instr(vcvtph2qq))]
15357#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15358pub fn _mm512_mask_cvtph_epi64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i {
15359 unsafe {
15360 transmute(src:vcvtph2qq_512(
15361 a,
15362 src.as_i64x8(),
15363 k,
15364 _MM_FROUND_CUR_DIRECTION,
15365 ))
15366 }
15367}
15368
15369/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15370/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15371///
15372/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi64)
15373#[inline]
15374#[target_feature(enable = "avx512fp16")]
15375#[cfg_attr(test, assert_instr(vcvtph2qq))]
15376#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15377pub fn _mm512_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m512i {
15378 _mm512_mask_cvtph_epi64(src:_mm512_setzero_si512(), k, a)
15379}
15380
15381/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15382/// store the results in dst.
15383///
15384/// Rounding is done according to the rounding parameter, which can be one of:
15385///
15386/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15387/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15388/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15389/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15390/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15391///
15392/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi64)
15393#[inline]
15394#[target_feature(enable = "avx512fp16")]
15395#[cfg_attr(test, assert_instr(vcvtph2qq, ROUNDING = 8))]
15396#[rustc_legacy_const_generics(1)]
15397#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15398pub fn _mm512_cvt_roundph_epi64<const ROUNDING: i32>(a: __m128h) -> __m512i {
15399 static_assert_rounding!(ROUNDING);
15400 _mm512_mask_cvt_roundph_epi64::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xff, a)
15401}
15402
15403/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15404/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15405///
15406/// Rounding is done according to the rounding parameter, which can be one of:
15407///
15408/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15409/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15410/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15411/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15412/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15413///
15414/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi64)
15415#[inline]
15416#[target_feature(enable = "avx512fp16")]
15417#[cfg_attr(test, assert_instr(vcvtph2qq, ROUNDING = 8))]
15418#[rustc_legacy_const_generics(3)]
15419#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15420pub fn _mm512_mask_cvt_roundph_epi64<const ROUNDING: i32>(
15421 src: __m512i,
15422 k: __mmask8,
15423 a: __m128h,
15424) -> __m512i {
15425 unsafe {
15426 static_assert_rounding!(ROUNDING);
15427 transmute(src:vcvtph2qq_512(a, src.as_i64x8(), k, ROUNDING))
15428 }
15429}
15430
15431/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15432/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15433///
15434/// Rounding is done according to the rounding parameter, which can be one of:
15435///
15436/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15437/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15438/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15439/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15440/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15441///
15442/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi64)
15443#[inline]
15444#[target_feature(enable = "avx512fp16")]
15445#[cfg_attr(test, assert_instr(vcvtph2qq, ROUNDING = 8))]
15446#[rustc_legacy_const_generics(2)]
15447#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15448pub fn _mm512_maskz_cvt_roundph_epi64<const ROUNDING: i32>(k: __mmask8, a: __m128h) -> __m512i {
15449 static_assert_rounding!(ROUNDING);
15450 _mm512_mask_cvt_roundph_epi64::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
15451}
15452
15453/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15454/// store the results in dst.
15455///
15456/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epu64)
15457#[inline]
15458#[target_feature(enable = "avx512fp16,avx512vl")]
15459#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15460#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15461pub fn _mm_cvtph_epu64(a: __m128h) -> __m128i {
15462 _mm_mask_cvtph_epu64(src:_mm_undefined_si128(), k:0xff, a)
15463}
15464
15465/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15466/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15467///
15468/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epu64)
15469#[inline]
15470#[target_feature(enable = "avx512fp16,avx512vl")]
15471#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15472#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15473pub fn _mm_mask_cvtph_epu64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
15474 unsafe { transmute(src:vcvtph2uqq_128(a, src.as_u64x2(), k)) }
15475}
15476
15477/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15478/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15479///
15480/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epu64)
15481#[inline]
15482#[target_feature(enable = "avx512fp16,avx512vl")]
15483#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15484#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15485pub fn _mm_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m128i {
15486 _mm_mask_cvtph_epu64(src:_mm_setzero_si128(), k, a)
15487}
15488
15489/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15490/// store the results in dst.
15491///
15492/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epu64)
15493#[inline]
15494#[target_feature(enable = "avx512fp16,avx512vl")]
15495#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15496#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15497pub fn _mm256_cvtph_epu64(a: __m128h) -> __m256i {
15498 _mm256_mask_cvtph_epu64(src:_mm256_undefined_si256(), k:0xff, a)
15499}
15500
15501/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15502/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15503///
15504/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epu64)
15505#[inline]
15506#[target_feature(enable = "avx512fp16,avx512vl")]
15507#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15508#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15509pub fn _mm256_mask_cvtph_epu64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
15510 unsafe { transmute(src:vcvtph2uqq_256(a, src.as_u64x4(), k)) }
15511}
15512
15513/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15514/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15515///
15516/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epu64)
15517#[inline]
15518#[target_feature(enable = "avx512fp16,avx512vl")]
15519#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15520#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15521pub fn _mm256_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m256i {
15522 _mm256_mask_cvtph_epu64(src:_mm256_setzero_si256(), k, a)
15523}
15524
15525/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15526/// store the results in dst.
15527///
15528/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epu64)
15529#[inline]
15530#[target_feature(enable = "avx512fp16")]
15531#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15532#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15533pub fn _mm512_cvtph_epu64(a: __m128h) -> __m512i {
15534 _mm512_mask_cvtph_epu64(src:_mm512_undefined_epi32(), k:0xff, a)
15535}
15536
15537/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15538/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15539///
15540/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epu64)
15541#[inline]
15542#[target_feature(enable = "avx512fp16")]
15543#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15544#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15545pub fn _mm512_mask_cvtph_epu64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i {
15546 unsafe {
15547 transmute(src:vcvtph2uqq_512(
15548 a,
15549 src.as_u64x8(),
15550 k,
15551 _MM_FROUND_CUR_DIRECTION,
15552 ))
15553 }
15554}
15555
15556/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15557/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15558///
15559/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epu64)
15560#[inline]
15561#[target_feature(enable = "avx512fp16")]
15562#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15563#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15564pub fn _mm512_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m512i {
15565 _mm512_mask_cvtph_epu64(src:_mm512_setzero_si512(), k, a)
15566}
15567
15568/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15569/// store the results in dst.
15570///
15571/// Rounding is done according to the rounding parameter, which can be one of:
15572///
15573/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15574/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15575/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15576/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15577/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15578///
15579/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu64)
15580#[inline]
15581#[target_feature(enable = "avx512fp16")]
15582#[cfg_attr(test, assert_instr(vcvtph2uqq, ROUNDING = 8))]
15583#[rustc_legacy_const_generics(1)]
15584#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15585pub fn _mm512_cvt_roundph_epu64<const ROUNDING: i32>(a: __m128h) -> __m512i {
15586 static_assert_rounding!(ROUNDING);
15587 _mm512_mask_cvt_roundph_epu64::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xff, a)
15588}
15589
15590/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15591/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15592///
15593/// Rounding is done according to the rounding parameter, which can be one of:
15594///
15595/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15596/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15597/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15598/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15599/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15600///
15601/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu64)
15602#[inline]
15603#[target_feature(enable = "avx512fp16")]
15604#[cfg_attr(test, assert_instr(vcvtph2uqq, ROUNDING = 8))]
15605#[rustc_legacy_const_generics(3)]
15606#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15607pub fn _mm512_mask_cvt_roundph_epu64<const ROUNDING: i32>(
15608 src: __m512i,
15609 k: __mmask8,
15610 a: __m128h,
15611) -> __m512i {
15612 unsafe {
15613 static_assert_rounding!(ROUNDING);
15614 transmute(src:vcvtph2uqq_512(a, src.as_u64x8(), k, ROUNDING))
15615 }
15616}
15617
15618/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15619/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15620///
15621/// Rounding is done according to the rounding parameter, which can be one of:
15622///
15623/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15624/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15625/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15626/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15627/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15628///
15629/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu64)
15630#[inline]
15631#[target_feature(enable = "avx512fp16")]
15632#[cfg_attr(test, assert_instr(vcvtph2uqq, ROUNDING = 8))]
15633#[rustc_legacy_const_generics(2)]
15634#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15635pub fn _mm512_maskz_cvt_roundph_epu64<const ROUNDING: i32>(k: __mmask8, a: __m128h) -> __m512i {
15636 static_assert_rounding!(ROUNDING);
15637 _mm512_mask_cvt_roundph_epu64::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
15638}
15639
15640/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15641/// store the results in dst.
15642///
15643/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epi64)
15644#[inline]
15645#[target_feature(enable = "avx512fp16,avx512vl")]
15646#[cfg_attr(test, assert_instr(vcvttph2qq))]
15647#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15648pub fn _mm_cvttph_epi64(a: __m128h) -> __m128i {
15649 _mm_mask_cvttph_epi64(src:_mm_undefined_si128(), k:0xff, a)
15650}
15651
15652/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15653/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15654///
15655/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epi64)
15656#[inline]
15657#[target_feature(enable = "avx512fp16,avx512vl")]
15658#[cfg_attr(test, assert_instr(vcvttph2qq))]
15659#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15660pub fn _mm_mask_cvttph_epi64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
15661 unsafe { transmute(src:vcvttph2qq_128(a, src.as_i64x2(), k)) }
15662}
15663
15664/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15665/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15666///
15667/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epi64)
15668#[inline]
15669#[target_feature(enable = "avx512fp16,avx512vl")]
15670#[cfg_attr(test, assert_instr(vcvttph2qq))]
15671#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15672pub fn _mm_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m128i {
15673 _mm_mask_cvttph_epi64(src:_mm_setzero_si128(), k, a)
15674}
15675
15676/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15677/// store the results in dst.
15678///
15679/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epi64)
15680#[inline]
15681#[target_feature(enable = "avx512fp16,avx512vl")]
15682#[cfg_attr(test, assert_instr(vcvttph2qq))]
15683#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15684pub fn _mm256_cvttph_epi64(a: __m128h) -> __m256i {
15685 _mm256_mask_cvttph_epi64(src:_mm256_undefined_si256(), k:0xff, a)
15686}
15687
15688/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15689/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15690///
15691/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epi64)
15692#[inline]
15693#[target_feature(enable = "avx512fp16,avx512vl")]
15694#[cfg_attr(test, assert_instr(vcvttph2qq))]
15695#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15696pub fn _mm256_mask_cvttph_epi64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
15697 unsafe { transmute(src:vcvttph2qq_256(a, src.as_i64x4(), k)) }
15698}
15699
15700/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15701/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15702///
15703/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epi64)
15704#[inline]
15705#[target_feature(enable = "avx512fp16,avx512vl")]
15706#[cfg_attr(test, assert_instr(vcvttph2qq))]
15707#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15708pub fn _mm256_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m256i {
15709 _mm256_mask_cvttph_epi64(src:_mm256_setzero_si256(), k, a)
15710}
15711
15712/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15713/// store the results in dst.
15714///
15715/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epi64)
15716#[inline]
15717#[target_feature(enable = "avx512fp16")]
15718#[cfg_attr(test, assert_instr(vcvttph2qq))]
15719#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15720pub fn _mm512_cvttph_epi64(a: __m128h) -> __m512i {
15721 _mm512_mask_cvttph_epi64(src:_mm512_undefined_epi32(), k:0xff, a)
15722}
15723
15724/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15725/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15726///
15727/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epi64)
15728#[inline]
15729#[target_feature(enable = "avx512fp16")]
15730#[cfg_attr(test, assert_instr(vcvttph2qq))]
15731#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15732pub fn _mm512_mask_cvttph_epi64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i {
15733 unsafe {
15734 transmute(src:vcvttph2qq_512(
15735 a,
15736 src.as_i64x8(),
15737 k,
15738 _MM_FROUND_CUR_DIRECTION,
15739 ))
15740 }
15741}
15742
15743/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15744/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15745///
15746/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epi64)
15747#[inline]
15748#[target_feature(enable = "avx512fp16")]
15749#[cfg_attr(test, assert_instr(vcvttph2qq))]
15750#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15751pub fn _mm512_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m512i {
15752 _mm512_mask_cvttph_epi64(src:_mm512_setzero_si512(), k, a)
15753}
15754
15755/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15756/// store the results in dst.
15757///
15758/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15759///
15760/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epi64)
15761#[inline]
15762#[target_feature(enable = "avx512fp16")]
15763#[cfg_attr(test, assert_instr(vcvttph2qq, SAE = 8))]
15764#[rustc_legacy_const_generics(1)]
15765#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15766pub fn _mm512_cvtt_roundph_epi64<const SAE: i32>(a: __m128h) -> __m512i {
15767 static_assert_sae!(SAE);
15768 _mm512_mask_cvtt_roundph_epi64::<SAE>(src:_mm512_undefined_epi32(), k:0xff, a)
15769}
15770
15771/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15772/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15773///
15774/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15775///
15776/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epi64)
15777#[inline]
15778#[target_feature(enable = "avx512fp16")]
15779#[cfg_attr(test, assert_instr(vcvttph2qq, SAE = 8))]
15780#[rustc_legacy_const_generics(3)]
15781#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15782pub fn _mm512_mask_cvtt_roundph_epi64<const SAE: i32>(
15783 src: __m512i,
15784 k: __mmask8,
15785 a: __m128h,
15786) -> __m512i {
15787 unsafe {
15788 static_assert_sae!(SAE);
15789 transmute(src:vcvttph2qq_512(a, src.as_i64x8(), k, SAE))
15790 }
15791}
15792
15793/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15794/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15795///
15796/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15797///
15798/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epi64)
15799#[inline]
15800#[target_feature(enable = "avx512fp16")]
15801#[cfg_attr(test, assert_instr(vcvttph2qq, SAE = 8))]
15802#[rustc_legacy_const_generics(2)]
15803#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15804pub fn _mm512_maskz_cvtt_roundph_epi64<const SAE: i32>(k: __mmask8, a: __m128h) -> __m512i {
15805 static_assert_sae!(SAE);
15806 _mm512_mask_cvtt_roundph_epi64::<SAE>(src:_mm512_setzero_si512(), k, a)
15807}
15808
15809/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15810/// store the results in dst.
15811///
15812/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu64)
15813#[inline]
15814#[target_feature(enable = "avx512fp16,avx512vl")]
15815#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15816#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15817pub fn _mm_cvttph_epu64(a: __m128h) -> __m128i {
15818 _mm_mask_cvttph_epu64(src:_mm_undefined_si128(), k:0xff, a)
15819}
15820
15821/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15822/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15823///
15824/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu64)
15825#[inline]
15826#[target_feature(enable = "avx512fp16,avx512vl")]
15827#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15828#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15829pub fn _mm_mask_cvttph_epu64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
15830 unsafe { transmute(src:vcvttph2uqq_128(a, src.as_u64x2(), k)) }
15831}
15832
15833/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15834/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15835///
15836/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu64)
15837#[inline]
15838#[target_feature(enable = "avx512fp16,avx512vl")]
15839#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15840#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15841pub fn _mm_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m128i {
15842 _mm_mask_cvttph_epu64(src:_mm_setzero_si128(), k, a)
15843}
15844
15845/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15846/// store the results in dst.
15847///
15848/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu64)
15849#[inline]
15850#[target_feature(enable = "avx512fp16,avx512vl")]
15851#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15852#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15853pub fn _mm256_cvttph_epu64(a: __m128h) -> __m256i {
15854 _mm256_mask_cvttph_epu64(src:_mm256_undefined_si256(), k:0xff, a)
15855}
15856
15857/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15858/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15859///
15860/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu64)
15861#[inline]
15862#[target_feature(enable = "avx512fp16,avx512vl")]
15863#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15864#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15865pub fn _mm256_mask_cvttph_epu64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
15866 unsafe { transmute(src:vcvttph2uqq_256(a, src.as_u64x4(), k)) }
15867}
15868
15869/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15870/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15871///
15872/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu64)
15873#[inline]
15874#[target_feature(enable = "avx512fp16,avx512vl")]
15875#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15876#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15877pub fn _mm256_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m256i {
15878 _mm256_mask_cvttph_epu64(src:_mm256_setzero_si256(), k, a)
15879}
15880
15881/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15882/// store the results in dst.
15883///
15884/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu64)
15885#[inline]
15886#[target_feature(enable = "avx512fp16")]
15887#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15888#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15889pub fn _mm512_cvttph_epu64(a: __m128h) -> __m512i {
15890 _mm512_mask_cvttph_epu64(src:_mm512_undefined_epi32(), k:0xff, a)
15891}
15892
15893/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15894/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15895///
15896/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu64)
15897#[inline]
15898#[target_feature(enable = "avx512fp16")]
15899#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15900#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15901pub fn _mm512_mask_cvttph_epu64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i {
15902 unsafe {
15903 transmute(src:vcvttph2uqq_512(
15904 a,
15905 src.as_u64x8(),
15906 k,
15907 _MM_FROUND_CUR_DIRECTION,
15908 ))
15909 }
15910}
15911
15912/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15913/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15914///
15915/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu64)
15916#[inline]
15917#[target_feature(enable = "avx512fp16")]
15918#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15919#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15920pub fn _mm512_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m512i {
15921 _mm512_mask_cvttph_epu64(src:_mm512_setzero_si512(), k, a)
15922}
15923
15924/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15925/// store the results in dst.
15926///
15927/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15928///
15929/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epu64)
15930#[inline]
15931#[target_feature(enable = "avx512fp16")]
15932#[cfg_attr(test, assert_instr(vcvttph2uqq, SAE = 8))]
15933#[rustc_legacy_const_generics(1)]
15934#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15935pub fn _mm512_cvtt_roundph_epu64<const SAE: i32>(a: __m128h) -> __m512i {
15936 static_assert_sae!(SAE);
15937 _mm512_mask_cvtt_roundph_epu64::<SAE>(src:_mm512_undefined_epi32(), k:0xff, a)
15938}
15939
15940/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15941/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15942///
15943/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15944///
15945/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epu64)
15946#[inline]
15947#[target_feature(enable = "avx512fp16")]
15948#[cfg_attr(test, assert_instr(vcvttph2uqq, SAE = 8))]
15949#[rustc_legacy_const_generics(3)]
15950#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15951pub fn _mm512_mask_cvtt_roundph_epu64<const SAE: i32>(
15952 src: __m512i,
15953 k: __mmask8,
15954 a: __m128h,
15955) -> __m512i {
15956 unsafe {
15957 static_assert_sae!(SAE);
15958 transmute(src:vcvttph2uqq_512(a, src.as_u64x8(), k, SAE))
15959 }
15960}
15961
15962/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15963/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15964///
15965/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15966///
15967/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epu64)
15968#[inline]
15969#[target_feature(enable = "avx512fp16")]
15970#[cfg_attr(test, assert_instr(vcvttph2uqq, SAE = 8))]
15971#[rustc_legacy_const_generics(2)]
15972#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15973pub fn _mm512_maskz_cvtt_roundph_epu64<const SAE: i32>(k: __mmask8, a: __m128h) -> __m512i {
15974 static_assert_sae!(SAE);
15975 _mm512_mask_cvtt_roundph_epu64::<SAE>(src:_mm512_setzero_si512(), k, a)
15976}
15977
15978/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15979/// floating-point elements, and store the results in dst.
15980///
15981/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtxph_ps)
15982#[inline]
15983#[target_feature(enable = "avx512fp16,avx512vl")]
15984#[cfg_attr(test, assert_instr(vcvtph2psx))]
15985#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15986pub fn _mm_cvtxph_ps(a: __m128h) -> __m128 {
15987 _mm_mask_cvtxph_ps(src:_mm_setzero_ps(), k:0xff, a)
15988}
15989
15990/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15991/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
15992/// dst when the corresponding mask bit is not set).
15993///
15994/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtxph_ps)
15995#[inline]
15996#[target_feature(enable = "avx512fp16,avx512vl")]
15997#[cfg_attr(test, assert_instr(vcvtph2psx))]
15998#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
15999pub fn _mm_mask_cvtxph_ps(src: __m128, k: __mmask8, a: __m128h) -> __m128 {
16000 unsafe { vcvtph2psx_128(a, src, k) }
16001}
16002
16003/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
16004/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
16005/// corresponding mask bit is not set).
16006///
16007/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtxph_ps)
16008#[inline]
16009#[target_feature(enable = "avx512fp16,avx512vl")]
16010#[cfg_attr(test, assert_instr(vcvtph2psx))]
16011#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
16012pub fn _mm_maskz_cvtxph_ps(k: __mmask8, a: __m128h) -> __m128 {
16013 _mm_mask_cvtxph_ps(src:_mm_setzero_ps(), k, a)
16014}
16015
16016/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
16017/// floating-point elements, and store the results in dst.
16018///
16019/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtxph_ps)
16020#[inline]
16021#[target_feature(enable = "avx512fp16,avx512vl")]
16022#[cfg_attr(test, assert_instr(vcvtph2psx))]
16023#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
16024pub fn _mm256_cvtxph_ps(a: __m128h) -> __m256 {
16025 _mm256_mask_cvtxph_ps(src:_mm256_setzero_ps(), k:0xff, a)
16026}
16027
16028/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
16029/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
16030/// dst when the corresponding mask bit is not set).
16031///
16032/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtxph_ps)
16033#[inline]
16034#[target_feature(enable = "avx512fp16,avx512vl")]
16035#[cfg_attr(test, assert_instr(vcvtph2psx))]
16036#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
16037pub fn _mm256_mask_cvtxph_ps(src: __m256, k: __mmask8, a: __m128h) -> __m256 {
16038 unsafe { vcvtph2psx_256(a, src, k) }
16039}
16040
16041/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
16042/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
16043/// corresponding mask bit is not set).
16044///
16045/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtxph_ps)
16046#[inline]
16047#[target_feature(enable = "avx512fp16,avx512vl")]
16048#[cfg_attr(test, assert_instr(vcvtph2psx))]
16049#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
16050pub fn _mm256_maskz_cvtxph_ps(k: __mmask8, a: __m128h) -> __m256 {
16051 _mm256_mask_cvtxph_ps(src:_mm256_setzero_ps(), k, a)
16052}
16053
16054/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
16055/// floating-point elements, and store the results in dst.
16056///
16057/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtxph_ps)
16058#[inline]
16059#[target_feature(enable = "avx512fp16")]
16060#[cfg_attr(test, assert_instr(vcvtph2psx))]
16061#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
16062pub fn _mm512_cvtxph_ps(a: __m256h) -> __m512 {
16063 _mm512_mask_cvtxph_ps(src:_mm512_setzero_ps(), k:0xffff, a)
16064}
16065
16066/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
16067/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
16068/// dst when the corresponding mask bit is not set).
16069///
16070/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtxph_ps)
16071#[inline]
16072#[target_feature(enable = "avx512fp16")]
16073#[cfg_attr(test, assert_instr(vcvtph2psx))]
16074#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
16075pub fn _mm512_mask_cvtxph_ps(src: __m512, k: __mmask16, a: __m256h) -> __m512 {
16076 unsafe { vcvtph2psx_512(a, src, k, _MM_FROUND_CUR_DIRECTION) }
16077}
16078
16079/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
16080/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
16081/// corresponding mask bit is not set).
16082///
16083/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtxph_ps)
16084#[inline]
16085#[target_feature(enable = "avx512fp16")]
16086#[cfg_attr(test, assert_instr(vcvtph2psx))]
16087#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
16088pub fn _mm512_maskz_cvtxph_ps(k: __mmask16, a: __m256h) -> __m512 {
16089 _mm512_mask_cvtxph_ps(src:_mm512_setzero_ps(), k, a)
16090}
16091
16092/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
16093/// floating-point elements, and store the results in dst.
16094///
16095/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16096///
16097/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtx_roundph_ps)
16098#[inline]
16099#[target_feature(enable = "avx512fp16")]
16100#[cfg_attr(test, assert_instr(vcvtph2psx, SAE = 8))]
16101#[rustc_legacy_const_generics(1)]
16102#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
16103pub fn _mm512_cvtx_roundph_ps<const SAE: i32>(a: __m256h) -> __m512 {
16104 static_assert_sae!(SAE);
16105 _mm512_mask_cvtx_roundph_ps::<SAE>(src:_mm512_setzero_ps(), k:0xffff, a)
16106}
16107
16108/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
16109/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
16110/// dst when the corresponding mask bit is not set).
16111///
16112/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16113///
16114/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtx_roundph_ps)
16115#[inline]
16116#[target_feature(enable = "avx512fp16")]
16117#[cfg_attr(test, assert_instr(vcvtph2psx, SAE = 8))]
16118#[rustc_legacy_const_generics(3)]
16119#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
16120pub fn _mm512_mask_cvtx_roundph_ps<const SAE: i32>(
16121 src: __m512,
16122 k: __mmask16,
16123 a: __m256h,
16124) -> __m512 {
16125 unsafe {
16126 static_assert_sae!(SAE);
16127 vcvtph2psx_512(a, src, k, SAE)
16128 }
16129}
16130
16131/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
16132/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
16133/// corresponding mask bit is not set).
16134///
16135/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16136///
16137/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtx_roundph_ps)
16138#[inline]
16139#[target_feature(enable = "avx512fp16")]
16140#[cfg_attr(test, assert_instr(vcvtph2psx, SAE = 8))]
16141#[rustc_legacy_const_generics(2)]
16142#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
16143pub fn _mm512_maskz_cvtx_roundph_ps<const SAE: i32>(k: __mmask16, a: __m256h) -> __m512 {
16144 static_assert_sae!(SAE);
16145 _mm512_mask_cvtx_roundph_ps::<SAE>(src:_mm512_setzero_ps(), k, a)
16146}
16147
16148/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
16149/// floating-point element, store the result in the lower element of dst, and copy the upper 3 packed
16150/// elements from a to the upper elements of dst.
16151///
16152/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_ss)
16153#[inline]
16154#[target_feature(enable = "avx512fp16")]
16155#[cfg_attr(test, assert_instr(vcvtsh2ss))]
16156#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
16157pub fn _mm_cvtsh_ss(a: __m128, b: __m128h) -> __m128 {
16158 _mm_mask_cvtsh_ss(src:a, k:0xff, a, b)
16159}
16160
16161/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
16162/// floating-point element, store the result in the lower element of dst using writemask k (the element is
16163/// copied from src to dst when mask bit 0 is not set), and copy the upper 3 packed elements from a to the
16164/// upper elements of dst.
16165///
16166/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsh_ss)
16167#[inline]
16168#[target_feature(enable = "avx512fp16")]
16169#[cfg_attr(test, assert_instr(vcvtsh2ss))]
16170#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
16171pub fn _mm_mask_cvtsh_ss(src: __m128, k: __mmask8, a: __m128, b: __m128h) -> __m128 {
16172 unsafe { vcvtsh2ss(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
16173}
16174
16175/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
16176/// floating-point element, store the result in the lower element of dst using zeromask k (the element is
16177/// zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements
16178/// of dst.
16179///
16180/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsh_ss)
16181#[inline]
16182#[target_feature(enable = "avx512fp16")]
16183#[cfg_attr(test, assert_instr(vcvtsh2ss))]
16184#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
16185pub fn _mm_maskz_cvtsh_ss(k: __mmask8, a: __m128, b: __m128h) -> __m128 {
16186 _mm_mask_cvtsh_ss(src:_mm_set_ss(0.0), k, a, b)
16187}
16188
16189/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
16190/// floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements
16191/// from a to the upper elements of dst.
16192///
16193/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16194///
16195/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_ss)
16196#[inline]
16197#[target_feature(enable = "avx512fp16")]
16198#[cfg_attr(test, assert_instr(vcvtsh2ss, SAE = 8))]
16199#[rustc_legacy_const_generics(2)]
16200#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
16201pub fn _mm_cvt_roundsh_ss<const SAE: i32>(a: __m128, b: __m128h) -> __m128 {
16202 static_assert_sae!(SAE);
16203 _mm_mask_cvt_roundsh_ss::<SAE>(src:_mm_undefined_ps(), k:0xff, a, b)
16204}
16205
16206/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
16207/// floating-point element, store the result in the lower element of dst using writemask k (the element is
16208/// copied from src to dst when mask bit 0 is not set), and copy the upper 3 packed elements from a to the
16209/// upper elements of dst.
16210///
16211/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16212///
16213/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsh_ss)
16214#[inline]
16215#[target_feature(enable = "avx512fp16")]
16216#[cfg_attr(test, assert_instr(vcvtsh2ss, SAE = 8))]
16217#[rustc_legacy_const_generics(4)]
16218#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
16219pub fn _mm_mask_cvt_roundsh_ss<const SAE: i32>(
16220 src: __m128,
16221 k: __mmask8,
16222 a: __m128,
16223 b: __m128h,
16224) -> __m128 {
16225 unsafe {
16226 static_assert_sae!(SAE);
16227 vcvtsh2ss(a, b, src, k, SAE)
16228 }
16229}
16230
16231/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
16232/// floating-point element, store the result in the lower element of dst using zeromask k (the element is
16233/// zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements
16234/// of dst.
16235///
16236/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16237///
16238/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsh_ss)
16239#[inline]
16240#[target_feature(enable = "avx512fp16")]
16241#[cfg_attr(test, assert_instr(vcvtsh2ss, SAE = 8))]
16242#[rustc_legacy_const_generics(3)]
16243#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
16244pub fn _mm_maskz_cvt_roundsh_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128h) -> __m128 {
16245 static_assert_sae!(SAE);
16246 _mm_mask_cvt_roundsh_ss::<SAE>(src:_mm_set_ss(0.0), k, a, b)
16247}
16248
16249/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16250/// floating-point elements, and store the results in dst.
16251///
16252/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_pd)
16253#[inline]
16254#[target_feature(enable = "avx512fp16,avx512vl")]
16255#[cfg_attr(test, assert_instr(vcvtph2pd))]
16256#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
16257pub fn _mm_cvtph_pd(a: __m128h) -> __m128d {
16258 _mm_mask_cvtph_pd(src:_mm_setzero_pd(), k:0xff, a)
16259}
16260
16261/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16262/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
16263/// dst when the corresponding mask bit is not set).
16264///
16265/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_pd)
16266#[inline]
16267#[target_feature(enable = "avx512fp16,avx512vl")]
16268#[cfg_attr(test, assert_instr(vcvtph2pd))]
16269#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
16270pub fn _mm_mask_cvtph_pd(src: __m128d, k: __mmask8, a: __m128h) -> __m128d {
16271 unsafe { vcvtph2pd_128(a, src, k) }
16272}
16273
16274/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16275/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
16276/// corresponding mask bit is not set).
16277///
16278/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_pd)
16279#[inline]
16280#[target_feature(enable = "avx512fp16,avx512vl")]
16281#[cfg_attr(test, assert_instr(vcvtph2pd))]
16282#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
16283pub fn _mm_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m128d {
16284 _mm_mask_cvtph_pd(src:_mm_setzero_pd(), k, a)
16285}
16286
16287/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16288/// floating-point elements, and store the results in dst.
16289///
16290/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_pd)
16291#[inline]
16292#[target_feature(enable = "avx512fp16,avx512vl")]
16293#[cfg_attr(test, assert_instr(vcvtph2pd))]
16294#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
16295pub fn _mm256_cvtph_pd(a: __m128h) -> __m256d {
16296 _mm256_mask_cvtph_pd(src:_mm256_setzero_pd(), k:0xff, a)
16297}
16298
16299/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16300/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
16301/// dst when the corresponding mask bit is not set).
16302///
16303/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_pd)
16304#[inline]
16305#[target_feature(enable = "avx512fp16,avx512vl")]
16306#[cfg_attr(test, assert_instr(vcvtph2pd))]
16307#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
16308pub fn _mm256_mask_cvtph_pd(src: __m256d, k: __mmask8, a: __m128h) -> __m256d {
16309 unsafe { vcvtph2pd_256(a, src, k) }
16310}
16311
16312/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16313/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
16314/// corresponding mask bit is not set).
16315///
16316/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_pd)
16317#[inline]
16318#[target_feature(enable = "avx512fp16,avx512vl")]
16319#[cfg_attr(test, assert_instr(vcvtph2pd))]
16320#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
16321pub fn _mm256_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m256d {
16322 _mm256_mask_cvtph_pd(src:_mm256_setzero_pd(), k, a)
16323}
16324
16325/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16326/// floating-point elements, and store the results in dst.
16327///
16328/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_pd)
16329#[inline]
16330#[target_feature(enable = "avx512fp16")]
16331#[cfg_attr(test, assert_instr(vcvtph2pd))]
16332#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
16333pub fn _mm512_cvtph_pd(a: __m128h) -> __m512d {
16334 _mm512_mask_cvtph_pd(src:_mm512_setzero_pd(), k:0xff, a)
16335}
16336
16337/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16338/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
16339/// dst when the corresponding mask bit is not set).
16340///
16341/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_pd)
16342#[inline]
16343#[target_feature(enable = "avx512fp16")]
16344#[cfg_attr(test, assert_instr(vcvtph2pd))]
16345#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
16346pub fn _mm512_mask_cvtph_pd(src: __m512d, k: __mmask8, a: __m128h) -> __m512d {
16347 unsafe { vcvtph2pd_512(a, src, k, _MM_FROUND_CUR_DIRECTION) }
16348}
16349
16350/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16351/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
16352/// corresponding mask bit is not set).
16353///
16354/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_pd)
16355#[inline]
16356#[target_feature(enable = "avx512fp16")]
16357#[cfg_attr(test, assert_instr(vcvtph2pd))]
16358#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
16359pub fn _mm512_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m512d {
16360 _mm512_mask_cvtph_pd(src:_mm512_setzero_pd(), k, a)
16361}
16362
16363/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16364/// floating-point elements, and store the results in dst.
16365///
16366/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16367///
16368/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_pd)
16369#[inline]
16370#[target_feature(enable = "avx512fp16")]
16371#[cfg_attr(test, assert_instr(vcvtph2pd, SAE = 8))]
16372#[rustc_legacy_const_generics(1)]
16373#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
16374pub fn _mm512_cvt_roundph_pd<const SAE: i32>(a: __m128h) -> __m512d {
16375 static_assert_sae!(SAE);
16376 _mm512_mask_cvt_roundph_pd::<SAE>(src:_mm512_setzero_pd(), k:0xff, a)
16377}
16378
16379/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16380/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
16381/// dst when the corresponding mask bit is not set).
16382///
16383/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16384///
16385/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_pd)
16386#[inline]
16387#[target_feature(enable = "avx512fp16")]
16388#[cfg_attr(test, assert_instr(vcvtph2pd, SAE = 8))]
16389#[rustc_legacy_const_generics(3)]
16390#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
16391pub fn _mm512_mask_cvt_roundph_pd<const SAE: i32>(
16392 src: __m512d,
16393 k: __mmask8,
16394 a: __m128h,
16395) -> __m512d {
16396 unsafe {
16397 static_assert_sae!(SAE);
16398 vcvtph2pd_512(a, src, k, SAE)
16399 }
16400}
16401
16402/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16403/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
16404/// corresponding mask bit is not set).
16405///
16406/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16407///
16408/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_pd)
16409#[inline]
16410#[target_feature(enable = "avx512fp16")]
16411#[cfg_attr(test, assert_instr(vcvtph2pd, SAE = 8))]
16412#[rustc_legacy_const_generics(2)]
16413#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
16414pub fn _mm512_maskz_cvt_roundph_pd<const SAE: i32>(k: __mmask8, a: __m128h) -> __m512d {
16415 static_assert_sae!(SAE);
16416 _mm512_mask_cvt_roundph_pd::<SAE>(src:_mm512_setzero_pd(), k, a)
16417}
16418
16419/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
16420/// floating-point element, store the result in the lower element of dst, and copy the upper element
16421/// from a to the upper element of dst.
16422///
16423/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_sd)
16424#[inline]
16425#[target_feature(enable = "avx512fp16")]
16426#[cfg_attr(test, assert_instr(vcvtsh2sd))]
16427#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
16428pub fn _mm_cvtsh_sd(a: __m128d, b: __m128h) -> __m128d {
16429 _mm_mask_cvtsh_sd(src:a, k:0xff, a, b)
16430}
16431
16432/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
16433/// floating-point element, store the result in the lower element of dst using writemask k (the element is
16434/// copied from src to dst when mask bit 0 is not set), and copy the upper element from a to the upper element
16435/// of dst.
16436///
16437/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsh_sd)
16438#[inline]
16439#[target_feature(enable = "avx512fp16")]
16440#[cfg_attr(test, assert_instr(vcvtsh2sd))]
16441#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
16442pub fn _mm_mask_cvtsh_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128h) -> __m128d {
16443 unsafe { vcvtsh2sd(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
16444}
16445
16446/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
16447/// floating-point element, store the result in the lower element of dst using zeromask k (the element is
16448/// zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
16449///
16450/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsh_sd)
16451#[inline]
16452#[target_feature(enable = "avx512fp16")]
16453#[cfg_attr(test, assert_instr(vcvtsh2sd))]
16454#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
16455pub fn _mm_maskz_cvtsh_sd(k: __mmask8, a: __m128d, b: __m128h) -> __m128d {
16456 _mm_mask_cvtsh_sd(src:_mm_set_sd(0.0), k, a, b)
16457}
16458
16459/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
16460/// floating-point element, store the result in the lower element of dst, and copy the upper element from a
16461/// to the upper element of dst.
16462///
16463/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16464///
16465/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_sd)
16466#[inline]
16467#[target_feature(enable = "avx512fp16")]
16468#[cfg_attr(test, assert_instr(vcvtsh2sd, SAE = 8))]
16469#[rustc_legacy_const_generics(2)]
16470#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
16471pub fn _mm_cvt_roundsh_sd<const SAE: i32>(a: __m128d, b: __m128h) -> __m128d {
16472 static_assert_sae!(SAE);
16473 _mm_mask_cvt_roundsh_sd::<SAE>(src:a, k:0xff, a, b)
16474}
16475
16476/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
16477/// floating-point element, store the result in the lower element of dst using writemask k (the element is
16478/// copied from src to dst when mask bit 0 is not set), and copy the upper element from a to the upper element
16479/// of dst.
16480///
16481/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16482///
16483/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsh_sd)
16484#[inline]
16485#[target_feature(enable = "avx512fp16")]
16486#[cfg_attr(test, assert_instr(vcvtsh2sd, SAE = 8))]
16487#[rustc_legacy_const_generics(4)]
16488#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
16489pub fn _mm_mask_cvt_roundsh_sd<const SAE: i32>(
16490 src: __m128d,
16491 k: __mmask8,
16492 a: __m128d,
16493 b: __m128h,
16494) -> __m128d {
16495 unsafe {
16496 static_assert_sae!(SAE);
16497 vcvtsh2sd(a, b, src, k, SAE)
16498 }
16499}
16500
16501/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
16502/// floating-point element, store the result in the lower element of dst using zeromask k (the element is
16503/// zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
16504///
16505/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16506///
16507/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsh_sd)
16508#[inline]
16509#[target_feature(enable = "avx512fp16")]
16510#[cfg_attr(test, assert_instr(vcvtsh2sd, SAE = 8))]
16511#[rustc_legacy_const_generics(3)]
16512#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
16513pub fn _mm_maskz_cvt_roundsh_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128h) -> __m128d {
16514 static_assert_sae!(SAE);
16515 _mm_mask_cvt_roundsh_sd::<SAE>(src:_mm_set_sd(0.0), k, a, b)
16516}
16517
16518/// Copy the lower half-precision (16-bit) floating-point element from `a` to `dst`.
16519///
16520/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_h)
16521#[inline]
16522#[target_feature(enable = "avx512fp16")]
16523#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16524#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16525pub const fn _mm_cvtsh_h(a: __m128h) -> f16 {
16526 unsafe { simd_extract!(a, 0) }
16527}
16528
16529/// Copy the lower half-precision (16-bit) floating-point element from `a` to `dst`.
16530///
16531/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtsh_h)
16532#[inline]
16533#[target_feature(enable = "avx512fp16")]
16534#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16535#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16536pub const fn _mm256_cvtsh_h(a: __m256h) -> f16 {
16537 unsafe { simd_extract!(a, 0) }
16538}
16539
16540/// Copy the lower half-precision (16-bit) floating-point element from `a` to `dst`.
16541///
16542/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtsh_h)
16543#[inline]
16544#[target_feature(enable = "avx512fp16")]
16545#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16546#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16547pub const fn _mm512_cvtsh_h(a: __m512h) -> f16 {
16548 unsafe { simd_extract!(a, 0) }
16549}
16550
16551/// Copy the lower 16-bit integer in a to dst.
16552///
16553/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si16)
16554#[inline]
16555#[target_feature(enable = "avx512fp16")]
16556#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
16557#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16558pub const fn _mm_cvtsi128_si16(a: __m128i) -> i16 {
16559 unsafe { simd_extract!(a.as_i16x8(), 0) }
16560}
16561
16562/// Copy 16-bit integer a to the lower elements of dst, and zero the upper elements of dst.
16563///
16564/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi16_si128)
16565#[inline]
16566#[target_feature(enable = "avx512fp16")]
16567#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
16568#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16569pub const fn _mm_cvtsi16_si128(a: i16) -> __m128i {
16570 unsafe { transmute(src:simd_insert!(i16x8::ZERO, 0, a)) }
16571}
16572
16573#[allow(improper_ctypes)]
16574unsafe extern "C" {
16575 #[link_name = "llvm.x86.avx512fp16.mask.cmp.sh"]
16576 unsafefn vcmpsh(a: __m128h, b: __m128h, imm8: i32, mask: __mmask8, sae: i32) -> __mmask8;
16577 #[link_name = "llvm.x86.avx512fp16.vcomi.sh"]
16578 unsafefn vcomish(a: __m128h, b: __m128h, imm8: i32, sae: i32) -> i32;
16579
16580 #[link_name = "llvm.x86.avx512fp16.add.ph.512"]
16581 unsafefn vaddph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
16582 #[link_name = "llvm.x86.avx512fp16.sub.ph.512"]
16583 unsafefn vsubph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
16584 #[link_name = "llvm.x86.avx512fp16.mul.ph.512"]
16585 unsafefn vmulph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
16586 #[link_name = "llvm.x86.avx512fp16.div.ph.512"]
16587 unsafefn vdivph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
16588
16589 #[link_name = "llvm.x86.avx512fp16.mask.add.sh.round"]
16590 unsafefn vaddsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16591 #[link_name = "llvm.x86.avx512fp16.mask.sub.sh.round"]
16592 unsafefn vsubsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16593 #[link_name = "llvm.x86.avx512fp16.mask.mul.sh.round"]
16594 unsafefn vmulsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16595 #[link_name = "llvm.x86.avx512fp16.mask.div.sh.round"]
16596 unsafefn vdivsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16597
16598 #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.128"]
16599 unsafefn vfmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128;
16600 #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.256"]
16601 unsafefn vfmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256;
16602 #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.512"]
16603 unsafefn vfmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512;
16604 #[link_name = "llvm.x86.avx512fp16.mask.vfmul.csh"]
16605 unsafefn vfmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128;
16606
16607 #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.128"]
16608 unsafefn vfcmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128;
16609 #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.256"]
16610 unsafefn vfcmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256;
16611 #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.512"]
16612 unsafefn vfcmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512;
16613 #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.csh"]
16614 unsafefn vfcmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128;
16615
16616 #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.128"]
16617 unsafefn vfmaddcph_mask3_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
16618 #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.128"]
16619 unsafefn vfmaddcph_maskz_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
16620 #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.256"]
16621 unsafefn vfmaddcph_mask3_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
16622 #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.256"]
16623 unsafefn vfmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
16624 #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.512"]
16625 unsafefn vfmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) -> __m512;
16626 #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.512"]
16627 unsafefn vfmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) -> __m512;
16628 #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.csh"]
16629 unsafefn vfmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
16630 #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.csh"]
16631 unsafefn vfmaddcsh_maskz(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
16632
16633 #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.128"]
16634 unsafefn vfcmaddcph_mask3_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
16635 #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.128"]
16636 unsafefn vfcmaddcph_maskz_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
16637 #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.256"]
16638 unsafefn vfcmaddcph_mask3_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
16639 #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.256"]
16640 unsafefn vfcmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
16641 #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.512"]
16642 unsafefn vfcmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32)
16643 -> __m512;
16644 #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.512"]
16645 unsafefn vfcmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32)
16646 -> __m512;
16647 #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.csh"]
16648 unsafefn vfcmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
16649 #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.csh"]
16650 unsafefn vfcmaddcsh_maskz(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
16651
16652 #[link_name = "llvm.x86.avx512fp16.vfmadd.ph.512"]
16653 unsafefn vfmaddph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h;
16654 #[link_name = "llvm.x86.avx512fp16.vfmadd.f16"]
16655 unsafefn vfmaddsh(a: f16, b: f16, c: f16, rounding: i32) -> f16;
16656
16657 #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.512"]
16658 unsafefn vfmaddsubph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h;
16659
16660 #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.128"]
16661 unsafefn vrcpph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
16662 #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.256"]
16663 unsafefn vrcpph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
16664 #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.512"]
16665 unsafefn vrcpph_512(a: __m512h, src: __m512h, k: __mmask32) -> __m512h;
16666 #[link_name = "llvm.x86.avx512fp16.mask.rcp.sh"]
16667 unsafefn vrcpsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h;
16668
16669 #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.128"]
16670 unsafefn vrsqrtph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
16671 #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.256"]
16672 unsafefn vrsqrtph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
16673 #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.512"]
16674 unsafefn vrsqrtph_512(a: __m512h, src: __m512h, k: __mmask32) -> __m512h;
16675 #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.sh"]
16676 unsafefn vrsqrtsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h;
16677
16678 #[link_name = "llvm.x86.avx512fp16.sqrt.ph.512"]
16679 unsafefn vsqrtph_512(a: __m512h, rounding: i32) -> __m512h;
16680 #[link_name = "llvm.x86.avx512fp16.mask.sqrt.sh"]
16681 unsafefn vsqrtsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16682
16683 #[link_name = "llvm.x86.avx512fp16.max.ph.128"]
16684 unsafefn vmaxph_128(a: __m128h, b: __m128h) -> __m128h;
16685 #[link_name = "llvm.x86.avx512fp16.max.ph.256"]
16686 unsafefn vmaxph_256(a: __m256h, b: __m256h) -> __m256h;
16687 #[link_name = "llvm.x86.avx512fp16.max.ph.512"]
16688 unsafefn vmaxph_512(a: __m512h, b: __m512h, sae: i32) -> __m512h;
16689 #[link_name = "llvm.x86.avx512fp16.mask.max.sh.round"]
16690 unsafefn vmaxsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
16691
16692 #[link_name = "llvm.x86.avx512fp16.min.ph.128"]
16693 unsafefn vminph_128(a: __m128h, b: __m128h) -> __m128h;
16694 #[link_name = "llvm.x86.avx512fp16.min.ph.256"]
16695 unsafefn vminph_256(a: __m256h, b: __m256h) -> __m256h;
16696 #[link_name = "llvm.x86.avx512fp16.min.ph.512"]
16697 unsafefn vminph_512(a: __m512h, b: __m512h, sae: i32) -> __m512h;
16698 #[link_name = "llvm.x86.avx512fp16.mask.min.sh.round"]
16699 unsafefn vminsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
16700
16701 #[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.128"]
16702 unsafefn vgetexpph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
16703 #[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.256"]
16704 unsafefn vgetexpph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
16705 #[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.512"]
16706 unsafefn vgetexpph_512(a: __m512h, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
16707 #[link_name = "llvm.x86.avx512fp16.mask.getexp.sh"]
16708 unsafefn vgetexpsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
16709
16710 #[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.128"]
16711 unsafefn vgetmantph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h;
16712 #[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.256"]
16713 unsafefn vgetmantph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h;
16714 #[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.512"]
16715 unsafefn vgetmantph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
16716 #[link_name = "llvm.x86.avx512fp16.mask.getmant.sh"]
16717 unsafefn vgetmantsh(
16718 a: __m128h,
16719 b: __m128h,
16720 imm8: i32,
16721 src: __m128h,
16722 k: __mmask8,
16723 sae: i32,
16724 ) -> __m128h;
16725
16726 #[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.128"]
16727 unsafefn vrndscaleph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h;
16728 #[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.256"]
16729 unsafefn vrndscaleph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h;
16730 #[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.512"]
16731 unsafefn vrndscaleph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
16732 #[link_name = "llvm.x86.avx512fp16.mask.rndscale.sh"]
16733 unsafefn vrndscalesh(
16734 a: __m128h,
16735 b: __m128h,
16736 src: __m128h,
16737 k: __mmask8,
16738 imm8: i32,
16739 sae: i32,
16740 ) -> __m128h;
16741
16742 #[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.128"]
16743 unsafefn vscalefph_128(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h;
16744 #[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.256"]
16745 unsafefn vscalefph_256(a: __m256h, b: __m256h, src: __m256h, k: __mmask16) -> __m256h;
16746 #[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.512"]
16747 unsafefn vscalefph_512(a: __m512h, b: __m512h, src: __m512h, k: __mmask32, rounding: i32) -> __m512h;
16748 #[link_name = "llvm.x86.avx512fp16.mask.scalef.sh"]
16749 unsafefn vscalefsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16750
16751 #[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.128"]
16752 unsafefn vreduceph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h;
16753 #[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.256"]
16754 unsafefn vreduceph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h;
16755 #[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.512"]
16756 unsafefn vreduceph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
16757 #[link_name = "llvm.x86.avx512fp16.mask.reduce.sh"]
16758 unsafefn vreducesh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, imm8: i32, sae: i32)
16759 -> __m128h;
16760
16761 #[link_name = "llvm.x86.avx512fp16.mask.fpclass.sh"]
16762 unsafefn vfpclasssh(a: __m128h, imm8: i32, k: __mmask8) -> __mmask8;
16763
16764 #[link_name = "llvm.x86.avx512.sitofp.round.v8f16.v8i16"]
16765 unsafefn vcvtw2ph_128(a: i16x8, rounding: i32) -> __m128h;
16766 #[link_name = "llvm.x86.avx512.sitofp.round.v16f16.v16i16"]
16767 unsafefn vcvtw2ph_256(a: i16x16, rounding: i32) -> __m256h;
16768 #[link_name = "llvm.x86.avx512.sitofp.round.v32f16.v32i16"]
16769 unsafefn vcvtw2ph_512(a: i16x32, rounding: i32) -> __m512h;
16770 #[link_name = "llvm.x86.avx512.uitofp.round.v8f16.v8i16"]
16771 unsafefn vcvtuw2ph_128(a: u16x8, rounding: i32) -> __m128h;
16772 #[link_name = "llvm.x86.avx512.uitofp.round.v16f16.v16i16"]
16773 unsafefn vcvtuw2ph_256(a: u16x16, rounding: i32) -> __m256h;
16774 #[link_name = "llvm.x86.avx512.uitofp.round.v32f16.v32i16"]
16775 unsafefn vcvtuw2ph_512(a: u16x32, rounding: i32) -> __m512h;
16776
16777 #[link_name = "llvm.x86.avx512fp16.mask.vcvtdq2ph.128"]
16778 unsafefn vcvtdq2ph_128(a: i32x4, src: __m128h, k: __mmask8) -> __m128h;
16779 #[link_name = "llvm.x86.avx512.sitofp.round.v8f16.v8i32"]
16780 unsafefn vcvtdq2ph_256(a: i32x8, rounding: i32) -> __m128h;
16781 #[link_name = "llvm.x86.avx512.sitofp.round.v16f16.v16i32"]
16782 unsafefn vcvtdq2ph_512(a: i32x16, rounding: i32) -> __m256h;
16783 #[link_name = "llvm.x86.avx512fp16.vcvtsi2sh"]
16784 unsafefn vcvtsi2sh(a: __m128h, b: i32, rounding: i32) -> __m128h;
16785 #[link_name = "llvm.x86.avx512fp16.mask.vcvtudq2ph.128"]
16786 unsafefn vcvtudq2ph_128(a: u32x4, src: __m128h, k: __mmask8) -> __m128h;
16787 #[link_name = "llvm.x86.avx512.uitofp.round.v8f16.v8i32"]
16788 unsafefn vcvtudq2ph_256(a: u32x8, rounding: i32) -> __m128h;
16789 #[link_name = "llvm.x86.avx512.uitofp.round.v16f16.v16i32"]
16790 unsafefn vcvtudq2ph_512(a: u32x16, rounding: i32) -> __m256h;
16791 #[link_name = "llvm.x86.avx512fp16.vcvtusi2sh"]
16792 unsafefn vcvtusi2sh(a: __m128h, b: u32, rounding: i32) -> __m128h;
16793
16794 #[link_name = "llvm.x86.avx512fp16.mask.vcvtqq2ph.128"]
16795 unsafefn vcvtqq2ph_128(a: i64x2, src: __m128h, k: __mmask8) -> __m128h;
16796 #[link_name = "llvm.x86.avx512fp16.mask.vcvtqq2ph.256"]
16797 unsafefn vcvtqq2ph_256(a: i64x4, src: __m128h, k: __mmask8) -> __m128h;
16798 #[link_name = "llvm.x86.avx512.sitofp.round.v8f16.v8i64"]
16799 unsafefn vcvtqq2ph_512(a: i64x8, rounding: i32) -> __m128h;
16800 #[link_name = "llvm.x86.avx512fp16.mask.vcvtuqq2ph.128"]
16801 unsafefn vcvtuqq2ph_128(a: u64x2, src: __m128h, k: __mmask8) -> __m128h;
16802 #[link_name = "llvm.x86.avx512fp16.mask.vcvtuqq2ph.256"]
16803 unsafefn vcvtuqq2ph_256(a: u64x4, src: __m128h, k: __mmask8) -> __m128h;
16804 #[link_name = "llvm.x86.avx512.uitofp.round.v8f16.v8i64"]
16805 unsafefn vcvtuqq2ph_512(a: u64x8, rounding: i32) -> __m128h;
16806
16807 #[link_name = "llvm.x86.avx512fp16.mask.vcvtps2phx.128"]
16808 unsafefn vcvtps2phx_128(a: __m128, src: __m128h, k: __mmask8) -> __m128h;
16809 #[link_name = "llvm.x86.avx512fp16.mask.vcvtps2phx.256"]
16810 unsafefn vcvtps2phx_256(a: __m256, src: __m128h, k: __mmask8) -> __m128h;
16811 #[link_name = "llvm.x86.avx512fp16.mask.vcvtps2phx.512"]
16812 unsafefn vcvtps2phx_512(a: __m512, src: __m256h, k: __mmask16, rounding: i32) -> __m256h;
16813 #[link_name = "llvm.x86.avx512fp16.mask.vcvtss2sh.round"]
16814 unsafefn vcvtss2sh(a: __m128h, b: __m128, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16815
16816 #[link_name = "llvm.x86.avx512fp16.mask.vcvtpd2ph.128"]
16817 unsafefn vcvtpd2ph_128(a: __m128d, src: __m128h, k: __mmask8) -> __m128h;
16818 #[link_name = "llvm.x86.avx512fp16.mask.vcvtpd2ph.256"]
16819 unsafefn vcvtpd2ph_256(a: __m256d, src: __m128h, k: __mmask8) -> __m128h;
16820 #[link_name = "llvm.x86.avx512fp16.mask.vcvtpd2ph.512"]
16821 unsafefn vcvtpd2ph_512(a: __m512d, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16822 #[link_name = "llvm.x86.avx512fp16.mask.vcvtsd2sh.round"]
16823 unsafefn vcvtsd2sh(a: __m128h, b: __m128d, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16824
16825 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2w.128"]
16826 unsafefn vcvtph2w_128(a: __m128h, src: i16x8, k: __mmask8) -> i16x8;
16827 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2w.256"]
16828 unsafefn vcvtph2w_256(a: __m256h, src: i16x16, k: __mmask16) -> i16x16;
16829 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2w.512"]
16830 unsafefn vcvtph2w_512(a: __m512h, src: i16x32, k: __mmask32, rounding: i32) -> i16x32;
16831 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uw.128"]
16832 unsafefn vcvtph2uw_128(a: __m128h, src: u16x8, k: __mmask8) -> u16x8;
16833 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uw.256"]
16834 unsafefn vcvtph2uw_256(a: __m256h, src: u16x16, k: __mmask16) -> u16x16;
16835 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uw.512"]
16836 unsafefn vcvtph2uw_512(a: __m512h, src: u16x32, k: __mmask32, sae: i32) -> u16x32;
16837
16838 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2w.128"]
16839 unsafefn vcvttph2w_128(a: __m128h, src: i16x8, k: __mmask8) -> i16x8;
16840 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2w.256"]
16841 unsafefn vcvttph2w_256(a: __m256h, src: i16x16, k: __mmask16) -> i16x16;
16842 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2w.512"]
16843 unsafefn vcvttph2w_512(a: __m512h, src: i16x32, k: __mmask32, sae: i32) -> i16x32;
16844 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uw.128"]
16845 unsafefn vcvttph2uw_128(a: __m128h, src: u16x8, k: __mmask8) -> u16x8;
16846 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uw.256"]
16847 unsafefn vcvttph2uw_256(a: __m256h, src: u16x16, k: __mmask16) -> u16x16;
16848 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uw.512"]
16849 unsafefn vcvttph2uw_512(a: __m512h, src: u16x32, k: __mmask32, sae: i32) -> u16x32;
16850
16851 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2dq.128"]
16852 unsafefn vcvtph2dq_128(a: __m128h, src: i32x4, k: __mmask8) -> i32x4;
16853 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2dq.256"]
16854 unsafefn vcvtph2dq_256(a: __m128h, src: i32x8, k: __mmask8) -> i32x8;
16855 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2dq.512"]
16856 unsafefn vcvtph2dq_512(a: __m256h, src: i32x16, k: __mmask16, rounding: i32) -> i32x16;
16857 #[link_name = "llvm.x86.avx512fp16.vcvtsh2si32"]
16858 unsafefn vcvtsh2si32(a: __m128h, rounding: i32) -> i32;
16859 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2udq.128"]
16860 unsafefn vcvtph2udq_128(a: __m128h, src: u32x4, k: __mmask8) -> u32x4;
16861 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2udq.256"]
16862 unsafefn vcvtph2udq_256(a: __m128h, src: u32x8, k: __mmask8) -> u32x8;
16863 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2udq.512"]
16864 unsafefn vcvtph2udq_512(a: __m256h, src: u32x16, k: __mmask16, rounding: i32) -> u32x16;
16865 #[link_name = "llvm.x86.avx512fp16.vcvtsh2usi32"]
16866 unsafefn vcvtsh2usi32(a: __m128h, sae: i32) -> u32;
16867
16868 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2dq.128"]
16869 unsafefn vcvttph2dq_128(a: __m128h, src: i32x4, k: __mmask8) -> i32x4;
16870 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2dq.256"]
16871 unsafefn vcvttph2dq_256(a: __m128h, src: i32x8, k: __mmask8) -> i32x8;
16872 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2dq.512"]
16873 unsafefn vcvttph2dq_512(a: __m256h, src: i32x16, k: __mmask16, sae: i32) -> i32x16;
16874 #[link_name = "llvm.x86.avx512fp16.vcvttsh2si32"]
16875 unsafefn vcvttsh2si32(a: __m128h, sae: i32) -> i32;
16876 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2udq.128"]
16877 unsafefn vcvttph2udq_128(a: __m128h, src: u32x4, k: __mmask8) -> u32x4;
16878 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2udq.256"]
16879 unsafefn vcvttph2udq_256(a: __m128h, src: u32x8, k: __mmask8) -> u32x8;
16880 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2udq.512"]
16881 unsafefn vcvttph2udq_512(a: __m256h, src: u32x16, k: __mmask16, sae: i32) -> u32x16;
16882 #[link_name = "llvm.x86.avx512fp16.vcvttsh2usi32"]
16883 unsafefn vcvttsh2usi32(a: __m128h, sae: i32) -> u32;
16884
16885 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2qq.128"]
16886 unsafefn vcvtph2qq_128(a: __m128h, src: i64x2, k: __mmask8) -> i64x2;
16887 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2qq.256"]
16888 unsafefn vcvtph2qq_256(a: __m128h, src: i64x4, k: __mmask8) -> i64x4;
16889 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2qq.512"]
16890 unsafefn vcvtph2qq_512(a: __m128h, src: i64x8, k: __mmask8, rounding: i32) -> i64x8;
16891 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uqq.128"]
16892 unsafefn vcvtph2uqq_128(a: __m128h, src: u64x2, k: __mmask8) -> u64x2;
16893 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uqq.256"]
16894 unsafefn vcvtph2uqq_256(a: __m128h, src: u64x4, k: __mmask8) -> u64x4;
16895 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uqq.512"]
16896 unsafefn vcvtph2uqq_512(a: __m128h, src: u64x8, k: __mmask8, rounding: i32) -> u64x8;
16897
16898 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2qq.128"]
16899 unsafefn vcvttph2qq_128(a: __m128h, src: i64x2, k: __mmask8) -> i64x2;
16900 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2qq.256"]
16901 unsafefn vcvttph2qq_256(a: __m128h, src: i64x4, k: __mmask8) -> i64x4;
16902 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2qq.512"]
16903 unsafefn vcvttph2qq_512(a: __m128h, src: i64x8, k: __mmask8, sae: i32) -> i64x8;
16904 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uqq.128"]
16905 unsafefn vcvttph2uqq_128(a: __m128h, src: u64x2, k: __mmask8) -> u64x2;
16906 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uqq.256"]
16907 unsafefn vcvttph2uqq_256(a: __m128h, src: u64x4, k: __mmask8) -> u64x4;
16908 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uqq.512"]
16909 unsafefn vcvttph2uqq_512(a: __m128h, src: u64x8, k: __mmask8, sae: i32) -> u64x8;
16910
16911 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2psx.128"]
16912 unsafefn vcvtph2psx_128(a: __m128h, src: __m128, k: __mmask8) -> __m128;
16913 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2psx.256"]
16914 unsafefn vcvtph2psx_256(a: __m128h, src: __m256, k: __mmask8) -> __m256;
16915 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2psx.512"]
16916 unsafefn vcvtph2psx_512(a: __m256h, src: __m512, k: __mmask16, sae: i32) -> __m512;
16917 #[link_name = "llvm.x86.avx512fp16.mask.vcvtsh2ss.round"]
16918 unsafefn vcvtsh2ss(a: __m128, b: __m128h, src: __m128, k: __mmask8, sae: i32) -> __m128;
16919
16920 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2pd.128"]
16921 unsafefn vcvtph2pd_128(a: __m128h, src: __m128d, k: __mmask8) -> __m128d;
16922 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2pd.256"]
16923 unsafefn vcvtph2pd_256(a: __m128h, src: __m256d, k: __mmask8) -> __m256d;
16924 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2pd.512"]
16925 unsafefn vcvtph2pd_512(a: __m128h, src: __m512d, k: __mmask8, sae: i32) -> __m512d;
16926 #[link_name = "llvm.x86.avx512fp16.mask.vcvtsh2sd.round"]
16927 unsafefn vcvtsh2sd(a: __m128d, b: __m128h, src: __m128d, k: __mmask8, sae: i32) -> __m128d;
16928
16929}
16930
16931#[cfg(test)]
16932mod tests {
16933 use crate::core_arch::assert_eq_const as assert_eq;
16934 use crate::core_arch::x86::*;
16935 use crate::ptr::{addr_of, addr_of_mut};
16936 use stdarch_test::simd_test;
16937
16938 #[target_feature(enable = "avx512fp16")]
16939 #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
16940 const fn _mm_set1_pch(re: f16, im: f16) -> __m128h {
16941 _mm_setr_ph(re, im, re, im, re, im, re, im)
16942 }
16943
16944 #[target_feature(enable = "avx512fp16")]
16945 #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
16946 const fn _mm256_set1_pch(re: f16, im: f16) -> __m256h {
16947 _mm256_setr_ph(
16948 re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im,
16949 )
16950 }
16951
16952 #[target_feature(enable = "avx512fp16")]
16953 #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
16954 const fn _mm512_set1_pch(re: f16, im: f16) -> __m512h {
16955 _mm512_setr_ph(
16956 re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im,
16957 re, im, re, im, re, im, re, im, re, im,
16958 )
16959 }
16960
16961 #[simd_test(enable = "avx512fp16,avx512vl")]
16962 const fn test_mm_set_ph() {
16963 let r = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
16964 let e = _mm_setr_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
16965 assert_eq_m128h(r, e);
16966 }
16967
16968 #[simd_test(enable = "avx512fp16,avx512vl")]
16969 const fn test_mm256_set_ph() {
16970 let r = _mm256_set_ph(
16971 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
16972 );
16973 let e = _mm256_setr_ph(
16974 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
16975 );
16976 assert_eq_m256h(r, e);
16977 }
16978
16979 #[simd_test(enable = "avx512fp16")]
16980 const fn test_mm512_set_ph() {
16981 let r = _mm512_set_ph(
16982 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
16983 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
16984 31.0, 32.0,
16985 );
16986 let e = _mm512_setr_ph(
16987 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
16988 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
16989 3.0, 2.0, 1.0,
16990 );
16991 assert_eq_m512h(r, e);
16992 }
16993
16994 #[simd_test(enable = "avx512fp16,avx512vl")]
16995 const fn test_mm_set_sh() {
16996 let r = _mm_set_sh(1.0);
16997 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0);
16998 assert_eq_m128h(r, e);
16999 }
17000
17001 #[simd_test(enable = "avx512fp16,avx512vl")]
17002 const fn test_mm_set1_ph() {
17003 let r = _mm_set1_ph(1.0);
17004 let e = _mm_set_ph(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0);
17005 assert_eq_m128h(r, e);
17006 }
17007
17008 #[simd_test(enable = "avx512fp16,avx512vl")]
17009 const fn test_mm256_set1_ph() {
17010 let r = _mm256_set1_ph(1.0);
17011 let e = _mm256_set_ph(
17012 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
17013 );
17014 assert_eq_m256h(r, e);
17015 }
17016
17017 #[simd_test(enable = "avx512fp16")]
17018 const fn test_mm512_set1_ph() {
17019 let r = _mm512_set1_ph(1.0);
17020 let e = _mm512_set_ph(
17021 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
17022 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
17023 );
17024 assert_eq_m512h(r, e);
17025 }
17026
17027 #[simd_test(enable = "avx512fp16,avx512vl")]
17028 const fn test_mm_setr_ph() {
17029 let r = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17030 let e = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
17031 assert_eq_m128h(r, e);
17032 }
17033
17034 #[simd_test(enable = "avx512fp16,avx512vl")]
17035 const fn test_mm256_setr_ph() {
17036 let r = _mm256_setr_ph(
17037 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17038 );
17039 let e = _mm256_set_ph(
17040 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
17041 );
17042 assert_eq_m256h(r, e);
17043 }
17044
17045 #[simd_test(enable = "avx512fp16")]
17046 const fn test_mm512_setr_ph() {
17047 let r = _mm512_setr_ph(
17048 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17049 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17050 31.0, 32.0,
17051 );
17052 let e = _mm512_set_ph(
17053 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17054 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17055 3.0, 2.0, 1.0,
17056 );
17057 assert_eq_m512h(r, e);
17058 }
17059
17060 #[simd_test(enable = "avx512fp16,avx512vl")]
17061 const fn test_mm_setzero_ph() {
17062 let r = _mm_setzero_ph();
17063 let e = _mm_set1_ph(0.0);
17064 assert_eq_m128h(r, e);
17065 }
17066
17067 #[simd_test(enable = "avx512fp16,avx512vl")]
17068 const fn test_mm256_setzero_ph() {
17069 let r = _mm256_setzero_ph();
17070 let e = _mm256_set1_ph(0.0);
17071 assert_eq_m256h(r, e);
17072 }
17073
17074 #[simd_test(enable = "avx512fp16")]
17075 const fn test_mm512_setzero_ph() {
17076 let r = _mm512_setzero_ph();
17077 let e = _mm512_set1_ph(0.0);
17078 assert_eq_m512h(r, e);
17079 }
17080
17081 #[simd_test(enable = "avx512fp16,avx512vl")]
17082 const fn test_mm_castsi128_ph() {
17083 let a = _mm_set1_epi16(0x3c00);
17084 let r = _mm_castsi128_ph(a);
17085 let e = _mm_set1_ph(1.0);
17086 assert_eq_m128h(r, e);
17087 }
17088
17089 #[simd_test(enable = "avx512fp16,avx512vl")]
17090 const fn test_mm256_castsi256_ph() {
17091 let a = _mm256_set1_epi16(0x3c00);
17092 let r = _mm256_castsi256_ph(a);
17093 let e = _mm256_set1_ph(1.0);
17094 assert_eq_m256h(r, e);
17095 }
17096
17097 #[simd_test(enable = "avx512fp16")]
17098 const fn test_mm512_castsi512_ph() {
17099 let a = _mm512_set1_epi16(0x3c00);
17100 let r = _mm512_castsi512_ph(a);
17101 let e = _mm512_set1_ph(1.0);
17102 assert_eq_m512h(r, e);
17103 }
17104
17105 #[simd_test(enable = "avx512fp16")]
17106 const fn test_mm_castph_si128() {
17107 let a = _mm_set1_ph(1.0);
17108 let r = _mm_castph_si128(a);
17109 let e = _mm_set1_epi16(0x3c00);
17110 assert_eq_m128i(r, e);
17111 }
17112
17113 #[simd_test(enable = "avx512fp16")]
17114 const fn test_mm256_castph_si256() {
17115 let a = _mm256_set1_ph(1.0);
17116 let r = _mm256_castph_si256(a);
17117 let e = _mm256_set1_epi16(0x3c00);
17118 assert_eq_m256i(r, e);
17119 }
17120
17121 #[simd_test(enable = "avx512fp16")]
17122 const fn test_mm512_castph_si512() {
17123 let a = _mm512_set1_ph(1.0);
17124 let r = _mm512_castph_si512(a);
17125 let e = _mm512_set1_epi16(0x3c00);
17126 assert_eq_m512i(r, e);
17127 }
17128
17129 #[simd_test(enable = "avx512fp16,avx512vl")]
17130 const fn test_mm_castps_ph() {
17131 let a = _mm_castsi128_ps(_mm_set1_epi16(0x3c00));
17132 let r = _mm_castps_ph(a);
17133 let e = _mm_set1_ph(1.0);
17134 assert_eq_m128h(r, e);
17135 }
17136
17137 #[simd_test(enable = "avx512fp16,avx512vl")]
17138 const fn test_mm256_castps_ph() {
17139 let a = _mm256_castsi256_ps(_mm256_set1_epi16(0x3c00));
17140 let r = _mm256_castps_ph(a);
17141 let e = _mm256_set1_ph(1.0);
17142 assert_eq_m256h(r, e);
17143 }
17144
17145 #[simd_test(enable = "avx512fp16")]
17146 const fn test_mm512_castps_ph() {
17147 let a = _mm512_castsi512_ps(_mm512_set1_epi16(0x3c00));
17148 let r = _mm512_castps_ph(a);
17149 let e = _mm512_set1_ph(1.0);
17150 assert_eq_m512h(r, e);
17151 }
17152
17153 #[simd_test(enable = "avx512fp16")]
17154 const fn test_mm_castph_ps() {
17155 let a = _mm_castsi128_ph(_mm_set1_epi32(0x3f800000));
17156 let r = _mm_castph_ps(a);
17157 let e = _mm_set1_ps(1.0);
17158 assert_eq_m128(r, e);
17159 }
17160
17161 #[simd_test(enable = "avx512fp16")]
17162 const fn test_mm256_castph_ps() {
17163 let a = _mm256_castsi256_ph(_mm256_set1_epi32(0x3f800000));
17164 let r = _mm256_castph_ps(a);
17165 let e = _mm256_set1_ps(1.0);
17166 assert_eq_m256(r, e);
17167 }
17168
17169 #[simd_test(enable = "avx512fp16")]
17170 const fn test_mm512_castph_ps() {
17171 let a = _mm512_castsi512_ph(_mm512_set1_epi32(0x3f800000));
17172 let r = _mm512_castph_ps(a);
17173 let e = _mm512_set1_ps(1.0);
17174 assert_eq_m512(r, e);
17175 }
17176
17177 #[simd_test(enable = "avx512fp16,avx512vl")]
17178 const fn test_mm_castpd_ph() {
17179 let a = _mm_castsi128_pd(_mm_set1_epi16(0x3c00));
17180 let r = _mm_castpd_ph(a);
17181 let e = _mm_set1_ph(1.0);
17182 assert_eq_m128h(r, e);
17183 }
17184
17185 #[simd_test(enable = "avx512fp16,avx512vl")]
17186 const fn test_mm256_castpd_ph() {
17187 let a = _mm256_castsi256_pd(_mm256_set1_epi16(0x3c00));
17188 let r = _mm256_castpd_ph(a);
17189 let e = _mm256_set1_ph(1.0);
17190 assert_eq_m256h(r, e);
17191 }
17192
17193 #[simd_test(enable = "avx512fp16")]
17194 const fn test_mm512_castpd_ph() {
17195 let a = _mm512_castsi512_pd(_mm512_set1_epi16(0x3c00));
17196 let r = _mm512_castpd_ph(a);
17197 let e = _mm512_set1_ph(1.0);
17198 assert_eq_m512h(r, e);
17199 }
17200
17201 #[simd_test(enable = "avx512fp16")]
17202 const fn test_mm_castph_pd() {
17203 let a = _mm_castsi128_ph(_mm_set1_epi64x(0x3ff0000000000000));
17204 let r = _mm_castph_pd(a);
17205 let e = _mm_set1_pd(1.0);
17206 assert_eq_m128d(r, e);
17207 }
17208
17209 #[simd_test(enable = "avx512fp16")]
17210 const fn test_mm256_castph_pd() {
17211 let a = _mm256_castsi256_ph(_mm256_set1_epi64x(0x3ff0000000000000));
17212 let r = _mm256_castph_pd(a);
17213 let e = _mm256_set1_pd(1.0);
17214 assert_eq_m256d(r, e);
17215 }
17216
17217 #[simd_test(enable = "avx512fp16")]
17218 const fn test_mm512_castph_pd() {
17219 let a = _mm512_castsi512_ph(_mm512_set1_epi64(0x3ff0000000000000));
17220 let r = _mm512_castph_pd(a);
17221 let e = _mm512_set1_pd(1.0);
17222 assert_eq_m512d(r, e);
17223 }
17224
17225 #[simd_test(enable = "avx512fp16,avx512vl")]
17226 const fn test_mm256_castph256_ph128() {
17227 let a = _mm256_setr_ph(
17228 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
17229 );
17230 let r = _mm256_castph256_ph128(a);
17231 let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
17232 assert_eq_m128h(r, e);
17233 }
17234
17235 #[simd_test(enable = "avx512fp16,avx512vl")]
17236 const fn test_mm512_castph512_ph128() {
17237 let a = _mm512_setr_ph(
17238 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,
17239 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
17240 );
17241 let r = _mm512_castph512_ph128(a);
17242 let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
17243 assert_eq_m128h(r, e);
17244 }
17245
17246 #[simd_test(enable = "avx512fp16,avx512vl")]
17247 const fn test_mm512_castph512_ph256() {
17248 let a = _mm512_setr_ph(
17249 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,
17250 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
17251 );
17252 let r = _mm512_castph512_ph256(a);
17253 let e = _mm256_setr_ph(
17254 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
17255 );
17256 assert_eq_m256h(r, e);
17257 }
17258
17259 #[simd_test(enable = "avx512fp16,avx512vl")]
17260 const fn test_mm256_castph128_ph256() {
17261 let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
17262 let r = _mm256_castph128_ph256(a);
17263 assert_eq_m128h(_mm256_castph256_ph128(r), a);
17264 }
17265
17266 #[simd_test(enable = "avx512fp16,avx512vl")]
17267 const fn test_mm512_castph128_ph512() {
17268 let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
17269 let r = _mm512_castph128_ph512(a);
17270 assert_eq_m128h(_mm512_castph512_ph128(r), a);
17271 }
17272
17273 #[simd_test(enable = "avx512fp16,avx512vl")]
17274 const fn test_mm512_castph256_ph512() {
17275 let a = _mm256_setr_ph(
17276 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
17277 );
17278 let r = _mm512_castph256_ph512(a);
17279 assert_eq_m256h(_mm512_castph512_ph256(r), a);
17280 }
17281
17282 #[simd_test(enable = "avx512fp16,avx512vl")]
17283 const fn test_mm256_zextph128_ph256() {
17284 let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
17285 let r = _mm256_zextph128_ph256(a);
17286 let e = _mm256_setr_ph(
17287 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
17288 );
17289 assert_eq_m256h(r, e);
17290 }
17291
17292 #[simd_test(enable = "avx512fp16")]
17293 const fn test_mm512_zextph128_ph512() {
17294 let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
17295 let r = _mm512_zextph128_ph512(a);
17296 let e = _mm512_setr_ph(
17297 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
17298 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
17299 );
17300 assert_eq_m512h(r, e);
17301 }
17302
17303 #[simd_test(enable = "avx512fp16")]
17304 const fn test_mm512_zextph256_ph512() {
17305 let a = _mm256_setr_ph(
17306 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
17307 );
17308 let r = _mm512_zextph256_ph512(a);
17309 let e = _mm512_setr_ph(
17310 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 0., 0., 0., 0.,
17311 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
17312 );
17313 assert_eq_m512h(r, e);
17314 }
17315
17316 #[simd_test(enable = "avx512fp16,avx512vl")]
17317 fn test_mm_cmp_ph_mask() {
17318 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17319 let b = _mm_set_ph(1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0);
17320 let r = _mm_cmp_ph_mask::<_CMP_EQ_OQ>(a, b);
17321 assert_eq!(r, 0b11110000);
17322 }
17323
17324 #[simd_test(enable = "avx512fp16,avx512vl")]
17325 fn test_mm_mask_cmp_ph_mask() {
17326 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17327 let b = _mm_set_ph(1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0);
17328 let r = _mm_mask_cmp_ph_mask::<_CMP_EQ_OQ>(0b01010101, a, b);
17329 assert_eq!(r, 0b01010000);
17330 }
17331
17332 #[simd_test(enable = "avx512fp16,avx512vl")]
17333 fn test_mm256_cmp_ph_mask() {
17334 let a = _mm256_set_ph(
17335 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17336 );
17337 let b = _mm256_set_ph(
17338 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0,
17339 -16.0,
17340 );
17341 let r = _mm256_cmp_ph_mask::<_CMP_EQ_OQ>(a, b);
17342 assert_eq!(r, 0b1111000011110000);
17343 }
17344
17345 #[simd_test(enable = "avx512fp16,avx512vl")]
17346 fn test_mm256_mask_cmp_ph_mask() {
17347 let a = _mm256_set_ph(
17348 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17349 );
17350 let b = _mm256_set_ph(
17351 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0,
17352 -16.0,
17353 );
17354 let r = _mm256_mask_cmp_ph_mask::<_CMP_EQ_OQ>(0b0101010101010101, a, b);
17355 assert_eq!(r, 0b0101000001010000);
17356 }
17357
17358 #[simd_test(enable = "avx512fp16")]
17359 fn test_mm512_cmp_ph_mask() {
17360 let a = _mm512_set_ph(
17361 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17362 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17363 31.0, 32.0,
17364 );
17365 let b = _mm512_set_ph(
17366 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0,
17367 -16.0, 17.0, 18.0, 19.0, 20.0, -21.0, -22.0, -23.0, -24.0, 25.0, 26.0, 27.0, 28.0,
17368 -29.0, -30.0, -31.0, -32.0,
17369 );
17370 let r = _mm512_cmp_ph_mask::<_CMP_EQ_OQ>(a, b);
17371 assert_eq!(r, 0b11110000111100001111000011110000);
17372 }
17373
17374 #[simd_test(enable = "avx512fp16")]
17375 fn test_mm512_mask_cmp_ph_mask() {
17376 let a = _mm512_set_ph(
17377 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17378 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17379 31.0, 32.0,
17380 );
17381 let b = _mm512_set_ph(
17382 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0,
17383 -16.0, 17.0, 18.0, 19.0, 20.0, -21.0, -22.0, -23.0, -24.0, 25.0, 26.0, 27.0, 28.0,
17384 -29.0, -30.0, -31.0, -32.0,
17385 );
17386 let r = _mm512_mask_cmp_ph_mask::<_CMP_EQ_OQ>(0b01010101010101010101010101010101, a, b);
17387 assert_eq!(r, 0b01010000010100000101000001010000);
17388 }
17389
17390 #[simd_test(enable = "avx512fp16")]
17391 fn test_mm512_cmp_round_ph_mask() {
17392 let a = _mm512_set_ph(
17393 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17394 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17395 31.0, 32.0,
17396 );
17397 let b = _mm512_set_ph(
17398 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0,
17399 -16.0, 17.0, 18.0, 19.0, 20.0, -21.0, -22.0, -23.0, -24.0, 25.0, 26.0, 27.0, 28.0,
17400 -29.0, -30.0, -31.0, -32.0,
17401 );
17402 let r = _mm512_cmp_round_ph_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
17403 assert_eq!(r, 0b11110000111100001111000011110000);
17404 }
17405
17406 #[simd_test(enable = "avx512fp16")]
17407 fn test_mm512_mask_cmp_round_ph_mask() {
17408 let a = _mm512_set_ph(
17409 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17410 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17411 31.0, 32.0,
17412 );
17413 let b = _mm512_set_ph(
17414 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0,
17415 -16.0, 17.0, 18.0, 19.0, 20.0, -21.0, -22.0, -23.0, -24.0, 25.0, 26.0, 27.0, 28.0,
17416 -29.0, -30.0, -31.0, -32.0,
17417 );
17418 let r = _mm512_mask_cmp_round_ph_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(
17419 0b01010101010101010101010101010101,
17420 a,
17421 b,
17422 );
17423 assert_eq!(r, 0b01010000010100000101000001010000);
17424 }
17425
17426 #[simd_test(enable = "avx512fp16")]
17427 fn test_mm_cmp_round_sh_mask() {
17428 let a = _mm_set_sh(1.0);
17429 let b = _mm_set_sh(1.0);
17430 let r = _mm_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
17431 assert_eq!(r, 1);
17432 }
17433
17434 #[simd_test(enable = "avx512fp16")]
17435 fn test_mm_mask_cmp_round_sh_mask() {
17436 let a = _mm_set_sh(1.0);
17437 let b = _mm_set_sh(1.0);
17438 let r = _mm_mask_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(0, a, b);
17439 assert_eq!(r, 0);
17440 }
17441
17442 #[simd_test(enable = "avx512fp16")]
17443 fn test_mm_cmp_sh_mask() {
17444 let a = _mm_set_sh(1.0);
17445 let b = _mm_set_sh(1.0);
17446 let r = _mm_cmp_sh_mask::<_CMP_EQ_OQ>(a, b);
17447 assert_eq!(r, 1);
17448 }
17449
17450 #[simd_test(enable = "avx512fp16")]
17451 fn test_mm_mask_cmp_sh_mask() {
17452 let a = _mm_set_sh(1.0);
17453 let b = _mm_set_sh(1.0);
17454 let r = _mm_mask_cmp_sh_mask::<_CMP_EQ_OQ>(0, a, b);
17455 assert_eq!(r, 0);
17456 }
17457
17458 #[simd_test(enable = "avx512fp16")]
17459 fn test_mm_comi_round_sh() {
17460 let a = _mm_set_sh(1.0);
17461 let b = _mm_set_sh(1.0);
17462 let r = _mm_comi_round_sh::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
17463 assert_eq!(r, 1);
17464 }
17465
17466 #[simd_test(enable = "avx512fp16")]
17467 fn test_mm_comi_sh() {
17468 let a = _mm_set_sh(1.0);
17469 let b = _mm_set_sh(1.0);
17470 let r = _mm_comi_sh::<_CMP_EQ_OQ>(a, b);
17471 assert_eq!(r, 1);
17472 }
17473
17474 #[simd_test(enable = "avx512fp16")]
17475 fn test_mm_comieq_sh() {
17476 let a = _mm_set_sh(1.0);
17477 let b = _mm_set_sh(1.0);
17478 let r = _mm_comieq_sh(a, b);
17479 assert_eq!(r, 1);
17480 }
17481
17482 #[simd_test(enable = "avx512fp16")]
17483 fn test_mm_comige_sh() {
17484 let a = _mm_set_sh(2.0);
17485 let b = _mm_set_sh(1.0);
17486 let r = _mm_comige_sh(a, b);
17487 assert_eq!(r, 1);
17488 }
17489
17490 #[simd_test(enable = "avx512fp16")]
17491 fn test_mm_comigt_sh() {
17492 let a = _mm_set_sh(2.0);
17493 let b = _mm_set_sh(1.0);
17494 let r = _mm_comigt_sh(a, b);
17495 assert_eq!(r, 1);
17496 }
17497
17498 #[simd_test(enable = "avx512fp16")]
17499 fn test_mm_comile_sh() {
17500 let a = _mm_set_sh(1.0);
17501 let b = _mm_set_sh(2.0);
17502 let r = _mm_comile_sh(a, b);
17503 assert_eq!(r, 1);
17504 }
17505
17506 #[simd_test(enable = "avx512fp16")]
17507 fn test_mm_comilt_sh() {
17508 let a = _mm_set_sh(1.0);
17509 let b = _mm_set_sh(2.0);
17510 let r = _mm_comilt_sh(a, b);
17511 assert_eq!(r, 1);
17512 }
17513
17514 #[simd_test(enable = "avx512fp16")]
17515 fn test_mm_comineq_sh() {
17516 let a = _mm_set_sh(1.0);
17517 let b = _mm_set_sh(2.0);
17518 let r = _mm_comineq_sh(a, b);
17519 assert_eq!(r, 1);
17520 }
17521
17522 #[simd_test(enable = "avx512fp16")]
17523 fn test_mm_ucomieq_sh() {
17524 let a = _mm_set_sh(1.0);
17525 let b = _mm_set_sh(1.0);
17526 let r = _mm_ucomieq_sh(a, b);
17527 assert_eq!(r, 1);
17528 }
17529
17530 #[simd_test(enable = "avx512fp16")]
17531 fn test_mm_ucomige_sh() {
17532 let a = _mm_set_sh(2.0);
17533 let b = _mm_set_sh(1.0);
17534 let r = _mm_ucomige_sh(a, b);
17535 assert_eq!(r, 1);
17536 }
17537
17538 #[simd_test(enable = "avx512fp16")]
17539 fn test_mm_ucomigt_sh() {
17540 let a = _mm_set_sh(2.0);
17541 let b = _mm_set_sh(1.0);
17542 let r = _mm_ucomigt_sh(a, b);
17543 assert_eq!(r, 1);
17544 }
17545
17546 #[simd_test(enable = "avx512fp16")]
17547 fn test_mm_ucomile_sh() {
17548 let a = _mm_set_sh(1.0);
17549 let b = _mm_set_sh(2.0);
17550 let r = _mm_ucomile_sh(a, b);
17551 assert_eq!(r, 1);
17552 }
17553
17554 #[simd_test(enable = "avx512fp16")]
17555 fn test_mm_ucomilt_sh() {
17556 let a = _mm_set_sh(1.0);
17557 let b = _mm_set_sh(2.0);
17558 let r = _mm_ucomilt_sh(a, b);
17559 assert_eq!(r, 1);
17560 }
17561
17562 #[simd_test(enable = "avx512fp16")]
17563 fn test_mm_ucomineq_sh() {
17564 let a = _mm_set_sh(1.0);
17565 let b = _mm_set_sh(2.0);
17566 let r = _mm_ucomineq_sh(a, b);
17567 assert_eq!(r, 1);
17568 }
17569
17570 #[simd_test(enable = "avx512fp16,avx512vl")]
17571 const fn test_mm_load_ph() {
17572 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17573 let b = unsafe { _mm_load_ph(addr_of!(a).cast()) };
17574 assert_eq_m128h(a, b);
17575 }
17576
17577 #[simd_test(enable = "avx512fp16,avx512vl")]
17578 const fn test_mm256_load_ph() {
17579 let a = _mm256_set_ph(
17580 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17581 );
17582 let b = unsafe { _mm256_load_ph(addr_of!(a).cast()) };
17583 assert_eq_m256h(a, b);
17584 }
17585
17586 #[simd_test(enable = "avx512fp16")]
17587 const fn test_mm512_load_ph() {
17588 let a = _mm512_set_ph(
17589 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17590 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17591 31.0, 32.0,
17592 );
17593 let b = unsafe { _mm512_load_ph(addr_of!(a).cast()) };
17594 assert_eq_m512h(a, b);
17595 }
17596
17597 #[simd_test(enable = "avx512fp16,avx512vl")]
17598 const fn test_mm_load_sh() {
17599 let a = _mm_set_sh(1.0);
17600 let b = unsafe { _mm_load_sh(addr_of!(a).cast()) };
17601 assert_eq_m128h(a, b);
17602 }
17603
17604 #[simd_test(enable = "avx512fp16,avx512vl")]
17605 fn test_mm_mask_load_sh() {
17606 let a = _mm_set_sh(1.0);
17607 let src = _mm_set_sh(2.);
17608 let b = unsafe { _mm_mask_load_sh(src, 1, addr_of!(a).cast()) };
17609 assert_eq_m128h(a, b);
17610 let b = unsafe { _mm_mask_load_sh(src, 0, addr_of!(a).cast()) };
17611 assert_eq_m128h(src, b);
17612 }
17613
17614 #[simd_test(enable = "avx512fp16,avx512vl")]
17615 fn test_mm_maskz_load_sh() {
17616 let a = _mm_set_sh(1.0);
17617 let b = unsafe { _mm_maskz_load_sh(1, addr_of!(a).cast()) };
17618 assert_eq_m128h(a, b);
17619 let b = unsafe { _mm_maskz_load_sh(0, addr_of!(a).cast()) };
17620 assert_eq_m128h(_mm_setzero_ph(), b);
17621 }
17622
17623 #[simd_test(enable = "avx512fp16,avx512vl")]
17624 const fn test_mm_loadu_ph() {
17625 let array = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
17626 let r = unsafe { _mm_loadu_ph(array.as_ptr()) };
17627 let e = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17628 assert_eq_m128h(r, e);
17629 }
17630
17631 #[simd_test(enable = "avx512fp16,avx512vl")]
17632 const fn test_mm256_loadu_ph() {
17633 let array = [
17634 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17635 ];
17636 let r = unsafe { _mm256_loadu_ph(array.as_ptr()) };
17637 let e = _mm256_setr_ph(
17638 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17639 );
17640 assert_eq_m256h(r, e);
17641 }
17642
17643 #[simd_test(enable = "avx512fp16")]
17644 const fn test_mm512_loadu_ph() {
17645 let array = [
17646 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17647 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17648 31.0, 32.0,
17649 ];
17650 let r = unsafe { _mm512_loadu_ph(array.as_ptr()) };
17651 let e = _mm512_setr_ph(
17652 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17653 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17654 31.0, 32.0,
17655 );
17656 assert_eq_m512h(r, e);
17657 }
17658
17659 #[simd_test(enable = "avx512fp16,avx512vl")]
17660 const fn test_mm_move_sh() {
17661 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17662 let b = _mm_set_sh(9.0);
17663 let r = _mm_move_sh(a, b);
17664 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0);
17665 assert_eq_m128h(r, e);
17666 }
17667
17668 #[simd_test(enable = "avx512fp16,avx512vl")]
17669 const fn test_mm_mask_move_sh() {
17670 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17671 let b = _mm_set_sh(9.0);
17672 let src = _mm_set_sh(10.0);
17673 let r = _mm_mask_move_sh(src, 0, a, b);
17674 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 10.0);
17675 assert_eq_m128h(r, e);
17676 }
17677
17678 #[simd_test(enable = "avx512fp16,avx512vl")]
17679 const fn test_mm_maskz_move_sh() {
17680 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17681 let b = _mm_set_sh(9.0);
17682 let r = _mm_maskz_move_sh(0, a, b);
17683 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 0.0);
17684 assert_eq_m128h(r, e);
17685 }
17686
17687 #[simd_test(enable = "avx512fp16,avx512vl")]
17688 const fn test_mm_store_ph() {
17689 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17690 let mut b = _mm_setzero_ph();
17691 unsafe {
17692 _mm_store_ph(addr_of_mut!(b).cast(), a);
17693 }
17694 assert_eq_m128h(a, b);
17695 }
17696
17697 #[simd_test(enable = "avx512fp16,avx512vl")]
17698 const fn test_mm256_store_ph() {
17699 let a = _mm256_set_ph(
17700 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17701 );
17702 let mut b = _mm256_setzero_ph();
17703 unsafe {
17704 _mm256_store_ph(addr_of_mut!(b).cast(), a);
17705 }
17706 assert_eq_m256h(a, b);
17707 }
17708
17709 #[simd_test(enable = "avx512fp16")]
17710 const fn test_mm512_store_ph() {
17711 let a = _mm512_set_ph(
17712 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17713 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17714 31.0, 32.0,
17715 );
17716 let mut b = _mm512_setzero_ph();
17717 unsafe {
17718 _mm512_store_ph(addr_of_mut!(b).cast(), a);
17719 }
17720 assert_eq_m512h(a, b);
17721 }
17722
17723 #[simd_test(enable = "avx512fp16,avx512vl")]
17724 const fn test_mm_store_sh() {
17725 let a = _mm_set_sh(1.0);
17726 let mut b = _mm_setzero_ph();
17727 unsafe {
17728 _mm_store_sh(addr_of_mut!(b).cast(), a);
17729 }
17730 assert_eq_m128h(a, b);
17731 }
17732
17733 #[simd_test(enable = "avx512fp16,avx512vl")]
17734 fn test_mm_mask_store_sh() {
17735 let a = _mm_set_sh(1.0);
17736 let mut b = _mm_setzero_ph();
17737 unsafe {
17738 _mm_mask_store_sh(addr_of_mut!(b).cast(), 0, a);
17739 }
17740 assert_eq_m128h(_mm_setzero_ph(), b);
17741 unsafe {
17742 _mm_mask_store_sh(addr_of_mut!(b).cast(), 1, a);
17743 }
17744 assert_eq_m128h(a, b);
17745 }
17746
17747 #[simd_test(enable = "avx512fp16,avx512vl")]
17748 const fn test_mm_storeu_ph() {
17749 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17750 let mut array = [0.0; 8];
17751 unsafe {
17752 _mm_storeu_ph(array.as_mut_ptr(), a);
17753 }
17754 assert_eq_m128h(a, unsafe { _mm_loadu_ph(array.as_ptr()) });
17755 }
17756
17757 #[simd_test(enable = "avx512fp16,avx512vl")]
17758 const fn test_mm256_storeu_ph() {
17759 let a = _mm256_set_ph(
17760 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17761 );
17762 let mut array = [0.0; 16];
17763 unsafe {
17764 _mm256_storeu_ph(array.as_mut_ptr(), a);
17765 }
17766 assert_eq_m256h(a, unsafe { _mm256_loadu_ph(array.as_ptr()) });
17767 }
17768
17769 #[simd_test(enable = "avx512fp16")]
17770 const fn test_mm512_storeu_ph() {
17771 let a = _mm512_set_ph(
17772 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17773 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17774 31.0, 32.0,
17775 );
17776 let mut array = [0.0; 32];
17777 unsafe {
17778 _mm512_storeu_ph(array.as_mut_ptr(), a);
17779 }
17780 assert_eq_m512h(a, unsafe { _mm512_loadu_ph(array.as_ptr()) });
17781 }
17782
17783 #[simd_test(enable = "avx512fp16,avx512vl")]
17784 const fn test_mm_add_ph() {
17785 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17786 let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
17787 let r = _mm_add_ph(a, b);
17788 let e = _mm_set1_ph(9.0);
17789 assert_eq_m128h(r, e);
17790 }
17791
17792 #[simd_test(enable = "avx512fp16,avx512vl")]
17793 const fn test_mm_mask_add_ph() {
17794 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17795 let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
17796 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
17797 let r = _mm_mask_add_ph(src, 0b01010101, a, b);
17798 let e = _mm_set_ph(10., 9., 12., 9., 14., 9., 16., 9.);
17799 assert_eq_m128h(r, e);
17800 }
17801
17802 #[simd_test(enable = "avx512fp16,avx512vl")]
17803 const fn test_mm_maskz_add_ph() {
17804 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17805 let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
17806 let r = _mm_maskz_add_ph(0b01010101, a, b);
17807 let e = _mm_set_ph(0., 9., 0., 9., 0., 9., 0., 9.);
17808 assert_eq_m128h(r, e);
17809 }
17810
17811 #[simd_test(enable = "avx512fp16,avx512vl")]
17812 const fn test_mm256_add_ph() {
17813 let a = _mm256_set_ph(
17814 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17815 );
17816 let b = _mm256_set_ph(
17817 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
17818 );
17819 let r = _mm256_add_ph(a, b);
17820 let e = _mm256_set1_ph(17.0);
17821 assert_eq_m256h(r, e);
17822 }
17823
17824 #[simd_test(enable = "avx512fp16,avx512vl")]
17825 const fn test_mm256_mask_add_ph() {
17826 let a = _mm256_set_ph(
17827 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17828 );
17829 let b = _mm256_set_ph(
17830 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
17831 );
17832 let src = _mm256_set_ph(
17833 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
17834 );
17835 let r = _mm256_mask_add_ph(src, 0b0101010101010101, a, b);
17836 let e = _mm256_set_ph(
17837 18., 17., 20., 17., 22., 17., 24., 17., 26., 17., 28., 17., 30., 17., 32., 17.,
17838 );
17839 assert_eq_m256h(r, e);
17840 }
17841
17842 #[simd_test(enable = "avx512fp16,avx512vl")]
17843 const fn test_mm256_maskz_add_ph() {
17844 let a = _mm256_set_ph(
17845 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17846 );
17847 let b = _mm256_set_ph(
17848 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
17849 );
17850 let r = _mm256_maskz_add_ph(0b0101010101010101, a, b);
17851 let e = _mm256_set_ph(
17852 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17.,
17853 );
17854 assert_eq_m256h(r, e);
17855 }
17856
17857 #[simd_test(enable = "avx512fp16")]
17858 const fn test_mm512_add_ph() {
17859 let a = _mm512_set_ph(
17860 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17861 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17862 31.0, 32.0,
17863 );
17864 let b = _mm512_set_ph(
17865 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17866 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17867 3.0, 2.0, 1.0,
17868 );
17869 let r = _mm512_add_ph(a, b);
17870 let e = _mm512_set1_ph(33.0);
17871 assert_eq_m512h(r, e);
17872 }
17873
17874 #[simd_test(enable = "avx512fp16")]
17875 const fn test_mm512_mask_add_ph() {
17876 let a = _mm512_set_ph(
17877 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17878 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17879 31.0, 32.0,
17880 );
17881 let b = _mm512_set_ph(
17882 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17883 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17884 3.0, 2.0, 1.0,
17885 );
17886 let src = _mm512_set_ph(
17887 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
17888 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
17889 );
17890 let r = _mm512_mask_add_ph(src, 0b01010101010101010101010101010101, a, b);
17891 let e = _mm512_set_ph(
17892 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50.,
17893 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33.,
17894 );
17895 assert_eq_m512h(r, e);
17896 }
17897
17898 #[simd_test(enable = "avx512fp16")]
17899 const fn test_mm512_maskz_add_ph() {
17900 let a = _mm512_set_ph(
17901 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17902 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17903 31.0, 32.0,
17904 );
17905 let b = _mm512_set_ph(
17906 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17907 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17908 3.0, 2.0, 1.0,
17909 );
17910 let r = _mm512_maskz_add_ph(0b01010101010101010101010101010101, a, b);
17911 let e = _mm512_set_ph(
17912 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0.,
17913 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33.,
17914 );
17915 assert_eq_m512h(r, e);
17916 }
17917
17918 #[simd_test(enable = "avx512fp16")]
17919 fn test_mm512_add_round_ph() {
17920 let a = _mm512_set_ph(
17921 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17922 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17923 31.0, 32.0,
17924 );
17925 let b = _mm512_set_ph(
17926 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17927 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17928 3.0, 2.0, 1.0,
17929 );
17930 let r = _mm512_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
17931 let e = _mm512_set1_ph(33.0);
17932 assert_eq_m512h(r, e);
17933 }
17934
17935 #[simd_test(enable = "avx512fp16")]
17936 fn test_mm512_mask_add_round_ph() {
17937 let a = _mm512_set_ph(
17938 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17939 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17940 31.0, 32.0,
17941 );
17942 let b = _mm512_set_ph(
17943 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17944 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17945 3.0, 2.0, 1.0,
17946 );
17947 let src = _mm512_set_ph(
17948 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
17949 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
17950 );
17951 let r = _mm512_mask_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
17952 src,
17953 0b01010101010101010101010101010101,
17954 a,
17955 b,
17956 );
17957 let e = _mm512_set_ph(
17958 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50.,
17959 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33.,
17960 );
17961 assert_eq_m512h(r, e);
17962 }
17963
17964 #[simd_test(enable = "avx512fp16")]
17965 fn test_mm512_maskz_add_round_ph() {
17966 let a = _mm512_set_ph(
17967 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17968 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17969 31.0, 32.0,
17970 );
17971 let b = _mm512_set_ph(
17972 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17973 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17974 3.0, 2.0, 1.0,
17975 );
17976 let r = _mm512_maskz_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
17977 0b01010101010101010101010101010101,
17978 a,
17979 b,
17980 );
17981 let e = _mm512_set_ph(
17982 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0.,
17983 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33.,
17984 );
17985 assert_eq_m512h(r, e);
17986 }
17987
17988 #[simd_test(enable = "avx512fp16,avx512vl")]
17989 fn test_mm_add_round_sh() {
17990 let a = _mm_set_sh(1.0);
17991 let b = _mm_set_sh(2.0);
17992 let r = _mm_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
17993 let e = _mm_set_sh(3.0);
17994 assert_eq_m128h(r, e);
17995 }
17996
17997 #[simd_test(enable = "avx512fp16,avx512vl")]
17998 fn test_mm_mask_add_round_sh() {
17999 let a = _mm_set_sh(1.0);
18000 let b = _mm_set_sh(2.0);
18001 let src = _mm_set_sh(4.0);
18002 let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18003 src, 0, a, b,
18004 );
18005 let e = _mm_set_sh(4.0);
18006 assert_eq_m128h(r, e);
18007 let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18008 src, 1, a, b,
18009 );
18010 let e = _mm_set_sh(3.0);
18011 assert_eq_m128h(r, e);
18012 }
18013
18014 #[simd_test(enable = "avx512fp16,avx512vl")]
18015 fn test_mm_maskz_add_round_sh() {
18016 let a = _mm_set_sh(1.0);
18017 let b = _mm_set_sh(2.0);
18018 let r =
18019 _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
18020 let e = _mm_set_sh(0.0);
18021 assert_eq_m128h(r, e);
18022 let r =
18023 _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
18024 let e = _mm_set_sh(3.0);
18025 assert_eq_m128h(r, e);
18026 }
18027
18028 #[simd_test(enable = "avx512fp16,avx512vl")]
18029 const fn test_mm_add_sh() {
18030 let a = _mm_set_sh(1.0);
18031 let b = _mm_set_sh(2.0);
18032 let r = _mm_add_sh(a, b);
18033 let e = _mm_set_sh(3.0);
18034 assert_eq_m128h(r, e);
18035 }
18036
18037 #[simd_test(enable = "avx512fp16,avx512vl")]
18038 const fn test_mm_mask_add_sh() {
18039 let a = _mm_set_sh(1.0);
18040 let b = _mm_set_sh(2.0);
18041 let src = _mm_set_sh(4.0);
18042 let r = _mm_mask_add_sh(src, 0, a, b);
18043 let e = _mm_set_sh(4.0);
18044 assert_eq_m128h(r, e);
18045 let r = _mm_mask_add_sh(src, 1, a, b);
18046 let e = _mm_set_sh(3.0);
18047 assert_eq_m128h(r, e);
18048 }
18049
18050 #[simd_test(enable = "avx512fp16,avx512vl")]
18051 const fn test_mm_maskz_add_sh() {
18052 let a = _mm_set_sh(1.0);
18053 let b = _mm_set_sh(2.0);
18054 let r = _mm_maskz_add_sh(0, a, b);
18055 let e = _mm_set_sh(0.0);
18056 assert_eq_m128h(r, e);
18057 let r = _mm_maskz_add_sh(1, a, b);
18058 let e = _mm_set_sh(3.0);
18059 assert_eq_m128h(r, e);
18060 }
18061
18062 #[simd_test(enable = "avx512fp16,avx512vl")]
18063 const fn test_mm_sub_ph() {
18064 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
18065 let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
18066 let r = _mm_sub_ph(a, b);
18067 let e = _mm_set_ph(-7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0);
18068 assert_eq_m128h(r, e);
18069 }
18070
18071 #[simd_test(enable = "avx512fp16,avx512vl")]
18072 const fn test_mm_mask_sub_ph() {
18073 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
18074 let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
18075 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
18076 let r = _mm_mask_sub_ph(src, 0b01010101, a, b);
18077 let e = _mm_set_ph(10., -5., 12., -1., 14., 3., 16., 7.);
18078 assert_eq_m128h(r, e);
18079 }
18080
18081 #[simd_test(enable = "avx512fp16,avx512vl")]
18082 const fn test_mm_maskz_sub_ph() {
18083 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
18084 let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
18085 let r = _mm_maskz_sub_ph(0b01010101, a, b);
18086 let e = _mm_set_ph(0., -5., 0., -1., 0., 3., 0., 7.);
18087 assert_eq_m128h(r, e);
18088 }
18089
18090 #[simd_test(enable = "avx512fp16,avx512vl")]
18091 const fn test_mm256_sub_ph() {
18092 let a = _mm256_set_ph(
18093 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18094 );
18095 let b = _mm256_set_ph(
18096 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
18097 );
18098 let r = _mm256_sub_ph(a, b);
18099 let e = _mm256_set_ph(
18100 -15.0, -13.0, -11.0, -9.0, -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0,
18101 15.0,
18102 );
18103 assert_eq_m256h(r, e);
18104 }
18105
18106 #[simd_test(enable = "avx512fp16,avx512vl")]
18107 const fn test_mm256_mask_sub_ph() {
18108 let a = _mm256_set_ph(
18109 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18110 );
18111 let b = _mm256_set_ph(
18112 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
18113 );
18114 let src = _mm256_set_ph(
18115 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
18116 );
18117 let r = _mm256_mask_sub_ph(src, 0b0101010101010101, a, b);
18118 let e = _mm256_set_ph(
18119 18., -13., 20., -9., 22., -5., 24., -1., 26., 3., 28., 7., 30., 11., 32., 15.,
18120 );
18121 assert_eq_m256h(r, e);
18122 }
18123
18124 #[simd_test(enable = "avx512fp16,avx512vl")]
18125 const fn test_mm256_maskz_sub_ph() {
18126 let a = _mm256_set_ph(
18127 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18128 );
18129 let b = _mm256_set_ph(
18130 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
18131 );
18132 let r = _mm256_maskz_sub_ph(0b0101010101010101, a, b);
18133 let e = _mm256_set_ph(
18134 0., -13., 0., -9., 0., -5., 0., -1., 0., 3., 0., 7., 0., 11., 0., 15.,
18135 );
18136 assert_eq_m256h(r, e);
18137 }
18138
18139 #[simd_test(enable = "avx512fp16")]
18140 const fn test_mm512_sub_ph() {
18141 let a = _mm512_set_ph(
18142 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18143 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
18144 31.0, 32.0,
18145 );
18146 let b = _mm512_set_ph(
18147 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
18148 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
18149 3.0, 2.0, 1.0,
18150 );
18151 let r = _mm512_sub_ph(a, b);
18152 let e = _mm512_set_ph(
18153 -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0,
18154 -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0,
18155 23.0, 25.0, 27.0, 29.0, 31.0,
18156 );
18157 assert_eq_m512h(r, e);
18158 }
18159
18160 #[simd_test(enable = "avx512fp16")]
18161 const fn test_mm512_mask_sub_ph() {
18162 let a = _mm512_set_ph(
18163 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18164 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
18165 31.0, 32.0,
18166 );
18167 let b = _mm512_set_ph(
18168 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
18169 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
18170 3.0, 2.0, 1.0,
18171 );
18172 let src = _mm512_set_ph(
18173 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
18174 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
18175 );
18176 let r = _mm512_mask_sub_ph(src, 0b01010101010101010101010101010101, a, b);
18177 let e = _mm512_set_ph(
18178 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1.,
18179 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31.,
18180 );
18181 assert_eq_m512h(r, e);
18182 }
18183
18184 #[simd_test(enable = "avx512fp16")]
18185 const fn test_mm512_maskz_sub_ph() {
18186 let a = _mm512_set_ph(
18187 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18188 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
18189 31.0, 32.0,
18190 );
18191 let b = _mm512_set_ph(
18192 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
18193 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
18194 3.0, 2.0, 1.0,
18195 );
18196 let r = _mm512_maskz_sub_ph(0b01010101010101010101010101010101, a, b);
18197 let e = _mm512_set_ph(
18198 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3.,
18199 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31.,
18200 );
18201 assert_eq_m512h(r, e);
18202 }
18203
18204 #[simd_test(enable = "avx512fp16")]
18205 fn test_mm512_sub_round_ph() {
18206 let a = _mm512_set_ph(
18207 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18208 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
18209 31.0, 32.0,
18210 );
18211 let b = _mm512_set_ph(
18212 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
18213 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
18214 3.0, 2.0, 1.0,
18215 );
18216 let r = _mm512_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
18217 let e = _mm512_set_ph(
18218 -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0,
18219 -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0,
18220 23.0, 25.0, 27.0, 29.0, 31.0,
18221 );
18222 assert_eq_m512h(r, e);
18223 }
18224
18225 #[simd_test(enable = "avx512fp16")]
18226 fn test_mm512_mask_sub_round_ph() {
18227 let a = _mm512_set_ph(
18228 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18229 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
18230 31.0, 32.0,
18231 );
18232 let b = _mm512_set_ph(
18233 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
18234 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
18235 3.0, 2.0, 1.0,
18236 );
18237 let src = _mm512_set_ph(
18238 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
18239 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
18240 );
18241 let r = _mm512_mask_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18242 src,
18243 0b01010101010101010101010101010101,
18244 a,
18245 b,
18246 );
18247 let e = _mm512_set_ph(
18248 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1.,
18249 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31.,
18250 );
18251 assert_eq_m512h(r, e);
18252 }
18253
18254 #[simd_test(enable = "avx512fp16")]
18255 fn test_mm512_maskz_sub_round_ph() {
18256 let a = _mm512_set_ph(
18257 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18258 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
18259 31.0, 32.0,
18260 );
18261 let b = _mm512_set_ph(
18262 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
18263 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
18264 3.0, 2.0, 1.0,
18265 );
18266 let r = _mm512_maskz_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18267 0b01010101010101010101010101010101,
18268 a,
18269 b,
18270 );
18271 let e = _mm512_set_ph(
18272 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3.,
18273 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31.,
18274 );
18275 assert_eq_m512h(r, e);
18276 }
18277
18278 #[simd_test(enable = "avx512fp16,avx512vl")]
18279 fn test_mm_sub_round_sh() {
18280 let a = _mm_set_sh(1.0);
18281 let b = _mm_set_sh(2.0);
18282 let r = _mm_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
18283 let e = _mm_set_sh(-1.0);
18284 assert_eq_m128h(r, e);
18285 }
18286
18287 #[simd_test(enable = "avx512fp16,avx512vl")]
18288 fn test_mm_mask_sub_round_sh() {
18289 let a = _mm_set_sh(1.0);
18290 let b = _mm_set_sh(2.0);
18291 let src = _mm_set_sh(4.0);
18292 let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18293 src, 0, a, b,
18294 );
18295 let e = _mm_set_sh(4.0);
18296 assert_eq_m128h(r, e);
18297 let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18298 src, 1, a, b,
18299 );
18300 let e = _mm_set_sh(-1.0);
18301 assert_eq_m128h(r, e);
18302 }
18303
18304 #[simd_test(enable = "avx512fp16,avx512vl")]
18305 fn test_mm_maskz_sub_round_sh() {
18306 let a = _mm_set_sh(1.0);
18307 let b = _mm_set_sh(2.0);
18308 let r =
18309 _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
18310 let e = _mm_set_sh(0.0);
18311 assert_eq_m128h(r, e);
18312 let r =
18313 _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
18314 let e = _mm_set_sh(-1.0);
18315 assert_eq_m128h(r, e);
18316 }
18317
18318 #[simd_test(enable = "avx512fp16,avx512vl")]
18319 const fn test_mm_sub_sh() {
18320 let a = _mm_set_sh(1.0);
18321 let b = _mm_set_sh(2.0);
18322 let r = _mm_sub_sh(a, b);
18323 let e = _mm_set_sh(-1.0);
18324 assert_eq_m128h(r, e);
18325 }
18326
18327 #[simd_test(enable = "avx512fp16,avx512vl")]
18328 const fn test_mm_mask_sub_sh() {
18329 let a = _mm_set_sh(1.0);
18330 let b = _mm_set_sh(2.0);
18331 let src = _mm_set_sh(4.0);
18332 let r = _mm_mask_sub_sh(src, 0, a, b);
18333 let e = _mm_set_sh(4.0);
18334 assert_eq_m128h(r, e);
18335 let r = _mm_mask_sub_sh(src, 1, a, b);
18336 let e = _mm_set_sh(-1.0);
18337 assert_eq_m128h(r, e);
18338 }
18339
18340 #[simd_test(enable = "avx512fp16,avx512vl")]
18341 const fn test_mm_maskz_sub_sh() {
18342 let a = _mm_set_sh(1.0);
18343 let b = _mm_set_sh(2.0);
18344 let r = _mm_maskz_sub_sh(0, a, b);
18345 let e = _mm_set_sh(0.0);
18346 assert_eq_m128h(r, e);
18347 let r = _mm_maskz_sub_sh(1, a, b);
18348 let e = _mm_set_sh(-1.0);
18349 assert_eq_m128h(r, e);
18350 }
18351
18352 #[simd_test(enable = "avx512fp16,avx512vl")]
18353 const fn test_mm_mul_ph() {
18354 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
18355 let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
18356 let r = _mm_mul_ph(a, b);
18357 let e = _mm_set_ph(8.0, 14.0, 18.0, 20.0, 20.0, 18.0, 14.0, 8.0);
18358 assert_eq_m128h(r, e);
18359 }
18360
18361 #[simd_test(enable = "avx512fp16,avx512vl")]
18362 const fn test_mm_mask_mul_ph() {
18363 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
18364 let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
18365 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
18366 let r = _mm_mask_mul_ph(src, 0b01010101, a, b);
18367 let e = _mm_set_ph(10., 14., 12., 20., 14., 18., 16., 8.);
18368 assert_eq_m128h(r, e);
18369 }
18370
18371 #[simd_test(enable = "avx512fp16,avx512vl")]
18372 const fn test_mm_maskz_mul_ph() {
18373 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
18374 let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
18375 let r = _mm_maskz_mul_ph(0b01010101, a, b);
18376 let e = _mm_set_ph(0., 14., 0., 20., 0., 18., 0., 8.);
18377 assert_eq_m128h(r, e);
18378 }
18379
18380 #[simd_test(enable = "avx512fp16,avx512vl")]
18381 const fn test_mm256_mul_ph() {
18382 let a = _mm256_set_ph(
18383 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18384 );
18385 let b = _mm256_set_ph(
18386 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
18387 );
18388 let r = _mm256_mul_ph(a, b);
18389 let e = _mm256_set_ph(
18390 16.0, 30.0, 42.0, 52.0, 60.0, 66.0, 70.0, 72.0, 72.0, 70.0, 66.0, 60.0, 52.0, 42.0,
18391 30.0, 16.0,
18392 );
18393 assert_eq_m256h(r, e);
18394 }
18395
18396 #[simd_test(enable = "avx512fp16,avx512vl")]
18397 const fn test_mm256_mask_mul_ph() {
18398 let a = _mm256_set_ph(
18399 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18400 );
18401 let b = _mm256_set_ph(
18402 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
18403 );
18404 let src = _mm256_set_ph(
18405 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
18406 );
18407 let r = _mm256_mask_mul_ph(src, 0b0101010101010101, a, b);
18408 let e = _mm256_set_ph(
18409 18., 30., 20., 52., 22., 66., 24., 72., 26., 70., 28., 60., 30., 42., 32., 16.,
18410 );
18411 assert_eq_m256h(r, e);
18412 }
18413
18414 #[simd_test(enable = "avx512fp16,avx512vl")]
18415 const fn test_mm256_maskz_mul_ph() {
18416 let a = _mm256_set_ph(
18417 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18418 );
18419 let b = _mm256_set_ph(
18420 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
18421 );
18422 let r = _mm256_maskz_mul_ph(0b0101010101010101, a, b);
18423 let e = _mm256_set_ph(
18424 0., 30., 0., 52., 0., 66., 0., 72., 0., 70., 0., 60., 0., 42., 0., 16.,
18425 );
18426 assert_eq_m256h(r, e);
18427 }
18428
18429 #[simd_test(enable = "avx512fp16")]
18430 const fn test_mm512_mul_ph() {
18431 let a = _mm512_set_ph(
18432 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18433 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
18434 31.0, 32.0,
18435 );
18436 let b = _mm512_set_ph(
18437 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
18438 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
18439 3.0, 2.0, 1.0,
18440 );
18441 let r = _mm512_mul_ph(a, b);
18442 let e = _mm512_set_ph(
18443 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0,
18444 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0,
18445 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0,
18446 );
18447 assert_eq_m512h(r, e);
18448 }
18449
18450 #[simd_test(enable = "avx512fp16")]
18451 const fn test_mm512_mask_mul_ph() {
18452 let a = _mm512_set_ph(
18453 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18454 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
18455 31.0, 32.0,
18456 );
18457 let b = _mm512_set_ph(
18458 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
18459 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
18460 3.0, 2.0, 1.0,
18461 );
18462 let src = _mm512_set_ph(
18463 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
18464 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
18465 );
18466 let r = _mm512_mask_mul_ph(src, 0b01010101010101010101010101010101, a, b);
18467 let e = _mm512_set_ph(
18468 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272.,
18469 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32.,
18470 );
18471 assert_eq_m512h(r, e);
18472 }
18473
18474 #[simd_test(enable = "avx512fp16")]
18475 const fn test_mm512_maskz_mul_ph() {
18476 let a = _mm512_set_ph(
18477 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18478 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
18479 31.0, 32.0,
18480 );
18481 let b = _mm512_set_ph(
18482 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
18483 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
18484 3.0, 2.0, 1.0,
18485 );
18486 let r = _mm512_maskz_mul_ph(0b01010101010101010101010101010101, a, b);
18487 let e = _mm512_set_ph(
18488 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0.,
18489 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32.,
18490 );
18491 assert_eq_m512h(r, e);
18492 }
18493
18494 #[simd_test(enable = "avx512fp16")]
18495 fn test_mm512_mul_round_ph() {
18496 let a = _mm512_set_ph(
18497 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18498 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
18499 31.0, 32.0,
18500 );
18501 let b = _mm512_set_ph(
18502 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
18503 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
18504 3.0, 2.0, 1.0,
18505 );
18506 let r = _mm512_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
18507 let e = _mm512_set_ph(
18508 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0,
18509 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0,
18510 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0,
18511 );
18512 assert_eq_m512h(r, e);
18513 }
18514
18515 #[simd_test(enable = "avx512fp16")]
18516 fn test_mm512_mask_mul_round_ph() {
18517 let a = _mm512_set_ph(
18518 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18519 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
18520 31.0, 32.0,
18521 );
18522 let b = _mm512_set_ph(
18523 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
18524 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
18525 3.0, 2.0, 1.0,
18526 );
18527 let src = _mm512_set_ph(
18528 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
18529 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
18530 );
18531 let r = _mm512_mask_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18532 src,
18533 0b01010101010101010101010101010101,
18534 a,
18535 b,
18536 );
18537 let e = _mm512_set_ph(
18538 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272.,
18539 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32.,
18540 );
18541 assert_eq_m512h(r, e);
18542 }
18543
18544 #[simd_test(enable = "avx512fp16")]
18545 fn test_mm512_maskz_mul_round_ph() {
18546 let a = _mm512_set_ph(
18547 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18548 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
18549 31.0, 32.0,
18550 );
18551 let b = _mm512_set_ph(
18552 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
18553 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
18554 3.0, 2.0, 1.0,
18555 );
18556 let r = _mm512_maskz_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18557 0b01010101010101010101010101010101,
18558 a,
18559 b,
18560 );
18561 let e = _mm512_set_ph(
18562 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0.,
18563 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32.,
18564 );
18565 assert_eq_m512h(r, e);
18566 }
18567
18568 #[simd_test(enable = "avx512fp16,avx512vl")]
18569 fn test_mm_mul_round_sh() {
18570 let a = _mm_set_sh(1.0);
18571 let b = _mm_set_sh(2.0);
18572 let r = _mm_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
18573 let e = _mm_set_sh(2.0);
18574 assert_eq_m128h(r, e);
18575 }
18576
18577 #[simd_test(enable = "avx512fp16,avx512vl")]
18578 fn test_mm_mask_mul_round_sh() {
18579 let a = _mm_set_sh(1.0);
18580 let b = _mm_set_sh(2.0);
18581 let src = _mm_set_sh(4.0);
18582 let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18583 src, 0, a, b,
18584 );
18585 let e = _mm_set_sh(4.0);
18586 assert_eq_m128h(r, e);
18587 let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18588 src, 1, a, b,
18589 );
18590 let e = _mm_set_sh(2.0);
18591 assert_eq_m128h(r, e);
18592 }
18593
18594 #[simd_test(enable = "avx512fp16,avx512vl")]
18595 fn test_mm_maskz_mul_round_sh() {
18596 let a = _mm_set_sh(1.0);
18597 let b = _mm_set_sh(2.0);
18598 let r =
18599 _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
18600 let e = _mm_set_sh(0.0);
18601 assert_eq_m128h(r, e);
18602 let r =
18603 _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
18604 let e = _mm_set_sh(2.0);
18605 assert_eq_m128h(r, e);
18606 }
18607
18608 #[simd_test(enable = "avx512fp16,avx512vl")]
18609 const fn test_mm_mul_sh() {
18610 let a = _mm_set_sh(1.0);
18611 let b = _mm_set_sh(2.0);
18612 let r = _mm_mul_sh(a, b);
18613 let e = _mm_set_sh(2.0);
18614 assert_eq_m128h(r, e);
18615 }
18616
18617 #[simd_test(enable = "avx512fp16,avx512vl")]
18618 const fn test_mm_mask_mul_sh() {
18619 let a = _mm_set_sh(1.0);
18620 let b = _mm_set_sh(2.0);
18621 let src = _mm_set_sh(4.0);
18622 let r = _mm_mask_mul_sh(src, 0, a, b);
18623 let e = _mm_set_sh(4.0);
18624 assert_eq_m128h(r, e);
18625 let r = _mm_mask_mul_sh(src, 1, a, b);
18626 let e = _mm_set_sh(2.0);
18627 assert_eq_m128h(r, e);
18628 }
18629
18630 #[simd_test(enable = "avx512fp16,avx512vl")]
18631 const fn test_mm_maskz_mul_sh() {
18632 let a = _mm_set_sh(1.0);
18633 let b = _mm_set_sh(2.0);
18634 let r = _mm_maskz_mul_sh(0, a, b);
18635 let e = _mm_set_sh(0.0);
18636 assert_eq_m128h(r, e);
18637 let r = _mm_maskz_mul_sh(1, a, b);
18638 let e = _mm_set_sh(2.0);
18639 assert_eq_m128h(r, e);
18640 }
18641
18642 #[simd_test(enable = "avx512fp16,avx512vl")]
18643 const fn test_mm_div_ph() {
18644 let a = _mm_set1_ph(1.0);
18645 let b = _mm_set1_ph(2.0);
18646 let r = _mm_div_ph(a, b);
18647 let e = _mm_set1_ph(0.5);
18648 assert_eq_m128h(r, e);
18649 }
18650
18651 #[simd_test(enable = "avx512fp16,avx512vl")]
18652 const fn test_mm_mask_div_ph() {
18653 let a = _mm_set1_ph(1.0);
18654 let b = _mm_set1_ph(2.0);
18655 let src = _mm_set_ph(4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0);
18656 let r = _mm_mask_div_ph(src, 0b01010101, a, b);
18657 let e = _mm_set_ph(4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5);
18658 assert_eq_m128h(r, e);
18659 }
18660
18661 #[simd_test(enable = "avx512fp16,avx512vl")]
18662 const fn test_mm_maskz_div_ph() {
18663 let a = _mm_set1_ph(1.0);
18664 let b = _mm_set1_ph(2.0);
18665 let r = _mm_maskz_div_ph(0b01010101, a, b);
18666 let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
18667 assert_eq_m128h(r, e);
18668 }
18669
18670 #[simd_test(enable = "avx512fp16,avx512vl")]
18671 const fn test_mm256_div_ph() {
18672 let a = _mm256_set1_ph(1.0);
18673 let b = _mm256_set1_ph(2.0);
18674 let r = _mm256_div_ph(a, b);
18675 let e = _mm256_set1_ph(0.5);
18676 assert_eq_m256h(r, e);
18677 }
18678
18679 #[simd_test(enable = "avx512fp16,avx512vl")]
18680 const fn test_mm256_mask_div_ph() {
18681 let a = _mm256_set1_ph(1.0);
18682 let b = _mm256_set1_ph(2.0);
18683 let src = _mm256_set_ph(
18684 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
18685 19.0,
18686 );
18687 let r = _mm256_mask_div_ph(src, 0b0101010101010101, a, b);
18688 let e = _mm256_set_ph(
18689 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
18690 );
18691 assert_eq_m256h(r, e);
18692 }
18693
18694 #[simd_test(enable = "avx512fp16,avx512vl")]
18695 const fn test_mm256_maskz_div_ph() {
18696 let a = _mm256_set1_ph(1.0);
18697 let b = _mm256_set1_ph(2.0);
18698 let r = _mm256_maskz_div_ph(0b0101010101010101, a, b);
18699 let e = _mm256_set_ph(
18700 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
18701 );
18702 assert_eq_m256h(r, e);
18703 }
18704
18705 #[simd_test(enable = "avx512fp16")]
18706 const fn test_mm512_div_ph() {
18707 let a = _mm512_set1_ph(1.0);
18708 let b = _mm512_set1_ph(2.0);
18709 let r = _mm512_div_ph(a, b);
18710 let e = _mm512_set1_ph(0.5);
18711 assert_eq_m512h(r, e);
18712 }
18713
18714 #[simd_test(enable = "avx512fp16")]
18715 const fn test_mm512_mask_div_ph() {
18716 let a = _mm512_set1_ph(1.0);
18717 let b = _mm512_set1_ph(2.0);
18718 let src = _mm512_set_ph(
18719 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
18720 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0,
18721 33.0, 34.0, 35.0,
18722 );
18723 let r = _mm512_mask_div_ph(src, 0b01010101010101010101010101010101, a, b);
18724 let e = _mm512_set_ph(
18725 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
18726 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5,
18727 );
18728 assert_eq_m512h(r, e);
18729 }
18730
18731 #[simd_test(enable = "avx512fp16")]
18732 const fn test_mm512_maskz_div_ph() {
18733 let a = _mm512_set1_ph(1.0);
18734 let b = _mm512_set1_ph(2.0);
18735 let r = _mm512_maskz_div_ph(0b01010101010101010101010101010101, a, b);
18736 let e = _mm512_set_ph(
18737 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
18738 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
18739 );
18740 assert_eq_m512h(r, e);
18741 }
18742
18743 #[simd_test(enable = "avx512fp16")]
18744 fn test_mm512_div_round_ph() {
18745 let a = _mm512_set1_ph(1.0);
18746 let b = _mm512_set1_ph(2.0);
18747 let r = _mm512_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
18748 let e = _mm512_set1_ph(0.5);
18749 assert_eq_m512h(r, e);
18750 }
18751
18752 #[simd_test(enable = "avx512fp16")]
18753 fn test_mm512_mask_div_round_ph() {
18754 let a = _mm512_set1_ph(1.0);
18755 let b = _mm512_set1_ph(2.0);
18756 let src = _mm512_set_ph(
18757 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
18758 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0,
18759 33.0, 34.0, 35.0,
18760 );
18761 let r = _mm512_mask_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18762 src,
18763 0b01010101010101010101010101010101,
18764 a,
18765 b,
18766 );
18767 let e = _mm512_set_ph(
18768 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
18769 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5,
18770 );
18771 assert_eq_m512h(r, e);
18772 }
18773
18774 #[simd_test(enable = "avx512fp16")]
18775 fn test_mm512_maskz_div_round_ph() {
18776 let a = _mm512_set1_ph(1.0);
18777 let b = _mm512_set1_ph(2.0);
18778 let r = _mm512_maskz_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18779 0b01010101010101010101010101010101,
18780 a,
18781 b,
18782 );
18783 let e = _mm512_set_ph(
18784 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
18785 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
18786 );
18787 assert_eq_m512h(r, e);
18788 }
18789
18790 #[simd_test(enable = "avx512fp16,avx512vl")]
18791 fn test_mm_div_round_sh() {
18792 let a = _mm_set_sh(1.0);
18793 let b = _mm_set_sh(2.0);
18794 let r = _mm_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
18795 let e = _mm_set_sh(0.5);
18796 assert_eq_m128h(r, e);
18797 }
18798
18799 #[simd_test(enable = "avx512fp16,avx512vl")]
18800 fn test_mm_mask_div_round_sh() {
18801 let a = _mm_set_sh(1.0);
18802 let b = _mm_set_sh(2.0);
18803 let src = _mm_set_sh(4.0);
18804 let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18805 src, 0, a, b,
18806 );
18807 let e = _mm_set_sh(4.0);
18808 assert_eq_m128h(r, e);
18809 let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18810 src, 1, a, b,
18811 );
18812 let e = _mm_set_sh(0.5);
18813 assert_eq_m128h(r, e);
18814 }
18815
18816 #[simd_test(enable = "avx512fp16,avx512vl")]
18817 fn test_mm_maskz_div_round_sh() {
18818 let a = _mm_set_sh(1.0);
18819 let b = _mm_set_sh(2.0);
18820 let r =
18821 _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
18822 let e = _mm_set_sh(0.0);
18823 assert_eq_m128h(r, e);
18824 let r =
18825 _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
18826 let e = _mm_set_sh(0.5);
18827 assert_eq_m128h(r, e);
18828 }
18829
18830 #[simd_test(enable = "avx512fp16,avx512vl")]
18831 const fn test_mm_div_sh() {
18832 let a = _mm_set_sh(1.0);
18833 let b = _mm_set_sh(2.0);
18834 let r = _mm_div_sh(a, b);
18835 let e = _mm_set_sh(0.5);
18836 assert_eq_m128h(r, e);
18837 }
18838
18839 #[simd_test(enable = "avx512fp16,avx512vl")]
18840 const fn test_mm_mask_div_sh() {
18841 let a = _mm_set_sh(1.0);
18842 let b = _mm_set_sh(2.0);
18843 let src = _mm_set_sh(4.0);
18844 let r = _mm_mask_div_sh(src, 0, a, b);
18845 let e = _mm_set_sh(4.0);
18846 assert_eq_m128h(r, e);
18847 let r = _mm_mask_div_sh(src, 1, a, b);
18848 let e = _mm_set_sh(0.5);
18849 assert_eq_m128h(r, e);
18850 }
18851
18852 #[simd_test(enable = "avx512fp16,avx512vl")]
18853 const fn test_mm_maskz_div_sh() {
18854 let a = _mm_set_sh(1.0);
18855 let b = _mm_set_sh(2.0);
18856 let r = _mm_maskz_div_sh(0, a, b);
18857 let e = _mm_set_sh(0.0);
18858 assert_eq_m128h(r, e);
18859 let r = _mm_maskz_div_sh(1, a, b);
18860 let e = _mm_set_sh(0.5);
18861 assert_eq_m128h(r, e);
18862 }
18863
18864 #[simd_test(enable = "avx512fp16,avx512vl")]
18865 fn test_mm_mul_pch() {
18866 let a = _mm_set1_pch(0.0, 1.0);
18867 let b = _mm_set1_pch(0.0, 1.0);
18868 let r = _mm_mul_pch(a, b);
18869 let e = _mm_set1_pch(-1.0, 0.0);
18870 assert_eq_m128h(r, e);
18871 }
18872
18873 #[simd_test(enable = "avx512fp16,avx512vl")]
18874 fn test_mm_mask_mul_pch() {
18875 let a = _mm_set1_pch(0.0, 1.0);
18876 let b = _mm_set1_pch(0.0, 1.0);
18877 let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
18878 let r = _mm_mask_mul_pch(src, 0b0101, a, b);
18879 let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
18880 assert_eq_m128h(r, e);
18881 }
18882
18883 #[simd_test(enable = "avx512fp16,avx512vl")]
18884 fn test_mm_maskz_mul_pch() {
18885 let a = _mm_set1_pch(0.0, 1.0);
18886 let b = _mm_set1_pch(0.0, 1.0);
18887 let r = _mm_maskz_mul_pch(0b0101, a, b);
18888 let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
18889 assert_eq_m128h(r, e);
18890 }
18891
18892 #[simd_test(enable = "avx512fp16,avx512vl")]
18893 fn test_mm256_mul_pch() {
18894 let a = _mm256_set1_pch(0.0, 1.0);
18895 let b = _mm256_set1_pch(0.0, 1.0);
18896 let r = _mm256_mul_pch(a, b);
18897 let e = _mm256_set1_pch(-1.0, 0.0);
18898 assert_eq_m256h(r, e);
18899 }
18900
18901 #[simd_test(enable = "avx512fp16,avx512vl")]
18902 fn test_mm256_mask_mul_pch() {
18903 let a = _mm256_set1_pch(0.0, 1.0);
18904 let b = _mm256_set1_pch(0.0, 1.0);
18905 let src = _mm256_setr_ph(
18906 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
18907 );
18908 let r = _mm256_mask_mul_pch(src, 0b01010101, a, b);
18909 let e = _mm256_setr_ph(
18910 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
18911 );
18912 assert_eq_m256h(r, e);
18913 }
18914
18915 #[simd_test(enable = "avx512fp16,avx512vl")]
18916 fn test_mm256_maskz_mul_pch() {
18917 let a = _mm256_set1_pch(0.0, 1.0);
18918 let b = _mm256_set1_pch(0.0, 1.0);
18919 let r = _mm256_maskz_mul_pch(0b01010101, a, b);
18920 let e = _mm256_setr_ph(
18921 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
18922 );
18923 assert_eq_m256h(r, e);
18924 }
18925
18926 #[simd_test(enable = "avx512fp16")]
18927 fn test_mm512_mul_pch() {
18928 let a = _mm512_set1_pch(0.0, 1.0);
18929 let b = _mm512_set1_pch(0.0, 1.0);
18930 let r = _mm512_mul_pch(a, b);
18931 let e = _mm512_set1_pch(-1.0, 0.0);
18932 assert_eq_m512h(r, e);
18933 }
18934
18935 #[simd_test(enable = "avx512fp16")]
18936 fn test_mm512_mask_mul_pch() {
18937 let a = _mm512_set1_pch(0.0, 1.0);
18938 let b = _mm512_set1_pch(0.0, 1.0);
18939 let src = _mm512_setr_ph(
18940 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
18941 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
18942 32.0, 33.0,
18943 );
18944 let r = _mm512_mask_mul_pch(src, 0b0101010101010101, a, b);
18945 let e = _mm512_setr_ph(
18946 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
18947 -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
18948 33.0,
18949 );
18950 assert_eq_m512h(r, e);
18951 }
18952
18953 #[simd_test(enable = "avx512fp16")]
18954 fn test_mm512_maskz_mul_pch() {
18955 let a = _mm512_set1_pch(0.0, 1.0);
18956 let b = _mm512_set1_pch(0.0, 1.0);
18957 let r = _mm512_maskz_mul_pch(0b0101010101010101, a, b);
18958 let e = _mm512_setr_ph(
18959 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
18960 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
18961 );
18962 assert_eq_m512h(r, e);
18963 }
18964
18965 #[simd_test(enable = "avx512fp16")]
18966 fn test_mm512_mul_round_pch() {
18967 let a = _mm512_set1_pch(0.0, 1.0);
18968 let b = _mm512_set1_pch(0.0, 1.0);
18969 let r = _mm512_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
18970 let e = _mm512_set1_pch(-1.0, 0.0);
18971 assert_eq_m512h(r, e);
18972 }
18973
18974 #[simd_test(enable = "avx512fp16")]
18975 fn test_mm512_mask_mul_round_pch() {
18976 let a = _mm512_set1_pch(0.0, 1.0);
18977 let b = _mm512_set1_pch(0.0, 1.0);
18978 let src = _mm512_setr_ph(
18979 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
18980 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
18981 32.0, 33.0,
18982 );
18983 let r = _mm512_mask_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18984 src,
18985 0b0101010101010101,
18986 a,
18987 b,
18988 );
18989 let e = _mm512_setr_ph(
18990 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
18991 -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
18992 33.0,
18993 );
18994 assert_eq_m512h(r, e);
18995 }
18996
18997 #[simd_test(enable = "avx512fp16")]
18998 fn test_mm512_maskz_mul_round_pch() {
18999 let a = _mm512_set1_pch(0.0, 1.0);
19000 let b = _mm512_set1_pch(0.0, 1.0);
19001 let r = _mm512_maskz_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19002 0b0101010101010101,
19003 a,
19004 b,
19005 );
19006 let e = _mm512_setr_ph(
19007 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19008 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19009 );
19010 assert_eq_m512h(r, e);
19011 }
19012
19013 #[simd_test(enable = "avx512fp16,avx512vl")]
19014 fn test_mm_mul_round_sch() {
19015 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19016 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19017 let r = _mm_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
19018 let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19019 assert_eq_m128h(r, e);
19020 }
19021
19022 #[simd_test(enable = "avx512fp16,avx512vl")]
19023 fn test_mm_mask_mul_round_sch() {
19024 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19025 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19026 let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
19027 let r = _mm_mask_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19028 src, 0, a, b,
19029 );
19030 let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19031 assert_eq_m128h(r, e);
19032 }
19033
19034 #[simd_test(enable = "avx512fp16,avx512vl")]
19035 fn test_mm_maskz_mul_round_sch() {
19036 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19037 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19038 let r =
19039 _mm_maskz_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
19040 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19041 assert_eq_m128h(r, e);
19042 }
19043
19044 #[simd_test(enable = "avx512fp16,avx512vl")]
19045 fn test_mm_mul_sch() {
19046 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19047 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19048 let r = _mm_mul_sch(a, b);
19049 let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19050 assert_eq_m128h(r, e);
19051 }
19052
19053 #[simd_test(enable = "avx512fp16,avx512vl")]
19054 fn test_mm_mask_mul_sch() {
19055 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19056 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19057 let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
19058 let r = _mm_mask_mul_sch(src, 0, a, b);
19059 let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19060 assert_eq_m128h(r, e);
19061 }
19062
19063 #[simd_test(enable = "avx512fp16,avx512vl")]
19064 fn test_mm_maskz_mul_sch() {
19065 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19066 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19067 let r = _mm_maskz_mul_sch(0, a, b);
19068 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19069 assert_eq_m128h(r, e);
19070 }
19071
19072 #[simd_test(enable = "avx512fp16,avx512vl")]
19073 fn test_mm_fmul_pch() {
19074 let a = _mm_set1_pch(0.0, 1.0);
19075 let b = _mm_set1_pch(0.0, 1.0);
19076 let r = _mm_fmul_pch(a, b);
19077 let e = _mm_set1_pch(-1.0, 0.0);
19078 assert_eq_m128h(r, e);
19079 }
19080
19081 #[simd_test(enable = "avx512fp16,avx512vl")]
19082 fn test_mm_mask_fmul_pch() {
19083 let a = _mm_set1_pch(0.0, 1.0);
19084 let b = _mm_set1_pch(0.0, 1.0);
19085 let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
19086 let r = _mm_mask_fmul_pch(src, 0b0101, a, b);
19087 let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
19088 assert_eq_m128h(r, e);
19089 }
19090
19091 #[simd_test(enable = "avx512fp16,avx512vl")]
19092 fn test_mm_maskz_fmul_pch() {
19093 let a = _mm_set1_pch(0.0, 1.0);
19094 let b = _mm_set1_pch(0.0, 1.0);
19095 let r = _mm_maskz_fmul_pch(0b0101, a, b);
19096 let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
19097 assert_eq_m128h(r, e);
19098 }
19099
19100 #[simd_test(enable = "avx512fp16,avx512vl")]
19101 fn test_mm256_fmul_pch() {
19102 let a = _mm256_set1_pch(0.0, 1.0);
19103 let b = _mm256_set1_pch(0.0, 1.0);
19104 let r = _mm256_fmul_pch(a, b);
19105 let e = _mm256_set1_pch(-1.0, 0.0);
19106 assert_eq_m256h(r, e);
19107 }
19108
19109 #[simd_test(enable = "avx512fp16,avx512vl")]
19110 fn test_mm256_mask_fmul_pch() {
19111 let a = _mm256_set1_pch(0.0, 1.0);
19112 let b = _mm256_set1_pch(0.0, 1.0);
19113 let src = _mm256_setr_ph(
19114 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
19115 );
19116 let r = _mm256_mask_fmul_pch(src, 0b01010101, a, b);
19117 let e = _mm256_setr_ph(
19118 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
19119 );
19120 assert_eq_m256h(r, e);
19121 }
19122
19123 #[simd_test(enable = "avx512fp16,avx512vl")]
19124 fn test_mm256_maskz_fmul_pch() {
19125 let a = _mm256_set1_pch(0.0, 1.0);
19126 let b = _mm256_set1_pch(0.0, 1.0);
19127 let r = _mm256_maskz_fmul_pch(0b01010101, a, b);
19128 let e = _mm256_setr_ph(
19129 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19130 );
19131 assert_eq_m256h(r, e);
19132 }
19133
19134 #[simd_test(enable = "avx512fp16")]
19135 fn test_mm512_fmul_pch() {
19136 let a = _mm512_set1_pch(0.0, 1.0);
19137 let b = _mm512_set1_pch(0.0, 1.0);
19138 let r = _mm512_fmul_pch(a, b);
19139 let e = _mm512_set1_pch(-1.0, 0.0);
19140 assert_eq_m512h(r, e);
19141 }
19142
19143 #[simd_test(enable = "avx512fp16")]
19144 fn test_mm512_mask_fmul_pch() {
19145 let a = _mm512_set1_pch(0.0, 1.0);
19146 let b = _mm512_set1_pch(0.0, 1.0);
19147 let src = _mm512_setr_ph(
19148 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
19149 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
19150 32.0, 33.0,
19151 );
19152 let r = _mm512_mask_fmul_pch(src, 0b0101010101010101, a, b);
19153 let e = _mm512_setr_ph(
19154 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
19155 -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
19156 33.0,
19157 );
19158 assert_eq_m512h(r, e);
19159 }
19160
19161 #[simd_test(enable = "avx512fp16")]
19162 fn test_mm512_maskz_fmul_pch() {
19163 let a = _mm512_set1_pch(0.0, 1.0);
19164 let b = _mm512_set1_pch(0.0, 1.0);
19165 let r = _mm512_maskz_fmul_pch(0b0101010101010101, a, b);
19166 let e = _mm512_setr_ph(
19167 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19168 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19169 );
19170 assert_eq_m512h(r, e);
19171 }
19172
19173 #[simd_test(enable = "avx512fp16")]
19174 fn test_mm512_fmul_round_pch() {
19175 let a = _mm512_set1_pch(0.0, 1.0);
19176 let b = _mm512_set1_pch(0.0, 1.0);
19177 let r = _mm512_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
19178 let e = _mm512_set1_pch(-1.0, 0.0);
19179 assert_eq_m512h(r, e);
19180 }
19181
19182 #[simd_test(enable = "avx512fp16")]
19183 fn test_mm512_mask_fmul_round_pch() {
19184 let a = _mm512_set1_pch(0.0, 1.0);
19185 let b = _mm512_set1_pch(0.0, 1.0);
19186 let src = _mm512_setr_ph(
19187 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
19188 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
19189 32.0, 33.0,
19190 );
19191 let r = _mm512_mask_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19192 src,
19193 0b0101010101010101,
19194 a,
19195 b,
19196 );
19197 let e = _mm512_setr_ph(
19198 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
19199 -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
19200 33.0,
19201 );
19202 assert_eq_m512h(r, e);
19203 }
19204
19205 #[simd_test(enable = "avx512fp16")]
19206 fn test_mm512_maskz_fmul_round_pch() {
19207 let a = _mm512_set1_pch(0.0, 1.0);
19208 let b = _mm512_set1_pch(0.0, 1.0);
19209 let r = _mm512_maskz_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19210 0b0101010101010101,
19211 a,
19212 b,
19213 );
19214 let e = _mm512_setr_ph(
19215 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19216 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19217 );
19218 assert_eq_m512h(r, e);
19219 }
19220
19221 #[simd_test(enable = "avx512fp16,avx512vl")]
19222 fn test_mm_fmul_round_sch() {
19223 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19224 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19225 let r = _mm_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
19226 let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19227 assert_eq_m128h(r, e);
19228 }
19229
19230 #[simd_test(enable = "avx512fp16,avx512vl")]
19231 fn test_mm_mask_fmul_round_sch() {
19232 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19233 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19234 let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
19235 let r = _mm_mask_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19236 src, 0, a, b,
19237 );
19238 let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19239 assert_eq_m128h(r, e);
19240 }
19241
19242 #[simd_test(enable = "avx512fp16,avx512vl")]
19243 fn test_mm_maskz_fmul_round_sch() {
19244 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19245 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19246 let r =
19247 _mm_maskz_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
19248 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19249 assert_eq_m128h(r, e);
19250 }
19251
19252 #[simd_test(enable = "avx512fp16,avx512vl")]
19253 fn test_mm_fmul_sch() {
19254 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19255 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19256 let r = _mm_fmul_sch(a, b);
19257 let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19258 assert_eq_m128h(r, e);
19259 }
19260
19261 #[simd_test(enable = "avx512fp16,avx512vl")]
19262 fn test_mm_mask_fmul_sch() {
19263 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19264 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19265 let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
19266 let r = _mm_mask_fmul_sch(src, 0, a, b);
19267 let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19268 assert_eq_m128h(r, e);
19269 }
19270
19271 #[simd_test(enable = "avx512fp16,avx512vl")]
19272 fn test_mm_maskz_fmul_sch() {
19273 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19274 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19275 let r = _mm_maskz_fmul_sch(0, a, b);
19276 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19277 assert_eq_m128h(r, e);
19278 }
19279
19280 #[simd_test(enable = "avx512fp16,avx512vl")]
19281 fn test_mm_cmul_pch() {
19282 let a = _mm_set1_pch(0.0, 1.0);
19283 let b = _mm_set1_pch(0.0, -1.0);
19284 let r = _mm_cmul_pch(a, b);
19285 let e = _mm_set1_pch(-1.0, 0.0);
19286 assert_eq_m128h(r, e);
19287 }
19288
19289 #[simd_test(enable = "avx512fp16,avx512vl")]
19290 fn test_mm_mask_cmul_pch() {
19291 let a = _mm_set1_pch(0.0, 1.0);
19292 let b = _mm_set1_pch(0.0, -1.0);
19293 let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
19294 let r = _mm_mask_cmul_pch(src, 0b0101, a, b);
19295 let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
19296 assert_eq_m128h(r, e);
19297 }
19298
19299 #[simd_test(enable = "avx512fp16,avx512vl")]
19300 fn test_mm_maskz_cmul_pch() {
19301 let a = _mm_set1_pch(0.0, 1.0);
19302 let b = _mm_set1_pch(0.0, -1.0);
19303 let r = _mm_maskz_cmul_pch(0b0101, a, b);
19304 let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
19305 assert_eq_m128h(r, e);
19306 }
19307
19308 #[simd_test(enable = "avx512fp16,avx512vl")]
19309 fn test_mm256_cmul_pch() {
19310 let a = _mm256_set1_pch(0.0, 1.0);
19311 let b = _mm256_set1_pch(0.0, -1.0);
19312 let r = _mm256_cmul_pch(a, b);
19313 let e = _mm256_set1_pch(-1.0, 0.0);
19314 assert_eq_m256h(r, e);
19315 }
19316
19317 #[simd_test(enable = "avx512fp16,avx512vl")]
19318 fn test_mm256_mask_cmul_pch() {
19319 let a = _mm256_set1_pch(0.0, 1.0);
19320 let b = _mm256_set1_pch(0.0, -1.0);
19321 let src = _mm256_setr_ph(
19322 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
19323 );
19324 let r = _mm256_mask_cmul_pch(src, 0b01010101, a, b);
19325 let e = _mm256_setr_ph(
19326 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
19327 );
19328 assert_eq_m256h(r, e);
19329 }
19330
19331 #[simd_test(enable = "avx512fp16,avx512vl")]
19332 fn test_mm256_maskz_cmul_pch() {
19333 let a = _mm256_set1_pch(0.0, 1.0);
19334 let b = _mm256_set1_pch(0.0, -1.0);
19335 let r = _mm256_maskz_cmul_pch(0b01010101, a, b);
19336 let e = _mm256_setr_ph(
19337 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19338 );
19339 assert_eq_m256h(r, e);
19340 }
19341
19342 #[simd_test(enable = "avx512fp16")]
19343 fn test_mm512_cmul_pch() {
19344 let a = _mm512_set1_pch(0.0, 1.0);
19345 let b = _mm512_set1_pch(0.0, -1.0);
19346 let r = _mm512_cmul_pch(a, b);
19347 let e = _mm512_set1_pch(-1.0, 0.0);
19348 assert_eq_m512h(r, e);
19349 }
19350
19351 #[simd_test(enable = "avx512fp16")]
19352 fn test_mm512_mask_cmul_pch() {
19353 let a = _mm512_set1_pch(0.0, 1.0);
19354 let b = _mm512_set1_pch(0.0, -1.0);
19355 let src = _mm512_setr_ph(
19356 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
19357 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
19358 32.0, 33.0,
19359 );
19360 let r = _mm512_mask_cmul_pch(src, 0b0101010101010101, a, b);
19361 let e = _mm512_setr_ph(
19362 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
19363 -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
19364 33.0,
19365 );
19366 assert_eq_m512h(r, e);
19367 }
19368
19369 #[simd_test(enable = "avx512fp16")]
19370 fn test_mm512_maskz_cmul_pch() {
19371 let a = _mm512_set1_pch(0.0, 1.0);
19372 let b = _mm512_set1_pch(0.0, -1.0);
19373 let r = _mm512_maskz_cmul_pch(0b0101010101010101, a, b);
19374 let e = _mm512_setr_ph(
19375 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19376 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19377 );
19378 assert_eq_m512h(r, e);
19379 }
19380
19381 #[simd_test(enable = "avx512fp16")]
19382 fn test_mm512_cmul_round_pch() {
19383 let a = _mm512_set1_pch(0.0, 1.0);
19384 let b = _mm512_set1_pch(0.0, -1.0);
19385 let r = _mm512_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
19386 let e = _mm512_set1_pch(-1.0, 0.0);
19387 assert_eq_m512h(r, e);
19388 }
19389
19390 #[simd_test(enable = "avx512fp16")]
19391 fn test_mm512_mask_cmul_round_pch() {
19392 let a = _mm512_set1_pch(0.0, 1.0);
19393 let b = _mm512_set1_pch(0.0, -1.0);
19394 let src = _mm512_setr_ph(
19395 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
19396 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
19397 32.0, 33.0,
19398 );
19399 let r = _mm512_mask_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19400 src,
19401 0b0101010101010101,
19402 a,
19403 b,
19404 );
19405 let e = _mm512_setr_ph(
19406 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
19407 -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
19408 33.0,
19409 );
19410 assert_eq_m512h(r, e);
19411 }
19412
19413 #[simd_test(enable = "avx512fp16")]
19414 fn test_mm512_maskz_cmul_round_pch() {
19415 let a = _mm512_set1_pch(0.0, 1.0);
19416 let b = _mm512_set1_pch(0.0, -1.0);
19417 let r = _mm512_maskz_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19418 0b0101010101010101,
19419 a,
19420 b,
19421 );
19422 let e = _mm512_setr_ph(
19423 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19424 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19425 );
19426 assert_eq_m512h(r, e);
19427 }
19428
19429 #[simd_test(enable = "avx512fp16,avx512vl")]
19430 fn test_mm_cmul_sch() {
19431 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19432 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19433 let r = _mm_cmul_sch(a, b);
19434 let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19435 assert_eq_m128h(r, e);
19436 }
19437
19438 #[simd_test(enable = "avx512fp16,avx512vl")]
19439 fn test_mm_mask_cmul_sch() {
19440 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19441 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19442 let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
19443 let r = _mm_mask_cmul_sch(src, 0, a, b);
19444 let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19445 assert_eq_m128h(r, e);
19446 }
19447
19448 #[simd_test(enable = "avx512fp16,avx512vl")]
19449 fn test_mm_maskz_cmul_sch() {
19450 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19451 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19452 let r = _mm_maskz_cmul_sch(0, a, b);
19453 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19454 assert_eq_m128h(r, e);
19455 }
19456
19457 #[simd_test(enable = "avx512fp16,avx512vl")]
19458 fn test_mm_cmul_round_sch() {
19459 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19460 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19461 let r = _mm_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
19462 let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19463 assert_eq_m128h(r, e);
19464 }
19465
19466 #[simd_test(enable = "avx512fp16,avx512vl")]
19467 fn test_mm_mask_cmul_round_sch() {
19468 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19469 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19470 let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
19471 let r = _mm_mask_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19472 src, 0, a, b,
19473 );
19474 let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19475 assert_eq_m128h(r, e);
19476 }
19477
19478 #[simd_test(enable = "avx512fp16,avx512vl")]
19479 fn test_mm_maskz_cmul_round_sch() {
19480 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19481 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19482 let r =
19483 _mm_maskz_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
19484 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19485 assert_eq_m128h(r, e);
19486 }
19487
19488 #[simd_test(enable = "avx512fp16,avx512vl")]
19489 fn test_mm_fcmul_pch() {
19490 let a = _mm_set1_pch(0.0, 1.0);
19491 let b = _mm_set1_pch(0.0, -1.0);
19492 let r = _mm_fcmul_pch(a, b);
19493 let e = _mm_set1_pch(-1.0, 0.0);
19494 assert_eq_m128h(r, e);
19495 }
19496
19497 #[simd_test(enable = "avx512fp16,avx512vl")]
19498 fn test_mm_mask_fcmul_pch() {
19499 let a = _mm_set1_pch(0.0, 1.0);
19500 let b = _mm_set1_pch(0.0, -1.0);
19501 let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
19502 let r = _mm_mask_fcmul_pch(src, 0b0101, a, b);
19503 let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
19504 assert_eq_m128h(r, e);
19505 }
19506
19507 #[simd_test(enable = "avx512fp16,avx512vl")]
19508 fn test_mm_maskz_fcmul_pch() {
19509 let a = _mm_set1_pch(0.0, 1.0);
19510 let b = _mm_set1_pch(0.0, -1.0);
19511 let r = _mm_maskz_fcmul_pch(0b0101, a, b);
19512 let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
19513 assert_eq_m128h(r, e);
19514 }
19515
19516 #[simd_test(enable = "avx512fp16,avx512vl")]
19517 fn test_mm256_fcmul_pch() {
19518 let a = _mm256_set1_pch(0.0, 1.0);
19519 let b = _mm256_set1_pch(0.0, -1.0);
19520 let r = _mm256_fcmul_pch(a, b);
19521 let e = _mm256_set1_pch(-1.0, 0.0);
19522 assert_eq_m256h(r, e);
19523 }
19524
19525 #[simd_test(enable = "avx512fp16,avx512vl")]
19526 fn test_mm256_mask_fcmul_pch() {
19527 let a = _mm256_set1_pch(0.0, 1.0);
19528 let b = _mm256_set1_pch(0.0, -1.0);
19529 let src = _mm256_setr_ph(
19530 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
19531 );
19532 let r = _mm256_mask_fcmul_pch(src, 0b01010101, a, b);
19533 let e = _mm256_setr_ph(
19534 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
19535 );
19536 assert_eq_m256h(r, e);
19537 }
19538
19539 #[simd_test(enable = "avx512fp16,avx512vl")]
19540 fn test_mm256_maskz_fcmul_pch() {
19541 let a = _mm256_set1_pch(0.0, 1.0);
19542 let b = _mm256_set1_pch(0.0, -1.0);
19543 let r = _mm256_maskz_fcmul_pch(0b01010101, a, b);
19544 let e = _mm256_setr_ph(
19545 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19546 );
19547 assert_eq_m256h(r, e);
19548 }
19549
19550 #[simd_test(enable = "avx512fp16")]
19551 fn test_mm512_fcmul_pch() {
19552 let a = _mm512_set1_pch(0.0, 1.0);
19553 let b = _mm512_set1_pch(0.0, -1.0);
19554 let r = _mm512_fcmul_pch(a, b);
19555 let e = _mm512_set1_pch(-1.0, 0.0);
19556 assert_eq_m512h(r, e);
19557 }
19558
19559 #[simd_test(enable = "avx512fp16")]
19560 fn test_mm512_mask_fcmul_pch() {
19561 let a = _mm512_set1_pch(0.0, 1.0);
19562 let b = _mm512_set1_pch(0.0, -1.0);
19563 let src = _mm512_setr_ph(
19564 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
19565 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
19566 32.0, 33.0,
19567 );
19568 let r = _mm512_mask_fcmul_pch(src, 0b0101010101010101, a, b);
19569 let e = _mm512_setr_ph(
19570 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
19571 -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
19572 33.0,
19573 );
19574 assert_eq_m512h(r, e);
19575 }
19576
19577 #[simd_test(enable = "avx512fp16")]
19578 fn test_mm512_maskz_fcmul_pch() {
19579 let a = _mm512_set1_pch(0.0, 1.0);
19580 let b = _mm512_set1_pch(0.0, -1.0);
19581 let r = _mm512_maskz_fcmul_pch(0b0101010101010101, a, b);
19582 let e = _mm512_setr_ph(
19583 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19584 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19585 );
19586 assert_eq_m512h(r, e);
19587 }
19588
19589 #[simd_test(enable = "avx512fp16")]
19590 fn test_mm512_fcmul_round_pch() {
19591 let a = _mm512_set1_pch(0.0, 1.0);
19592 let b = _mm512_set1_pch(0.0, -1.0);
19593 let r = _mm512_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
19594 let e = _mm512_set1_pch(-1.0, 0.0);
19595 assert_eq_m512h(r, e);
19596 }
19597
19598 #[simd_test(enable = "avx512fp16")]
19599 fn test_mm512_mask_fcmul_round_pch() {
19600 let a = _mm512_set1_pch(0.0, 1.0);
19601 let b = _mm512_set1_pch(0.0, -1.0);
19602 let src = _mm512_setr_ph(
19603 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
19604 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
19605 32.0, 33.0,
19606 );
19607 let r = _mm512_mask_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19608 src,
19609 0b0101010101010101,
19610 a,
19611 b,
19612 );
19613 let e = _mm512_setr_ph(
19614 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
19615 -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
19616 33.0,
19617 );
19618 assert_eq_m512h(r, e);
19619 }
19620
19621 #[simd_test(enable = "avx512fp16")]
19622 fn test_mm512_maskz_fcmul_round_pch() {
19623 let a = _mm512_set1_pch(0.0, 1.0);
19624 let b = _mm512_set1_pch(0.0, -1.0);
19625 let r = _mm512_maskz_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19626 0b0101010101010101,
19627 a,
19628 b,
19629 );
19630 let e = _mm512_setr_ph(
19631 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19632 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19633 );
19634 assert_eq_m512h(r, e);
19635 }
19636
19637 #[simd_test(enable = "avx512fp16,avx512vl")]
19638 fn test_mm_fcmul_sch() {
19639 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19640 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19641 let r = _mm_fcmul_sch(a, b);
19642 let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19643 assert_eq_m128h(r, e);
19644 }
19645
19646 #[simd_test(enable = "avx512fp16,avx512vl")]
19647 fn test_mm_mask_fcmul_sch() {
19648 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19649 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19650 let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
19651 let r = _mm_mask_fcmul_sch(src, 0, a, b);
19652 let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19653 assert_eq_m128h(r, e);
19654 }
19655
19656 #[simd_test(enable = "avx512fp16,avx512vl")]
19657 fn test_mm_maskz_fcmul_sch() {
19658 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19659 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19660 let r = _mm_maskz_fcmul_sch(0, a, b);
19661 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19662 assert_eq_m128h(r, e);
19663 }
19664
19665 #[simd_test(enable = "avx512fp16,avx512vl")]
19666 fn test_mm_fcmul_round_sch() {
19667 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19668 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19669 let r = _mm_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
19670 let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19671 assert_eq_m128h(r, e);
19672 }
19673
19674 #[simd_test(enable = "avx512fp16,avx512vl")]
19675 fn test_mm_mask_fcmul_round_sch() {
19676 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19677 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19678 let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
19679 let r = _mm_mask_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19680 src, 0, a, b,
19681 );
19682 let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19683 assert_eq_m128h(r, e);
19684 }
19685
19686 #[simd_test(enable = "avx512fp16,avx512vl")]
19687 fn test_mm_maskz_fcmul_round_sch() {
19688 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19689 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19690 let r =
19691 _mm_maskz_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
19692 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19693 assert_eq_m128h(r, e);
19694 }
19695
19696 #[simd_test(enable = "avx512fp16,avx512vl")]
19697 const fn test_mm_abs_ph() {
19698 let a = _mm_set_ph(-1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0);
19699 let r = _mm_abs_ph(a);
19700 let e = _mm_set_ph(1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0);
19701 assert_eq_m128h(r, e);
19702 }
19703
19704 #[simd_test(enable = "avx512fp16,avx512vl")]
19705 const fn test_mm256_abs_ph() {
19706 let a = _mm256_set_ph(
19707 -1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0,
19708 -14.0,
19709 );
19710 let r = _mm256_abs_ph(a);
19711 let e = _mm256_set_ph(
19712 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
19713 );
19714 assert_eq_m256h(r, e);
19715 }
19716
19717 #[simd_test(enable = "avx512fp16")]
19718 const fn test_mm512_abs_ph() {
19719 let a = _mm512_set_ph(
19720 -1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0,
19721 -14.0, 15.0, -16.0, 17.0, -18.0, 19.0, -20.0, 21.0, -22.0, 23.0, -24.0, 25.0, -26.0,
19722 27.0, -28.0, 29.0, -30.0,
19723 );
19724 let r = _mm512_abs_ph(a);
19725 let e = _mm512_set_ph(
19726 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
19727 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0,
19728 29.0, 30.0,
19729 );
19730 assert_eq_m512h(r, e);
19731 }
19732
19733 #[simd_test(enable = "avx512fp16,avx512vl")]
19734 const fn test_mm_conj_pch() {
19735 let a = _mm_set1_pch(0.0, 1.0);
19736 let r = _mm_conj_pch(a);
19737 let e = _mm_set1_pch(0.0, -1.0);
19738 assert_eq_m128h(r, e);
19739 }
19740
19741 #[simd_test(enable = "avx512fp16,avx512vl")]
19742 const fn test_mm_mask_conj_pch() {
19743 let a = _mm_set1_pch(0.0, 1.0);
19744 let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
19745 let r = _mm_mask_conj_pch(src, 0b0101, a);
19746 let e = _mm_setr_ph(0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0);
19747 assert_eq_m128h(r, e);
19748 }
19749
19750 #[simd_test(enable = "avx512fp16,avx512vl")]
19751 const fn test_mm_maskz_conj_pch() {
19752 let a = _mm_set1_pch(0.0, 1.0);
19753 let r = _mm_maskz_conj_pch(0b0101, a);
19754 let e = _mm_setr_ph(0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0);
19755 assert_eq_m128h(r, e);
19756 }
19757
19758 #[simd_test(enable = "avx512fp16,avx512vl")]
19759 const fn test_mm256_conj_pch() {
19760 let a = _mm256_set1_pch(0.0, 1.0);
19761 let r = _mm256_conj_pch(a);
19762 let e = _mm256_set1_pch(0.0, -1.0);
19763 assert_eq_m256h(r, e);
19764 }
19765
19766 #[simd_test(enable = "avx512fp16,avx512vl")]
19767 const fn test_mm256_mask_conj_pch() {
19768 let a = _mm256_set1_pch(0.0, 1.0);
19769 let src = _mm256_setr_ph(
19770 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
19771 );
19772 let r = _mm256_mask_conj_pch(src, 0b01010101, a);
19773 let e = _mm256_setr_ph(
19774 0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0, 0.0, -1.0, 12.0, 13.0, 0.0, -1.0, 16.0, 17.0,
19775 );
19776 assert_eq_m256h(r, e);
19777 }
19778
19779 #[simd_test(enable = "avx512fp16,avx512vl")]
19780 const fn test_mm256_maskz_conj_pch() {
19781 let a = _mm256_set1_pch(0.0, 1.0);
19782 let r = _mm256_maskz_conj_pch(0b01010101, a);
19783 let e = _mm256_setr_ph(
19784 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
19785 );
19786 assert_eq_m256h(r, e);
19787 }
19788
19789 #[simd_test(enable = "avx512fp16")]
19790 const fn test_mm512_conj_pch() {
19791 let a = _mm512_set1_pch(0.0, 1.0);
19792 let r = _mm512_conj_pch(a);
19793 let e = _mm512_set1_pch(0.0, -1.0);
19794 assert_eq_m512h(r, e);
19795 }
19796
19797 #[simd_test(enable = "avx512fp16")]
19798 const fn test_mm512_mask_conj_pch() {
19799 let a = _mm512_set1_pch(0.0, 1.0);
19800 let src = _mm512_setr_ph(
19801 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
19802 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
19803 32.0, 33.0,
19804 );
19805 let r = _mm512_mask_conj_pch(src, 0b0101010101010101, a);
19806 let e = _mm512_setr_ph(
19807 0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0, 0.0, -1.0, 12.0, 13.0, 0.0, -1.0, 16.0, 17.0,
19808 0.0, -1.0, 20.0, 21.0, 0.0, -1.0, 24.0, 25.0, 0.0, -1.0, 28.0, 29.0, 0.0, -1.0, 32.0,
19809 33.0,
19810 );
19811 assert_eq_m512h(r, e);
19812 }
19813
19814 #[simd_test(enable = "avx512fp16")]
19815 const fn test_mm512_maskz_conj_pch() {
19816 let a = _mm512_set1_pch(0.0, 1.0);
19817 let r = _mm512_maskz_conj_pch(0b0101010101010101, a);
19818 let e = _mm512_setr_ph(
19819 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
19820 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
19821 );
19822 assert_eq_m512h(r, e);
19823 }
19824
19825 #[simd_test(enable = "avx512fp16,avx512vl")]
19826 fn test_mm_fmadd_pch() {
19827 let a = _mm_set1_pch(0.0, 1.0);
19828 let b = _mm_set1_pch(0.0, 2.0);
19829 let c = _mm_set1_pch(0.0, 3.0);
19830 let r = _mm_fmadd_pch(a, b, c);
19831 let e = _mm_set1_pch(-2.0, 3.0);
19832 assert_eq_m128h(r, e);
19833 }
19834
19835 #[simd_test(enable = "avx512fp16,avx512vl")]
19836 fn test_mm_mask_fmadd_pch() {
19837 let a = _mm_set1_pch(0.0, 1.0);
19838 let b = _mm_set1_pch(0.0, 2.0);
19839 let c = _mm_set1_pch(0.0, 3.0);
19840 let r = _mm_mask_fmadd_pch(a, 0b0101, b, c);
19841 let e = _mm_setr_ph(-2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0);
19842 assert_eq_m128h(r, e);
19843 }
19844
19845 #[simd_test(enable = "avx512fp16,avx512vl")]
19846 fn test_mm_mask3_fmadd_pch() {
19847 let a = _mm_set1_pch(0.0, 1.0);
19848 let b = _mm_set1_pch(0.0, 2.0);
19849 let c = _mm_set1_pch(0.0, 3.0);
19850 let r = _mm_mask3_fmadd_pch(a, b, c, 0b0101);
19851 let e = _mm_setr_ph(-2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0);
19852 assert_eq_m128h(r, e);
19853 }
19854
19855 #[simd_test(enable = "avx512fp16,avx512vl")]
19856 fn test_mm_maskz_fmadd_pch() {
19857 let a = _mm_set1_pch(0.0, 1.0);
19858 let b = _mm_set1_pch(0.0, 2.0);
19859 let c = _mm_set1_pch(0.0, 3.0);
19860 let r = _mm_maskz_fmadd_pch(0b0101, a, b, c);
19861 let e = _mm_setr_ph(-2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0);
19862 assert_eq_m128h(r, e);
19863 }
19864
19865 #[simd_test(enable = "avx512fp16,avx512vl")]
19866 fn test_mm256_fmadd_pch() {
19867 let a = _mm256_set1_pch(0.0, 1.0);
19868 let b = _mm256_set1_pch(0.0, 2.0);
19869 let c = _mm256_set1_pch(0.0, 3.0);
19870 let r = _mm256_fmadd_pch(a, b, c);
19871 let e = _mm256_set1_pch(-2.0, 3.0);
19872 assert_eq_m256h(r, e);
19873 }
19874
19875 #[simd_test(enable = "avx512fp16,avx512vl")]
19876 fn test_mm256_mask_fmadd_pch() {
19877 let a = _mm256_set1_pch(0.0, 1.0);
19878 let b = _mm256_set1_pch(0.0, 2.0);
19879 let c = _mm256_set1_pch(0.0, 3.0);
19880 let r = _mm256_mask_fmadd_pch(a, 0b01010101, b, c);
19881 let e = _mm256_setr_ph(
19882 -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
19883 );
19884 assert_eq_m256h(r, e);
19885 }
19886
19887 #[simd_test(enable = "avx512fp16,avx512vl")]
19888 fn test_mm256_mask3_fmadd_pch() {
19889 let a = _mm256_set1_pch(0.0, 1.0);
19890 let b = _mm256_set1_pch(0.0, 2.0);
19891 let c = _mm256_set1_pch(0.0, 3.0);
19892 let r = _mm256_mask3_fmadd_pch(a, b, c, 0b01010101);
19893 let e = _mm256_setr_ph(
19894 -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
19895 );
19896 assert_eq_m256h(r, e);
19897 }
19898
19899 #[simd_test(enable = "avx512fp16,avx512vl")]
19900 fn test_mm256_maskz_fmadd_pch() {
19901 let a = _mm256_set1_pch(0.0, 1.0);
19902 let b = _mm256_set1_pch(0.0, 2.0);
19903 let c = _mm256_set1_pch(0.0, 3.0);
19904 let r = _mm256_maskz_fmadd_pch(0b01010101, a, b, c);
19905 let e = _mm256_setr_ph(
19906 -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
19907 );
19908 assert_eq_m256h(r, e);
19909 }
19910
19911 #[simd_test(enable = "avx512fp16")]
19912 fn test_mm512_fmadd_pch() {
19913 let a = _mm512_set1_pch(0.0, 1.0);
19914 let b = _mm512_set1_pch(0.0, 2.0);
19915 let c = _mm512_set1_pch(0.0, 3.0);
19916 let r = _mm512_fmadd_pch(a, b, c);
19917 let e = _mm512_set1_pch(-2.0, 3.0);
19918 assert_eq_m512h(r, e);
19919 }
19920
19921 #[simd_test(enable = "avx512fp16")]
19922 fn test_mm512_mask_fmadd_pch() {
19923 let a = _mm512_set1_pch(0.0, 1.0);
19924 let b = _mm512_set1_pch(0.0, 2.0);
19925 let c = _mm512_set1_pch(0.0, 3.0);
19926 let r = _mm512_mask_fmadd_pch(a, 0b0101010101010101, b, c);
19927 let e = _mm512_setr_ph(
19928 -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
19929 -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
19930 );
19931 assert_eq_m512h(r, e);
19932 }
19933
19934 #[simd_test(enable = "avx512fp16")]
19935 fn test_mm512_mask3_fmadd_pch() {
19936 let a = _mm512_set1_pch(0.0, 1.0);
19937 let b = _mm512_set1_pch(0.0, 2.0);
19938 let c = _mm512_set1_pch(0.0, 3.0);
19939 let r = _mm512_mask3_fmadd_pch(a, b, c, 0b0101010101010101);
19940 let e = _mm512_setr_ph(
19941 -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
19942 -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
19943 );
19944 assert_eq_m512h(r, e);
19945 }
19946
19947 #[simd_test(enable = "avx512fp16")]
19948 fn test_mm512_maskz_fmadd_pch() {
19949 let a = _mm512_set1_pch(0.0, 1.0);
19950 let b = _mm512_set1_pch(0.0, 2.0);
19951 let c = _mm512_set1_pch(0.0, 3.0);
19952 let r = _mm512_maskz_fmadd_pch(0b0101010101010101, a, b, c);
19953 let e = _mm512_setr_ph(
19954 -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
19955 -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
19956 );
19957 assert_eq_m512h(r, e);
19958 }
19959
19960 #[simd_test(enable = "avx512fp16")]
19961 fn test_mm512_fmadd_round_pch() {
19962 let a = _mm512_set1_pch(0.0, 1.0);
19963 let b = _mm512_set1_pch(0.0, 2.0);
19964 let c = _mm512_set1_pch(0.0, 3.0);
19965 let r =
19966 _mm512_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
19967 let e = _mm512_set1_pch(-2.0, 3.0);
19968 assert_eq_m512h(r, e);
19969 }
19970
19971 #[simd_test(enable = "avx512fp16")]
19972 fn test_mm512_mask_fmadd_round_pch() {
19973 let a = _mm512_set1_pch(0.0, 1.0);
19974 let b = _mm512_set1_pch(0.0, 2.0);
19975 let c = _mm512_set1_pch(0.0, 3.0);
19976 let r = _mm512_mask_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19977 a,
19978 0b0101010101010101,
19979 b,
19980 c,
19981 );
19982 let e = _mm512_setr_ph(
19983 -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
19984 -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
19985 );
19986 assert_eq_m512h(r, e);
19987 }
19988
19989 #[simd_test(enable = "avx512fp16")]
19990 fn test_mm512_mask3_fmadd_round_pch() {
19991 let a = _mm512_set1_pch(0.0, 1.0);
19992 let b = _mm512_set1_pch(0.0, 2.0);
19993 let c = _mm512_set1_pch(0.0, 3.0);
19994 let r = _mm512_mask3_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19995 a,
19996 b,
19997 c,
19998 0b0101010101010101,
19999 );
20000 let e = _mm512_setr_ph(
20001 -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
20002 -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
20003 );
20004 assert_eq_m512h(r, e);
20005 }
20006
20007 #[simd_test(enable = "avx512fp16")]
20008 fn test_mm512_maskz_fmadd_round_pch() {
20009 let a = _mm512_set1_pch(0.0, 1.0);
20010 let b = _mm512_set1_pch(0.0, 2.0);
20011 let c = _mm512_set1_pch(0.0, 3.0);
20012 let r = _mm512_maskz_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20013 0b0101010101010101,
20014 a,
20015 b,
20016 c,
20017 );
20018 let e = _mm512_setr_ph(
20019 -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
20020 -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
20021 );
20022 assert_eq_m512h(r, e);
20023 }
20024
20025 #[simd_test(enable = "avx512fp16,avx512vl")]
20026 fn test_mm_fmadd_sch() {
20027 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20028 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20029 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20030 let r = _mm_fmadd_sch(a, b, c);
20031 let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20032 assert_eq_m128h(r, e);
20033 }
20034
20035 #[simd_test(enable = "avx512fp16,avx512vl")]
20036 fn test_mm_mask_fmadd_sch() {
20037 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20038 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20039 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20040 let r = _mm_mask_fmadd_sch(a, 0, b, c);
20041 let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20042 assert_eq_m128h(r, e);
20043 let r = _mm_mask_fmadd_sch(a, 1, b, c);
20044 let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20045 assert_eq_m128h(r, e);
20046 }
20047
20048 #[simd_test(enable = "avx512fp16,avx512vl")]
20049 fn test_mm_mask3_fmadd_sch() {
20050 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20051 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20052 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20053 let r = _mm_mask3_fmadd_sch(a, b, c, 0);
20054 let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20055 assert_eq_m128h(r, e);
20056 let r = _mm_mask3_fmadd_sch(a, b, c, 1);
20057 let e = _mm_setr_ph(-2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20058 assert_eq_m128h(r, e);
20059 }
20060
20061 #[simd_test(enable = "avx512fp16,avx512vl")]
20062 fn test_mm_maskz_fmadd_sch() {
20063 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20064 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20065 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20066 let r = _mm_maskz_fmadd_sch(0, a, b, c);
20067 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20068 assert_eq_m128h(r, e);
20069 let r = _mm_maskz_fmadd_sch(1, a, b, c);
20070 let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20071 assert_eq_m128h(r, e);
20072 }
20073
20074 #[simd_test(enable = "avx512fp16,avx512vl")]
20075 fn test_mm_fmadd_round_sch() {
20076 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20077 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20078 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20079 let r = _mm_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
20080 let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20081 assert_eq_m128h(r, e);
20082 }
20083
20084 #[simd_test(enable = "avx512fp16,avx512vl")]
20085 fn test_mm_mask_fmadd_round_sch() {
20086 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20087 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20088 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20089 let r = _mm_mask_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20090 a, 0, b, c,
20091 );
20092 let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20093 assert_eq_m128h(r, e);
20094 let r = _mm_mask_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20095 a, 1, b, c,
20096 );
20097 let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20098 assert_eq_m128h(r, e);
20099 }
20100
20101 #[simd_test(enable = "avx512fp16,avx512vl")]
20102 fn test_mm_mask3_fmadd_round_sch() {
20103 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20104 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20105 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20106 let r = _mm_mask3_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20107 a, b, c, 0,
20108 );
20109 let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20110 assert_eq_m128h(r, e);
20111 let r = _mm_mask3_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20112 a, b, c, 1,
20113 );
20114 let e = _mm_setr_ph(-2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20115 assert_eq_m128h(r, e);
20116 }
20117
20118 #[simd_test(enable = "avx512fp16,avx512vl")]
20119 fn test_mm_maskz_fmadd_round_sch() {
20120 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20121 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20122 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20123 let r = _mm_maskz_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20124 0, a, b, c,
20125 );
20126 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20127 assert_eq_m128h(r, e);
20128 let r = _mm_maskz_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20129 1, a, b, c,
20130 );
20131 let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20132 assert_eq_m128h(r, e);
20133 }
20134
20135 #[simd_test(enable = "avx512fp16,avx512vl")]
20136 fn test_mm_fcmadd_pch() {
20137 let a = _mm_set1_pch(0.0, 1.0);
20138 let b = _mm_set1_pch(0.0, 2.0);
20139 let c = _mm_set1_pch(0.0, 3.0);
20140 let r = _mm_fcmadd_pch(a, b, c);
20141 let e = _mm_set1_pch(2.0, 3.0);
20142 assert_eq_m128h(r, e);
20143 }
20144
20145 #[simd_test(enable = "avx512fp16,avx512vl")]
20146 fn test_mm_mask_fcmadd_pch() {
20147 let a = _mm_set1_pch(0.0, 1.0);
20148 let b = _mm_set1_pch(0.0, 2.0);
20149 let c = _mm_set1_pch(0.0, 3.0);
20150 let r = _mm_mask_fcmadd_pch(a, 0b0101, b, c);
20151 let e = _mm_setr_ph(2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0);
20152 assert_eq_m128h(r, e);
20153 }
20154
20155 #[simd_test(enable = "avx512fp16,avx512vl")]
20156 fn test_mm_mask3_fcmadd_pch() {
20157 let a = _mm_set1_pch(0.0, 1.0);
20158 let b = _mm_set1_pch(0.0, 2.0);
20159 let c = _mm_set1_pch(0.0, 3.0);
20160 let r = _mm_mask3_fcmadd_pch(a, b, c, 0b0101);
20161 let e = _mm_setr_ph(2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0);
20162 assert_eq_m128h(r, e);
20163 }
20164
20165 #[simd_test(enable = "avx512fp16,avx512vl")]
20166 fn test_mm_maskz_fcmadd_pch() {
20167 let a = _mm_set1_pch(0.0, 1.0);
20168 let b = _mm_set1_pch(0.0, 2.0);
20169 let c = _mm_set1_pch(0.0, 3.0);
20170 let r = _mm_maskz_fcmadd_pch(0b0101, a, b, c);
20171 let e = _mm_setr_ph(2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0);
20172 assert_eq_m128h(r, e);
20173 }
20174
20175 #[simd_test(enable = "avx512fp16,avx512vl")]
20176 fn test_mm256_fcmadd_pch() {
20177 let a = _mm256_set1_pch(0.0, 1.0);
20178 let b = _mm256_set1_pch(0.0, 2.0);
20179 let c = _mm256_set1_pch(0.0, 3.0);
20180 let r = _mm256_fcmadd_pch(a, b, c);
20181 let e = _mm256_set1_pch(2.0, 3.0);
20182 assert_eq_m256h(r, e);
20183 }
20184
20185 #[simd_test(enable = "avx512fp16,avx512vl")]
20186 fn test_mm256_mask_fcmadd_pch() {
20187 let a = _mm256_set1_pch(0.0, 1.0);
20188 let b = _mm256_set1_pch(0.0, 2.0);
20189 let c = _mm256_set1_pch(0.0, 3.0);
20190 let r = _mm256_mask_fcmadd_pch(a, 0b01010101, b, c);
20191 let e = _mm256_setr_ph(
20192 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
20193 );
20194 assert_eq_m256h(r, e);
20195 }
20196
20197 #[simd_test(enable = "avx512fp16,avx512vl")]
20198 fn test_mm256_mask3_fcmadd_pch() {
20199 let a = _mm256_set1_pch(0.0, 1.0);
20200 let b = _mm256_set1_pch(0.0, 2.0);
20201 let c = _mm256_set1_pch(0.0, 3.0);
20202 let r = _mm256_mask3_fcmadd_pch(a, b, c, 0b01010101);
20203 let e = _mm256_setr_ph(
20204 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
20205 );
20206 assert_eq_m256h(r, e);
20207 }
20208
20209 #[simd_test(enable = "avx512fp16,avx512vl")]
20210 fn test_mm256_maskz_fcmadd_pch() {
20211 let a = _mm256_set1_pch(0.0, 1.0);
20212 let b = _mm256_set1_pch(0.0, 2.0);
20213 let c = _mm256_set1_pch(0.0, 3.0);
20214 let r = _mm256_maskz_fcmadd_pch(0b01010101, a, b, c);
20215 let e = _mm256_setr_ph(
20216 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
20217 );
20218 assert_eq_m256h(r, e);
20219 }
20220
20221 #[simd_test(enable = "avx512fp16")]
20222 fn test_mm512_fcmadd_pch() {
20223 let a = _mm512_set1_pch(0.0, 1.0);
20224 let b = _mm512_set1_pch(0.0, 2.0);
20225 let c = _mm512_set1_pch(0.0, 3.0);
20226 let r = _mm512_fcmadd_pch(a, b, c);
20227 let e = _mm512_set1_pch(2.0, 3.0);
20228 assert_eq_m512h(r, e);
20229 }
20230
20231 #[simd_test(enable = "avx512fp16")]
20232 fn test_mm512_mask_fcmadd_pch() {
20233 let a = _mm512_set1_pch(0.0, 1.0);
20234 let b = _mm512_set1_pch(0.0, 2.0);
20235 let c = _mm512_set1_pch(0.0, 3.0);
20236 let r = _mm512_mask_fcmadd_pch(a, 0b0101010101010101, b, c);
20237 let e = _mm512_setr_ph(
20238 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0,
20239 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
20240 );
20241 assert_eq_m512h(r, e);
20242 }
20243
20244 #[simd_test(enable = "avx512fp16")]
20245 fn test_mm512_mask3_fcmadd_pch() {
20246 let a = _mm512_set1_pch(0.0, 1.0);
20247 let b = _mm512_set1_pch(0.0, 2.0);
20248 let c = _mm512_set1_pch(0.0, 3.0);
20249 let r = _mm512_mask3_fcmadd_pch(a, b, c, 0b0101010101010101);
20250 let e = _mm512_setr_ph(
20251 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0,
20252 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
20253 );
20254 assert_eq_m512h(r, e);
20255 }
20256
20257 #[simd_test(enable = "avx512fp16")]
20258 fn test_mm512_maskz_fcmadd_pch() {
20259 let a = _mm512_set1_pch(0.0, 1.0);
20260 let b = _mm512_set1_pch(0.0, 2.0);
20261 let c = _mm512_set1_pch(0.0, 3.0);
20262 let r = _mm512_maskz_fcmadd_pch(0b0101010101010101, a, b, c);
20263 let e = _mm512_setr_ph(
20264 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0,
20265 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
20266 );
20267 assert_eq_m512h(r, e);
20268 }
20269
20270 #[simd_test(enable = "avx512fp16")]
20271 fn test_mm512_fcmadd_round_pch() {
20272 let a = _mm512_set1_pch(0.0, 1.0);
20273 let b = _mm512_set1_pch(0.0, 2.0);
20274 let c = _mm512_set1_pch(0.0, 3.0);
20275 let r =
20276 _mm512_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
20277 let e = _mm512_set1_pch(2.0, 3.0);
20278 assert_eq_m512h(r, e);
20279 }
20280
20281 #[simd_test(enable = "avx512fp16")]
20282 fn test_mm512_mask_fcmadd_round_pch() {
20283 let a = _mm512_set1_pch(0.0, 1.0);
20284 let b = _mm512_set1_pch(0.0, 2.0);
20285 let c = _mm512_set1_pch(0.0, 3.0);
20286 let r = _mm512_mask_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20287 a,
20288 0b0101010101010101,
20289 b,
20290 c,
20291 );
20292 let e = _mm512_setr_ph(
20293 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0,
20294 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
20295 );
20296 assert_eq_m512h(r, e);
20297 }
20298
20299 #[simd_test(enable = "avx512fp16")]
20300 fn test_mm512_mask3_fcmadd_round_pch() {
20301 let a = _mm512_set1_pch(0.0, 1.0);
20302 let b = _mm512_set1_pch(0.0, 2.0);
20303 let c = _mm512_set1_pch(0.0, 3.0);
20304 let r = _mm512_mask3_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20305 a,
20306 b,
20307 c,
20308 0b0101010101010101,
20309 );
20310 let e = _mm512_setr_ph(
20311 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0,
20312 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
20313 );
20314 assert_eq_m512h(r, e);
20315 }
20316
20317 #[simd_test(enable = "avx512fp16")]
20318 fn test_mm512_maskz_fcmadd_round_pch() {
20319 let a = _mm512_set1_pch(0.0, 1.0);
20320 let b = _mm512_set1_pch(0.0, 2.0);
20321 let c = _mm512_set1_pch(0.0, 3.0);
20322 let r = _mm512_maskz_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20323 0b0101010101010101,
20324 a,
20325 b,
20326 c,
20327 );
20328 let e = _mm512_setr_ph(
20329 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0,
20330 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
20331 );
20332 assert_eq_m512h(r, e);
20333 }
20334
20335 #[simd_test(enable = "avx512fp16,avx512vl")]
20336 fn test_mm_fcmadd_sch() {
20337 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20338 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20339 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20340 let r = _mm_fcmadd_sch(a, b, c);
20341 let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20342 assert_eq_m128h(r, e);
20343 }
20344
20345 #[simd_test(enable = "avx512fp16,avx512vl")]
20346 fn test_mm_mask_fcmadd_sch() {
20347 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20348 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20349 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20350 let r = _mm_mask_fcmadd_sch(a, 0, b, c);
20351 let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20352 assert_eq_m128h(r, e);
20353 let r = _mm_mask_fcmadd_sch(a, 1, b, c);
20354 let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20355 assert_eq_m128h(r, e);
20356 }
20357
20358 #[simd_test(enable = "avx512fp16,avx512vl")]
20359 fn test_mm_mask3_fcmadd_sch() {
20360 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20361 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20362 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20363 let r = _mm_mask3_fcmadd_sch(a, b, c, 0);
20364 let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20365 assert_eq_m128h(r, e);
20366 let r = _mm_mask3_fcmadd_sch(a, b, c, 1);
20367 let e = _mm_setr_ph(2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20368 assert_eq_m128h(r, e);
20369 }
20370
20371 #[simd_test(enable = "avx512fp16,avx512vl")]
20372 fn test_mm_maskz_fcmadd_sch() {
20373 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20374 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20375 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20376 let r = _mm_maskz_fcmadd_sch(0, a, b, c);
20377 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20378 assert_eq_m128h(r, e);
20379 let r = _mm_maskz_fcmadd_sch(1, a, b, c);
20380 let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20381 assert_eq_m128h(r, e);
20382 }
20383
20384 #[simd_test(enable = "avx512fp16,avx512vl")]
20385 fn test_mm_fcmadd_round_sch() {
20386 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20387 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20388 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20389 let r = _mm_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
20390 let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20391 assert_eq_m128h(r, e);
20392 }
20393
20394 #[simd_test(enable = "avx512fp16,avx512vl")]
20395 fn test_mm_mask_fcmadd_round_sch() {
20396 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20397 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20398 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20399 let r = _mm_mask_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20400 a, 0, b, c,
20401 );
20402 let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20403 assert_eq_m128h(r, e);
20404 let r = _mm_mask_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20405 a, 1, b, c,
20406 );
20407 let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20408 assert_eq_m128h(r, e);
20409 }
20410
20411 #[simd_test(enable = "avx512fp16,avx512vl")]
20412 fn test_mm_mask3_fcmadd_round_sch() {
20413 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20414 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20415 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20416 let r = _mm_mask3_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20417 a, b, c, 0,
20418 );
20419 let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20420 assert_eq_m128h(r, e);
20421 let r = _mm_mask3_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20422 a, b, c, 1,
20423 );
20424 let e = _mm_setr_ph(2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20425 assert_eq_m128h(r, e);
20426 }
20427
20428 #[simd_test(enable = "avx512fp16,avx512vl")]
20429 fn test_mm_maskz_fcmadd_round_sch() {
20430 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20431 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20432 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20433 let r = _mm_maskz_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20434 0, a, b, c,
20435 );
20436 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20437 assert_eq_m128h(r, e);
20438 let r = _mm_maskz_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20439 1, a, b, c,
20440 );
20441 let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20442 assert_eq_m128h(r, e);
20443 }
20444
20445 #[simd_test(enable = "avx512fp16,avx512vl")]
20446 const fn test_mm_fmadd_ph() {
20447 let a = _mm_set1_ph(1.0);
20448 let b = _mm_set1_ph(2.0);
20449 let c = _mm_set1_ph(3.0);
20450 let r = _mm_fmadd_ph(a, b, c);
20451 let e = _mm_set1_ph(5.0);
20452 assert_eq_m128h(r, e);
20453 }
20454
20455 #[simd_test(enable = "avx512fp16,avx512vl")]
20456 const fn test_mm_mask_fmadd_ph() {
20457 let a = _mm_set1_ph(1.0);
20458 let b = _mm_set1_ph(2.0);
20459 let c = _mm_set1_ph(3.0);
20460 let r = _mm_mask_fmadd_ph(a, 0b01010101, b, c);
20461 let e = _mm_set_ph(1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0);
20462 assert_eq_m128h(r, e);
20463 }
20464
20465 #[simd_test(enable = "avx512fp16,avx512vl")]
20466 const fn test_mm_mask3_fmadd_ph() {
20467 let a = _mm_set1_ph(1.0);
20468 let b = _mm_set1_ph(2.0);
20469 let c = _mm_set1_ph(3.0);
20470 let r = _mm_mask3_fmadd_ph(a, b, c, 0b01010101);
20471 let e = _mm_set_ph(3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0);
20472 assert_eq_m128h(r, e);
20473 }
20474
20475 #[simd_test(enable = "avx512fp16,avx512vl")]
20476 const fn test_mm_maskz_fmadd_ph() {
20477 let a = _mm_set1_ph(1.0);
20478 let b = _mm_set1_ph(2.0);
20479 let c = _mm_set1_ph(3.0);
20480 let r = _mm_maskz_fmadd_ph(0b01010101, a, b, c);
20481 let e = _mm_set_ph(0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0);
20482 assert_eq_m128h(r, e);
20483 }
20484
20485 #[simd_test(enable = "avx512fp16,avx512vl")]
20486 const fn test_mm256_fmadd_ph() {
20487 let a = _mm256_set1_ph(1.0);
20488 let b = _mm256_set1_ph(2.0);
20489 let c = _mm256_set1_ph(3.0);
20490 let r = _mm256_fmadd_ph(a, b, c);
20491 let e = _mm256_set1_ph(5.0);
20492 assert_eq_m256h(r, e);
20493 }
20494
20495 #[simd_test(enable = "avx512fp16,avx512vl")]
20496 const fn test_mm256_mask_fmadd_ph() {
20497 let a = _mm256_set1_ph(1.0);
20498 let b = _mm256_set1_ph(2.0);
20499 let c = _mm256_set1_ph(3.0);
20500 let r = _mm256_mask_fmadd_ph(a, 0b0101010101010101, b, c);
20501 let e = _mm256_set_ph(
20502 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
20503 );
20504 assert_eq_m256h(r, e);
20505 }
20506
20507 #[simd_test(enable = "avx512fp16,avx512vl")]
20508 const fn test_mm256_mask3_fmadd_ph() {
20509 let a = _mm256_set1_ph(1.0);
20510 let b = _mm256_set1_ph(2.0);
20511 let c = _mm256_set1_ph(3.0);
20512 let r = _mm256_mask3_fmadd_ph(a, b, c, 0b0101010101010101);
20513 let e = _mm256_set_ph(
20514 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
20515 );
20516 assert_eq_m256h(r, e);
20517 }
20518
20519 #[simd_test(enable = "avx512fp16,avx512vl")]
20520 const fn test_mm256_maskz_fmadd_ph() {
20521 let a = _mm256_set1_ph(1.0);
20522 let b = _mm256_set1_ph(2.0);
20523 let c = _mm256_set1_ph(3.0);
20524 let r = _mm256_maskz_fmadd_ph(0b0101010101010101, a, b, c);
20525 let e = _mm256_set_ph(
20526 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
20527 );
20528 assert_eq_m256h(r, e);
20529 }
20530
20531 #[simd_test(enable = "avx512fp16")]
20532 const fn test_mm512_fmadd_ph() {
20533 let a = _mm512_set1_ph(1.0);
20534 let b = _mm512_set1_ph(2.0);
20535 let c = _mm512_set1_ph(3.0);
20536 let r = _mm512_fmadd_ph(a, b, c);
20537 let e = _mm512_set1_ph(5.0);
20538 assert_eq_m512h(r, e);
20539 }
20540
20541 #[simd_test(enable = "avx512fp16")]
20542 const fn test_mm512_mask_fmadd_ph() {
20543 let a = _mm512_set1_ph(1.0);
20544 let b = _mm512_set1_ph(2.0);
20545 let c = _mm512_set1_ph(3.0);
20546 let r = _mm512_mask_fmadd_ph(a, 0b01010101010101010101010101010101, b, c);
20547 let e = _mm512_set_ph(
20548 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0,
20549 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
20550 );
20551 assert_eq_m512h(r, e);
20552 }
20553
20554 #[simd_test(enable = "avx512fp16")]
20555 const fn test_mm512_mask3_fmadd_ph() {
20556 let a = _mm512_set1_ph(1.0);
20557 let b = _mm512_set1_ph(2.0);
20558 let c = _mm512_set1_ph(3.0);
20559 let r = _mm512_mask3_fmadd_ph(a, b, c, 0b01010101010101010101010101010101);
20560 let e = _mm512_set_ph(
20561 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0,
20562 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
20563 );
20564 assert_eq_m512h(r, e);
20565 }
20566
20567 #[simd_test(enable = "avx512fp16")]
20568 const fn test_mm512_maskz_fmadd_ph() {
20569 let a = _mm512_set1_ph(1.0);
20570 let b = _mm512_set1_ph(2.0);
20571 let c = _mm512_set1_ph(3.0);
20572 let r = _mm512_maskz_fmadd_ph(0b01010101010101010101010101010101, a, b, c);
20573 let e = _mm512_set_ph(
20574 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0,
20575 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
20576 );
20577 assert_eq_m512h(r, e);
20578 }
20579
20580 #[simd_test(enable = "avx512fp16")]
20581 fn test_mm512_fmadd_round_ph() {
20582 let a = _mm512_set1_ph(1.0);
20583 let b = _mm512_set1_ph(2.0);
20584 let c = _mm512_set1_ph(3.0);
20585 let r = _mm512_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
20586 let e = _mm512_set1_ph(5.0);
20587 assert_eq_m512h(r, e);
20588 }
20589
20590 #[simd_test(enable = "avx512fp16")]
20591 fn test_mm512_mask_fmadd_round_ph() {
20592 let a = _mm512_set1_ph(1.0);
20593 let b = _mm512_set1_ph(2.0);
20594 let c = _mm512_set1_ph(3.0);
20595 let r = _mm512_mask_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20596 a,
20597 0b01010101010101010101010101010101,
20598 b,
20599 c,
20600 );
20601 let e = _mm512_set_ph(
20602 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0,
20603 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
20604 );
20605 assert_eq_m512h(r, e);
20606 }
20607
20608 #[simd_test(enable = "avx512fp16")]
20609 fn test_mm512_mask3_fmadd_round_ph() {
20610 let a = _mm512_set1_ph(1.0);
20611 let b = _mm512_set1_ph(2.0);
20612 let c = _mm512_set1_ph(3.0);
20613 let r = _mm512_mask3_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20614 a,
20615 b,
20616 c,
20617 0b01010101010101010101010101010101,
20618 );
20619 let e = _mm512_set_ph(
20620 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0,
20621 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
20622 );
20623 assert_eq_m512h(r, e);
20624 }
20625
20626 #[simd_test(enable = "avx512fp16")]
20627 fn test_mm512_maskz_fmadd_round_ph() {
20628 let a = _mm512_set1_ph(1.0);
20629 let b = _mm512_set1_ph(2.0);
20630 let c = _mm512_set1_ph(3.0);
20631 let r = _mm512_maskz_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20632 0b01010101010101010101010101010101,
20633 a,
20634 b,
20635 c,
20636 );
20637 let e = _mm512_set_ph(
20638 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0,
20639 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
20640 );
20641 assert_eq_m512h(r, e);
20642 }
20643
20644 #[simd_test(enable = "avx512fp16,avx512vl")]
20645 const fn test_mm_fmadd_sh() {
20646 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20647 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20648 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20649 let r = _mm_fmadd_sh(a, b, c);
20650 let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
20651 assert_eq_m128h(r, e);
20652 }
20653
20654 #[simd_test(enable = "avx512fp16,avx512vl")]
20655 const fn test_mm_mask_fmadd_sh() {
20656 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20657 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20658 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20659 let r = _mm_mask_fmadd_sh(a, 0, b, c);
20660 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20661 assert_eq_m128h(r, e);
20662 let r = _mm_mask_fmadd_sh(a, 1, b, c);
20663 let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
20664 assert_eq_m128h(r, e);
20665 }
20666
20667 #[simd_test(enable = "avx512fp16,avx512vl")]
20668 const fn test_mm_mask3_fmadd_sh() {
20669 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20670 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20671 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20672 let r = _mm_mask3_fmadd_sh(a, b, c, 0);
20673 let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20674 assert_eq_m128h(r, e);
20675 let r = _mm_mask3_fmadd_sh(a, b, c, 1);
20676 let e = _mm_setr_ph(5.0, 30., 31., 32., 33., 34., 35., 36.);
20677 assert_eq_m128h(r, e);
20678 }
20679
20680 #[simd_test(enable = "avx512fp16,avx512vl")]
20681 const fn test_mm_maskz_fmadd_sh() {
20682 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20683 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20684 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20685 let r = _mm_maskz_fmadd_sh(0, a, b, c);
20686 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
20687 assert_eq_m128h(r, e);
20688 let r = _mm_maskz_fmadd_sh(1, a, b, c);
20689 let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
20690 assert_eq_m128h(r, e);
20691 }
20692
20693 #[simd_test(enable = "avx512fp16,avx512vl")]
20694 fn test_mm_fmadd_round_sh() {
20695 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20696 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20697 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20698 let r = _mm_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
20699 let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
20700 assert_eq_m128h(r, e);
20701 }
20702
20703 #[simd_test(enable = "avx512fp16,avx512vl")]
20704 fn test_mm_mask_fmadd_round_sh() {
20705 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20706 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20707 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20708 let r = _mm_mask_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20709 a, 0, b, c,
20710 );
20711 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20712 assert_eq_m128h(r, e);
20713 let r = _mm_mask_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20714 a, 1, b, c,
20715 );
20716 let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
20717 assert_eq_m128h(r, e);
20718 }
20719
20720 #[simd_test(enable = "avx512fp16,avx512vl")]
20721 fn test_mm_mask3_fmadd_round_sh() {
20722 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20723 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20724 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20725 let r = _mm_mask3_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20726 a, b, c, 0,
20727 );
20728 let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20729 assert_eq_m128h(r, e);
20730 let r = _mm_mask3_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20731 a, b, c, 1,
20732 );
20733 let e = _mm_setr_ph(5.0, 30., 31., 32., 33., 34., 35., 36.);
20734 assert_eq_m128h(r, e);
20735 }
20736
20737 #[simd_test(enable = "avx512fp16,avx512vl")]
20738 fn test_mm_maskz_fmadd_round_sh() {
20739 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20740 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20741 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20742 let r = _mm_maskz_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20743 0, a, b, c,
20744 );
20745 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
20746 assert_eq_m128h(r, e);
20747 let r = _mm_maskz_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20748 1, a, b, c,
20749 );
20750 let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
20751 assert_eq_m128h(r, e);
20752 }
20753
20754 #[simd_test(enable = "avx512fp16,avx512vl")]
20755 const fn test_mm_fmsub_ph() {
20756 let a = _mm_set1_ph(1.0);
20757 let b = _mm_set1_ph(2.0);
20758 let c = _mm_set1_ph(3.0);
20759 let r = _mm_fmsub_ph(a, b, c);
20760 let e = _mm_set1_ph(-1.0);
20761 assert_eq_m128h(r, e);
20762 }
20763
20764 #[simd_test(enable = "avx512fp16,avx512vl")]
20765 const fn test_mm_mask_fmsub_ph() {
20766 let a = _mm_set1_ph(1.0);
20767 let b = _mm_set1_ph(2.0);
20768 let c = _mm_set1_ph(3.0);
20769 let r = _mm_mask_fmsub_ph(a, 0b01010101, b, c);
20770 let e = _mm_set_ph(1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0);
20771 assert_eq_m128h(r, e);
20772 }
20773
20774 #[simd_test(enable = "avx512fp16,avx512vl")]
20775 const fn test_mm_mask3_fmsub_ph() {
20776 let a = _mm_set1_ph(1.0);
20777 let b = _mm_set1_ph(2.0);
20778 let c = _mm_set1_ph(3.0);
20779 let r = _mm_mask3_fmsub_ph(a, b, c, 0b01010101);
20780 let e = _mm_set_ph(3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0);
20781 assert_eq_m128h(r, e);
20782 }
20783
20784 #[simd_test(enable = "avx512fp16,avx512vl")]
20785 const fn test_mm_maskz_fmsub_ph() {
20786 let a = _mm_set1_ph(1.0);
20787 let b = _mm_set1_ph(2.0);
20788 let c = _mm_set1_ph(3.0);
20789 let r = _mm_maskz_fmsub_ph(0b01010101, a, b, c);
20790 let e = _mm_set_ph(0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0);
20791 assert_eq_m128h(r, e);
20792 }
20793
20794 #[simd_test(enable = "avx512fp16,avx512vl")]
20795 const fn test_mm256_fmsub_ph() {
20796 let a = _mm256_set1_ph(1.0);
20797 let b = _mm256_set1_ph(2.0);
20798 let c = _mm256_set1_ph(3.0);
20799 let r = _mm256_fmsub_ph(a, b, c);
20800 let e = _mm256_set1_ph(-1.0);
20801 assert_eq_m256h(r, e);
20802 }
20803
20804 #[simd_test(enable = "avx512fp16,avx512vl")]
20805 const fn test_mm256_mask_fmsub_ph() {
20806 let a = _mm256_set1_ph(1.0);
20807 let b = _mm256_set1_ph(2.0);
20808 let c = _mm256_set1_ph(3.0);
20809 let r = _mm256_mask_fmsub_ph(a, 0b0101010101010101, b, c);
20810 let e = _mm256_set_ph(
20811 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
20812 );
20813 assert_eq_m256h(r, e);
20814 }
20815
20816 #[simd_test(enable = "avx512fp16,avx512vl")]
20817 const fn test_mm256_mask3_fmsub_ph() {
20818 let a = _mm256_set1_ph(1.0);
20819 let b = _mm256_set1_ph(2.0);
20820 let c = _mm256_set1_ph(3.0);
20821 let r = _mm256_mask3_fmsub_ph(a, b, c, 0b0101010101010101);
20822 let e = _mm256_set_ph(
20823 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
20824 );
20825 assert_eq_m256h(r, e);
20826 }
20827
20828 #[simd_test(enable = "avx512fp16,avx512vl")]
20829 const fn test_mm256_maskz_fmsub_ph() {
20830 let a = _mm256_set1_ph(1.0);
20831 let b = _mm256_set1_ph(2.0);
20832 let c = _mm256_set1_ph(3.0);
20833 let r = _mm256_maskz_fmsub_ph(0b0101010101010101, a, b, c);
20834 let e = _mm256_set_ph(
20835 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
20836 );
20837 assert_eq_m256h(r, e);
20838 }
20839
20840 #[simd_test(enable = "avx512fp16")]
20841 const fn test_mm512_fmsub_ph() {
20842 let a = _mm512_set1_ph(1.0);
20843 let b = _mm512_set1_ph(2.0);
20844 let c = _mm512_set1_ph(3.0);
20845 let r = _mm512_fmsub_ph(a, b, c);
20846 let e = _mm512_set1_ph(-1.0);
20847 assert_eq_m512h(r, e);
20848 }
20849
20850 #[simd_test(enable = "avx512fp16")]
20851 const fn test_mm512_mask_fmsub_ph() {
20852 let a = _mm512_set1_ph(1.0);
20853 let b = _mm512_set1_ph(2.0);
20854 let c = _mm512_set1_ph(3.0);
20855 let r = _mm512_mask_fmsub_ph(a, 0b01010101010101010101010101010101, b, c);
20856 let e = _mm512_set_ph(
20857 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
20858 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
20859 );
20860 assert_eq_m512h(r, e);
20861 }
20862
20863 #[simd_test(enable = "avx512fp16")]
20864 const fn test_mm512_mask3_fmsub_ph() {
20865 let a = _mm512_set1_ph(1.0);
20866 let b = _mm512_set1_ph(2.0);
20867 let c = _mm512_set1_ph(3.0);
20868 let r = _mm512_mask3_fmsub_ph(a, b, c, 0b01010101010101010101010101010101);
20869 let e = _mm512_set_ph(
20870 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
20871 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
20872 );
20873 assert_eq_m512h(r, e);
20874 }
20875
20876 #[simd_test(enable = "avx512fp16")]
20877 const fn test_mm512_maskz_fmsub_ph() {
20878 let a = _mm512_set1_ph(1.0);
20879 let b = _mm512_set1_ph(2.0);
20880 let c = _mm512_set1_ph(3.0);
20881 let r = _mm512_maskz_fmsub_ph(0b01010101010101010101010101010101, a, b, c);
20882 let e = _mm512_set_ph(
20883 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
20884 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
20885 );
20886 assert_eq_m512h(r, e);
20887 }
20888
20889 #[simd_test(enable = "avx512fp16")]
20890 fn test_mm512_fmsub_round_ph() {
20891 let a = _mm512_set1_ph(1.0);
20892 let b = _mm512_set1_ph(2.0);
20893 let c = _mm512_set1_ph(3.0);
20894 let r = _mm512_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
20895 let e = _mm512_set1_ph(-1.0);
20896 assert_eq_m512h(r, e);
20897 }
20898
20899 #[simd_test(enable = "avx512fp16")]
20900 fn test_mm512_mask_fmsub_round_ph() {
20901 let a = _mm512_set1_ph(1.0);
20902 let b = _mm512_set1_ph(2.0);
20903 let c = _mm512_set1_ph(3.0);
20904 let r = _mm512_mask_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20905 a,
20906 0b01010101010101010101010101010101,
20907 b,
20908 c,
20909 );
20910 let e = _mm512_set_ph(
20911 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
20912 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
20913 );
20914 assert_eq_m512h(r, e);
20915 }
20916
20917 #[simd_test(enable = "avx512fp16")]
20918 fn test_mm512_mask3_fmsub_round_ph() {
20919 let a = _mm512_set1_ph(1.0);
20920 let b = _mm512_set1_ph(2.0);
20921 let c = _mm512_set1_ph(3.0);
20922 let r = _mm512_mask3_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20923 a,
20924 b,
20925 c,
20926 0b01010101010101010101010101010101,
20927 );
20928 let e = _mm512_set_ph(
20929 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
20930 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
20931 );
20932 assert_eq_m512h(r, e);
20933 }
20934
20935 #[simd_test(enable = "avx512fp16")]
20936 fn test_mm512_maskz_fmsub_round_ph() {
20937 let a = _mm512_set1_ph(1.0);
20938 let b = _mm512_set1_ph(2.0);
20939 let c = _mm512_set1_ph(3.0);
20940 let r = _mm512_maskz_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20941 0b01010101010101010101010101010101,
20942 a,
20943 b,
20944 c,
20945 );
20946 let e = _mm512_set_ph(
20947 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
20948 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
20949 );
20950 assert_eq_m512h(r, e);
20951 }
20952
20953 #[simd_test(enable = "avx512fp16,avx512vl")]
20954 const fn test_mm_fmsub_sh() {
20955 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20956 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20957 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20958 let r = _mm_fmsub_sh(a, b, c);
20959 let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
20960 assert_eq_m128h(r, e);
20961 }
20962
20963 #[simd_test(enable = "avx512fp16,avx512vl")]
20964 const fn test_mm_mask_fmsub_sh() {
20965 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20966 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20967 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20968 let r = _mm_mask_fmsub_sh(a, 0, b, c);
20969 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20970 assert_eq_m128h(r, e);
20971 let r = _mm_mask_fmsub_sh(a, 1, b, c);
20972 let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
20973 assert_eq_m128h(r, e);
20974 }
20975
20976 #[simd_test(enable = "avx512fp16,avx512vl")]
20977 const fn test_mm_mask3_fmsub_sh() {
20978 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20979 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20980 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20981 let r = _mm_mask3_fmsub_sh(a, b, c, 0);
20982 let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20983 assert_eq_m128h(r, e);
20984 let r = _mm_mask3_fmsub_sh(a, b, c, 1);
20985 let e = _mm_setr_ph(-1.0, 30., 31., 32., 33., 34., 35., 36.);
20986 assert_eq_m128h(r, e);
20987 }
20988
20989 #[simd_test(enable = "avx512fp16,avx512vl")]
20990 const fn test_mm_maskz_fmsub_sh() {
20991 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20992 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20993 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20994 let r = _mm_maskz_fmsub_sh(0, a, b, c);
20995 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
20996 assert_eq_m128h(r, e);
20997 let r = _mm_maskz_fmsub_sh(1, a, b, c);
20998 let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
20999 assert_eq_m128h(r, e);
21000 }
21001
21002 #[simd_test(enable = "avx512fp16,avx512vl")]
21003 fn test_mm_fmsub_round_sh() {
21004 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21005 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21006 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21007 let r = _mm_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
21008 let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
21009 assert_eq_m128h(r, e);
21010 }
21011
21012 #[simd_test(enable = "avx512fp16,avx512vl")]
21013 fn test_mm_mask_fmsub_round_sh() {
21014 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21015 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21016 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21017 let r = _mm_mask_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21018 a, 0, b, c,
21019 );
21020 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21021 assert_eq_m128h(r, e);
21022 let r = _mm_mask_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21023 a, 1, b, c,
21024 );
21025 let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
21026 assert_eq_m128h(r, e);
21027 }
21028
21029 #[simd_test(enable = "avx512fp16,avx512vl")]
21030 fn test_mm_mask3_fmsub_round_sh() {
21031 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21032 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21033 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21034 let r = _mm_mask3_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21035 a, b, c, 0,
21036 );
21037 let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21038 assert_eq_m128h(r, e);
21039 let r = _mm_mask3_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21040 a, b, c, 1,
21041 );
21042 let e = _mm_setr_ph(-1.0, 30., 31., 32., 33., 34., 35., 36.);
21043 assert_eq_m128h(r, e);
21044 }
21045
21046 #[simd_test(enable = "avx512fp16,avx512vl")]
21047 fn test_mm_maskz_fmsub_round_sh() {
21048 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21049 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21050 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21051 let r = _mm_maskz_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21052 0, a, b, c,
21053 );
21054 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
21055 assert_eq_m128h(r, e);
21056 let r = _mm_maskz_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21057 1, a, b, c,
21058 );
21059 let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
21060 assert_eq_m128h(r, e);
21061 }
21062
21063 #[simd_test(enable = "avx512fp16,avx512vl")]
21064 const fn test_mm_fnmadd_ph() {
21065 let a = _mm_set1_ph(1.0);
21066 let b = _mm_set1_ph(2.0);
21067 let c = _mm_set1_ph(3.0);
21068 let r = _mm_fnmadd_ph(a, b, c);
21069 let e = _mm_set1_ph(1.0);
21070 assert_eq_m128h(r, e);
21071 }
21072
21073 #[simd_test(enable = "avx512fp16,avx512vl")]
21074 const fn test_mm_mask_fnmadd_ph() {
21075 let a = _mm_set1_ph(1.0);
21076 let b = _mm_set1_ph(2.0);
21077 let c = _mm_set1_ph(3.0);
21078 let r = _mm_mask_fnmadd_ph(a, 0b01010101, b, c);
21079 let e = _mm_set_ph(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0);
21080 assert_eq_m128h(r, e);
21081 }
21082
21083 #[simd_test(enable = "avx512fp16,avx512vl")]
21084 const fn test_mm_mask3_fnmadd_ph() {
21085 let a = _mm_set1_ph(1.0);
21086 let b = _mm_set1_ph(2.0);
21087 let c = _mm_set1_ph(3.0);
21088 let r = _mm_mask3_fnmadd_ph(a, b, c, 0b01010101);
21089 let e = _mm_set_ph(3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0);
21090 assert_eq_m128h(r, e);
21091 }
21092
21093 #[simd_test(enable = "avx512fp16,avx512vl")]
21094 const fn test_mm_maskz_fnmadd_ph() {
21095 let a = _mm_set1_ph(1.0);
21096 let b = _mm_set1_ph(2.0);
21097 let c = _mm_set1_ph(3.0);
21098 let r = _mm_maskz_fnmadd_ph(0b01010101, a, b, c);
21099 let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
21100 assert_eq_m128h(r, e);
21101 }
21102
21103 #[simd_test(enable = "avx512fp16,avx512vl")]
21104 const fn test_mm256_fnmadd_ph() {
21105 let a = _mm256_set1_ph(1.0);
21106 let b = _mm256_set1_ph(2.0);
21107 let c = _mm256_set1_ph(3.0);
21108 let r = _mm256_fnmadd_ph(a, b, c);
21109 let e = _mm256_set1_ph(1.0);
21110 assert_eq_m256h(r, e);
21111 }
21112
21113 #[simd_test(enable = "avx512fp16,avx512vl")]
21114 const fn test_mm256_mask_fnmadd_ph() {
21115 let a = _mm256_set1_ph(1.0);
21116 let b = _mm256_set1_ph(2.0);
21117 let c = _mm256_set1_ph(3.0);
21118 let r = _mm256_mask_fnmadd_ph(a, 0b0101010101010101, b, c);
21119 let e = _mm256_set_ph(
21120 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
21121 );
21122 assert_eq_m256h(r, e);
21123 }
21124
21125 #[simd_test(enable = "avx512fp16,avx512vl")]
21126 const fn test_mm256_mask3_fnmadd_ph() {
21127 let a = _mm256_set1_ph(1.0);
21128 let b = _mm256_set1_ph(2.0);
21129 let c = _mm256_set1_ph(3.0);
21130 let r = _mm256_mask3_fnmadd_ph(a, b, c, 0b0101010101010101);
21131 let e = _mm256_set_ph(
21132 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
21133 );
21134 assert_eq_m256h(r, e);
21135 }
21136
21137 #[simd_test(enable = "avx512fp16,avx512vl")]
21138 const fn test_mm256_maskz_fnmadd_ph() {
21139 let a = _mm256_set1_ph(1.0);
21140 let b = _mm256_set1_ph(2.0);
21141 let c = _mm256_set1_ph(3.0);
21142 let r = _mm256_maskz_fnmadd_ph(0b0101010101010101, a, b, c);
21143 let e = _mm256_set_ph(
21144 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
21145 );
21146 assert_eq_m256h(r, e);
21147 }
21148
21149 #[simd_test(enable = "avx512fp16")]
21150 const fn test_mm512_fnmadd_ph() {
21151 let a = _mm512_set1_ph(1.0);
21152 let b = _mm512_set1_ph(2.0);
21153 let c = _mm512_set1_ph(3.0);
21154 let r = _mm512_fnmadd_ph(a, b, c);
21155 let e = _mm512_set1_ph(1.0);
21156 assert_eq_m512h(r, e);
21157 }
21158
21159 #[simd_test(enable = "avx512fp16")]
21160 const fn test_mm512_mask_fnmadd_ph() {
21161 let a = _mm512_set1_ph(1.0);
21162 let b = _mm512_set1_ph(2.0);
21163 let c = _mm512_set1_ph(3.0);
21164 let r = _mm512_mask_fnmadd_ph(a, 0b01010101010101010101010101010101, b, c);
21165 let e = _mm512_set_ph(
21166 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
21167 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
21168 );
21169 assert_eq_m512h(r, e);
21170 }
21171
21172 #[simd_test(enable = "avx512fp16")]
21173 const fn test_mm512_mask3_fnmadd_ph() {
21174 let a = _mm512_set1_ph(1.0);
21175 let b = _mm512_set1_ph(2.0);
21176 let c = _mm512_set1_ph(3.0);
21177 let r = _mm512_mask3_fnmadd_ph(a, b, c, 0b01010101010101010101010101010101);
21178 let e = _mm512_set_ph(
21179 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
21180 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
21181 );
21182 assert_eq_m512h(r, e);
21183 }
21184
21185 #[simd_test(enable = "avx512fp16")]
21186 const fn test_mm512_maskz_fnmadd_ph() {
21187 let a = _mm512_set1_ph(1.0);
21188 let b = _mm512_set1_ph(2.0);
21189 let c = _mm512_set1_ph(3.0);
21190 let r = _mm512_maskz_fnmadd_ph(0b01010101010101010101010101010101, a, b, c);
21191 let e = _mm512_set_ph(
21192 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
21193 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
21194 );
21195 assert_eq_m512h(r, e);
21196 }
21197
21198 #[simd_test(enable = "avx512fp16")]
21199 fn test_mm512_fnmadd_round_ph() {
21200 let a = _mm512_set1_ph(1.0);
21201 let b = _mm512_set1_ph(2.0);
21202 let c = _mm512_set1_ph(3.0);
21203 let r =
21204 _mm512_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
21205 let e = _mm512_set1_ph(1.0);
21206 assert_eq_m512h(r, e);
21207 }
21208
21209 #[simd_test(enable = "avx512fp16")]
21210 fn test_mm512_mask_fnmadd_round_ph() {
21211 let a = _mm512_set1_ph(1.0);
21212 let b = _mm512_set1_ph(2.0);
21213 let c = _mm512_set1_ph(3.0);
21214 let r = _mm512_mask_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21215 a,
21216 0b01010101010101010101010101010101,
21217 b,
21218 c,
21219 );
21220 let e = _mm512_set_ph(
21221 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
21222 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
21223 );
21224 assert_eq_m512h(r, e);
21225 }
21226
21227 #[simd_test(enable = "avx512fp16")]
21228 fn test_mm512_mask3_fnmadd_round_ph() {
21229 let a = _mm512_set1_ph(1.0);
21230 let b = _mm512_set1_ph(2.0);
21231 let c = _mm512_set1_ph(3.0);
21232 let r = _mm512_mask3_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21233 a,
21234 b,
21235 c,
21236 0b01010101010101010101010101010101,
21237 );
21238 let e = _mm512_set_ph(
21239 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
21240 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
21241 );
21242 assert_eq_m512h(r, e);
21243 }
21244
21245 #[simd_test(enable = "avx512fp16")]
21246 fn test_mm512_maskz_fnmadd_round_ph() {
21247 let a = _mm512_set1_ph(1.0);
21248 let b = _mm512_set1_ph(2.0);
21249 let c = _mm512_set1_ph(3.0);
21250 let r = _mm512_maskz_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21251 0b01010101010101010101010101010101,
21252 a,
21253 b,
21254 c,
21255 );
21256 let e = _mm512_set_ph(
21257 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
21258 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
21259 );
21260 assert_eq_m512h(r, e);
21261 }
21262
21263 #[simd_test(enable = "avx512fp16,avx512vl")]
21264 const fn test_mm_fnmadd_sh() {
21265 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21266 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21267 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21268 let r = _mm_fnmadd_sh(a, b, c);
21269 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21270 assert_eq_m128h(r, e);
21271 }
21272
21273 #[simd_test(enable = "avx512fp16,avx512vl")]
21274 const fn test_mm_mask_fnmadd_sh() {
21275 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21276 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21277 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21278 let r = _mm_mask_fnmadd_sh(a, 0, b, c);
21279 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21280 assert_eq_m128h(r, e);
21281 let r = _mm_mask_fnmadd_sh(a, 1, b, c);
21282 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21283 assert_eq_m128h(r, e);
21284 }
21285
21286 #[simd_test(enable = "avx512fp16,avx512vl")]
21287 const fn test_mm_mask3_fnmadd_sh() {
21288 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21289 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21290 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21291 let r = _mm_mask3_fnmadd_sh(a, b, c, 0);
21292 let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21293 assert_eq_m128h(r, e);
21294 let r = _mm_mask3_fnmadd_sh(a, b, c, 1);
21295 let e = _mm_setr_ph(1.0, 30., 31., 32., 33., 34., 35., 36.);
21296 assert_eq_m128h(r, e);
21297 }
21298
21299 #[simd_test(enable = "avx512fp16,avx512vl")]
21300 const fn test_mm_maskz_fnmadd_sh() {
21301 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21302 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21303 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21304 let r = _mm_maskz_fnmadd_sh(0, a, b, c);
21305 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
21306 assert_eq_m128h(r, e);
21307 let r = _mm_maskz_fnmadd_sh(1, a, b, c);
21308 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21309 assert_eq_m128h(r, e);
21310 }
21311
21312 #[simd_test(enable = "avx512fp16,avx512vl")]
21313 fn test_mm_fnmadd_round_sh() {
21314 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21315 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21316 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21317 let r = _mm_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
21318 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21319 assert_eq_m128h(r, e);
21320 }
21321
21322 #[simd_test(enable = "avx512fp16,avx512vl")]
21323 fn test_mm_mask_fnmadd_round_sh() {
21324 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21325 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21326 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21327 let r = _mm_mask_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21328 a, 0, b, c,
21329 );
21330 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21331 assert_eq_m128h(r, e);
21332 let r = _mm_mask_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21333 a, 1, b, c,
21334 );
21335 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21336 assert_eq_m128h(r, e);
21337 }
21338
21339 #[simd_test(enable = "avx512fp16,avx512vl")]
21340 fn test_mm_mask3_fnmadd_round_sh() {
21341 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21342 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21343 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21344 let r = _mm_mask3_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21345 a, b, c, 0,
21346 );
21347 let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21348 assert_eq_m128h(r, e);
21349 let r = _mm_mask3_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21350 a, b, c, 1,
21351 );
21352 let e = _mm_setr_ph(1.0, 30., 31., 32., 33., 34., 35., 36.);
21353 assert_eq_m128h(r, e);
21354 }
21355
21356 #[simd_test(enable = "avx512fp16,avx512vl")]
21357 fn test_mm_maskz_fnmadd_round_sh() {
21358 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21359 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21360 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21361 let r = _mm_maskz_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21362 0, a, b, c,
21363 );
21364 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
21365 assert_eq_m128h(r, e);
21366 let r = _mm_maskz_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21367 1, a, b, c,
21368 );
21369 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21370 assert_eq_m128h(r, e);
21371 }
21372
21373 #[simd_test(enable = "avx512fp16,avx512vl")]
21374 const fn test_mm_fnmsub_ph() {
21375 let a = _mm_set1_ph(1.0);
21376 let b = _mm_set1_ph(2.0);
21377 let c = _mm_set1_ph(3.0);
21378 let r = _mm_fnmsub_ph(a, b, c);
21379 let e = _mm_set1_ph(-5.0);
21380 assert_eq_m128h(r, e);
21381 }
21382
21383 #[simd_test(enable = "avx512fp16,avx512vl")]
21384 const fn test_mm_mask_fnmsub_ph() {
21385 let a = _mm_set1_ph(1.0);
21386 let b = _mm_set1_ph(2.0);
21387 let c = _mm_set1_ph(3.0);
21388 let r = _mm_mask_fnmsub_ph(a, 0b01010101, b, c);
21389 let e = _mm_set_ph(1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0);
21390 assert_eq_m128h(r, e);
21391 }
21392
21393 #[simd_test(enable = "avx512fp16,avx512vl")]
21394 const fn test_mm_mask3_fnmsub_ph() {
21395 let a = _mm_set1_ph(1.0);
21396 let b = _mm_set1_ph(2.0);
21397 let c = _mm_set1_ph(3.0);
21398 let r = _mm_mask3_fnmsub_ph(a, b, c, 0b01010101);
21399 let e = _mm_set_ph(3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0);
21400 assert_eq_m128h(r, e);
21401 }
21402
21403 #[simd_test(enable = "avx512fp16,avx512vl")]
21404 const fn test_mm_maskz_fnmsub_ph() {
21405 let a = _mm_set1_ph(1.0);
21406 let b = _mm_set1_ph(2.0);
21407 let c = _mm_set1_ph(3.0);
21408 let r = _mm_maskz_fnmsub_ph(0b01010101, a, b, c);
21409 let e = _mm_set_ph(0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0);
21410 assert_eq_m128h(r, e);
21411 }
21412
21413 #[simd_test(enable = "avx512fp16,avx512vl")]
21414 const fn test_mm256_fnmsub_ph() {
21415 let a = _mm256_set1_ph(1.0);
21416 let b = _mm256_set1_ph(2.0);
21417 let c = _mm256_set1_ph(3.0);
21418 let r = _mm256_fnmsub_ph(a, b, c);
21419 let e = _mm256_set1_ph(-5.0);
21420 assert_eq_m256h(r, e);
21421 }
21422
21423 #[simd_test(enable = "avx512fp16,avx512vl")]
21424 const fn test_mm256_mask_fnmsub_ph() {
21425 let a = _mm256_set1_ph(1.0);
21426 let b = _mm256_set1_ph(2.0);
21427 let c = _mm256_set1_ph(3.0);
21428 let r = _mm256_mask_fnmsub_ph(a, 0b0101010101010101, b, c);
21429 let e = _mm256_set_ph(
21430 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
21431 );
21432 assert_eq_m256h(r, e);
21433 }
21434
21435 #[simd_test(enable = "avx512fp16,avx512vl")]
21436 const fn test_mm256_mask3_fnmsub_ph() {
21437 let a = _mm256_set1_ph(1.0);
21438 let b = _mm256_set1_ph(2.0);
21439 let c = _mm256_set1_ph(3.0);
21440 let r = _mm256_mask3_fnmsub_ph(a, b, c, 0b0101010101010101);
21441 let e = _mm256_set_ph(
21442 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
21443 );
21444 assert_eq_m256h(r, e);
21445 }
21446
21447 #[simd_test(enable = "avx512fp16,avx512vl")]
21448 const fn test_mm256_maskz_fnmsub_ph() {
21449 let a = _mm256_set1_ph(1.0);
21450 let b = _mm256_set1_ph(2.0);
21451 let c = _mm256_set1_ph(3.0);
21452 let r = _mm256_maskz_fnmsub_ph(0b0101010101010101, a, b, c);
21453 let e = _mm256_set_ph(
21454 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
21455 );
21456 assert_eq_m256h(r, e);
21457 }
21458
21459 #[simd_test(enable = "avx512fp16")]
21460 const fn test_mm512_fnmsub_ph() {
21461 let a = _mm512_set1_ph(1.0);
21462 let b = _mm512_set1_ph(2.0);
21463 let c = _mm512_set1_ph(3.0);
21464 let r = _mm512_fnmsub_ph(a, b, c);
21465 let e = _mm512_set1_ph(-5.0);
21466 assert_eq_m512h(r, e);
21467 }
21468
21469 #[simd_test(enable = "avx512fp16")]
21470 const fn test_mm512_mask_fnmsub_ph() {
21471 let a = _mm512_set1_ph(1.0);
21472 let b = _mm512_set1_ph(2.0);
21473 let c = _mm512_set1_ph(3.0);
21474 let r = _mm512_mask_fnmsub_ph(a, 0b01010101010101010101010101010101, b, c);
21475 let e = _mm512_set_ph(
21476 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
21477 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
21478 );
21479 assert_eq_m512h(r, e);
21480 }
21481
21482 #[simd_test(enable = "avx512fp16")]
21483 const fn test_mm512_mask3_fnmsub_ph() {
21484 let a = _mm512_set1_ph(1.0);
21485 let b = _mm512_set1_ph(2.0);
21486 let c = _mm512_set1_ph(3.0);
21487 let r = _mm512_mask3_fnmsub_ph(a, b, c, 0b01010101010101010101010101010101);
21488 let e = _mm512_set_ph(
21489 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
21490 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
21491 );
21492 assert_eq_m512h(r, e);
21493 }
21494
21495 #[simd_test(enable = "avx512fp16")]
21496 const fn test_mm512_maskz_fnmsub_ph() {
21497 let a = _mm512_set1_ph(1.0);
21498 let b = _mm512_set1_ph(2.0);
21499 let c = _mm512_set1_ph(3.0);
21500 let r = _mm512_maskz_fnmsub_ph(0b01010101010101010101010101010101, a, b, c);
21501 let e = _mm512_set_ph(
21502 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
21503 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
21504 );
21505 assert_eq_m512h(r, e);
21506 }
21507
21508 #[simd_test(enable = "avx512fp16")]
21509 fn test_mm512_fnmsub_round_ph() {
21510 let a = _mm512_set1_ph(1.0);
21511 let b = _mm512_set1_ph(2.0);
21512 let c = _mm512_set1_ph(3.0);
21513 let r =
21514 _mm512_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
21515 let e = _mm512_set1_ph(-5.0);
21516 assert_eq_m512h(r, e);
21517 }
21518
21519 #[simd_test(enable = "avx512fp16")]
21520 fn test_mm512_mask_fnmsub_round_ph() {
21521 let a = _mm512_set1_ph(1.0);
21522 let b = _mm512_set1_ph(2.0);
21523 let c = _mm512_set1_ph(3.0);
21524 let r = _mm512_mask_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21525 a,
21526 0b01010101010101010101010101010101,
21527 b,
21528 c,
21529 );
21530 let e = _mm512_set_ph(
21531 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
21532 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
21533 );
21534 assert_eq_m512h(r, e);
21535 }
21536
21537 #[simd_test(enable = "avx512fp16")]
21538 fn test_mm512_mask3_fnmsub_round_ph() {
21539 let a = _mm512_set1_ph(1.0);
21540 let b = _mm512_set1_ph(2.0);
21541 let c = _mm512_set1_ph(3.0);
21542 let r = _mm512_mask3_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21543 a,
21544 b,
21545 c,
21546 0b01010101010101010101010101010101,
21547 );
21548 let e = _mm512_set_ph(
21549 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
21550 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
21551 );
21552 assert_eq_m512h(r, e);
21553 }
21554
21555 #[simd_test(enable = "avx512fp16")]
21556 fn test_mm512_maskz_fnmsub_round_ph() {
21557 let a = _mm512_set1_ph(1.0);
21558 let b = _mm512_set1_ph(2.0);
21559 let c = _mm512_set1_ph(3.0);
21560 let r = _mm512_maskz_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21561 0b01010101010101010101010101010101,
21562 a,
21563 b,
21564 c,
21565 );
21566 let e = _mm512_set_ph(
21567 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
21568 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
21569 );
21570 assert_eq_m512h(r, e);
21571 }
21572
21573 #[simd_test(enable = "avx512fp16,avx512vl")]
21574 const fn test_mm_fnmsub_sh() {
21575 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21576 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21577 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21578 let r = _mm_fnmsub_sh(a, b, c);
21579 let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
21580 assert_eq_m128h(r, e);
21581 }
21582
21583 #[simd_test(enable = "avx512fp16,avx512vl")]
21584 const fn test_mm_mask_fnmsub_sh() {
21585 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21586 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21587 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21588 let r = _mm_mask_fnmsub_sh(a, 0, b, c);
21589 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21590 assert_eq_m128h(r, e);
21591 let r = _mm_mask_fnmsub_sh(a, 1, b, c);
21592 let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
21593 assert_eq_m128h(r, e);
21594 }
21595
21596 #[simd_test(enable = "avx512fp16,avx512vl")]
21597 const fn test_mm_mask3_fnmsub_sh() {
21598 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21599 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21600 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21601 let r = _mm_mask3_fnmsub_sh(a, b, c, 0);
21602 let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21603 assert_eq_m128h(r, e);
21604 let r = _mm_mask3_fnmsub_sh(a, b, c, 1);
21605 let e = _mm_setr_ph(-5.0, 30., 31., 32., 33., 34., 35., 36.);
21606 assert_eq_m128h(r, e);
21607 }
21608
21609 #[simd_test(enable = "avx512fp16,avx512vl")]
21610 const fn test_mm_maskz_fnmsub_sh() {
21611 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21612 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21613 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21614 let r = _mm_maskz_fnmsub_sh(0, a, b, c);
21615 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
21616 assert_eq_m128h(r, e);
21617 let r = _mm_maskz_fnmsub_sh(1, a, b, c);
21618 let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
21619 assert_eq_m128h(r, e);
21620 }
21621
21622 #[simd_test(enable = "avx512fp16,avx512vl")]
21623 fn test_mm_fnmsub_round_sh() {
21624 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21625 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21626 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21627 let r = _mm_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
21628 let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
21629 assert_eq_m128h(r, e);
21630 }
21631
21632 #[simd_test(enable = "avx512fp16,avx512vl")]
21633 fn test_mm_mask_fnmsub_round_sh() {
21634 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21635 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21636 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21637 let r = _mm_mask_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21638 a, 0, b, c,
21639 );
21640 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21641 assert_eq_m128h(r, e);
21642 let r = _mm_mask_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21643 a, 1, b, c,
21644 );
21645 let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
21646 assert_eq_m128h(r, e);
21647 }
21648
21649 #[simd_test(enable = "avx512fp16,avx512vl")]
21650 fn test_mm_mask3_fnmsub_round_sh() {
21651 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21652 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21653 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21654 let r = _mm_mask3_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21655 a, b, c, 0,
21656 );
21657 let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21658 assert_eq_m128h(r, e);
21659 let r = _mm_mask3_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21660 a, b, c, 1,
21661 );
21662 let e = _mm_setr_ph(-5.0, 30., 31., 32., 33., 34., 35., 36.);
21663 assert_eq_m128h(r, e);
21664 }
21665
21666 #[simd_test(enable = "avx512fp16,avx512vl")]
21667 fn test_mm_maskz_fnmsub_round_sh() {
21668 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21669 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21670 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21671 let r = _mm_maskz_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21672 0, a, b, c,
21673 );
21674 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
21675 assert_eq_m128h(r, e);
21676 let r = _mm_maskz_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21677 1, a, b, c,
21678 );
21679 let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
21680 assert_eq_m128h(r, e);
21681 }
21682
21683 #[simd_test(enable = "avx512fp16,avx512vl")]
21684 const fn test_mm_fmaddsub_ph() {
21685 let a = _mm_set1_ph(1.0);
21686 let b = _mm_set1_ph(2.0);
21687 let c = _mm_set1_ph(3.0);
21688 let r = _mm_fmaddsub_ph(a, b, c);
21689 let e = _mm_set_ph(5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0);
21690 assert_eq_m128h(r, e);
21691 }
21692
21693 #[simd_test(enable = "avx512fp16,avx512vl")]
21694 const fn test_mm_mask_fmaddsub_ph() {
21695 let a = _mm_set1_ph(1.0);
21696 let b = _mm_set1_ph(2.0);
21697 let c = _mm_set1_ph(3.0);
21698 let r = _mm_mask_fmaddsub_ph(a, 0b00110011, b, c);
21699 let e = _mm_set_ph(1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0);
21700 assert_eq_m128h(r, e);
21701 }
21702
21703 #[simd_test(enable = "avx512fp16,avx512vl")]
21704 const fn test_mm_mask3_fmaddsub_ph() {
21705 let a = _mm_set1_ph(1.0);
21706 let b = _mm_set1_ph(2.0);
21707 let c = _mm_set1_ph(3.0);
21708 let r = _mm_mask3_fmaddsub_ph(a, b, c, 0b00110011);
21709 let e = _mm_set_ph(3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0);
21710 assert_eq_m128h(r, e);
21711 }
21712
21713 #[simd_test(enable = "avx512fp16,avx512vl")]
21714 const fn test_mm_maskz_fmaddsub_ph() {
21715 let a = _mm_set1_ph(1.0);
21716 let b = _mm_set1_ph(2.0);
21717 let c = _mm_set1_ph(3.0);
21718 let r = _mm_maskz_fmaddsub_ph(0b00110011, a, b, c);
21719 let e = _mm_set_ph(0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0);
21720 assert_eq_m128h(r, e);
21721 }
21722
21723 #[simd_test(enable = "avx512fp16,avx512vl")]
21724 const fn test_mm256_fmaddsub_ph() {
21725 let a = _mm256_set1_ph(1.0);
21726 let b = _mm256_set1_ph(2.0);
21727 let c = _mm256_set1_ph(3.0);
21728 let r = _mm256_fmaddsub_ph(a, b, c);
21729 let e = _mm256_set_ph(
21730 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
21731 );
21732 assert_eq_m256h(r, e);
21733 }
21734
21735 #[simd_test(enable = "avx512fp16,avx512vl")]
21736 const fn test_mm256_mask_fmaddsub_ph() {
21737 let a = _mm256_set1_ph(1.0);
21738 let b = _mm256_set1_ph(2.0);
21739 let c = _mm256_set1_ph(3.0);
21740 let r = _mm256_mask_fmaddsub_ph(a, 0b0011001100110011, b, c);
21741 let e = _mm256_set_ph(
21742 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
21743 );
21744 assert_eq_m256h(r, e);
21745 }
21746
21747 #[simd_test(enable = "avx512fp16,avx512vl")]
21748 const fn test_mm256_mask3_fmaddsub_ph() {
21749 let a = _mm256_set1_ph(1.0);
21750 let b = _mm256_set1_ph(2.0);
21751 let c = _mm256_set1_ph(3.0);
21752 let r = _mm256_mask3_fmaddsub_ph(a, b, c, 0b0011001100110011);
21753 let e = _mm256_set_ph(
21754 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
21755 );
21756 assert_eq_m256h(r, e);
21757 }
21758
21759 #[simd_test(enable = "avx512fp16,avx512vl")]
21760 const fn test_mm256_maskz_fmaddsub_ph() {
21761 let a = _mm256_set1_ph(1.0);
21762 let b = _mm256_set1_ph(2.0);
21763 let c = _mm256_set1_ph(3.0);
21764 let r = _mm256_maskz_fmaddsub_ph(0b0011001100110011, a, b, c);
21765 let e = _mm256_set_ph(
21766 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
21767 );
21768 assert_eq_m256h(r, e);
21769 }
21770
21771 #[simd_test(enable = "avx512fp16")]
21772 const fn test_mm512_fmaddsub_ph() {
21773 let a = _mm512_set1_ph(1.0);
21774 let b = _mm512_set1_ph(2.0);
21775 let c = _mm512_set1_ph(3.0);
21776 let r = _mm512_fmaddsub_ph(a, b, c);
21777 let e = _mm512_set_ph(
21778 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
21779 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
21780 );
21781 assert_eq_m512h(r, e);
21782 }
21783
21784 #[simd_test(enable = "avx512fp16")]
21785 const fn test_mm512_mask_fmaddsub_ph() {
21786 let a = _mm512_set1_ph(1.0);
21787 let b = _mm512_set1_ph(2.0);
21788 let c = _mm512_set1_ph(3.0);
21789 let r = _mm512_mask_fmaddsub_ph(a, 0b00110011001100110011001100110011, b, c);
21790 let e = _mm512_set_ph(
21791 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
21792 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
21793 );
21794 assert_eq_m512h(r, e);
21795 }
21796
21797 #[simd_test(enable = "avx512fp16")]
21798 const fn test_mm512_mask3_fmaddsub_ph() {
21799 let a = _mm512_set1_ph(1.0);
21800 let b = _mm512_set1_ph(2.0);
21801 let c = _mm512_set1_ph(3.0);
21802 let r = _mm512_mask3_fmaddsub_ph(a, b, c, 0b00110011001100110011001100110011);
21803 let e = _mm512_set_ph(
21804 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
21805 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
21806 );
21807 assert_eq_m512h(r, e);
21808 }
21809
21810 #[simd_test(enable = "avx512fp16")]
21811 const fn test_mm512_maskz_fmaddsub_ph() {
21812 let a = _mm512_set1_ph(1.0);
21813 let b = _mm512_set1_ph(2.0);
21814 let c = _mm512_set1_ph(3.0);
21815 let r = _mm512_maskz_fmaddsub_ph(0b00110011001100110011001100110011, a, b, c);
21816 let e = _mm512_set_ph(
21817 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
21818 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
21819 );
21820 assert_eq_m512h(r, e);
21821 }
21822
21823 #[simd_test(enable = "avx512fp16")]
21824 fn test_mm512_fmaddsub_round_ph() {
21825 let a = _mm512_set1_ph(1.0);
21826 let b = _mm512_set1_ph(2.0);
21827 let c = _mm512_set1_ph(3.0);
21828 let r =
21829 _mm512_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
21830 let e = _mm512_set_ph(
21831 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
21832 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
21833 );
21834 assert_eq_m512h(r, e);
21835 }
21836
21837 #[simd_test(enable = "avx512fp16")]
21838 fn test_mm512_mask_fmaddsub_round_ph() {
21839 let a = _mm512_set1_ph(1.0);
21840 let b = _mm512_set1_ph(2.0);
21841 let c = _mm512_set1_ph(3.0);
21842 let r = _mm512_mask_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21843 a,
21844 0b00110011001100110011001100110011,
21845 b,
21846 c,
21847 );
21848 let e = _mm512_set_ph(
21849 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
21850 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
21851 );
21852 assert_eq_m512h(r, e);
21853 }
21854
21855 #[simd_test(enable = "avx512fp16")]
21856 fn test_mm512_mask3_fmaddsub_round_ph() {
21857 let a = _mm512_set1_ph(1.0);
21858 let b = _mm512_set1_ph(2.0);
21859 let c = _mm512_set1_ph(3.0);
21860 let r = _mm512_mask3_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21861 a,
21862 b,
21863 c,
21864 0b00110011001100110011001100110011,
21865 );
21866 let e = _mm512_set_ph(
21867 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
21868 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
21869 );
21870 assert_eq_m512h(r, e);
21871 }
21872
21873 #[simd_test(enable = "avx512fp16")]
21874 fn test_mm512_maskz_fmaddsub_round_ph() {
21875 let a = _mm512_set1_ph(1.0);
21876 let b = _mm512_set1_ph(2.0);
21877 let c = _mm512_set1_ph(3.0);
21878 let r = _mm512_maskz_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21879 0b00110011001100110011001100110011,
21880 a,
21881 b,
21882 c,
21883 );
21884 let e = _mm512_set_ph(
21885 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
21886 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
21887 );
21888 assert_eq_m512h(r, e);
21889 }
21890
21891 #[simd_test(enable = "avx512fp16,avx512vl")]
21892 const fn test_mm_fmsubadd_ph() {
21893 let a = _mm_set1_ph(1.0);
21894 let b = _mm_set1_ph(2.0);
21895 let c = _mm_set1_ph(3.0);
21896 let r = _mm_fmsubadd_ph(a, b, c);
21897 let e = _mm_set_ph(-1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0);
21898 assert_eq_m128h(r, e);
21899 }
21900
21901 #[simd_test(enable = "avx512fp16,avx512vl")]
21902 const fn test_mm_mask_fmsubadd_ph() {
21903 let a = _mm_set1_ph(1.0);
21904 let b = _mm_set1_ph(2.0);
21905 let c = _mm_set1_ph(3.0);
21906 let r = _mm_mask_fmsubadd_ph(a, 0b00110011, b, c);
21907 let e = _mm_set_ph(1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0);
21908 assert_eq_m128h(r, e);
21909 }
21910
21911 #[simd_test(enable = "avx512fp16,avx512vl")]
21912 const fn test_mm_mask3_fmsubadd_ph() {
21913 let a = _mm_set1_ph(1.0);
21914 let b = _mm_set1_ph(2.0);
21915 let c = _mm_set1_ph(3.0);
21916 let r = _mm_mask3_fmsubadd_ph(a, b, c, 0b00110011);
21917 let e = _mm_set_ph(3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0);
21918 assert_eq_m128h(r, e);
21919 }
21920
21921 #[simd_test(enable = "avx512fp16,avx512vl")]
21922 const fn test_mm_maskz_fmsubadd_ph() {
21923 let a = _mm_set1_ph(1.0);
21924 let b = _mm_set1_ph(2.0);
21925 let c = _mm_set1_ph(3.0);
21926 let r = _mm_maskz_fmsubadd_ph(0b00110011, a, b, c);
21927 let e = _mm_set_ph(0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0);
21928 assert_eq_m128h(r, e);
21929 }
21930
21931 #[simd_test(enable = "avx512fp16,avx512vl")]
21932 const fn test_mm256_fmsubadd_ph() {
21933 let a = _mm256_set1_ph(1.0);
21934 let b = _mm256_set1_ph(2.0);
21935 let c = _mm256_set1_ph(3.0);
21936 let r = _mm256_fmsubadd_ph(a, b, c);
21937 let e = _mm256_set_ph(
21938 -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
21939 );
21940 assert_eq_m256h(r, e);
21941 }
21942
21943 #[simd_test(enable = "avx512fp16,avx512vl")]
21944 const fn test_mm256_mask_fmsubadd_ph() {
21945 let a = _mm256_set1_ph(1.0);
21946 let b = _mm256_set1_ph(2.0);
21947 let c = _mm256_set1_ph(3.0);
21948 let r = _mm256_mask_fmsubadd_ph(a, 0b0011001100110011, b, c);
21949 let e = _mm256_set_ph(
21950 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
21951 );
21952 assert_eq_m256h(r, e);
21953 }
21954
21955 #[simd_test(enable = "avx512fp16,avx512vl")]
21956 const fn test_mm256_mask3_fmsubadd_ph() {
21957 let a = _mm256_set1_ph(1.0);
21958 let b = _mm256_set1_ph(2.0);
21959 let c = _mm256_set1_ph(3.0);
21960 let r = _mm256_mask3_fmsubadd_ph(a, b, c, 0b0011001100110011);
21961 let e = _mm256_set_ph(
21962 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
21963 );
21964 assert_eq_m256h(r, e);
21965 }
21966
21967 #[simd_test(enable = "avx512fp16,avx512vl")]
21968 const fn test_mm256_maskz_fmsubadd_ph() {
21969 let a = _mm256_set1_ph(1.0);
21970 let b = _mm256_set1_ph(2.0);
21971 let c = _mm256_set1_ph(3.0);
21972 let r = _mm256_maskz_fmsubadd_ph(0b0011001100110011, a, b, c);
21973 let e = _mm256_set_ph(
21974 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
21975 );
21976 assert_eq_m256h(r, e);
21977 }
21978
21979 #[simd_test(enable = "avx512fp16")]
21980 const fn test_mm512_fmsubadd_ph() {
21981 let a = _mm512_set1_ph(1.0);
21982 let b = _mm512_set1_ph(2.0);
21983 let c = _mm512_set1_ph(3.0);
21984 let r = _mm512_fmsubadd_ph(a, b, c);
21985 let e = _mm512_set_ph(
21986 -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
21987 -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
21988 );
21989 assert_eq_m512h(r, e);
21990 }
21991
21992 #[simd_test(enable = "avx512fp16")]
21993 const fn test_mm512_mask_fmsubadd_ph() {
21994 let a = _mm512_set1_ph(1.0);
21995 let b = _mm512_set1_ph(2.0);
21996 let c = _mm512_set1_ph(3.0);
21997 let r = _mm512_mask_fmsubadd_ph(a, 0b00110011001100110011001100110011, b, c);
21998 let e = _mm512_set_ph(
21999 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
22000 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
22001 );
22002 assert_eq_m512h(r, e);
22003 }
22004
22005 #[simd_test(enable = "avx512fp16")]
22006 const fn test_mm512_mask3_fmsubadd_ph() {
22007 let a = _mm512_set1_ph(1.0);
22008 let b = _mm512_set1_ph(2.0);
22009 let c = _mm512_set1_ph(3.0);
22010 let r = _mm512_mask3_fmsubadd_ph(a, b, c, 0b00110011001100110011001100110011);
22011 let e = _mm512_set_ph(
22012 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
22013 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
22014 );
22015 assert_eq_m512h(r, e);
22016 }
22017
22018 #[simd_test(enable = "avx512fp16")]
22019 const fn test_mm512_maskz_fmsubadd_ph() {
22020 let a = _mm512_set1_ph(1.0);
22021 let b = _mm512_set1_ph(2.0);
22022 let c = _mm512_set1_ph(3.0);
22023 let r = _mm512_maskz_fmsubadd_ph(0b00110011001100110011001100110011, a, b, c);
22024 let e = _mm512_set_ph(
22025 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
22026 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
22027 );
22028 assert_eq_m512h(r, e);
22029 }
22030
22031 #[simd_test(enable = "avx512fp16")]
22032 fn test_mm512_fmsubadd_round_ph() {
22033 let a = _mm512_set1_ph(1.0);
22034 let b = _mm512_set1_ph(2.0);
22035 let c = _mm512_set1_ph(3.0);
22036 let r =
22037 _mm512_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
22038 let e = _mm512_set_ph(
22039 -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
22040 -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
22041 );
22042 assert_eq_m512h(r, e);
22043 }
22044
22045 #[simd_test(enable = "avx512fp16")]
22046 fn test_mm512_mask_fmsubadd_round_ph() {
22047 let a = _mm512_set1_ph(1.0);
22048 let b = _mm512_set1_ph(2.0);
22049 let c = _mm512_set1_ph(3.0);
22050 let r = _mm512_mask_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22051 a,
22052 0b00110011001100110011001100110011,
22053 b,
22054 c,
22055 );
22056 let e = _mm512_set_ph(
22057 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
22058 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
22059 );
22060 assert_eq_m512h(r, e);
22061 }
22062
22063 #[simd_test(enable = "avx512fp16")]
22064 fn test_mm512_mask3_fmsubadd_round_ph() {
22065 let a = _mm512_set1_ph(1.0);
22066 let b = _mm512_set1_ph(2.0);
22067 let c = _mm512_set1_ph(3.0);
22068 let r = _mm512_mask3_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22069 a,
22070 b,
22071 c,
22072 0b00110011001100110011001100110011,
22073 );
22074 let e = _mm512_set_ph(
22075 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
22076 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
22077 );
22078 assert_eq_m512h(r, e);
22079 }
22080
22081 #[simd_test(enable = "avx512fp16")]
22082 fn test_mm512_maskz_fmsubadd_round_ph() {
22083 let a = _mm512_set1_ph(1.0);
22084 let b = _mm512_set1_ph(2.0);
22085 let c = _mm512_set1_ph(3.0);
22086 let r = _mm512_maskz_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22087 0b00110011001100110011001100110011,
22088 a,
22089 b,
22090 c,
22091 );
22092 let e = _mm512_set_ph(
22093 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
22094 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
22095 );
22096 assert_eq_m512h(r, e);
22097 }
22098
22099 #[simd_test(enable = "avx512fp16,avx512vl")]
22100 fn test_mm_rcp_ph() {
22101 let a = _mm_set1_ph(2.0);
22102 let r = _mm_rcp_ph(a);
22103 let e = _mm_set1_ph(0.5);
22104 assert_eq_m128h(r, e);
22105 }
22106
22107 #[simd_test(enable = "avx512fp16,avx512vl")]
22108 fn test_mm_mask_rcp_ph() {
22109 let a = _mm_set1_ph(2.0);
22110 let src = _mm_set1_ph(1.0);
22111 let r = _mm_mask_rcp_ph(src, 0b01010101, a);
22112 let e = _mm_set_ph(1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5);
22113 assert_eq_m128h(r, e);
22114 }
22115
22116 #[simd_test(enable = "avx512fp16,avx512vl")]
22117 fn test_mm_maskz_rcp_ph() {
22118 let a = _mm_set1_ph(2.0);
22119 let r = _mm_maskz_rcp_ph(0b01010101, a);
22120 let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
22121 assert_eq_m128h(r, e);
22122 }
22123
22124 #[simd_test(enable = "avx512fp16,avx512vl")]
22125 fn test_mm256_rcp_ph() {
22126 let a = _mm256_set1_ph(2.0);
22127 let r = _mm256_rcp_ph(a);
22128 let e = _mm256_set1_ph(0.5);
22129 assert_eq_m256h(r, e);
22130 }
22131
22132 #[simd_test(enable = "avx512fp16,avx512vl")]
22133 fn test_mm256_mask_rcp_ph() {
22134 let a = _mm256_set1_ph(2.0);
22135 let src = _mm256_set1_ph(1.0);
22136 let r = _mm256_mask_rcp_ph(src, 0b0101010101010101, a);
22137 let e = _mm256_set_ph(
22138 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
22139 );
22140 assert_eq_m256h(r, e);
22141 }
22142
22143 #[simd_test(enable = "avx512fp16,avx512vl")]
22144 fn test_mm256_maskz_rcp_ph() {
22145 let a = _mm256_set1_ph(2.0);
22146 let r = _mm256_maskz_rcp_ph(0b0101010101010101, a);
22147 let e = _mm256_set_ph(
22148 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
22149 );
22150 assert_eq_m256h(r, e);
22151 }
22152
22153 #[simd_test(enable = "avx512fp16")]
22154 fn test_mm512_rcp_ph() {
22155 let a = _mm512_set1_ph(2.0);
22156 let r = _mm512_rcp_ph(a);
22157 let e = _mm512_set1_ph(0.5);
22158 assert_eq_m512h(r, e);
22159 }
22160
22161 #[simd_test(enable = "avx512fp16")]
22162 fn test_mm512_mask_rcp_ph() {
22163 let a = _mm512_set1_ph(2.0);
22164 let src = _mm512_set1_ph(1.0);
22165 let r = _mm512_mask_rcp_ph(src, 0b01010101010101010101010101010101, a);
22166 let e = _mm512_set_ph(
22167 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0,
22168 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
22169 );
22170 assert_eq_m512h(r, e);
22171 }
22172
22173 #[simd_test(enable = "avx512fp16")]
22174 fn test_mm512_maskz_rcp_ph() {
22175 let a = _mm512_set1_ph(2.0);
22176 let r = _mm512_maskz_rcp_ph(0b01010101010101010101010101010101, a);
22177 let e = _mm512_set_ph(
22178 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
22179 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
22180 );
22181 assert_eq_m512h(r, e);
22182 }
22183
22184 #[simd_test(enable = "avx512fp16,avx512vl")]
22185 fn test_mm_rcp_sh() {
22186 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22187 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22188 let r = _mm_rcp_sh(a, b);
22189 let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22190 assert_eq_m128h(r, e);
22191 }
22192
22193 #[simd_test(enable = "avx512fp16,avx512vl")]
22194 fn test_mm_mask_rcp_sh() {
22195 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22196 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22197 let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
22198 let r = _mm_mask_rcp_sh(src, 0, a, b);
22199 let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22200 assert_eq_m128h(r, e);
22201 let r = _mm_mask_rcp_sh(src, 1, a, b);
22202 let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22203 assert_eq_m128h(r, e);
22204 }
22205
22206 #[simd_test(enable = "avx512fp16,avx512vl")]
22207 fn test_mm_maskz_rcp_sh() {
22208 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22209 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22210 let r = _mm_maskz_rcp_sh(0, a, b);
22211 let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22212 assert_eq_m128h(r, e);
22213 let r = _mm_maskz_rcp_sh(1, a, b);
22214 let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22215 assert_eq_m128h(r, e);
22216 }
22217
22218 #[simd_test(enable = "avx512fp16,avx512vl")]
22219 fn test_mm_rsqrt_ph() {
22220 let a = _mm_set1_ph(4.0);
22221 let r = _mm_rsqrt_ph(a);
22222 let e = _mm_set1_ph(0.5);
22223 assert_eq_m128h(r, e);
22224 }
22225
22226 #[simd_test(enable = "avx512fp16,avx512vl")]
22227 fn test_mm_mask_rsqrt_ph() {
22228 let a = _mm_set1_ph(4.0);
22229 let src = _mm_set1_ph(1.0);
22230 let r = _mm_mask_rsqrt_ph(src, 0b01010101, a);
22231 let e = _mm_set_ph(1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5);
22232 assert_eq_m128h(r, e);
22233 }
22234
22235 #[simd_test(enable = "avx512fp16,avx512vl")]
22236 fn test_mm_maskz_rsqrt_ph() {
22237 let a = _mm_set1_ph(4.0);
22238 let r = _mm_maskz_rsqrt_ph(0b01010101, a);
22239 let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
22240 assert_eq_m128h(r, e);
22241 }
22242
22243 #[simd_test(enable = "avx512fp16,avx512vl")]
22244 fn test_mm256_rsqrt_ph() {
22245 let a = _mm256_set1_ph(4.0);
22246 let r = _mm256_rsqrt_ph(a);
22247 let e = _mm256_set1_ph(0.5);
22248 assert_eq_m256h(r, e);
22249 }
22250
22251 #[simd_test(enable = "avx512fp16,avx512vl")]
22252 fn test_mm256_mask_rsqrt_ph() {
22253 let a = _mm256_set1_ph(4.0);
22254 let src = _mm256_set1_ph(1.0);
22255 let r = _mm256_mask_rsqrt_ph(src, 0b0101010101010101, a);
22256 let e = _mm256_set_ph(
22257 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
22258 );
22259 assert_eq_m256h(r, e);
22260 }
22261
22262 #[simd_test(enable = "avx512fp16,avx512vl")]
22263 fn test_mm256_maskz_rsqrt_ph() {
22264 let a = _mm256_set1_ph(4.0);
22265 let r = _mm256_maskz_rsqrt_ph(0b0101010101010101, a);
22266 let e = _mm256_set_ph(
22267 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
22268 );
22269 assert_eq_m256h(r, e);
22270 }
22271
22272 #[simd_test(enable = "avx512fp16")]
22273 fn test_mm512_rsqrt_ph() {
22274 let a = _mm512_set1_ph(4.0);
22275 let r = _mm512_rsqrt_ph(a);
22276 let e = _mm512_set1_ph(0.5);
22277 assert_eq_m512h(r, e);
22278 }
22279
22280 #[simd_test(enable = "avx512fp16")]
22281 fn test_mm512_mask_rsqrt_ph() {
22282 let a = _mm512_set1_ph(4.0);
22283 let src = _mm512_set1_ph(1.0);
22284 let r = _mm512_mask_rsqrt_ph(src, 0b01010101010101010101010101010101, a);
22285 let e = _mm512_set_ph(
22286 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0,
22287 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
22288 );
22289 assert_eq_m512h(r, e);
22290 }
22291
22292 #[simd_test(enable = "avx512fp16")]
22293 fn test_mm512_maskz_rsqrt_ph() {
22294 let a = _mm512_set1_ph(4.0);
22295 let r = _mm512_maskz_rsqrt_ph(0b01010101010101010101010101010101, a);
22296 let e = _mm512_set_ph(
22297 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
22298 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
22299 );
22300 assert_eq_m512h(r, e);
22301 }
22302
22303 #[simd_test(enable = "avx512fp16,avx512vl")]
22304 fn test_mm_rsqrt_sh() {
22305 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22306 let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
22307 let r = _mm_rsqrt_sh(a, b);
22308 let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22309 assert_eq_m128h(r, e);
22310 }
22311
22312 #[simd_test(enable = "avx512fp16,avx512vl")]
22313 fn test_mm_mask_rsqrt_sh() {
22314 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22315 let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
22316 let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
22317 let r = _mm_mask_rsqrt_sh(src, 0, a, b);
22318 let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22319 assert_eq_m128h(r, e);
22320 let r = _mm_mask_rsqrt_sh(src, 1, a, b);
22321 let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22322 assert_eq_m128h(r, e);
22323 }
22324
22325 #[simd_test(enable = "avx512fp16,avx512vl")]
22326 fn test_mm_maskz_rsqrt_sh() {
22327 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22328 let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
22329 let r = _mm_maskz_rsqrt_sh(0, a, b);
22330 let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22331 assert_eq_m128h(r, e);
22332 let r = _mm_maskz_rsqrt_sh(1, a, b);
22333 let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22334 assert_eq_m128h(r, e);
22335 }
22336
22337 #[simd_test(enable = "avx512fp16,avx512vl")]
22338 fn test_mm_sqrt_ph() {
22339 let a = _mm_set1_ph(4.0);
22340 let r = _mm_sqrt_ph(a);
22341 let e = _mm_set1_ph(2.0);
22342 assert_eq_m128h(r, e);
22343 }
22344
22345 #[simd_test(enable = "avx512fp16,avx512vl")]
22346 fn test_mm_mask_sqrt_ph() {
22347 let a = _mm_set1_ph(4.0);
22348 let src = _mm_set1_ph(1.0);
22349 let r = _mm_mask_sqrt_ph(src, 0b01010101, a);
22350 let e = _mm_set_ph(1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0);
22351 assert_eq_m128h(r, e);
22352 }
22353
22354 #[simd_test(enable = "avx512fp16,avx512vl")]
22355 fn test_mm_maskz_sqrt_ph() {
22356 let a = _mm_set1_ph(4.0);
22357 let r = _mm_maskz_sqrt_ph(0b01010101, a);
22358 let e = _mm_set_ph(0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0);
22359 assert_eq_m128h(r, e);
22360 }
22361
22362 #[simd_test(enable = "avx512fp16,avx512vl")]
22363 fn test_mm256_sqrt_ph() {
22364 let a = _mm256_set1_ph(4.0);
22365 let r = _mm256_sqrt_ph(a);
22366 let e = _mm256_set1_ph(2.0);
22367 assert_eq_m256h(r, e);
22368 }
22369
22370 #[simd_test(enable = "avx512fp16,avx512vl")]
22371 fn test_mm256_mask_sqrt_ph() {
22372 let a = _mm256_set1_ph(4.0);
22373 let src = _mm256_set1_ph(1.0);
22374 let r = _mm256_mask_sqrt_ph(src, 0b0101010101010101, a);
22375 let e = _mm256_set_ph(
22376 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
22377 );
22378 assert_eq_m256h(r, e);
22379 }
22380
22381 #[simd_test(enable = "avx512fp16,avx512vl")]
22382 fn test_mm256_maskz_sqrt_ph() {
22383 let a = _mm256_set1_ph(4.0);
22384 let r = _mm256_maskz_sqrt_ph(0b0101010101010101, a);
22385 let e = _mm256_set_ph(
22386 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
22387 );
22388 assert_eq_m256h(r, e);
22389 }
22390
22391 #[simd_test(enable = "avx512fp16")]
22392 fn test_mm512_sqrt_ph() {
22393 let a = _mm512_set1_ph(4.0);
22394 let r = _mm512_sqrt_ph(a);
22395 let e = _mm512_set1_ph(2.0);
22396 assert_eq_m512h(r, e);
22397 }
22398
22399 #[simd_test(enable = "avx512fp16")]
22400 fn test_mm512_mask_sqrt_ph() {
22401 let a = _mm512_set1_ph(4.0);
22402 let src = _mm512_set1_ph(1.0);
22403 let r = _mm512_mask_sqrt_ph(src, 0b01010101010101010101010101010101, a);
22404 let e = _mm512_set_ph(
22405 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
22406 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
22407 );
22408 assert_eq_m512h(r, e);
22409 }
22410
22411 #[simd_test(enable = "avx512fp16")]
22412 fn test_mm512_maskz_sqrt_ph() {
22413 let a = _mm512_set1_ph(4.0);
22414 let r = _mm512_maskz_sqrt_ph(0b01010101010101010101010101010101, a);
22415 let e = _mm512_set_ph(
22416 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
22417 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
22418 );
22419 assert_eq_m512h(r, e);
22420 }
22421
22422 #[simd_test(enable = "avx512fp16")]
22423 fn test_mm512_sqrt_round_ph() {
22424 let a = _mm512_set1_ph(4.0);
22425 let r = _mm512_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
22426 let e = _mm512_set1_ph(2.0);
22427 assert_eq_m512h(r, e);
22428 }
22429
22430 #[simd_test(enable = "avx512fp16")]
22431 fn test_mm512_mask_sqrt_round_ph() {
22432 let a = _mm512_set1_ph(4.0);
22433 let src = _mm512_set1_ph(1.0);
22434 let r = _mm512_mask_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22435 src,
22436 0b01010101010101010101010101010101,
22437 a,
22438 );
22439 let e = _mm512_set_ph(
22440 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
22441 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
22442 );
22443 assert_eq_m512h(r, e);
22444 }
22445
22446 #[simd_test(enable = "avx512fp16")]
22447 fn test_mm512_maskz_sqrt_round_ph() {
22448 let a = _mm512_set1_ph(4.0);
22449 let r = _mm512_maskz_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22450 0b01010101010101010101010101010101,
22451 a,
22452 );
22453 let e = _mm512_set_ph(
22454 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
22455 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
22456 );
22457 assert_eq_m512h(r, e);
22458 }
22459
22460 #[simd_test(enable = "avx512fp16,avx512vl")]
22461 fn test_mm_sqrt_sh() {
22462 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22463 let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
22464 let r = _mm_sqrt_sh(a, b);
22465 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22466 assert_eq_m128h(r, e);
22467 }
22468
22469 #[simd_test(enable = "avx512fp16,avx512vl")]
22470 fn test_mm_mask_sqrt_sh() {
22471 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22472 let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
22473 let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
22474 let r = _mm_mask_sqrt_sh(src, 0, a, b);
22475 let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22476 assert_eq_m128h(r, e);
22477 let r = _mm_mask_sqrt_sh(src, 1, a, b);
22478 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22479 assert_eq_m128h(r, e);
22480 }
22481
22482 #[simd_test(enable = "avx512fp16,avx512vl")]
22483 fn test_mm_maskz_sqrt_sh() {
22484 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22485 let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
22486 let r = _mm_maskz_sqrt_sh(0, a, b);
22487 let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22488 assert_eq_m128h(r, e);
22489 let r = _mm_maskz_sqrt_sh(1, a, b);
22490 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22491 assert_eq_m128h(r, e);
22492 }
22493
22494 #[simd_test(enable = "avx512fp16,avx512vl")]
22495 fn test_mm_sqrt_round_sh() {
22496 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22497 let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
22498 let r = _mm_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
22499 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22500 assert_eq_m128h(r, e);
22501 }
22502
22503 #[simd_test(enable = "avx512fp16,avx512vl")]
22504 fn test_mm_mask_sqrt_round_sh() {
22505 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22506 let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
22507 let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
22508 let r = _mm_mask_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22509 src, 0, a, b,
22510 );
22511 let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22512 assert_eq_m128h(r, e);
22513 let r = _mm_mask_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22514 src, 1, a, b,
22515 );
22516 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22517 assert_eq_m128h(r, e);
22518 }
22519
22520 #[simd_test(enable = "avx512fp16,avx512vl")]
22521 fn test_mm_maskz_sqrt_round_sh() {
22522 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22523 let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
22524 let r =
22525 _mm_maskz_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
22526 let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22527 assert_eq_m128h(r, e);
22528 let r =
22529 _mm_maskz_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
22530 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22531 assert_eq_m128h(r, e);
22532 }
22533
22534 #[simd_test(enable = "avx512fp16,avx512vl")]
22535 fn test_mm_max_ph() {
22536 let a = _mm_set1_ph(2.0);
22537 let b = _mm_set1_ph(1.0);
22538 let r = _mm_max_ph(a, b);
22539 let e = _mm_set1_ph(2.0);
22540 assert_eq_m128h(r, e);
22541 }
22542
22543 #[simd_test(enable = "avx512fp16,avx512vl")]
22544 fn test_mm_mask_max_ph() {
22545 let a = _mm_set1_ph(2.0);
22546 let b = _mm_set1_ph(1.0);
22547 let src = _mm_set1_ph(3.0);
22548 let r = _mm_mask_max_ph(src, 0b01010101, a, b);
22549 let e = _mm_set_ph(3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0);
22550 assert_eq_m128h(r, e);
22551 }
22552
22553 #[simd_test(enable = "avx512fp16,avx512vl")]
22554 fn test_mm_maskz_max_ph() {
22555 let a = _mm_set1_ph(2.0);
22556 let b = _mm_set1_ph(1.0);
22557 let r = _mm_maskz_max_ph(0b01010101, a, b);
22558 let e = _mm_set_ph(0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0);
22559 assert_eq_m128h(r, e);
22560 }
22561
22562 #[simd_test(enable = "avx512fp16,avx512vl")]
22563 fn test_mm256_max_ph() {
22564 let a = _mm256_set1_ph(2.0);
22565 let b = _mm256_set1_ph(1.0);
22566 let r = _mm256_max_ph(a, b);
22567 let e = _mm256_set1_ph(2.0);
22568 assert_eq_m256h(r, e);
22569 }
22570
22571 #[simd_test(enable = "avx512fp16,avx512vl")]
22572 fn test_mm256_mask_max_ph() {
22573 let a = _mm256_set1_ph(2.0);
22574 let b = _mm256_set1_ph(1.0);
22575 let src = _mm256_set1_ph(3.0);
22576 let r = _mm256_mask_max_ph(src, 0b0101010101010101, a, b);
22577 let e = _mm256_set_ph(
22578 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0,
22579 );
22580 assert_eq_m256h(r, e);
22581 }
22582
22583 #[simd_test(enable = "avx512fp16,avx512vl")]
22584 fn test_mm256_maskz_max_ph() {
22585 let a = _mm256_set1_ph(2.0);
22586 let b = _mm256_set1_ph(1.0);
22587 let r = _mm256_maskz_max_ph(0b0101010101010101, a, b);
22588 let e = _mm256_set_ph(
22589 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
22590 );
22591 assert_eq_m256h(r, e);
22592 }
22593
22594 #[simd_test(enable = "avx512fp16")]
22595 fn test_mm512_max_ph() {
22596 let a = _mm512_set1_ph(2.0);
22597 let b = _mm512_set1_ph(1.0);
22598 let r = _mm512_max_ph(a, b);
22599 let e = _mm512_set1_ph(2.0);
22600 assert_eq_m512h(r, e);
22601 }
22602
22603 #[simd_test(enable = "avx512fp16")]
22604 fn test_mm512_mask_max_ph() {
22605 let a = _mm512_set1_ph(2.0);
22606 let b = _mm512_set1_ph(1.0);
22607 let src = _mm512_set1_ph(3.0);
22608 let r = _mm512_mask_max_ph(src, 0b01010101010101010101010101010101, a, b);
22609 let e = _mm512_set_ph(
22610 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0,
22611 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0,
22612 );
22613 assert_eq_m512h(r, e);
22614 }
22615
22616 #[simd_test(enable = "avx512fp16")]
22617 fn test_mm512_maskz_max_ph() {
22618 let a = _mm512_set1_ph(2.0);
22619 let b = _mm512_set1_ph(1.0);
22620 let r = _mm512_maskz_max_ph(0b01010101010101010101010101010101, a, b);
22621 let e = _mm512_set_ph(
22622 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
22623 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
22624 );
22625 assert_eq_m512h(r, e);
22626 }
22627
22628 #[simd_test(enable = "avx512fp16")]
22629 fn test_mm512_max_round_ph() {
22630 let a = _mm512_set1_ph(2.0);
22631 let b = _mm512_set1_ph(1.0);
22632 let r = _mm512_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
22633 let e = _mm512_set1_ph(2.0);
22634 assert_eq_m512h(r, e);
22635 }
22636
22637 #[simd_test(enable = "avx512fp16")]
22638 fn test_mm512_mask_max_round_ph() {
22639 let a = _mm512_set1_ph(2.0);
22640 let b = _mm512_set1_ph(1.0);
22641 let src = _mm512_set1_ph(3.0);
22642 let r = _mm512_mask_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22643 src,
22644 0b01010101010101010101010101010101,
22645 a,
22646 b,
22647 );
22648 let e = _mm512_set_ph(
22649 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0,
22650 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0,
22651 );
22652 assert_eq_m512h(r, e);
22653 }
22654
22655 #[simd_test(enable = "avx512fp16")]
22656 fn test_mm512_maskz_max_round_ph() {
22657 let a = _mm512_set1_ph(2.0);
22658 let b = _mm512_set1_ph(1.0);
22659 let r = _mm512_maskz_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22660 0b01010101010101010101010101010101,
22661 a,
22662 b,
22663 );
22664 let e = _mm512_set_ph(
22665 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
22666 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
22667 );
22668 assert_eq_m512h(r, e);
22669 }
22670
22671 #[simd_test(enable = "avx512fp16,avx512vl")]
22672 fn test_mm_max_sh() {
22673 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22674 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22675 let r = _mm_max_sh(a, b);
22676 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22677 assert_eq_m128h(r, e);
22678 }
22679
22680 #[simd_test(enable = "avx512fp16,avx512vl")]
22681 fn test_mm_mask_max_sh() {
22682 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22683 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22684 let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
22685 let r = _mm_mask_max_sh(src, 0, a, b);
22686 let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22687 assert_eq_m128h(r, e);
22688 let r = _mm_mask_max_sh(src, 1, a, b);
22689 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22690 assert_eq_m128h(r, e);
22691 }
22692
22693 #[simd_test(enable = "avx512fp16,avx512vl")]
22694 fn test_mm_maskz_max_sh() {
22695 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22696 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22697 let r = _mm_maskz_max_sh(0, a, b);
22698 let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22699 assert_eq_m128h(r, e);
22700 let r = _mm_maskz_max_sh(1, a, b);
22701 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22702 assert_eq_m128h(r, e);
22703 }
22704
22705 #[simd_test(enable = "avx512fp16,avx512vl")]
22706 fn test_mm_max_round_sh() {
22707 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22708 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22709 let r = _mm_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
22710 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22711 assert_eq_m128h(r, e);
22712 }
22713
22714 #[simd_test(enable = "avx512fp16,avx512vl")]
22715 fn test_mm_mask_max_round_sh() {
22716 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22717 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22718 let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
22719 let r = _mm_mask_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22720 src, 0, a, b,
22721 );
22722 let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22723 assert_eq_m128h(r, e);
22724 let r = _mm_mask_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22725 src, 1, a, b,
22726 );
22727 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22728 assert_eq_m128h(r, e);
22729 }
22730
22731 #[simd_test(enable = "avx512fp16,avx512vl")]
22732 fn test_mm_maskz_max_round_sh() {
22733 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22734 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22735 let r =
22736 _mm_maskz_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
22737 let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22738 assert_eq_m128h(r, e);
22739 let r =
22740 _mm_maskz_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
22741 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22742 assert_eq_m128h(r, e);
22743 }
22744
22745 #[simd_test(enable = "avx512fp16,avx512vl")]
22746 fn test_mm_min_ph() {
22747 let a = _mm_set1_ph(2.0);
22748 let b = _mm_set1_ph(1.0);
22749 let r = _mm_min_ph(a, b);
22750 let e = _mm_set1_ph(1.0);
22751 assert_eq_m128h(r, e);
22752 }
22753
22754 #[simd_test(enable = "avx512fp16,avx512vl")]
22755 fn test_mm_mask_min_ph() {
22756 let a = _mm_set1_ph(2.0);
22757 let b = _mm_set1_ph(1.0);
22758 let src = _mm_set1_ph(3.0);
22759 let r = _mm_mask_min_ph(src, 0b01010101, a, b);
22760 let e = _mm_set_ph(3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0);
22761 assert_eq_m128h(r, e);
22762 }
22763
22764 #[simd_test(enable = "avx512fp16,avx512vl")]
22765 fn test_mm_maskz_min_ph() {
22766 let a = _mm_set1_ph(2.0);
22767 let b = _mm_set1_ph(1.0);
22768 let r = _mm_maskz_min_ph(0b01010101, a, b);
22769 let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
22770 assert_eq_m128h(r, e);
22771 }
22772
22773 #[simd_test(enable = "avx512fp16,avx512vl")]
22774 fn test_mm256_min_ph() {
22775 let a = _mm256_set1_ph(2.0);
22776 let b = _mm256_set1_ph(1.0);
22777 let r = _mm256_min_ph(a, b);
22778 let e = _mm256_set1_ph(1.0);
22779 assert_eq_m256h(r, e);
22780 }
22781
22782 #[simd_test(enable = "avx512fp16,avx512vl")]
22783 fn test_mm256_mask_min_ph() {
22784 let a = _mm256_set1_ph(2.0);
22785 let b = _mm256_set1_ph(1.0);
22786 let src = _mm256_set1_ph(3.0);
22787 let r = _mm256_mask_min_ph(src, 0b0101010101010101, a, b);
22788 let e = _mm256_set_ph(
22789 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
22790 );
22791 assert_eq_m256h(r, e);
22792 }
22793
22794 #[simd_test(enable = "avx512fp16,avx512vl")]
22795 fn test_mm256_maskz_min_ph() {
22796 let a = _mm256_set1_ph(2.0);
22797 let b = _mm256_set1_ph(1.0);
22798 let r = _mm256_maskz_min_ph(0b0101010101010101, a, b);
22799 let e = _mm256_set_ph(
22800 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
22801 );
22802 assert_eq_m256h(r, e);
22803 }
22804
22805 #[simd_test(enable = "avx512fp16")]
22806 fn test_mm512_min_ph() {
22807 let a = _mm512_set1_ph(2.0);
22808 let b = _mm512_set1_ph(1.0);
22809 let r = _mm512_min_ph(a, b);
22810 let e = _mm512_set1_ph(1.0);
22811 assert_eq_m512h(r, e);
22812 }
22813
22814 #[simd_test(enable = "avx512fp16")]
22815 fn test_mm512_mask_min_ph() {
22816 let a = _mm512_set1_ph(2.0);
22817 let b = _mm512_set1_ph(1.0);
22818 let src = _mm512_set1_ph(3.0);
22819 let r = _mm512_mask_min_ph(src, 0b01010101010101010101010101010101, a, b);
22820 let e = _mm512_set_ph(
22821 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
22822 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
22823 );
22824 assert_eq_m512h(r, e);
22825 }
22826
22827 #[simd_test(enable = "avx512fp16")]
22828 fn test_mm512_maskz_min_ph() {
22829 let a = _mm512_set1_ph(2.0);
22830 let b = _mm512_set1_ph(1.0);
22831 let r = _mm512_maskz_min_ph(0b01010101010101010101010101010101, a, b);
22832 let e = _mm512_set_ph(
22833 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
22834 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
22835 );
22836 assert_eq_m512h(r, e);
22837 }
22838
22839 #[simd_test(enable = "avx512fp16")]
22840 fn test_mm512_min_round_ph() {
22841 let a = _mm512_set1_ph(2.0);
22842 let b = _mm512_set1_ph(1.0);
22843 let r = _mm512_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
22844 let e = _mm512_set1_ph(1.0);
22845 assert_eq_m512h(r, e);
22846 }
22847
22848 #[simd_test(enable = "avx512fp16")]
22849 fn test_mm512_mask_min_round_ph() {
22850 let a = _mm512_set1_ph(2.0);
22851 let b = _mm512_set1_ph(1.0);
22852 let src = _mm512_set1_ph(3.0);
22853 let r = _mm512_mask_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22854 src,
22855 0b01010101010101010101010101010101,
22856 a,
22857 b,
22858 );
22859 let e = _mm512_set_ph(
22860 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
22861 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
22862 );
22863 assert_eq_m512h(r, e);
22864 }
22865
22866 #[simd_test(enable = "avx512fp16")]
22867 fn test_mm512_maskz_min_round_ph() {
22868 let a = _mm512_set1_ph(2.0);
22869 let b = _mm512_set1_ph(1.0);
22870 let r = _mm512_maskz_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22871 0b01010101010101010101010101010101,
22872 a,
22873 b,
22874 );
22875 let e = _mm512_set_ph(
22876 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
22877 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
22878 );
22879 assert_eq_m512h(r, e);
22880 }
22881
22882 #[simd_test(enable = "avx512fp16,avx512vl")]
22883 fn test_mm_min_sh() {
22884 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22885 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22886 let r = _mm_min_sh(a, b);
22887 let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22888 assert_eq_m128h(r, e);
22889 }
22890
22891 #[simd_test(enable = "avx512fp16,avx512vl")]
22892 fn test_mm_mask_min_sh() {
22893 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22894 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22895 let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
22896 let r = _mm_mask_min_sh(src, 0, a, b);
22897 let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22898 assert_eq_m128h(r, e);
22899 let r = _mm_mask_min_sh(src, 1, a, b);
22900 let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22901 assert_eq_m128h(r, e);
22902 }
22903
22904 #[simd_test(enable = "avx512fp16,avx512vl")]
22905 fn test_mm_maskz_min_sh() {
22906 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22907 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22908 let r = _mm_maskz_min_sh(0, a, b);
22909 let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22910 assert_eq_m128h(r, e);
22911 let r = _mm_maskz_min_sh(1, a, b);
22912 let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22913 assert_eq_m128h(r, e);
22914 }
22915
22916 #[simd_test(enable = "avx512fp16,avx512vl")]
22917 fn test_mm_min_round_sh() {
22918 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22919 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22920 let r = _mm_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
22921 let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22922 assert_eq_m128h(r, e);
22923 }
22924
22925 #[simd_test(enable = "avx512fp16,avx512vl")]
22926 fn test_mm_mask_min_round_sh() {
22927 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22928 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22929 let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
22930 let r = _mm_mask_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22931 src, 0, a, b,
22932 );
22933 let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22934 assert_eq_m128h(r, e);
22935 let r = _mm_mask_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22936 src, 1, a, b,
22937 );
22938 let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22939 assert_eq_m128h(r, e);
22940 }
22941
22942 #[simd_test(enable = "avx512fp16,avx512vl")]
22943 fn test_mm_maskz_min_round_sh() {
22944 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22945 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22946 let r =
22947 _mm_maskz_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
22948 let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22949 assert_eq_m128h(r, e);
22950 let r =
22951 _mm_maskz_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
22952 let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22953 assert_eq_m128h(r, e);
22954 }
22955
22956 #[simd_test(enable = "avx512fp16,avx512vl")]
22957 fn test_mm_getexp_ph() {
22958 let a = _mm_set1_ph(3.0);
22959 let r = _mm_getexp_ph(a);
22960 let e = _mm_set1_ph(1.0);
22961 assert_eq_m128h(r, e);
22962 }
22963
22964 #[simd_test(enable = "avx512fp16,avx512vl")]
22965 fn test_mm_mask_getexp_ph() {
22966 let a = _mm_set1_ph(3.0);
22967 let src = _mm_set1_ph(4.0);
22968 let r = _mm_mask_getexp_ph(src, 0b01010101, a);
22969 let e = _mm_set_ph(4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0);
22970 assert_eq_m128h(r, e);
22971 }
22972
22973 #[simd_test(enable = "avx512fp16,avx512vl")]
22974 fn test_mm_maskz_getexp_ph() {
22975 let a = _mm_set1_ph(3.0);
22976 let r = _mm_maskz_getexp_ph(0b01010101, a);
22977 let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
22978 assert_eq_m128h(r, e);
22979 }
22980
22981 #[simd_test(enable = "avx512fp16,avx512vl")]
22982 fn test_mm256_getexp_ph() {
22983 let a = _mm256_set1_ph(3.0);
22984 let r = _mm256_getexp_ph(a);
22985 let e = _mm256_set1_ph(1.0);
22986 assert_eq_m256h(r, e);
22987 }
22988
22989 #[simd_test(enable = "avx512fp16,avx512vl")]
22990 fn test_mm256_mask_getexp_ph() {
22991 let a = _mm256_set1_ph(3.0);
22992 let src = _mm256_set1_ph(4.0);
22993 let r = _mm256_mask_getexp_ph(src, 0b0101010101010101, a);
22994 let e = _mm256_set_ph(
22995 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0,
22996 );
22997 assert_eq_m256h(r, e);
22998 }
22999
23000 #[simd_test(enable = "avx512fp16,avx512vl")]
23001 fn test_mm256_maskz_getexp_ph() {
23002 let a = _mm256_set1_ph(3.0);
23003 let r = _mm256_maskz_getexp_ph(0b0101010101010101, a);
23004 let e = _mm256_set_ph(
23005 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
23006 );
23007 assert_eq_m256h(r, e);
23008 }
23009
23010 #[simd_test(enable = "avx512fp16")]
23011 fn test_mm512_getexp_ph() {
23012 let a = _mm512_set1_ph(3.0);
23013 let r = _mm512_getexp_ph(a);
23014 let e = _mm512_set1_ph(1.0);
23015 assert_eq_m512h(r, e);
23016 }
23017
23018 #[simd_test(enable = "avx512fp16")]
23019 fn test_mm512_mask_getexp_ph() {
23020 let a = _mm512_set1_ph(3.0);
23021 let src = _mm512_set1_ph(4.0);
23022 let r = _mm512_mask_getexp_ph(src, 0b01010101010101010101010101010101, a);
23023 let e = _mm512_set_ph(
23024 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0,
23025 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0,
23026 );
23027 assert_eq_m512h(r, e);
23028 }
23029
23030 #[simd_test(enable = "avx512fp16")]
23031 fn test_mm512_maskz_getexp_ph() {
23032 let a = _mm512_set1_ph(3.0);
23033 let r = _mm512_maskz_getexp_ph(0b01010101010101010101010101010101, a);
23034 let e = _mm512_set_ph(
23035 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
23036 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
23037 );
23038 assert_eq_m512h(r, e);
23039 }
23040
23041 #[simd_test(enable = "avx512fp16")]
23042 fn test_mm512_getexp_round_ph() {
23043 let a = _mm512_set1_ph(3.0);
23044 let r = _mm512_getexp_round_ph::<_MM_FROUND_NO_EXC>(a);
23045 let e = _mm512_set1_ph(1.0);
23046 assert_eq_m512h(r, e);
23047 }
23048
23049 #[simd_test(enable = "avx512fp16")]
23050 fn test_mm512_mask_getexp_round_ph() {
23051 let a = _mm512_set1_ph(3.0);
23052 let src = _mm512_set1_ph(4.0);
23053 let r = _mm512_mask_getexp_round_ph::<_MM_FROUND_NO_EXC>(
23054 src,
23055 0b01010101010101010101010101010101,
23056 a,
23057 );
23058 let e = _mm512_set_ph(
23059 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0,
23060 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0,
23061 );
23062 assert_eq_m512h(r, e);
23063 }
23064
23065 #[simd_test(enable = "avx512fp16")]
23066 fn test_mm512_maskz_getexp_round_ph() {
23067 let a = _mm512_set1_ph(3.0);
23068 let r = _mm512_maskz_getexp_round_ph::<_MM_FROUND_NO_EXC>(
23069 0b01010101010101010101010101010101,
23070 a,
23071 );
23072 let e = _mm512_set_ph(
23073 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
23074 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
23075 );
23076 assert_eq_m512h(r, e);
23077 }
23078
23079 #[simd_test(enable = "avx512fp16,avx512vl")]
23080 fn test_mm_getexp_sh() {
23081 let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
23082 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
23083 let r = _mm_getexp_sh(a, b);
23084 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23085 assert_eq_m128h(r, e);
23086 }
23087
23088 #[simd_test(enable = "avx512fp16,avx512vl")]
23089 fn test_mm_mask_getexp_sh() {
23090 let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
23091 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
23092 let src = _mm_setr_ph(4.0, 30., 31., 32., 33., 34., 35., 36.);
23093 let r = _mm_mask_getexp_sh(src, 0, a, b);
23094 let e = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
23095 assert_eq_m128h(r, e);
23096 let r = _mm_mask_getexp_sh(src, 1, a, b);
23097 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23098 assert_eq_m128h(r, e);
23099 }
23100
23101 #[simd_test(enable = "avx512fp16,avx512vl")]
23102 fn test_mm_maskz_getexp_sh() {
23103 let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
23104 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
23105 let r = _mm_maskz_getexp_sh(0, a, b);
23106 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
23107 assert_eq_m128h(r, e);
23108 let r = _mm_maskz_getexp_sh(1, a, b);
23109 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23110 assert_eq_m128h(r, e);
23111 }
23112
23113 #[simd_test(enable = "avx512fp16,avx512vl")]
23114 fn test_mm_getexp_round_sh() {
23115 let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
23116 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
23117 let r = _mm_getexp_round_sh::<_MM_FROUND_NO_EXC>(a, b);
23118 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23119 assert_eq_m128h(r, e);
23120 }
23121
23122 #[simd_test(enable = "avx512fp16,avx512vl")]
23123 fn test_mm_mask_getexp_round_sh() {
23124 let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
23125 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
23126 let src = _mm_setr_ph(4.0, 30., 31., 32., 33., 34., 35., 36.);
23127 let r = _mm_mask_getexp_round_sh::<_MM_FROUND_NO_EXC>(src, 0, a, b);
23128 let e = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
23129 assert_eq_m128h(r, e);
23130 let r = _mm_mask_getexp_round_sh::<_MM_FROUND_NO_EXC>(src, 1, a, b);
23131 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23132 assert_eq_m128h(r, e);
23133 }
23134
23135 #[simd_test(enable = "avx512fp16,avx512vl")]
23136 fn test_mm_maskz_getexp_round_sh() {
23137 let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
23138 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
23139 let r = _mm_maskz_getexp_round_sh::<_MM_FROUND_NO_EXC>(0, a, b);
23140 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
23141 assert_eq_m128h(r, e);
23142 let r = _mm_maskz_getexp_round_sh::<_MM_FROUND_NO_EXC>(1, a, b);
23143 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23144 assert_eq_m128h(r, e);
23145 }
23146
23147 #[simd_test(enable = "avx512fp16,avx512vl")]
23148 fn test_mm_getmant_ph() {
23149 let a = _mm_set1_ph(10.0);
23150 let r = _mm_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
23151 let e = _mm_set1_ph(1.25);
23152 assert_eq_m128h(r, e);
23153 }
23154
23155 #[simd_test(enable = "avx512fp16,avx512vl")]
23156 fn test_mm_mask_getmant_ph() {
23157 let a = _mm_set1_ph(10.0);
23158 let src = _mm_set1_ph(20.0);
23159 let r = _mm_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, 0b01010101, a);
23160 let e = _mm_set_ph(20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25);
23161 assert_eq_m128h(r, e);
23162 }
23163
23164 #[simd_test(enable = "avx512fp16,avx512vl")]
23165 fn test_mm_maskz_getmant_ph() {
23166 let a = _mm_set1_ph(10.0);
23167 let r = _mm_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(0b01010101, a);
23168 let e = _mm_set_ph(0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25);
23169 assert_eq_m128h(r, e);
23170 }
23171
23172 #[simd_test(enable = "avx512fp16,avx512vl")]
23173 fn test_mm256_getmant_ph() {
23174 let a = _mm256_set1_ph(10.0);
23175 let r = _mm256_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
23176 let e = _mm256_set1_ph(1.25);
23177 assert_eq_m256h(r, e);
23178 }
23179
23180 #[simd_test(enable = "avx512fp16,avx512vl")]
23181 fn test_mm256_mask_getmant_ph() {
23182 let a = _mm256_set1_ph(10.0);
23183 let src = _mm256_set1_ph(20.0);
23184 let r = _mm256_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
23185 src,
23186 0b0101010101010101,
23187 a,
23188 );
23189 let e = _mm256_set_ph(
23190 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
23191 20.0, 1.25,
23192 );
23193 assert_eq_m256h(r, e);
23194 }
23195
23196 #[simd_test(enable = "avx512fp16,avx512vl")]
23197 fn test_mm256_maskz_getmant_ph() {
23198 let a = _mm256_set1_ph(10.0);
23199 let r = _mm256_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
23200 0b0101010101010101,
23201 a,
23202 );
23203 let e = _mm256_set_ph(
23204 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
23205 );
23206 assert_eq_m256h(r, e);
23207 }
23208
23209 #[simd_test(enable = "avx512fp16")]
23210 fn test_mm512_getmant_ph() {
23211 let a = _mm512_set1_ph(10.0);
23212 let r = _mm512_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
23213 let e = _mm512_set1_ph(1.25);
23214 assert_eq_m512h(r, e);
23215 }
23216
23217 #[simd_test(enable = "avx512fp16")]
23218 fn test_mm512_mask_getmant_ph() {
23219 let a = _mm512_set1_ph(10.0);
23220 let src = _mm512_set1_ph(20.0);
23221 let r = _mm512_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
23222 src,
23223 0b01010101010101010101010101010101,
23224 a,
23225 );
23226 let e = _mm512_set_ph(
23227 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
23228 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
23229 20.0, 1.25, 20.0, 1.25,
23230 );
23231 assert_eq_m512h(r, e);
23232 }
23233
23234 #[simd_test(enable = "avx512fp16")]
23235 fn test_mm512_maskz_getmant_ph() {
23236 let a = _mm512_set1_ph(10.0);
23237 let r = _mm512_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
23238 0b01010101010101010101010101010101,
23239 a,
23240 );
23241 let e = _mm512_set_ph(
23242 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
23243 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
23244 );
23245 assert_eq_m512h(r, e);
23246 }
23247
23248 #[simd_test(enable = "avx512fp16")]
23249 fn test_mm512_getmant_round_ph() {
23250 let a = _mm512_set1_ph(10.0);
23251 let r =
23252 _mm512_getmant_round_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN, _MM_FROUND_NO_EXC>(
23253 a,
23254 );
23255 let e = _mm512_set1_ph(1.25);
23256 assert_eq_m512h(r, e);
23257 }
23258
23259 #[simd_test(enable = "avx512fp16")]
23260 fn test_mm512_mask_getmant_round_ph() {
23261 let a = _mm512_set1_ph(10.0);
23262 let src = _mm512_set1_ph(20.0);
23263 let r = _mm512_mask_getmant_round_ph::<
23264 _MM_MANT_NORM_P75_1P5,
23265 _MM_MANT_SIGN_NAN,
23266 _MM_FROUND_NO_EXC,
23267 >(src, 0b01010101010101010101010101010101, a);
23268 let e = _mm512_set_ph(
23269 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
23270 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
23271 20.0, 1.25, 20.0, 1.25,
23272 );
23273 assert_eq_m512h(r, e);
23274 }
23275
23276 #[simd_test(enable = "avx512fp16")]
23277 fn test_mm512_maskz_getmant_round_ph() {
23278 let a = _mm512_set1_ph(10.0);
23279 let r = _mm512_maskz_getmant_round_ph::<
23280 _MM_MANT_NORM_P75_1P5,
23281 _MM_MANT_SIGN_NAN,
23282 _MM_FROUND_NO_EXC,
23283 >(0b01010101010101010101010101010101, a);
23284 let e = _mm512_set_ph(
23285 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
23286 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
23287 );
23288 assert_eq_m512h(r, e);
23289 }
23290
23291 #[simd_test(enable = "avx512fp16,avx512vl")]
23292 fn test_mm_getmant_sh() {
23293 let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
23294 let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
23295 let r = _mm_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a, b);
23296 let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
23297 assert_eq_m128h(r, e);
23298 }
23299
23300 #[simd_test(enable = "avx512fp16,avx512vl")]
23301 fn test_mm_mask_getmant_sh() {
23302 let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
23303 let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
23304 let src = _mm_setr_ph(20.0, 30., 31., 32., 33., 34., 35., 36.);
23305 let r = _mm_mask_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, 0, a, b);
23306 let e = _mm_setr_ph(20.0, 10., 11., 12., 13., 14., 15., 16.);
23307 assert_eq_m128h(r, e);
23308 let r = _mm_mask_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, 1, a, b);
23309 let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
23310 assert_eq_m128h(r, e);
23311 }
23312
23313 #[simd_test(enable = "avx512fp16,avx512vl")]
23314 fn test_mm_maskz_getmant_sh() {
23315 let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
23316 let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
23317 let r = _mm_maskz_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(0, a, b);
23318 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
23319 assert_eq_m128h(r, e);
23320 let r = _mm_maskz_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(1, a, b);
23321 let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
23322 assert_eq_m128h(r, e);
23323 }
23324
23325 #[simd_test(enable = "avx512fp16,avx512vl")]
23326 fn test_mm_getmant_round_sh() {
23327 let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
23328 let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
23329 let r = _mm_getmant_round_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN, _MM_FROUND_NO_EXC>(
23330 a, b,
23331 );
23332 let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
23333 assert_eq_m128h(r, e);
23334 }
23335
23336 #[simd_test(enable = "avx512fp16,avx512vl")]
23337 fn test_mm_mask_getmant_round_sh() {
23338 let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
23339 let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
23340 let src = _mm_setr_ph(20.0, 30., 31., 32., 33., 34., 35., 36.);
23341 let r = _mm_mask_getmant_round_sh::<
23342 _MM_MANT_NORM_P75_1P5,
23343 _MM_MANT_SIGN_NAN,
23344 _MM_FROUND_NO_EXC,
23345 >(src, 0, a, b);
23346 let e = _mm_setr_ph(20.0, 10., 11., 12., 13., 14., 15., 16.);
23347 assert_eq_m128h(r, e);
23348 let r = _mm_mask_getmant_round_sh::<
23349 _MM_MANT_NORM_P75_1P5,
23350 _MM_MANT_SIGN_NAN,
23351 _MM_FROUND_NO_EXC,
23352 >(src, 1, a, b);
23353 let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
23354 assert_eq_m128h(r, e);
23355 }
23356
23357 #[simd_test(enable = "avx512fp16,avx512vl")]
23358 fn test_mm_maskz_getmant_round_sh() {
23359 let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
23360 let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
23361 let r = _mm_maskz_getmant_round_sh::<
23362 _MM_MANT_NORM_P75_1P5,
23363 _MM_MANT_SIGN_NAN,
23364 _MM_FROUND_NO_EXC,
23365 >(0, a, b);
23366 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
23367 assert_eq_m128h(r, e);
23368 let r = _mm_maskz_getmant_round_sh::<
23369 _MM_MANT_NORM_P75_1P5,
23370 _MM_MANT_SIGN_NAN,
23371 _MM_FROUND_NO_EXC,
23372 >(1, a, b);
23373 let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
23374 assert_eq_m128h(r, e);
23375 }
23376
23377 #[simd_test(enable = "avx512fp16,avx512vl")]
23378 fn test_mm_roundscale_ph() {
23379 let a = _mm_set1_ph(1.1);
23380 let r = _mm_roundscale_ph::<0>(a);
23381 let e = _mm_set1_ph(1.0);
23382 assert_eq_m128h(r, e);
23383 }
23384
23385 #[simd_test(enable = "avx512fp16,avx512vl")]
23386 fn test_mm_mask_roundscale_ph() {
23387 let a = _mm_set1_ph(1.1);
23388 let src = _mm_set1_ph(2.0);
23389 let r = _mm_mask_roundscale_ph::<0>(src, 0b01010101, a);
23390 let e = _mm_set_ph(2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0);
23391 assert_eq_m128h(r, e);
23392 }
23393
23394 #[simd_test(enable = "avx512fp16,avx512vl")]
23395 fn test_mm_maskz_roundscale_ph() {
23396 let a = _mm_set1_ph(1.1);
23397 let r = _mm_maskz_roundscale_ph::<0>(0b01010101, a);
23398 let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
23399 assert_eq_m128h(r, e);
23400 }
23401
23402 #[simd_test(enable = "avx512fp16,avx512vl")]
23403 fn test_mm256_roundscale_ph() {
23404 let a = _mm256_set1_ph(1.1);
23405 let r = _mm256_roundscale_ph::<0>(a);
23406 let e = _mm256_set1_ph(1.0);
23407 assert_eq_m256h(r, e);
23408 }
23409
23410 #[simd_test(enable = "avx512fp16,avx512vl")]
23411 fn test_mm256_mask_roundscale_ph() {
23412 let a = _mm256_set1_ph(1.1);
23413 let src = _mm256_set1_ph(2.0);
23414 let r = _mm256_mask_roundscale_ph::<0>(src, 0b0101010101010101, a);
23415 let e = _mm256_set_ph(
23416 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
23417 );
23418 assert_eq_m256h(r, e);
23419 }
23420
23421 #[simd_test(enable = "avx512fp16,avx512vl")]
23422 fn test_mm256_maskz_roundscale_ph() {
23423 let a = _mm256_set1_ph(1.1);
23424 let r = _mm256_maskz_roundscale_ph::<0>(0b0101010101010101, a);
23425 let e = _mm256_set_ph(
23426 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
23427 );
23428 assert_eq_m256h(r, e);
23429 }
23430
23431 #[simd_test(enable = "avx512fp16")]
23432 fn test_mm512_roundscale_ph() {
23433 let a = _mm512_set1_ph(1.1);
23434 let r = _mm512_roundscale_ph::<0>(a);
23435 let e = _mm512_set1_ph(1.0);
23436 assert_eq_m512h(r, e);
23437 }
23438
23439 #[simd_test(enable = "avx512fp16")]
23440 fn test_mm512_mask_roundscale_ph() {
23441 let a = _mm512_set1_ph(1.1);
23442 let src = _mm512_set1_ph(2.0);
23443 let r = _mm512_mask_roundscale_ph::<0>(src, 0b01010101010101010101010101010101, a);
23444 let e = _mm512_set_ph(
23445 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
23446 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
23447 );
23448 assert_eq_m512h(r, e);
23449 }
23450
23451 #[simd_test(enable = "avx512fp16")]
23452 fn test_mm512_maskz_roundscale_ph() {
23453 let a = _mm512_set1_ph(1.1);
23454 let r = _mm512_maskz_roundscale_ph::<0>(0b01010101010101010101010101010101, a);
23455 let e = _mm512_set_ph(
23456 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
23457 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
23458 );
23459 assert_eq_m512h(r, e);
23460 }
23461
23462 #[simd_test(enable = "avx512fp16")]
23463 fn test_mm512_roundscale_round_ph() {
23464 let a = _mm512_set1_ph(1.1);
23465 let r = _mm512_roundscale_round_ph::<0, _MM_FROUND_NO_EXC>(a);
23466 let e = _mm512_set1_ph(1.0);
23467 assert_eq_m512h(r, e);
23468 }
23469
23470 #[simd_test(enable = "avx512fp16")]
23471 fn test_mm512_mask_roundscale_round_ph() {
23472 let a = _mm512_set1_ph(1.1);
23473 let src = _mm512_set1_ph(2.0);
23474 let r = _mm512_mask_roundscale_round_ph::<0, _MM_FROUND_NO_EXC>(
23475 src,
23476 0b01010101010101010101010101010101,
23477 a,
23478 );
23479 let e = _mm512_set_ph(
23480 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
23481 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
23482 );
23483 assert_eq_m512h(r, e);
23484 }
23485
23486 #[simd_test(enable = "avx512fp16")]
23487 fn test_mm512_maskz_roundscale_round_ph() {
23488 let a = _mm512_set1_ph(1.1);
23489 let r = _mm512_maskz_roundscale_round_ph::<0, _MM_FROUND_NO_EXC>(
23490 0b01010101010101010101010101010101,
23491 a,
23492 );
23493 let e = _mm512_set_ph(
23494 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
23495 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
23496 );
23497 assert_eq_m512h(r, e);
23498 }
23499
23500 #[simd_test(enable = "avx512fp16,avx512vl")]
23501 fn test_mm_roundscale_sh() {
23502 let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23503 let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
23504 let r = _mm_roundscale_sh::<0>(a, b);
23505 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23506 assert_eq_m128h(r, e);
23507 }
23508
23509 #[simd_test(enable = "avx512fp16,avx512vl")]
23510 fn test_mm_mask_roundscale_sh() {
23511 let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23512 let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
23513 let src = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
23514 let r = _mm_mask_roundscale_sh::<0>(src, 0, a, b);
23515 let e = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
23516 assert_eq_m128h(r, e);
23517 let r = _mm_mask_roundscale_sh::<0>(src, 1, a, b);
23518 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23519 assert_eq_m128h(r, e);
23520 }
23521
23522 #[simd_test(enable = "avx512fp16,avx512vl")]
23523 fn test_mm_maskz_roundscale_sh() {
23524 let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23525 let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
23526 let r = _mm_maskz_roundscale_sh::<0>(0, a, b);
23527 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
23528 assert_eq_m128h(r, e);
23529 let r = _mm_maskz_roundscale_sh::<0>(1, a, b);
23530 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23531 assert_eq_m128h(r, e);
23532 }
23533
23534 #[simd_test(enable = "avx512fp16,avx512vl")]
23535 fn test_mm_roundscale_round_sh() {
23536 let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23537 let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
23538 let r = _mm_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(a, b);
23539 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23540 assert_eq_m128h(r, e);
23541 }
23542
23543 #[simd_test(enable = "avx512fp16,avx512vl")]
23544 fn test_mm_mask_roundscale_round_sh() {
23545 let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23546 let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
23547 let src = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
23548 let r = _mm_mask_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(src, 0, a, b);
23549 let e = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
23550 assert_eq_m128h(r, e);
23551 let r = _mm_mask_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(src, 1, a, b);
23552 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23553 assert_eq_m128h(r, e);
23554 }
23555
23556 #[simd_test(enable = "avx512fp16,avx512vl")]
23557 fn test_mm_maskz_roundscale_round_sh() {
23558 let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23559 let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
23560 let r = _mm_maskz_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(0, a, b);
23561 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
23562 assert_eq_m128h(r, e);
23563 let r = _mm_maskz_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(1, a, b);
23564 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23565 assert_eq_m128h(r, e);
23566 }
23567
23568 #[simd_test(enable = "avx512fp16,avx512vl")]
23569 fn test_mm_scalef_ph() {
23570 let a = _mm_set1_ph(1.);
23571 let b = _mm_set1_ph(3.);
23572 let r = _mm_scalef_ph(a, b);
23573 let e = _mm_set1_ph(8.0);
23574 assert_eq_m128h(r, e);
23575 }
23576
23577 #[simd_test(enable = "avx512fp16,avx512vl")]
23578 fn test_mm_mask_scalef_ph() {
23579 let a = _mm_set1_ph(1.);
23580 let b = _mm_set1_ph(3.);
23581 let src = _mm_set1_ph(2.);
23582 let r = _mm_mask_scalef_ph(src, 0b01010101, a, b);
23583 let e = _mm_set_ph(2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0);
23584 assert_eq_m128h(r, e);
23585 }
23586
23587 #[simd_test(enable = "avx512fp16,avx512vl")]
23588 fn test_mm_maskz_scalef_ph() {
23589 let a = _mm_set1_ph(1.);
23590 let b = _mm_set1_ph(3.);
23591 let r = _mm_maskz_scalef_ph(0b01010101, a, b);
23592 let e = _mm_set_ph(0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0);
23593 assert_eq_m128h(r, e);
23594 }
23595
23596 #[simd_test(enable = "avx512fp16,avx512vl")]
23597 fn test_mm256_scalef_ph() {
23598 let a = _mm256_set1_ph(1.);
23599 let b = _mm256_set1_ph(3.);
23600 let r = _mm256_scalef_ph(a, b);
23601 let e = _mm256_set1_ph(8.0);
23602 assert_eq_m256h(r, e);
23603 }
23604
23605 #[simd_test(enable = "avx512fp16,avx512vl")]
23606 fn test_mm256_mask_scalef_ph() {
23607 let a = _mm256_set1_ph(1.);
23608 let b = _mm256_set1_ph(3.);
23609 let src = _mm256_set1_ph(2.);
23610 let r = _mm256_mask_scalef_ph(src, 0b0101010101010101, a, b);
23611 let e = _mm256_set_ph(
23612 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0,
23613 );
23614 assert_eq_m256h(r, e);
23615 }
23616
23617 #[simd_test(enable = "avx512fp16,avx512vl")]
23618 fn test_mm256_maskz_scalef_ph() {
23619 let a = _mm256_set1_ph(1.);
23620 let b = _mm256_set1_ph(3.);
23621 let r = _mm256_maskz_scalef_ph(0b0101010101010101, a, b);
23622 let e = _mm256_set_ph(
23623 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0,
23624 );
23625 assert_eq_m256h(r, e);
23626 }
23627
23628 #[simd_test(enable = "avx512fp16")]
23629 fn test_mm512_scalef_ph() {
23630 let a = _mm512_set1_ph(1.);
23631 let b = _mm512_set1_ph(3.);
23632 let r = _mm512_scalef_ph(a, b);
23633 let e = _mm512_set1_ph(8.0);
23634 assert_eq_m512h(r, e);
23635 }
23636
23637 #[simd_test(enable = "avx512fp16")]
23638 fn test_mm512_mask_scalef_ph() {
23639 let a = _mm512_set1_ph(1.);
23640 let b = _mm512_set1_ph(3.);
23641 let src = _mm512_set1_ph(2.);
23642 let r = _mm512_mask_scalef_ph(src, 0b01010101010101010101010101010101, a, b);
23643 let e = _mm512_set_ph(
23644 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0,
23645 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0,
23646 );
23647 assert_eq_m512h(r, e);
23648 }
23649
23650 #[simd_test(enable = "avx512fp16")]
23651 fn test_mm512_maskz_scalef_ph() {
23652 let a = _mm512_set1_ph(1.);
23653 let b = _mm512_set1_ph(3.);
23654 let r = _mm512_maskz_scalef_ph(0b01010101010101010101010101010101, a, b);
23655 let e = _mm512_set_ph(
23656 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0,
23657 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0,
23658 );
23659 assert_eq_m512h(r, e);
23660 }
23661
23662 #[simd_test(enable = "avx512fp16")]
23663 fn test_mm512_scalef_round_ph() {
23664 let a = _mm512_set1_ph(1.);
23665 let b = _mm512_set1_ph(3.);
23666 let r = _mm512_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
23667 let e = _mm512_set1_ph(8.0);
23668 assert_eq_m512h(r, e);
23669 }
23670
23671 #[simd_test(enable = "avx512fp16")]
23672 fn test_mm512_mask_scalef_round_ph() {
23673 let a = _mm512_set1_ph(1.);
23674 let b = _mm512_set1_ph(3.);
23675 let src = _mm512_set1_ph(2.);
23676 let r = _mm512_mask_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
23677 src,
23678 0b01010101010101010101010101010101,
23679 a,
23680 b,
23681 );
23682 let e = _mm512_set_ph(
23683 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0,
23684 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0,
23685 );
23686 assert_eq_m512h(r, e);
23687 }
23688
23689 #[simd_test(enable = "avx512fp16")]
23690 fn test_mm512_maskz_scalef_round_ph() {
23691 let a = _mm512_set1_ph(1.);
23692 let b = _mm512_set1_ph(3.);
23693 let r = _mm512_maskz_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
23694 0b01010101010101010101010101010101,
23695 a,
23696 b,
23697 );
23698 let e = _mm512_set_ph(
23699 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0,
23700 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0,
23701 );
23702 assert_eq_m512h(r, e);
23703 }
23704
23705 #[simd_test(enable = "avx512fp16,avx512vl")]
23706 fn test_mm_scalef_sh() {
23707 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23708 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
23709 let r = _mm_scalef_sh(a, b);
23710 let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
23711 assert_eq_m128h(r, e);
23712 }
23713
23714 #[simd_test(enable = "avx512fp16,avx512vl")]
23715 fn test_mm_mask_scalef_sh() {
23716 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23717 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
23718 let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.);
23719 let r = _mm_mask_scalef_sh(src, 0, a, b);
23720 let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23721 assert_eq_m128h(r, e);
23722 let r = _mm_mask_scalef_sh(src, 1, a, b);
23723 let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
23724 assert_eq_m128h(r, e);
23725 }
23726
23727 #[simd_test(enable = "avx512fp16,avx512vl")]
23728 fn test_mm_maskz_scalef_sh() {
23729 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23730 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
23731 let r = _mm_maskz_scalef_sh(0, a, b);
23732 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
23733 assert_eq_m128h(r, e);
23734 let r = _mm_maskz_scalef_sh(1, a, b);
23735 let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
23736 assert_eq_m128h(r, e);
23737 }
23738
23739 #[simd_test(enable = "avx512fp16,avx512vl")]
23740 fn test_mm_scalef_round_sh() {
23741 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23742 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
23743 let r = _mm_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
23744 let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
23745 assert_eq_m128h(r, e);
23746 }
23747
23748 #[simd_test(enable = "avx512fp16,avx512vl")]
23749 fn test_mm_mask_scalef_round_sh() {
23750 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23751 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
23752 let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.);
23753 let r = _mm_mask_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
23754 src, 0, a, b,
23755 );
23756 let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23757 assert_eq_m128h(r, e);
23758 let r = _mm_mask_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
23759 src, 1, a, b,
23760 );
23761 let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
23762 assert_eq_m128h(r, e);
23763 }
23764
23765 #[simd_test(enable = "avx512fp16,avx512vl")]
23766 fn test_mm_maskz_scalef_round_sh() {
23767 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23768 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
23769 let r =
23770 _mm_maskz_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
23771 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
23772 assert_eq_m128h(r, e);
23773 let r =
23774 _mm_maskz_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
23775 let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
23776 assert_eq_m128h(r, e);
23777 }
23778
23779 #[simd_test(enable = "avx512fp16,avx512vl")]
23780 fn test_mm_reduce_ph() {
23781 let a = _mm_set1_ph(1.25);
23782 let r = _mm_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
23783 let e = _mm_set1_ph(0.25);
23784 assert_eq_m128h(r, e);
23785 }
23786
23787 #[simd_test(enable = "avx512fp16,avx512vl")]
23788 fn test_mm_mask_reduce_ph() {
23789 let a = _mm_set1_ph(1.25);
23790 let src = _mm_set1_ph(2.0);
23791 let r = _mm_mask_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01010101, a);
23792 let e = _mm_set_ph(2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25);
23793 assert_eq_m128h(r, e);
23794 }
23795
23796 #[simd_test(enable = "avx512fp16,avx512vl")]
23797 fn test_mm_maskz_reduce_ph() {
23798 let a = _mm_set1_ph(1.25);
23799 let r = _mm_maskz_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01010101, a);
23800 let e = _mm_set_ph(0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25);
23801 assert_eq_m128h(r, e);
23802 }
23803
23804 #[simd_test(enable = "avx512fp16,avx512vl")]
23805 fn test_mm256_reduce_ph() {
23806 let a = _mm256_set1_ph(1.25);
23807 let r = _mm256_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
23808 let e = _mm256_set1_ph(0.25);
23809 assert_eq_m256h(r, e);
23810 }
23811
23812 #[simd_test(enable = "avx512fp16,avx512vl")]
23813 fn test_mm256_mask_reduce_ph() {
23814 let a = _mm256_set1_ph(1.25);
23815 let src = _mm256_set1_ph(2.0);
23816 let r = _mm256_mask_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0101010101010101, a);
23817 let e = _mm256_set_ph(
23818 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
23819 );
23820 assert_eq_m256h(r, e);
23821 }
23822
23823 #[simd_test(enable = "avx512fp16,avx512vl")]
23824 fn test_mm256_maskz_reduce_ph() {
23825 let a = _mm256_set1_ph(1.25);
23826 let r = _mm256_maskz_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0101010101010101, a);
23827 let e = _mm256_set_ph(
23828 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
23829 );
23830 assert_eq_m256h(r, e);
23831 }
23832
23833 #[simd_test(enable = "avx512fp16")]
23834 fn test_mm512_reduce_ph() {
23835 let a = _mm512_set1_ph(1.25);
23836 let r = _mm512_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
23837 let e = _mm512_set1_ph(0.25);
23838 assert_eq_m512h(r, e);
23839 }
23840
23841 #[simd_test(enable = "avx512fp16")]
23842 fn test_mm512_mask_reduce_ph() {
23843 let a = _mm512_set1_ph(1.25);
23844 let src = _mm512_set1_ph(2.0);
23845 let r = _mm512_mask_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(
23846 src,
23847 0b01010101010101010101010101010101,
23848 a,
23849 );
23850 let e = _mm512_set_ph(
23851 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
23852 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
23853 );
23854 assert_eq_m512h(r, e);
23855 }
23856
23857 #[simd_test(enable = "avx512fp16")]
23858 fn test_mm512_maskz_reduce_ph() {
23859 let a = _mm512_set1_ph(1.25);
23860 let r = _mm512_maskz_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(
23861 0b01010101010101010101010101010101,
23862 a,
23863 );
23864 let e = _mm512_set_ph(
23865 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
23866 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
23867 );
23868 assert_eq_m512h(r, e);
23869 }
23870
23871 #[simd_test(enable = "avx512fp16")]
23872 fn test_mm512_reduce_round_ph() {
23873 let a = _mm512_set1_ph(1.25);
23874 let r = _mm512_reduce_round_ph::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a);
23875 let e = _mm512_set1_ph(0.25);
23876 assert_eq_m512h(r, e);
23877 }
23878
23879 #[simd_test(enable = "avx512fp16")]
23880 fn test_mm512_mask_reduce_round_ph() {
23881 let a = _mm512_set1_ph(1.25);
23882 let src = _mm512_set1_ph(2.0);
23883 let r = _mm512_mask_reduce_round_ph::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
23884 src,
23885 0b01010101010101010101010101010101,
23886 a,
23887 );
23888 let e = _mm512_set_ph(
23889 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
23890 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
23891 );
23892 assert_eq_m512h(r, e);
23893 }
23894
23895 #[simd_test(enable = "avx512fp16")]
23896 fn test_mm512_maskz_reduce_round_ph() {
23897 let a = _mm512_set1_ph(1.25);
23898 let r = _mm512_maskz_reduce_round_ph::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
23899 0b01010101010101010101010101010101,
23900 a,
23901 );
23902 let e = _mm512_set_ph(
23903 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
23904 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
23905 );
23906 assert_eq_m512h(r, e);
23907 }
23908
23909 #[simd_test(enable = "avx512fp16,avx512vl")]
23910 fn test_mm_reduce_sh() {
23911 let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
23912 let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
23913 let r = _mm_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(a, b);
23914 let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
23915 assert_eq_m128h(r, e);
23916 }
23917
23918 #[simd_test(enable = "avx512fp16,avx512vl")]
23919 fn test_mm_mask_reduce_sh() {
23920 let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
23921 let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
23922 let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.);
23923 let r = _mm_mask_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0, a, b);
23924 let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23925 assert_eq_m128h(r, e);
23926 let r = _mm_mask_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 1, a, b);
23927 let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
23928 assert_eq_m128h(r, e);
23929 }
23930
23931 #[simd_test(enable = "avx512fp16,avx512vl")]
23932 fn test_mm_maskz_reduce_sh() {
23933 let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
23934 let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
23935 let r = _mm_maskz_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(0, a, b);
23936 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
23937 assert_eq_m128h(r, e);
23938 let r = _mm_maskz_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(1, a, b);
23939 let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
23940 assert_eq_m128h(r, e);
23941 }
23942
23943 #[simd_test(enable = "avx512fp16,avx512vl")]
23944 fn test_mm_reduce_round_sh() {
23945 let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
23946 let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
23947 let r = _mm_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a, b);
23948 let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
23949 assert_eq_m128h(r, e);
23950 }
23951
23952 #[simd_test(enable = "avx512fp16,avx512vl")]
23953 fn test_mm_mask_reduce_round_sh() {
23954 let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
23955 let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
23956 let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.);
23957 let r = _mm_mask_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
23958 src, 0, a, b,
23959 );
23960 let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23961 assert_eq_m128h(r, e);
23962 let r = _mm_mask_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
23963 src, 1, a, b,
23964 );
23965 let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
23966 assert_eq_m128h(r, e);
23967 }
23968
23969 #[simd_test(enable = "avx512fp16,avx512vl")]
23970 fn test_mm_maskz_reduce_round_sh() {
23971 let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
23972 let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
23973 let r =
23974 _mm_maskz_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(0, a, b);
23975 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
23976 assert_eq_m128h(r, e);
23977 let r =
23978 _mm_maskz_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(1, a, b);
23979 let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
23980 assert_eq_m128h(r, e);
23981 }
23982
23983 #[simd_test(enable = "avx512fp16,avx512vl")]
23984 const fn test_mm_reduce_add_ph() {
23985 let a = _mm_set1_ph(2.0);
23986 let r = _mm_reduce_add_ph(a);
23987 assert_eq!(r, 16.0);
23988 }
23989
23990 #[simd_test(enable = "avx512fp16,avx512vl")]
23991 const fn test_mm256_reduce_add_ph() {
23992 let a = _mm256_set1_ph(2.0);
23993 let r = _mm256_reduce_add_ph(a);
23994 assert_eq!(r, 32.0);
23995 }
23996
23997 #[simd_test(enable = "avx512fp16")]
23998 const fn test_mm512_reduce_add_ph() {
23999 let a = _mm512_set1_ph(2.0);
24000 let r = _mm512_reduce_add_ph(a);
24001 assert_eq!(r, 64.0);
24002 }
24003
24004 #[simd_test(enable = "avx512fp16,avx512vl")]
24005 const fn test_mm_reduce_mul_ph() {
24006 let a = _mm_set1_ph(2.0);
24007 let r = _mm_reduce_mul_ph(a);
24008 assert_eq!(r, 256.0);
24009 }
24010
24011 #[simd_test(enable = "avx512fp16,avx512vl")]
24012 const fn test_mm256_reduce_mul_ph() {
24013 let a = _mm256_set1_ph(1.2);
24014 let r = _mm256_reduce_mul_ph(a);
24015 assert_eq!(r, 18.5);
24016 }
24017
24018 #[simd_test(enable = "avx512fp16")]
24019 const fn test_mm512_reduce_mul_ph() {
24020 let a = _mm512_set1_ph(1.2);
24021 let r = _mm512_reduce_mul_ph(a);
24022 assert_eq!(r, 342.3);
24023 }
24024
24025 #[simd_test(enable = "avx512fp16,avx512vl")]
24026 fn test_mm_reduce_max_ph() {
24027 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24028 let r = _mm_reduce_max_ph(a);
24029 assert_eq!(r, 8.0);
24030 }
24031
24032 #[simd_test(enable = "avx512fp16,avx512vl")]
24033 fn test_mm256_reduce_max_ph() {
24034 let a = _mm256_set_ph(
24035 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24036 );
24037 let r = _mm256_reduce_max_ph(a);
24038 assert_eq!(r, 16.0);
24039 }
24040
24041 #[simd_test(enable = "avx512fp16")]
24042 fn test_mm512_reduce_max_ph() {
24043 let a = _mm512_set_ph(
24044 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24045 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
24046 31.0, 32.0,
24047 );
24048 let r = _mm512_reduce_max_ph(a);
24049 assert_eq!(r, 32.0);
24050 }
24051
24052 #[simd_test(enable = "avx512fp16,avx512vl")]
24053 fn test_mm_reduce_min_ph() {
24054 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24055 let r = _mm_reduce_min_ph(a);
24056 assert_eq!(r, 1.0);
24057 }
24058
24059 #[simd_test(enable = "avx512fp16,avx512vl")]
24060 fn test_mm256_reduce_min_ph() {
24061 let a = _mm256_set_ph(
24062 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24063 );
24064 let r = _mm256_reduce_min_ph(a);
24065 assert_eq!(r, 1.0);
24066 }
24067
24068 #[simd_test(enable = "avx512fp16")]
24069 fn test_mm512_reduce_min_ph() {
24070 let a = _mm512_set_ph(
24071 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24072 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
24073 31.0, 32.0,
24074 );
24075 let r = _mm512_reduce_min_ph(a);
24076 assert_eq!(r, 1.0);
24077 }
24078
24079 #[simd_test(enable = "avx512fp16,avx512vl")]
24080 fn test_mm_fpclass_ph_mask() {
24081 let a = _mm_set_ph(
24082 1.,
24083 f16::INFINITY,
24084 f16::NEG_INFINITY,
24085 0.0,
24086 -0.0,
24087 -2.0,
24088 f16::NAN,
24089 5.9e-8, // Denormal
24090 );
24091 let r = _mm_fpclass_ph_mask::<0x18>(a); // infinities
24092 assert_eq!(r, 0b01100000);
24093 }
24094
24095 #[simd_test(enable = "avx512fp16,avx512vl")]
24096 fn test_mm_mask_fpclass_ph_mask() {
24097 let a = _mm_set_ph(
24098 1.,
24099 f16::INFINITY,
24100 f16::NEG_INFINITY,
24101 0.0,
24102 -0.0,
24103 -2.0,
24104 f16::NAN,
24105 5.9e-8, // Denormal
24106 );
24107 let r = _mm_mask_fpclass_ph_mask::<0x18>(0b01010101, a);
24108 assert_eq!(r, 0b01000000);
24109 }
24110
24111 #[simd_test(enable = "avx512fp16,avx512vl")]
24112 fn test_mm256_fpclass_ph_mask() {
24113 let a = _mm256_set_ph(
24114 1.,
24115 f16::INFINITY,
24116 f16::NEG_INFINITY,
24117 0.0,
24118 -0.0,
24119 -2.0,
24120 f16::NAN,
24121 5.9e-8, // Denormal
24122 1.,
24123 f16::INFINITY,
24124 f16::NEG_INFINITY,
24125 0.0,
24126 -0.0,
24127 -2.0,
24128 f16::NAN,
24129 5.9e-8, // Denormal
24130 );
24131 let r = _mm256_fpclass_ph_mask::<0x18>(a); // infinities
24132 assert_eq!(r, 0b0110000001100000);
24133 }
24134
24135 #[simd_test(enable = "avx512fp16,avx512vl")]
24136 fn test_mm256_mask_fpclass_ph_mask() {
24137 let a = _mm256_set_ph(
24138 1.,
24139 f16::INFINITY,
24140 f16::NEG_INFINITY,
24141 0.0,
24142 -0.0,
24143 -2.0,
24144 f16::NAN,
24145 5.9e-8, // Denormal
24146 1.,
24147 f16::INFINITY,
24148 f16::NEG_INFINITY,
24149 0.0,
24150 -0.0,
24151 -2.0,
24152 f16::NAN,
24153 5.9e-8, // Denormal
24154 );
24155 let r = _mm256_mask_fpclass_ph_mask::<0x18>(0b0101010101010101, a);
24156 assert_eq!(r, 0b0100000001000000);
24157 }
24158
24159 #[simd_test(enable = "avx512fp16")]
24160 fn test_mm512_fpclass_ph_mask() {
24161 let a = _mm512_set_ph(
24162 1.,
24163 f16::INFINITY,
24164 f16::NEG_INFINITY,
24165 0.0,
24166 -0.0,
24167 -2.0,
24168 f16::NAN,
24169 5.9e-8, // Denormal
24170 1.,
24171 f16::INFINITY,
24172 f16::NEG_INFINITY,
24173 0.0,
24174 -0.0,
24175 -2.0,
24176 f16::NAN,
24177 5.9e-8, // Denormal
24178 1.,
24179 f16::INFINITY,
24180 f16::NEG_INFINITY,
24181 0.0,
24182 -0.0,
24183 -2.0,
24184 f16::NAN,
24185 5.9e-8, // Denormal
24186 1.,
24187 f16::INFINITY,
24188 f16::NEG_INFINITY,
24189 0.0,
24190 -0.0,
24191 -2.0,
24192 f16::NAN,
24193 5.9e-8, // Denormal
24194 );
24195 let r = _mm512_fpclass_ph_mask::<0x18>(a); // infinities
24196 assert_eq!(r, 0b01100000011000000110000001100000);
24197 }
24198
24199 #[simd_test(enable = "avx512fp16")]
24200 fn test_mm512_mask_fpclass_ph_mask() {
24201 let a = _mm512_set_ph(
24202 1.,
24203 f16::INFINITY,
24204 f16::NEG_INFINITY,
24205 0.0,
24206 -0.0,
24207 -2.0,
24208 f16::NAN,
24209 5.9e-8, // Denormal
24210 1.,
24211 f16::INFINITY,
24212 f16::NEG_INFINITY,
24213 0.0,
24214 -0.0,
24215 -2.0,
24216 f16::NAN,
24217 5.9e-8, // Denormal
24218 1.,
24219 f16::INFINITY,
24220 f16::NEG_INFINITY,
24221 0.0,
24222 -0.0,
24223 -2.0,
24224 f16::NAN,
24225 5.9e-8, // Denormal
24226 1.,
24227 f16::INFINITY,
24228 f16::NEG_INFINITY,
24229 0.0,
24230 -0.0,
24231 -2.0,
24232 f16::NAN,
24233 5.9e-8, // Denormal
24234 );
24235 let r = _mm512_mask_fpclass_ph_mask::<0x18>(0b01010101010101010101010101010101, a);
24236 assert_eq!(r, 0b01000000010000000100000001000000);
24237 }
24238
24239 #[simd_test(enable = "avx512fp16")]
24240 fn test_mm_fpclass_sh_mask() {
24241 let a = _mm_set_sh(f16::INFINITY);
24242 let r = _mm_fpclass_sh_mask::<0x18>(a);
24243 assert_eq!(r, 1);
24244 }
24245
24246 #[simd_test(enable = "avx512fp16")]
24247 fn test_mm_mask_fpclass_sh_mask() {
24248 let a = _mm_set_sh(f16::INFINITY);
24249 let r = _mm_mask_fpclass_sh_mask::<0x18>(0, a);
24250 assert_eq!(r, 0);
24251 let r = _mm_mask_fpclass_sh_mask::<0x18>(1, a);
24252 assert_eq!(r, 1);
24253 }
24254
24255 #[simd_test(enable = "avx512fp16,avx512vl")]
24256 const fn test_mm_mask_blend_ph() {
24257 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24258 let b = _mm_set_ph(-1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0);
24259 let r = _mm_mask_blend_ph(0b01010101, a, b);
24260 let e = _mm_set_ph(1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0);
24261 assert_eq_m128h(r, e);
24262 }
24263
24264 #[simd_test(enable = "avx512fp16,avx512vl")]
24265 const fn test_mm256_mask_blend_ph() {
24266 let a = _mm256_set_ph(
24267 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24268 );
24269 let b = _mm256_set_ph(
24270 -1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0, -9.0, -10.0, -11.0, -12.0, -13.0,
24271 -14.0, -15.0, -16.0,
24272 );
24273 let r = _mm256_mask_blend_ph(0b0101010101010101, a, b);
24274 let e = _mm256_set_ph(
24275 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0, -14.0, 15.0,
24276 -16.0,
24277 );
24278 assert_eq_m256h(r, e);
24279 }
24280
24281 #[simd_test(enable = "avx512fp16")]
24282 const fn test_mm512_mask_blend_ph() {
24283 let a = _mm512_set_ph(
24284 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24285 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
24286 31.0, 32.0,
24287 );
24288 let b = _mm512_set_ph(
24289 -1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0, -9.0, -10.0, -11.0, -12.0, -13.0,
24290 -14.0, -15.0, -16.0, -17.0, -18.0, -19.0, -20.0, -21.0, -22.0, -23.0, -24.0, -25.0,
24291 -26.0, -27.0, -28.0, -29.0, -30.0, -31.0, -32.0,
24292 );
24293 let r = _mm512_mask_blend_ph(0b01010101010101010101010101010101, a, b);
24294 let e = _mm512_set_ph(
24295 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0, -14.0, 15.0,
24296 -16.0, 17.0, -18.0, 19.0, -20.0, 21.0, -22.0, 23.0, -24.0, 25.0, -26.0, 27.0, -28.0,
24297 29.0, -30.0, 31.0, -32.0,
24298 );
24299 assert_eq_m512h(r, e);
24300 }
24301
24302 #[simd_test(enable = "avx512fp16,avx512vl")]
24303 fn test_mm_permutex2var_ph() {
24304 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24305 let b = _mm_setr_ph(9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
24306 let idx = _mm_setr_epi16(0, 2, 4, 6, 8, 10, 12, 14);
24307 let r = _mm_permutex2var_ph(a, idx, b);
24308 let e = _mm_setr_ph(1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0);
24309 assert_eq_m128h(r, e);
24310 }
24311
24312 #[simd_test(enable = "avx512fp16,avx512vl")]
24313 fn test_mm256_permutex2var_ph() {
24314 let a = _mm256_setr_ph(
24315 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24316 );
24317 let b = _mm256_setr_ph(
24318 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
24319 31.0, 32.0,
24320 );
24321 let idx = _mm256_setr_epi16(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
24322 let r = _mm256_permutex2var_ph(a, idx, b);
24323 let e = _mm256_setr_ph(
24324 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, 23.0, 25.0, 27.0, 29.0,
24325 31.0,
24326 );
24327 assert_eq_m256h(r, e);
24328 }
24329
24330 #[simd_test(enable = "avx512fp16")]
24331 fn test_mm512_permutex2var_ph() {
24332 let a = _mm512_setr_ph(
24333 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24334 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
24335 31.0, 32.0,
24336 );
24337 let b = _mm512_setr_ph(
24338 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0,
24339 47.0, 48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0,
24340 61.0, 62.0, 63.0, 64.0,
24341 );
24342 let idx = _mm512_set_epi16(
24343 62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, 36, 34, 32, 30, 28, 26, 24, 22, 20,
24344 18, 16, 14, 12, 10, 8, 6, 4, 2, 0,
24345 );
24346 let r = _mm512_permutex2var_ph(a, idx, b);
24347 let e = _mm512_setr_ph(
24348 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, 23.0, 25.0, 27.0, 29.0,
24349 31.0, 33.0, 35.0, 37.0, 39.0, 41.0, 43.0, 45.0, 47.0, 49.0, 51.0, 53.0, 55.0, 57.0,
24350 59.0, 61.0, 63.0,
24351 );
24352 assert_eq_m512h(r, e);
24353 }
24354
24355 #[simd_test(enable = "avx512fp16,avx512vl")]
24356 fn test_mm_permutexvar_ph() {
24357 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24358 let idx = _mm_set_epi16(0, 2, 4, 6, 1, 3, 5, 7);
24359 let r = _mm_permutexvar_ph(idx, a);
24360 let e = _mm_setr_ph(1.0, 3.0, 5.0, 7.0, 2.0, 4.0, 6.0, 8.0);
24361 assert_eq_m128h(r, e);
24362 }
24363
24364 #[simd_test(enable = "avx512fp16,avx512vl")]
24365 fn test_mm256_permutexvar_ph() {
24366 let a = _mm256_set_ph(
24367 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24368 );
24369 let idx = _mm256_set_epi16(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15);
24370 let r = _mm256_permutexvar_ph(idx, a);
24371 let e = _mm256_setr_ph(
24372 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0,
24373 );
24374 assert_eq_m256h(r, e);
24375 }
24376
24377 #[simd_test(enable = "avx512fp16")]
24378 fn test_mm512_permutexvar_ph() {
24379 let a = _mm512_set_ph(
24380 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24381 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
24382 31.0, 32.0,
24383 );
24384 let idx = _mm512_set_epi16(
24385 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 1, 3, 5, 7, 9, 11, 13, 15,
24386 17, 19, 21, 23, 25, 27, 29, 31,
24387 );
24388 let r = _mm512_permutexvar_ph(idx, a);
24389 let e = _mm512_setr_ph(
24390 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, 23.0, 25.0, 27.0, 29.0,
24391 31.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0, 22.0, 24.0, 26.0, 28.0,
24392 30.0, 32.0,
24393 );
24394 assert_eq_m512h(r, e);
24395 }
24396
24397 #[simd_test(enable = "avx512fp16,avx512vl")]
24398 fn test_mm_cvtepi16_ph() {
24399 let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
24400 let r = _mm_cvtepi16_ph(a);
24401 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24402 assert_eq_m128h(r, e);
24403 }
24404
24405 #[simd_test(enable = "avx512fp16,avx512vl")]
24406 fn test_mm_mask_cvtepi16_ph() {
24407 let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
24408 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24409 let r = _mm_mask_cvtepi16_ph(src, 0b01010101, a);
24410 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
24411 assert_eq_m128h(r, e);
24412 }
24413
24414 #[simd_test(enable = "avx512fp16,avx512vl")]
24415 fn test_mm_maskz_cvtepi16_ph() {
24416 let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
24417 let r = _mm_maskz_cvtepi16_ph(0b01010101, a);
24418 let e = _mm_set_ph(0., 2., 0., 4., 0., 6., 0., 8.);
24419 assert_eq_m128h(r, e);
24420 }
24421
24422 #[simd_test(enable = "avx512fp16,avx512vl")]
24423 fn test_mm256_cvtepi16_ph() {
24424 let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24425 let r = _mm256_cvtepi16_ph(a);
24426 let e = _mm256_set_ph(
24427 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24428 );
24429 assert_eq_m256h(r, e);
24430 }
24431
24432 #[simd_test(enable = "avx512fp16,avx512vl")]
24433 fn test_mm256_mask_cvtepi16_ph() {
24434 let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24435 let src = _mm256_set_ph(
24436 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
24437 );
24438 let r = _mm256_mask_cvtepi16_ph(src, 0b0101010101010101, a);
24439 let e = _mm256_set_ph(
24440 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16.,
24441 );
24442 assert_eq_m256h(r, e);
24443 }
24444
24445 #[simd_test(enable = "avx512fp16,avx512vl")]
24446 fn test_mm256_maskz_cvtepi16_ph() {
24447 let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24448 let r = _mm256_maskz_cvtepi16_ph(0b0101010101010101, a);
24449 let e = _mm256_set_ph(
24450 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16.,
24451 );
24452 assert_eq_m256h(r, e);
24453 }
24454
24455 #[simd_test(enable = "avx512fp16")]
24456 fn test_mm512_cvtepi16_ph() {
24457 let a = _mm512_set_epi16(
24458 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24459 25, 26, 27, 28, 29, 30, 31, 32,
24460 );
24461 let r = _mm512_cvtepi16_ph(a);
24462 let e = _mm512_set_ph(
24463 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24464 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
24465 31.0, 32.0,
24466 );
24467 assert_eq_m512h(r, e);
24468 }
24469
24470 #[simd_test(enable = "avx512fp16")]
24471 fn test_mm512_mask_cvtepi16_ph() {
24472 let a = _mm512_set_epi16(
24473 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24474 25, 26, 27, 28, 29, 30, 31, 32,
24475 );
24476 let src = _mm512_set_ph(
24477 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26.,
24478 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41.,
24479 );
24480 let r = _mm512_mask_cvtepi16_ph(src, 0b01010101010101010101010101010101, a);
24481 let e = _mm512_set_ph(
24482 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., 26., 18.,
24483 28., 20., 30., 22., 32., 24., 34., 26., 36., 28., 38., 30., 40., 32.,
24484 );
24485 assert_eq_m512h(r, e);
24486 }
24487
24488 #[simd_test(enable = "avx512fp16")]
24489 fn test_mm512_maskz_cvtepi16_ph() {
24490 let a = _mm512_set_epi16(
24491 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24492 25, 26, 27, 28, 29, 30, 31, 32,
24493 );
24494 let r = _mm512_maskz_cvtepi16_ph(0b01010101010101010101010101010101, a);
24495 let e = _mm512_set_ph(
24496 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16., 0., 18., 0., 20.,
24497 0., 22., 0., 24., 0., 26., 0., 28., 0., 30., 0., 32.,
24498 );
24499 assert_eq_m512h(r, e);
24500 }
24501
24502 #[simd_test(enable = "avx512fp16")]
24503 fn test_mm512_cvt_roundepi16_ph() {
24504 let a = _mm512_set_epi16(
24505 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24506 25, 26, 27, 28, 29, 30, 31, 32,
24507 );
24508 let r = _mm512_cvt_roundepi16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
24509 let e = _mm512_set_ph(
24510 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24511 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
24512 31.0, 32.0,
24513 );
24514 assert_eq_m512h(r, e);
24515 }
24516
24517 #[simd_test(enable = "avx512fp16")]
24518 fn test_mm512_mask_cvt_roundepi16_ph() {
24519 let a = _mm512_set_epi16(
24520 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24521 25, 26, 27, 28, 29, 30, 31, 32,
24522 );
24523 let src = _mm512_set_ph(
24524 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26.,
24525 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41.,
24526 );
24527 let r = _mm512_mask_cvt_roundepi16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24528 src,
24529 0b01010101010101010101010101010101,
24530 a,
24531 );
24532 let e = _mm512_set_ph(
24533 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., 26., 18.,
24534 28., 20., 30., 22., 32., 24., 34., 26., 36., 28., 38., 30., 40., 32.,
24535 );
24536 assert_eq_m512h(r, e);
24537 }
24538
24539 #[simd_test(enable = "avx512fp16")]
24540 fn test_mm512_maskz_cvt_roundepi16_ph() {
24541 let a = _mm512_set_epi16(
24542 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24543 25, 26, 27, 28, 29, 30, 31, 32,
24544 );
24545 let r = _mm512_maskz_cvt_roundepi16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24546 0b01010101010101010101010101010101,
24547 a,
24548 );
24549 let e = _mm512_set_ph(
24550 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16., 0., 18., 0., 20.,
24551 0., 22., 0., 24., 0., 26., 0., 28., 0., 30., 0., 32.,
24552 );
24553 assert_eq_m512h(r, e);
24554 }
24555
24556 #[simd_test(enable = "avx512fp16,avx512vl")]
24557 fn test_mm_cvtepu16_ph() {
24558 let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
24559 let r = _mm_cvtepu16_ph(a);
24560 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24561 assert_eq_m128h(r, e);
24562 }
24563
24564 #[simd_test(enable = "avx512fp16,avx512vl")]
24565 fn test_mm_mask_cvtepu16_ph() {
24566 let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
24567 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24568 let r = _mm_mask_cvtepu16_ph(src, 0b01010101, a);
24569 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
24570 assert_eq_m128h(r, e);
24571 }
24572
24573 #[simd_test(enable = "avx512fp16,avx512vl")]
24574 fn test_mm_maskz_cvtepu16_ph() {
24575 let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
24576 let r = _mm_maskz_cvtepu16_ph(0b01010101, a);
24577 let e = _mm_set_ph(0., 2., 0., 4., 0., 6., 0., 8.);
24578 assert_eq_m128h(r, e);
24579 }
24580
24581 #[simd_test(enable = "avx512fp16,avx512vl")]
24582 fn test_mm256_cvtepu16_ph() {
24583 let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24584 let r = _mm256_cvtepu16_ph(a);
24585 let e = _mm256_set_ph(
24586 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24587 );
24588 assert_eq_m256h(r, e);
24589 }
24590
24591 #[simd_test(enable = "avx512fp16,avx512vl")]
24592 fn test_mm256_mask_cvtepu16_ph() {
24593 let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24594 let src = _mm256_set_ph(
24595 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
24596 );
24597 let r = _mm256_mask_cvtepu16_ph(src, 0b0101010101010101, a);
24598 let e = _mm256_set_ph(
24599 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16.,
24600 );
24601 assert_eq_m256h(r, e);
24602 }
24603
24604 #[simd_test(enable = "avx512fp16,avx512vl")]
24605 fn test_mm256_maskz_cvtepu16_ph() {
24606 let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24607 let r = _mm256_maskz_cvtepu16_ph(0b0101010101010101, a);
24608 let e = _mm256_set_ph(
24609 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16.,
24610 );
24611 assert_eq_m256h(r, e);
24612 }
24613
24614 #[simd_test(enable = "avx512fp16")]
24615 fn test_mm512_cvtepu16_ph() {
24616 let a = _mm512_set_epi16(
24617 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24618 25, 26, 27, 28, 29, 30, 31, 32,
24619 );
24620 let r = _mm512_cvtepu16_ph(a);
24621 let e = _mm512_set_ph(
24622 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24623 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
24624 31.0, 32.0,
24625 );
24626 assert_eq_m512h(r, e);
24627 }
24628
24629 #[simd_test(enable = "avx512fp16")]
24630 fn test_mm512_mask_cvtepu16_ph() {
24631 let a = _mm512_set_epi16(
24632 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24633 25, 26, 27, 28, 29, 30, 31, 32,
24634 );
24635 let src = _mm512_set_ph(
24636 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26.,
24637 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41.,
24638 );
24639 let r = _mm512_mask_cvtepu16_ph(src, 0b01010101010101010101010101010101, a);
24640 let e = _mm512_set_ph(
24641 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., 26., 18.,
24642 28., 20., 30., 22., 32., 24., 34., 26., 36., 28., 38., 30., 40., 32.,
24643 );
24644 assert_eq_m512h(r, e);
24645 }
24646
24647 #[simd_test(enable = "avx512fp16")]
24648 fn test_mm512_maskz_cvtepu16_ph() {
24649 let a = _mm512_set_epi16(
24650 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24651 25, 26, 27, 28, 29, 30, 31, 32,
24652 );
24653 let r = _mm512_maskz_cvtepu16_ph(0b01010101010101010101010101010101, a);
24654 let e = _mm512_set_ph(
24655 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16., 0., 18., 0., 20.,
24656 0., 22., 0., 24., 0., 26., 0., 28., 0., 30., 0., 32.,
24657 );
24658 assert_eq_m512h(r, e);
24659 }
24660
24661 #[simd_test(enable = "avx512fp16")]
24662 fn test_mm512_cvt_roundepu16_ph() {
24663 let a = _mm512_set_epi16(
24664 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24665 25, 26, 27, 28, 29, 30, 31, 32,
24666 );
24667 let r = _mm512_cvt_roundepu16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
24668 let e = _mm512_set_ph(
24669 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24670 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
24671 31.0, 32.0,
24672 );
24673 assert_eq_m512h(r, e);
24674 }
24675
24676 #[simd_test(enable = "avx512fp16")]
24677 fn test_mm512_mask_cvt_roundepu16_ph() {
24678 let a = _mm512_set_epi16(
24679 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24680 25, 26, 27, 28, 29, 30, 31, 32,
24681 );
24682 let src = _mm512_set_ph(
24683 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26.,
24684 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41.,
24685 );
24686 let r = _mm512_mask_cvt_roundepu16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24687 src,
24688 0b01010101010101010101010101010101,
24689 a,
24690 );
24691 let e = _mm512_set_ph(
24692 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., 26., 18.,
24693 28., 20., 30., 22., 32., 24., 34., 26., 36., 28., 38., 30., 40., 32.,
24694 );
24695 assert_eq_m512h(r, e);
24696 }
24697
24698 #[simd_test(enable = "avx512fp16")]
24699 fn test_mm512_maskz_cvt_roundepu16_ph() {
24700 let a = _mm512_set_epi16(
24701 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24702 25, 26, 27, 28, 29, 30, 31, 32,
24703 );
24704 let r = _mm512_maskz_cvt_roundepu16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24705 0b01010101010101010101010101010101,
24706 a,
24707 );
24708 let e = _mm512_set_ph(
24709 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16., 0., 18., 0., 20.,
24710 0., 22., 0., 24., 0., 26., 0., 28., 0., 30., 0., 32.,
24711 );
24712 assert_eq_m512h(r, e);
24713 }
24714
24715 #[simd_test(enable = "avx512fp16,avx512vl")]
24716 fn test_mm_cvtepi32_ph() {
24717 let a = _mm_set_epi32(1, 2, 3, 4);
24718 let r = _mm_cvtepi32_ph(a);
24719 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
24720 assert_eq_m128h(r, e);
24721 }
24722
24723 #[simd_test(enable = "avx512fp16,avx512vl")]
24724 fn test_mm_mask_cvtepi32_ph() {
24725 let a = _mm_set_epi32(1, 2, 3, 4);
24726 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24727 let r = _mm_mask_cvtepi32_ph(src, 0b0101, a);
24728 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2., 16., 4.);
24729 assert_eq_m128h(r, e);
24730 }
24731
24732 #[simd_test(enable = "avx512fp16,avx512vl")]
24733 fn test_mm_maskz_cvtepi32_ph() {
24734 let a = _mm_set_epi32(1, 2, 3, 4);
24735 let r = _mm_maskz_cvtepi32_ph(0b0101, a);
24736 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2., 0.0, 4.);
24737 assert_eq_m128h(r, e);
24738 }
24739
24740 #[simd_test(enable = "avx512fp16,avx512vl")]
24741 fn test_mm256_cvtepi32_ph() {
24742 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
24743 let r = _mm256_cvtepi32_ph(a);
24744 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24745 assert_eq_m128h(r, e);
24746 }
24747
24748 #[simd_test(enable = "avx512fp16,avx512vl")]
24749 fn test_mm256_mask_cvtepi32_ph() {
24750 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
24751 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24752 let r = _mm256_mask_cvtepi32_ph(src, 0b01010101, a);
24753 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
24754 assert_eq_m128h(r, e);
24755 }
24756
24757 #[simd_test(enable = "avx512fp16,avx512vl")]
24758 fn test_mm256_maskz_cvtepi32_ph() {
24759 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
24760 let r = _mm256_maskz_cvtepi32_ph(0b01010101, a);
24761 let e = _mm_set_ph(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0);
24762 assert_eq_m128h(r, e);
24763 }
24764
24765 #[simd_test(enable = "avx512fp16")]
24766 fn test_mm512_cvtepi32_ph() {
24767 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24768 let r = _mm512_cvtepi32_ph(a);
24769 let e = _mm256_set_ph(
24770 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24771 );
24772 assert_eq_m256h(r, e);
24773 }
24774
24775 #[simd_test(enable = "avx512fp16,avx512vl")]
24776 fn test_mm512_mask_cvtepi32_ph() {
24777 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24778 let src = _mm256_set_ph(
24779 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
24780 );
24781 let r = _mm512_mask_cvtepi32_ph(src, 0b0101010101010101, a);
24782 let e = _mm256_set_ph(
24783 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16.,
24784 );
24785 assert_eq_m256h(r, e);
24786 }
24787
24788 #[simd_test(enable = "avx512fp16,avx512vl")]
24789 fn test_mm512_maskz_cvtepi32_ph() {
24790 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24791 let r = _mm512_maskz_cvtepi32_ph(0b0101010101010101, a);
24792 let e = _mm256_set_ph(
24793 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
24794 );
24795 assert_eq_m256h(r, e);
24796 }
24797
24798 #[simd_test(enable = "avx512fp16,avx512vl")]
24799 fn test_mm512_cvt_roundepi32_ph() {
24800 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24801 let r = _mm512_cvt_roundepi32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
24802 let e = _mm256_set_ph(
24803 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24804 );
24805 assert_eq_m256h(r, e);
24806 }
24807
24808 #[simd_test(enable = "avx512fp16,avx512vl")]
24809 fn test_mm512_mask_cvt_roundepi32_ph() {
24810 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24811 let src = _mm256_set_ph(
24812 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
24813 );
24814 let r = _mm512_mask_cvt_roundepi32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24815 src,
24816 0b0101010101010101,
24817 a,
24818 );
24819 let e = _mm256_set_ph(
24820 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16.,
24821 );
24822 assert_eq_m256h(r, e);
24823 }
24824
24825 #[simd_test(enable = "avx512fp16,avx512vl")]
24826 fn test_mm512_maskz_cvt_roundepi32_ph() {
24827 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24828 let r = _mm512_maskz_cvt_roundepi32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24829 0b0101010101010101,
24830 a,
24831 );
24832 let e = _mm256_set_ph(
24833 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
24834 );
24835 assert_eq_m256h(r, e);
24836 }
24837
24838 #[simd_test(enable = "avx512fp16,avx512vl")]
24839 fn test_mm_cvti32_sh() {
24840 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24841 let r = _mm_cvti32_sh(a, 10);
24842 let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24843 assert_eq_m128h(r, e);
24844 }
24845
24846 #[simd_test(enable = "avx512fp16,avx512vl")]
24847 fn test_mm_cvt_roundi32_sh() {
24848 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24849 let r = _mm_cvt_roundi32_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 10);
24850 let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24851 assert_eq_m128h(r, e);
24852 }
24853
24854 #[simd_test(enable = "avx512fp16,avx512vl")]
24855 fn test_mm_cvtepu32_ph() {
24856 let a = _mm_set_epi32(1, 2, 3, 4);
24857 let r = _mm_cvtepu32_ph(a);
24858 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
24859 assert_eq_m128h(r, e);
24860 }
24861
24862 #[simd_test(enable = "avx512fp16,avx512vl")]
24863 fn test_mm_mask_cvtepu32_ph() {
24864 let a = _mm_set_epi32(1, 2, 3, 4);
24865 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24866 let r = _mm_mask_cvtepu32_ph(src, 0b0101, a);
24867 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2., 16., 4.);
24868 assert_eq_m128h(r, e);
24869 }
24870
24871 #[simd_test(enable = "avx512fp16,avx512vl")]
24872 fn test_mm_maskz_cvtepu32_ph() {
24873 let a = _mm_set_epi32(1, 2, 3, 4);
24874 let r = _mm_maskz_cvtepu32_ph(0b0101, a);
24875 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2., 0.0, 4.);
24876 assert_eq_m128h(r, e);
24877 }
24878
24879 #[simd_test(enable = "avx512fp16,avx512vl")]
24880 fn test_mm256_cvtepu32_ph() {
24881 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
24882 let r = _mm256_cvtepu32_ph(a);
24883 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24884 assert_eq_m128h(r, e);
24885 }
24886
24887 #[simd_test(enable = "avx512fp16,avx512vl")]
24888 fn test_mm256_mask_cvtepu32_ph() {
24889 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
24890 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24891 let r = _mm256_mask_cvtepu32_ph(src, 0b01010101, a);
24892 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
24893 assert_eq_m128h(r, e);
24894 }
24895
24896 #[simd_test(enable = "avx512fp16,avx512vl")]
24897 fn test_mm256_maskz_cvtepu32_ph() {
24898 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
24899 let r = _mm256_maskz_cvtepu32_ph(0b01010101, a);
24900 let e = _mm_set_ph(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0);
24901 assert_eq_m128h(r, e);
24902 }
24903
24904 #[simd_test(enable = "avx512fp16,avx512vl")]
24905 fn test_mm512_cvtepu32_ph() {
24906 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24907 let r = _mm512_cvtepu32_ph(a);
24908 let e = _mm256_set_ph(
24909 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24910 );
24911 assert_eq_m256h(r, e);
24912 }
24913
24914 #[simd_test(enable = "avx512fp16,avx512vl")]
24915 fn test_mm512_mask_cvtepu32_ph() {
24916 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24917 let src = _mm256_set_ph(
24918 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
24919 );
24920 let r = _mm512_mask_cvtepu32_ph(src, 0b0101010101010101, a);
24921 let e = _mm256_set_ph(
24922 10., 2.0, 12., 4.0, 14., 6.0, 16., 8.0, 18., 10.0, 20., 12.0, 22., 14.0, 24., 16.0,
24923 );
24924 assert_eq_m256h(r, e);
24925 }
24926
24927 #[simd_test(enable = "avx512fp16,avx512vl")]
24928 fn test_mm512_maskz_cvtepu32_ph() {
24929 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24930 let r = _mm512_maskz_cvtepu32_ph(0b0101010101010101, a);
24931 let e = _mm256_set_ph(
24932 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
24933 );
24934 assert_eq_m256h(r, e);
24935 }
24936
24937 #[simd_test(enable = "avx512fp16,avx512vl")]
24938 fn test_mm512_cvt_roundepu32_ph() {
24939 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24940 let r = _mm512_cvt_roundepu32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
24941 let e = _mm256_set_ph(
24942 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24943 );
24944 assert_eq_m256h(r, e);
24945 }
24946
24947 #[simd_test(enable = "avx512fp16,avx512vl")]
24948 fn test_mm512_mask_cvt_roundepu32_ph() {
24949 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24950 let src = _mm256_set_ph(
24951 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
24952 );
24953 let r = _mm512_mask_cvt_roundepu32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24954 src,
24955 0b0101010101010101,
24956 a,
24957 );
24958 let e = _mm256_set_ph(
24959 10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0, 18.0, 10.0, 20.0, 12.0, 22.0, 14.0, 24.0,
24960 16.0,
24961 );
24962 assert_eq_m256h(r, e);
24963 }
24964
24965 #[simd_test(enable = "avx512fp16,avx512vl")]
24966 fn test_mm512_maskz_cvt_roundepu32_ph() {
24967 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24968 let r = _mm512_maskz_cvt_roundepu32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24969 0b0101010101010101,
24970 a,
24971 );
24972 let e = _mm256_set_ph(
24973 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
24974 );
24975 assert_eq_m256h(r, e);
24976 }
24977
24978 #[simd_test(enable = "avx512fp16,avx512vl")]
24979 fn test_mm_cvtu32_sh() {
24980 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24981 let r = _mm_cvtu32_sh(a, 10);
24982 let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24983 assert_eq_m128h(r, e);
24984 }
24985
24986 #[simd_test(enable = "avx512fp16,avx512vl")]
24987 fn test_mm_cvt_roundu32_sh() {
24988 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24989 let r = _mm_cvt_roundu32_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 10);
24990 let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24991 assert_eq_m128h(r, e);
24992 }
24993
24994 #[simd_test(enable = "avx512fp16,avx512vl")]
24995 fn test_mm_cvtepi64_ph() {
24996 let a = _mm_set_epi64x(1, 2);
24997 let r = _mm_cvtepi64_ph(a);
24998 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
24999 assert_eq_m128h(r, e);
25000 }
25001
25002 #[simd_test(enable = "avx512fp16,avx512vl")]
25003 fn test_mm_mask_cvtepi64_ph() {
25004 let a = _mm_set_epi64x(1, 2);
25005 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25006 let r = _mm_mask_cvtepi64_ph(src, 0b01, a);
25007 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 16., 2.);
25008 assert_eq_m128h(r, e);
25009 }
25010
25011 #[simd_test(enable = "avx512fp16,avx512vl")]
25012 fn test_mm_maskz_cvtepi64_ph() {
25013 let a = _mm_set_epi64x(1, 2);
25014 let r = _mm_maskz_cvtepi64_ph(0b01, a);
25015 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.);
25016 assert_eq_m128h(r, e);
25017 }
25018
25019 #[simd_test(enable = "avx512fp16,avx512vl")]
25020 fn test_mm256_cvtepi64_ph() {
25021 let a = _mm256_set_epi64x(1, 2, 3, 4);
25022 let r = _mm256_cvtepi64_ph(a);
25023 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
25024 assert_eq_m128h(r, e);
25025 }
25026
25027 #[simd_test(enable = "avx512fp16,avx512vl")]
25028 fn test_mm256_mask_cvtepi64_ph() {
25029 let a = _mm256_set_epi64x(1, 2, 3, 4);
25030 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25031 let r = _mm256_mask_cvtepi64_ph(src, 0b0101, a);
25032 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2.0, 16.0, 4.0);
25033 assert_eq_m128h(r, e);
25034 }
25035
25036 #[simd_test(enable = "avx512fp16,avx512vl")]
25037 fn test_mm256_maskz_cvtepi64_ph() {
25038 let a = _mm256_set_epi64x(1, 2, 3, 4);
25039 let r = _mm256_maskz_cvtepi64_ph(0b0101, a);
25040 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0);
25041 assert_eq_m128h(r, e);
25042 }
25043
25044 #[simd_test(enable = "avx512fp16,avx512vl")]
25045 fn test_mm512_cvtepi64_ph() {
25046 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
25047 let r = _mm512_cvtepi64_ph(a);
25048 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25049 assert_eq_m128h(r, e);
25050 }
25051
25052 #[simd_test(enable = "avx512fp16,avx512vl")]
25053 fn test_mm512_mask_cvtepi64_ph() {
25054 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
25055 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25056 let r = _mm512_mask_cvtepi64_ph(src, 0b01010101, a);
25057 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
25058 assert_eq_m128h(r, e);
25059 }
25060
25061 #[simd_test(enable = "avx512fp16,avx512vl")]
25062 fn test_mm512_maskz_cvtepi64_ph() {
25063 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
25064 let r = _mm512_maskz_cvtepi64_ph(0b01010101, a);
25065 let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.);
25066 assert_eq_m128h(r, e);
25067 }
25068
25069 #[simd_test(enable = "avx512fp16,avx512vl")]
25070 fn test_mm512_cvt_roundepi64_ph() {
25071 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
25072 let r = _mm512_cvt_roundepi64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
25073 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25074 assert_eq_m128h(r, e);
25075 }
25076
25077 #[simd_test(enable = "avx512fp16")]
25078 fn test_mm512_mask_cvt_roundepi64_ph() {
25079 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
25080 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25081 let r = _mm512_mask_cvt_roundepi64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25082 src, 0b01010101, a,
25083 );
25084 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
25085 assert_eq_m128h(r, e);
25086 }
25087
25088 #[simd_test(enable = "avx512fp16,avx512vl")]
25089 fn test_mm512_maskz_cvt_roundepi64_ph() {
25090 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
25091 let r = _mm512_maskz_cvt_roundepi64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25092 0b01010101, a,
25093 );
25094 let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.);
25095 assert_eq_m128h(r, e);
25096 }
25097
25098 #[simd_test(enable = "avx512fp16,avx512vl")]
25099 fn test_mm_cvtepu64_ph() {
25100 let a = _mm_set_epi64x(1, 2);
25101 let r = _mm_cvtepu64_ph(a);
25102 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
25103 assert_eq_m128h(r, e);
25104 }
25105
25106 #[simd_test(enable = "avx512fp16,avx512vl")]
25107 fn test_mm_mask_cvtepu64_ph() {
25108 let a = _mm_set_epi64x(1, 2);
25109 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25110 let r = _mm_mask_cvtepu64_ph(src, 0b01, a);
25111 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 16., 2.);
25112 assert_eq_m128h(r, e);
25113 }
25114
25115 #[simd_test(enable = "avx512fp16,avx512vl")]
25116 fn test_mm_maskz_cvtepu64_ph() {
25117 let a = _mm_set_epi64x(1, 2);
25118 let r = _mm_maskz_cvtepu64_ph(0b01, a);
25119 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0);
25120 assert_eq_m128h(r, e);
25121 }
25122
25123 #[simd_test(enable = "avx512fp16,avx512vl")]
25124 fn test_mm256_cvtepu64_ph() {
25125 let a = _mm256_set_epi64x(1, 2, 3, 4);
25126 let r = _mm256_cvtepu64_ph(a);
25127 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
25128 assert_eq_m128h(r, e);
25129 }
25130
25131 #[simd_test(enable = "avx512fp16,avx512vl")]
25132 fn test_mm256_mask_cvtepu64_ph() {
25133 let a = _mm256_set_epi64x(1, 2, 3, 4);
25134 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25135 let r = _mm256_mask_cvtepu64_ph(src, 0b0101, a);
25136 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2.0, 16.0, 4.0);
25137 assert_eq_m128h(r, e);
25138 }
25139
25140 #[simd_test(enable = "avx512fp16,avx512vl")]
25141 fn test_mm256_maskz_cvtepu64_ph() {
25142 let a = _mm256_set_epi64x(1, 2, 3, 4);
25143 let r = _mm256_maskz_cvtepu64_ph(0b0101, a);
25144 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0);
25145 assert_eq_m128h(r, e);
25146 }
25147
25148 #[simd_test(enable = "avx512fp16,avx512vl")]
25149 fn test_mm512_cvtepu64_ph() {
25150 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
25151 let r = _mm512_cvtepu64_ph(a);
25152 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25153 assert_eq_m128h(r, e);
25154 }
25155
25156 #[simd_test(enable = "avx512fp16,avx512vl")]
25157 fn test_mm512_mask_cvtepu64_ph() {
25158 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
25159 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25160 let r = _mm512_mask_cvtepu64_ph(src, 0b01010101, a);
25161 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
25162 assert_eq_m128h(r, e);
25163 }
25164
25165 #[simd_test(enable = "avx512fp16,avx512vl")]
25166 fn test_mm512_maskz_cvtepu64_ph() {
25167 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
25168 let r = _mm512_maskz_cvtepu64_ph(0b01010101, a);
25169 let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.);
25170 assert_eq_m128h(r, e);
25171 }
25172
25173 #[simd_test(enable = "avx512fp16,avx512vl")]
25174 fn test_mm512_cvt_roundepu64_ph() {
25175 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
25176 let r = _mm512_cvt_roundepu64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
25177 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25178 assert_eq_m128h(r, e);
25179 }
25180
25181 #[simd_test(enable = "avx512fp16,avx512vl")]
25182 fn test_mm512_mask_cvt_roundepu64_ph() {
25183 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
25184 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25185 let r = _mm512_mask_cvt_roundepu64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25186 src, 0b01010101, a,
25187 );
25188 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
25189 assert_eq_m128h(r, e);
25190 }
25191
25192 #[simd_test(enable = "avx512fp16,avx512vl")]
25193 fn test_mm512_maskz_cvt_roundepu64_ph() {
25194 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
25195 let r = _mm512_maskz_cvt_roundepu64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25196 0b01010101, a,
25197 );
25198 let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.);
25199 assert_eq_m128h(r, e);
25200 }
25201
25202 #[simd_test(enable = "avx512fp16,avx512vl")]
25203 fn test_mm_cvtxps_ph() {
25204 let a = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
25205 let r = _mm_cvtxps_ph(a);
25206 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
25207 assert_eq_m128h(r, e);
25208 }
25209
25210 #[simd_test(enable = "avx512fp16,avx512vl")]
25211 fn test_mm_mask_cvtxps_ph() {
25212 let a = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
25213 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25214 let r = _mm_mask_cvtxps_ph(src, 0b0101, a);
25215 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2.0, 16., 4.0);
25216 assert_eq_m128h(r, e);
25217 }
25218
25219 #[simd_test(enable = "avx512fp16,avx512vl")]
25220 fn test_mm_maskz_cvtxps_ph() {
25221 let a = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
25222 let r = _mm_maskz_cvtxps_ph(0b0101, a);
25223 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0);
25224 assert_eq_m128h(r, e);
25225 }
25226
25227 #[simd_test(enable = "avx512fp16,avx512vl")]
25228 fn test_mm256_cvtxps_ph() {
25229 let a = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25230 let r = _mm256_cvtxps_ph(a);
25231 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25232 assert_eq_m128h(r, e);
25233 }
25234
25235 #[simd_test(enable = "avx512fp16,avx512vl")]
25236 fn test_mm256_mask_cvtxps_ph() {
25237 let a = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25238 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25239 let r = _mm256_mask_cvtxps_ph(src, 0b01010101, a);
25240 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
25241 assert_eq_m128h(r, e);
25242 }
25243
25244 #[simd_test(enable = "avx512fp16,avx512vl")]
25245 fn test_mm256_maskz_cvtxps_ph() {
25246 let a = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25247 let r = _mm256_maskz_cvtxps_ph(0b01010101, a);
25248 let e = _mm_set_ph(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0);
25249 assert_eq_m128h(r, e);
25250 }
25251
25252 #[simd_test(enable = "avx512fp16,avx512vl")]
25253 fn test_mm512_cvtxps_ph() {
25254 let a = _mm512_set_ps(
25255 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25256 );
25257 let r = _mm512_cvtxps_ph(a);
25258 let e = _mm256_set_ph(
25259 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25260 );
25261 assert_eq_m256h(r, e);
25262 }
25263
25264 #[simd_test(enable = "avx512fp16,avx512vl")]
25265 fn test_mm512_mask_cvtxps_ph() {
25266 let a = _mm512_set_ps(
25267 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25268 );
25269 let src = _mm256_set_ph(
25270 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
25271 );
25272 let r = _mm512_mask_cvtxps_ph(src, 0b0101010101010101, a);
25273 let e = _mm256_set_ph(
25274 10., 2.0, 12., 4.0, 14., 6.0, 16., 8.0, 18., 10.0, 20., 12.0, 22., 14.0, 24., 16.0,
25275 );
25276 assert_eq_m256h(r, e);
25277 }
25278
25279 #[simd_test(enable = "avx512fp16,avx512vl")]
25280 fn test_mm512_maskz_cvtxps_ph() {
25281 let a = _mm512_set_ps(
25282 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25283 );
25284 let r = _mm512_maskz_cvtxps_ph(0b0101010101010101, a);
25285 let e = _mm256_set_ph(
25286 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
25287 );
25288 assert_eq_m256h(r, e);
25289 }
25290
25291 #[simd_test(enable = "avx512fp16,avx512vl")]
25292 fn test_mm512_cvtx_roundps_ph() {
25293 let a = _mm512_set_ps(
25294 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25295 );
25296 let r = _mm512_cvtx_roundps_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
25297 let e = _mm256_set_ph(
25298 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25299 );
25300 assert_eq_m256h(r, e);
25301 }
25302
25303 #[simd_test(enable = "avx512fp16,avx512vl")]
25304 fn test_mm512_mask_cvtx_roundps_ph() {
25305 let a = _mm512_set_ps(
25306 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25307 );
25308 let src = _mm256_set_ph(
25309 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
25310 );
25311 let r = _mm512_mask_cvtx_roundps_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25312 src,
25313 0b0101010101010101,
25314 a,
25315 );
25316 let e = _mm256_set_ph(
25317 10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0, 18.0, 10.0, 20.0, 12.0, 22.0, 14.0, 24.0,
25318 16.0,
25319 );
25320 assert_eq_m256h(r, e);
25321 }
25322
25323 #[simd_test(enable = "avx512fp16,avx512vl")]
25324 fn test_mm512_maskz_cvtx_roundps_ph() {
25325 let a = _mm512_set_ps(
25326 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25327 );
25328 let r = _mm512_maskz_cvtx_roundps_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25329 0b0101010101010101,
25330 a,
25331 );
25332 let e = _mm256_set_ph(
25333 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
25334 );
25335 assert_eq_m256h(r, e);
25336 }
25337
25338 #[simd_test(enable = "avx512fp16,avx512vl")]
25339 fn test_mm_cvtss_sh() {
25340 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25341 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
25342 let r = _mm_cvtss_sh(a, b);
25343 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25344 assert_eq_m128h(r, e);
25345 }
25346
25347 #[simd_test(enable = "avx512fp16,avx512vl")]
25348 fn test_mm_mask_cvtss_sh() {
25349 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25350 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
25351 let src = _mm_setr_ph(20., 21., 22., 23., 24., 25., 26., 27.);
25352 let r = _mm_mask_cvtss_sh(src, 0, a, b);
25353 let e = _mm_setr_ph(20., 11., 12., 13., 14., 15., 16., 17.);
25354 assert_eq_m128h(r, e);
25355 let r = _mm_mask_cvtss_sh(src, 1, a, b);
25356 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25357 assert_eq_m128h(r, e);
25358 }
25359
25360 #[simd_test(enable = "avx512fp16,avx512vl")]
25361 fn test_mm_maskz_cvtss_sh() {
25362 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25363 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
25364 let r = _mm_maskz_cvtss_sh(0, a, b);
25365 let e = _mm_setr_ph(0.0, 11., 12., 13., 14., 15., 16., 17.);
25366 assert_eq_m128h(r, e);
25367 let r = _mm_maskz_cvtss_sh(1, a, b);
25368 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25369 assert_eq_m128h(r, e);
25370 }
25371
25372 #[simd_test(enable = "avx512fp16,avx512vl")]
25373 fn test_mm_cvt_roundss_sh() {
25374 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25375 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
25376 let r = _mm_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
25377 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25378 assert_eq_m128h(r, e);
25379 }
25380
25381 #[simd_test(enable = "avx512fp16,avx512vl")]
25382 fn test_mm_mask_cvt_roundss_sh() {
25383 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25384 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
25385 let src = _mm_setr_ph(20., 21., 22., 23., 24., 25., 26., 27.);
25386 let r = _mm_mask_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25387 src, 0, a, b,
25388 );
25389 let e = _mm_setr_ph(20., 11., 12., 13., 14., 15., 16., 17.);
25390 assert_eq_m128h(r, e);
25391 let r = _mm_mask_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25392 src, 1, a, b,
25393 );
25394 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25395 assert_eq_m128h(r, e);
25396 }
25397
25398 #[simd_test(enable = "avx512fp16,avx512vl")]
25399 fn test_mm_maskz_cvt_roundss_sh() {
25400 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25401 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
25402 let r =
25403 _mm_maskz_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
25404 let e = _mm_setr_ph(0.0, 11., 12., 13., 14., 15., 16., 17.);
25405 assert_eq_m128h(r, e);
25406 let r =
25407 _mm_maskz_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
25408 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25409 assert_eq_m128h(r, e);
25410 }
25411
25412 #[simd_test(enable = "avx512fp16,avx512vl")]
25413 fn test_mm_cvtpd_ph() {
25414 let a = _mm_set_pd(1.0, 2.0);
25415 let r = _mm_cvtpd_ph(a);
25416 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
25417 assert_eq_m128h(r, e);
25418 }
25419
25420 #[simd_test(enable = "avx512fp16,avx512vl")]
25421 fn test_mm_mask_cvtpd_ph() {
25422 let a = _mm_set_pd(1.0, 2.0);
25423 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25424 let r = _mm_mask_cvtpd_ph(src, 0b01, a);
25425 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 16., 2.);
25426 assert_eq_m128h(r, e);
25427 }
25428
25429 #[simd_test(enable = "avx512fp16,avx512vl")]
25430 fn test_mm_maskz_cvtpd_ph() {
25431 let a = _mm_set_pd(1.0, 2.0);
25432 let r = _mm_maskz_cvtpd_ph(0b01, a);
25433 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0);
25434 assert_eq_m128h(r, e);
25435 }
25436
25437 #[simd_test(enable = "avx512fp16,avx512vl")]
25438 fn test_mm256_cvtpd_ph() {
25439 let a = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
25440 let r = _mm256_cvtpd_ph(a);
25441 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
25442 assert_eq_m128h(r, e);
25443 }
25444
25445 #[simd_test(enable = "avx512fp16,avx512vl")]
25446 fn test_mm256_mask_cvtpd_ph() {
25447 let a = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
25448 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25449 let r = _mm256_mask_cvtpd_ph(src, 0b0101, a);
25450 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2.0, 16.0, 4.0);
25451 assert_eq_m128h(r, e);
25452 }
25453
25454 #[simd_test(enable = "avx512fp16,avx512vl")]
25455 fn test_mm256_maskz_cvtpd_ph() {
25456 let a = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
25457 let r = _mm256_maskz_cvtpd_ph(0b0101, a);
25458 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0);
25459 assert_eq_m128h(r, e);
25460 }
25461
25462 #[simd_test(enable = "avx512fp16,avx512vl")]
25463 fn test_mm512_cvtpd_ph() {
25464 let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25465 let r = _mm512_cvtpd_ph(a);
25466 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25467 assert_eq_m128h(r, e);
25468 }
25469
25470 #[simd_test(enable = "avx512fp16,avx512vl")]
25471 fn test_mm512_mask_cvtpd_ph() {
25472 let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25473 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25474 let r = _mm512_mask_cvtpd_ph(src, 0b01010101, a);
25475 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
25476 assert_eq_m128h(r, e);
25477 }
25478
25479 #[simd_test(enable = "avx512fp16,avx512vl")]
25480 fn test_mm512_maskz_cvtpd_ph() {
25481 let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25482 let r = _mm512_maskz_cvtpd_ph(0b01010101, a);
25483 let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.);
25484 assert_eq_m128h(r, e);
25485 }
25486
25487 #[simd_test(enable = "avx512fp16,avx512vl")]
25488 fn test_mm512_cvt_roundpd_ph() {
25489 let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25490 let r = _mm512_cvt_roundpd_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
25491 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25492 assert_eq_m128h(r, e);
25493 }
25494
25495 #[simd_test(enable = "avx512fp16,avx512vl")]
25496 fn test_mm512_mask_cvt_roundpd_ph() {
25497 let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25498 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25499 let r = _mm512_mask_cvt_roundpd_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25500 src, 0b01010101, a,
25501 );
25502 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
25503 assert_eq_m128h(r, e);
25504 }
25505
25506 #[simd_test(enable = "avx512fp16,avx512vl")]
25507 fn test_mm512_maskz_cvt_roundpd_ph() {
25508 let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25509 let r = _mm512_maskz_cvt_roundpd_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25510 0b01010101, a,
25511 );
25512 let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.);
25513 assert_eq_m128h(r, e);
25514 }
25515
25516 #[simd_test(enable = "avx512fp16,avx512vl")]
25517 fn test_mm_cvtsd_sh() {
25518 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25519 let b = _mm_setr_pd(1.0, 2.0);
25520 let r = _mm_cvtsd_sh(a, b);
25521 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25522 assert_eq_m128h(r, e);
25523 }
25524
25525 #[simd_test(enable = "avx512fp16,avx512vl")]
25526 fn test_mm_mask_cvtsd_sh() {
25527 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25528 let b = _mm_setr_pd(1.0, 2.0);
25529 let src = _mm_setr_ph(20., 21., 22., 23., 24., 25., 26., 27.);
25530 let r = _mm_mask_cvtsd_sh(src, 0, a, b);
25531 let e = _mm_setr_ph(20., 11., 12., 13., 14., 15., 16., 17.);
25532 assert_eq_m128h(r, e);
25533 let r = _mm_mask_cvtsd_sh(src, 1, a, b);
25534 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25535 assert_eq_m128h(r, e);
25536 }
25537
25538 #[simd_test(enable = "avx512fp16,avx512vl")]
25539 fn test_mm_maskz_cvtsd_sh() {
25540 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25541 let b = _mm_setr_pd(1.0, 2.0);
25542 let r = _mm_maskz_cvtsd_sh(0, a, b);
25543 let e = _mm_setr_ph(0.0, 11., 12., 13., 14., 15., 16., 17.);
25544 assert_eq_m128h(r, e);
25545 let r = _mm_maskz_cvtsd_sh(1, a, b);
25546 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25547 assert_eq_m128h(r, e);
25548 }
25549
25550 #[simd_test(enable = "avx512fp16,avx512vl")]
25551 fn test_mm_cvt_roundsd_sh() {
25552 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25553 let b = _mm_setr_pd(1.0, 2.0);
25554 let r = _mm_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
25555 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25556 assert_eq_m128h(r, e);
25557 }
25558
25559 #[simd_test(enable = "avx512fp16,avx512vl")]
25560 fn test_mm_mask_cvt_roundsd_sh() {
25561 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25562 let b = _mm_setr_pd(1.0, 2.0);
25563 let src = _mm_setr_ph(20., 21., 22., 23., 24., 25., 26., 27.);
25564 let r = _mm_mask_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25565 src, 0, a, b,
25566 );
25567 let e = _mm_setr_ph(20., 11., 12., 13., 14., 15., 16., 17.);
25568 assert_eq_m128h(r, e);
25569 let r = _mm_mask_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25570 src, 1, a, b,
25571 );
25572 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25573 assert_eq_m128h(r, e);
25574 }
25575
25576 #[simd_test(enable = "avx512fp16,avx512vl")]
25577 fn test_mm_maskz_cvt_roundsd_sh() {
25578 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25579 let b = _mm_setr_pd(1.0, 2.0);
25580 let r =
25581 _mm_maskz_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
25582 let e = _mm_setr_ph(0.0, 11., 12., 13., 14., 15., 16., 17.);
25583 assert_eq_m128h(r, e);
25584 let r =
25585 _mm_maskz_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
25586 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25587 assert_eq_m128h(r, e);
25588 }
25589
25590 #[simd_test(enable = "avx512fp16,avx512vl")]
25591 fn test_mm_cvtph_epi16() {
25592 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25593 let r = _mm_cvttph_epi16(a);
25594 let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
25595 assert_eq_m128i(r, e);
25596 }
25597
25598 #[simd_test(enable = "avx512fp16,avx512vl")]
25599 fn test_mm_mask_cvtph_epi16() {
25600 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25601 let src = _mm_set_epi16(10, 11, 12, 13, 14, 15, 16, 17);
25602 let r = _mm_mask_cvttph_epi16(src, 0b01010101, a);
25603 let e = _mm_set_epi16(10, 2, 12, 4, 14, 6, 16, 8);
25604 assert_eq_m128i(r, e);
25605 }
25606
25607 #[simd_test(enable = "avx512fp16,avx512vl")]
25608 fn test_mm_maskz_cvtph_epi16() {
25609 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25610 let r = _mm_maskz_cvttph_epi16(0b01010101, a);
25611 let e = _mm_set_epi16(0, 2, 0, 4, 0, 6, 0, 8);
25612 assert_eq_m128i(r, e);
25613 }
25614
25615 #[simd_test(enable = "avx512fp16,avx512vl")]
25616 fn test_mm256_cvtph_epi16() {
25617 let a = _mm256_set_ph(
25618 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25619 );
25620 let r = _mm256_cvttph_epi16(a);
25621 let e = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
25622 assert_eq_m256i(r, e);
25623 }
25624
25625 #[simd_test(enable = "avx512fp16,avx512vl")]
25626 fn test_mm256_mask_cvtph_epi16() {
25627 let a = _mm256_set_ph(
25628 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25629 );
25630 let src = _mm256_set_epi16(
25631 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
25632 );
25633 let r = _mm256_mask_cvttph_epi16(src, 0b0101010101010101, a);
25634 let e = _mm256_set_epi16(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
25635 assert_eq_m256i(r, e);
25636 }
25637
25638 #[simd_test(enable = "avx512fp16,avx512vl")]
25639 fn test_mm256_maskz_cvtph_epi16() {
25640 let a = _mm256_set_ph(
25641 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25642 );
25643 let r = _mm256_maskz_cvttph_epi16(0b0101010101010101, a);
25644 let e = _mm256_set_epi16(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
25645 assert_eq_m256i(r, e);
25646 }
25647
25648 #[simd_test(enable = "avx512fp16")]
25649 fn test_mm512_cvtph_epi16() {
25650 let a = _mm512_set_ph(
25651 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25652 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25653 31.0, 32.0,
25654 );
25655 let r = _mm512_cvttph_epi16(a);
25656 let e = _mm512_set_epi16(
25657 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25658 25, 26, 27, 28, 29, 30, 31, 32,
25659 );
25660 assert_eq_m512i(r, e);
25661 }
25662
25663 #[simd_test(enable = "avx512fp16")]
25664 fn test_mm512_mask_cvtph_epi16() {
25665 let a = _mm512_set_ph(
25666 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25667 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25668 31.0, 32.0,
25669 );
25670 let src = _mm512_set_epi16(
25671 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
25672 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
25673 );
25674 let r = _mm512_mask_cvttph_epi16(src, 0b01010101010101010101010101010101, a);
25675 let e = _mm512_set_epi16(
25676 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
25677 24, 34, 26, 36, 28, 38, 30, 40, 32,
25678 );
25679 assert_eq_m512i(r, e);
25680 }
25681
25682 #[simd_test(enable = "avx512fp16")]
25683 fn test_mm512_maskz_cvtph_epi16() {
25684 let a = _mm512_set_ph(
25685 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25686 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25687 31.0, 32.0,
25688 );
25689 let r = _mm512_maskz_cvttph_epi16(0b01010101010101010101010101010101, a);
25690 let e = _mm512_set_epi16(
25691 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
25692 0, 28, 0, 30, 0, 32,
25693 );
25694 assert_eq_m512i(r, e);
25695 }
25696
25697 #[simd_test(enable = "avx512fp16")]
25698 fn test_mm512_cvt_roundph_epi16() {
25699 let a = _mm512_set_ph(
25700 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25701 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25702 31.0, 32.0,
25703 );
25704 let r = _mm512_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(a);
25705 let e = _mm512_set_epi16(
25706 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25707 25, 26, 27, 28, 29, 30, 31, 32,
25708 );
25709 assert_eq_m512i(r, e);
25710 }
25711
25712 #[simd_test(enable = "avx512fp16")]
25713 fn test_mm512_mask_cvt_roundph_epi16() {
25714 let a = _mm512_set_ph(
25715 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25716 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25717 31.0, 32.0,
25718 );
25719 let src = _mm512_set_epi16(
25720 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
25721 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
25722 );
25723 let r = _mm512_mask_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(
25724 src,
25725 0b01010101010101010101010101010101,
25726 a,
25727 );
25728 let e = _mm512_set_epi16(
25729 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
25730 24, 34, 26, 36, 28, 38, 30, 40, 32,
25731 );
25732 assert_eq_m512i(r, e);
25733 }
25734
25735 #[simd_test(enable = "avx512fp16")]
25736 fn test_mm512_maskz_cvt_roundph_epi16() {
25737 let a = _mm512_set_ph(
25738 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25739 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25740 31.0, 32.0,
25741 );
25742 let r = _mm512_maskz_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(
25743 0b01010101010101010101010101010101,
25744 a,
25745 );
25746 let e = _mm512_set_epi16(
25747 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
25748 0, 28, 0, 30, 0, 32,
25749 );
25750 assert_eq_m512i(r, e);
25751 }
25752
25753 #[simd_test(enable = "avx512fp16,avx512vl")]
25754 fn test_mm_cvtph_epu16() {
25755 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25756 let r = _mm_cvttph_epu16(a);
25757 let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
25758 assert_eq_m128i(r, e);
25759 }
25760
25761 #[simd_test(enable = "avx512fp16,avx512vl")]
25762 fn test_mm_mask_cvtph_epu16() {
25763 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25764 let src = _mm_set_epi16(10, 11, 12, 13, 14, 15, 16, 17);
25765 let r = _mm_mask_cvttph_epu16(src, 0b01010101, a);
25766 let e = _mm_set_epi16(10, 2, 12, 4, 14, 6, 16, 8);
25767 assert_eq_m128i(r, e);
25768 }
25769
25770 #[simd_test(enable = "avx512fp16,avx512vl")]
25771 fn test_mm_maskz_cvtph_epu16() {
25772 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25773 let r = _mm_maskz_cvttph_epu16(0b01010101, a);
25774 let e = _mm_set_epi16(0, 2, 0, 4, 0, 6, 0, 8);
25775 assert_eq_m128i(r, e);
25776 }
25777
25778 #[simd_test(enable = "avx512fp16,avx512vl")]
25779 fn test_mm256_cvtph_epu16() {
25780 let a = _mm256_set_ph(
25781 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25782 );
25783 let r = _mm256_cvttph_epu16(a);
25784 let e = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
25785 assert_eq_m256i(r, e);
25786 }
25787
25788 #[simd_test(enable = "avx512fp16,avx512vl")]
25789 fn test_mm256_mask_cvtph_epu16() {
25790 let a = _mm256_set_ph(
25791 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25792 );
25793 let src = _mm256_set_epi16(
25794 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
25795 );
25796 let r = _mm256_mask_cvttph_epu16(src, 0b0101010101010101, a);
25797 let e = _mm256_set_epi16(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
25798 assert_eq_m256i(r, e);
25799 }
25800
25801 #[simd_test(enable = "avx512fp16,avx512vl")]
25802 fn test_mm256_maskz_cvtph_epu16() {
25803 let a = _mm256_set_ph(
25804 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25805 );
25806 let r = _mm256_maskz_cvttph_epu16(0b0101010101010101, a);
25807 let e = _mm256_set_epi16(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
25808 assert_eq_m256i(r, e);
25809 }
25810
25811 #[simd_test(enable = "avx512fp16")]
25812 fn test_mm512_cvtph_epu16() {
25813 let a = _mm512_set_ph(
25814 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25815 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25816 31.0, 32.0,
25817 );
25818 let r = _mm512_cvttph_epu16(a);
25819 let e = _mm512_set_epi16(
25820 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25821 25, 26, 27, 28, 29, 30, 31, 32,
25822 );
25823 assert_eq_m512i(r, e);
25824 }
25825
25826 #[simd_test(enable = "avx512fp16")]
25827 fn test_mm512_mask_cvtph_epu16() {
25828 let a = _mm512_set_ph(
25829 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25830 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25831 31.0, 32.0,
25832 );
25833 let src = _mm512_set_epi16(
25834 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
25835 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
25836 );
25837 let r = _mm512_mask_cvttph_epu16(src, 0b01010101010101010101010101010101, a);
25838 let e = _mm512_set_epi16(
25839 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
25840 24, 34, 26, 36, 28, 38, 30, 40, 32,
25841 );
25842 assert_eq_m512i(r, e);
25843 }
25844
25845 #[simd_test(enable = "avx512fp16")]
25846 fn test_mm512_maskz_cvtph_epu16() {
25847 let a = _mm512_set_ph(
25848 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25849 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25850 31.0, 32.0,
25851 );
25852 let r = _mm512_maskz_cvttph_epu16(0b01010101010101010101010101010101, a);
25853 let e = _mm512_set_epi16(
25854 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
25855 0, 28, 0, 30, 0, 32,
25856 );
25857 assert_eq_m512i(r, e);
25858 }
25859
25860 #[simd_test(enable = "avx512fp16")]
25861 fn test_mm512_cvt_roundph_epu16() {
25862 let a = _mm512_set_ph(
25863 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25864 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25865 31.0, 32.0,
25866 );
25867 let r = _mm512_cvt_roundph_epu16::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
25868 let e = _mm512_set_epi16(
25869 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25870 25, 26, 27, 28, 29, 30, 31, 32,
25871 );
25872 assert_eq_m512i(r, e);
25873 }
25874
25875 #[simd_test(enable = "avx512fp16")]
25876 fn test_mm512_mask_cvt_roundph_epu16() {
25877 let a = _mm512_set_ph(
25878 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25879 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25880 31.0, 32.0,
25881 );
25882 let src = _mm512_set_epi16(
25883 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
25884 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
25885 );
25886 let r = _mm512_mask_cvt_roundph_epu16::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25887 src,
25888 0b01010101010101010101010101010101,
25889 a,
25890 );
25891 let e = _mm512_set_epi16(
25892 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
25893 24, 34, 26, 36, 28, 38, 30, 40, 32,
25894 );
25895 assert_eq_m512i(r, e);
25896 }
25897
25898 #[simd_test(enable = "avx512fp16")]
25899 fn test_mm512_maskz_cvt_roundph_epu16() {
25900 let a = _mm512_set_ph(
25901 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25902 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25903 31.0, 32.0,
25904 );
25905 let r = _mm512_maskz_cvt_roundph_epu16::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25906 0b01010101010101010101010101010101,
25907 a,
25908 );
25909 let e = _mm512_set_epi16(
25910 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
25911 0, 28, 0, 30, 0, 32,
25912 );
25913 assert_eq_m512i(r, e);
25914 }
25915
25916 #[simd_test(enable = "avx512fp16,avx512vl")]
25917 fn test_mm_cvttph_epi16() {
25918 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25919 let r = _mm_cvttph_epi16(a);
25920 let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
25921 assert_eq_m128i(r, e);
25922 }
25923
25924 #[simd_test(enable = "avx512fp16,avx512vl")]
25925 fn test_mm_mask_cvttph_epi16() {
25926 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25927 let src = _mm_set_epi16(10, 11, 12, 13, 14, 15, 16, 17);
25928 let r = _mm_mask_cvttph_epi16(src, 0b01010101, a);
25929 let e = _mm_set_epi16(10, 2, 12, 4, 14, 6, 16, 8);
25930 assert_eq_m128i(r, e);
25931 }
25932
25933 #[simd_test(enable = "avx512fp16,avx512vl")]
25934 fn test_mm_maskz_cvttph_epi16() {
25935 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25936 let r = _mm_maskz_cvttph_epi16(0b01010101, a);
25937 let e = _mm_set_epi16(0, 2, 0, 4, 0, 6, 0, 8);
25938 assert_eq_m128i(r, e);
25939 }
25940
25941 #[simd_test(enable = "avx512fp16,avx512vl")]
25942 fn test_mm256_cvttph_epi16() {
25943 let a = _mm256_set_ph(
25944 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25945 );
25946 let r = _mm256_cvttph_epi16(a);
25947 let e = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
25948 assert_eq_m256i(r, e);
25949 }
25950
25951 #[simd_test(enable = "avx512fp16,avx512vl")]
25952 fn test_mm256_mask_cvttph_epi16() {
25953 let a = _mm256_set_ph(
25954 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25955 );
25956 let src = _mm256_set_epi16(
25957 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
25958 );
25959 let r = _mm256_mask_cvttph_epi16(src, 0b0101010101010101, a);
25960 let e = _mm256_set_epi16(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
25961 assert_eq_m256i(r, e);
25962 }
25963
25964 #[simd_test(enable = "avx512fp16,avx512vl")]
25965 fn test_mm256_maskz_cvttph_epi16() {
25966 let a = _mm256_set_ph(
25967 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25968 );
25969 let r = _mm256_maskz_cvttph_epi16(0b0101010101010101, a);
25970 let e = _mm256_set_epi16(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
25971 assert_eq_m256i(r, e);
25972 }
25973
25974 #[simd_test(enable = "avx512fp16")]
25975 fn test_mm512_cvttph_epi16() {
25976 let a = _mm512_set_ph(
25977 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25978 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25979 31.0, 32.0,
25980 );
25981 let r = _mm512_cvttph_epi16(a);
25982 let e = _mm512_set_epi16(
25983 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25984 25, 26, 27, 28, 29, 30, 31, 32,
25985 );
25986 assert_eq_m512i(r, e);
25987 }
25988
25989 #[simd_test(enable = "avx512fp16")]
25990 fn test_mm512_mask_cvttph_epi16() {
25991 let a = _mm512_set_ph(
25992 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25993 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25994 31.0, 32.0,
25995 );
25996 let src = _mm512_set_epi16(
25997 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
25998 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
25999 );
26000 let r = _mm512_mask_cvttph_epi16(src, 0b01010101010101010101010101010101, a);
26001 let e = _mm512_set_epi16(
26002 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
26003 24, 34, 26, 36, 28, 38, 30, 40, 32,
26004 );
26005 assert_eq_m512i(r, e);
26006 }
26007
26008 #[simd_test(enable = "avx512fp16")]
26009 fn test_mm512_maskz_cvttph_epi16() {
26010 let a = _mm512_set_ph(
26011 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26012 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
26013 31.0, 32.0,
26014 );
26015 let r = _mm512_maskz_cvttph_epi16(0b01010101010101010101010101010101, a);
26016 let e = _mm512_set_epi16(
26017 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
26018 0, 28, 0, 30, 0, 32,
26019 );
26020 assert_eq_m512i(r, e);
26021 }
26022
26023 #[simd_test(enable = "avx512fp16")]
26024 fn test_mm512_cvtt_roundph_epi16() {
26025 let a = _mm512_set_ph(
26026 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26027 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
26028 31.0, 32.0,
26029 );
26030 let r = _mm512_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(a);
26031 let e = _mm512_set_epi16(
26032 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
26033 25, 26, 27, 28, 29, 30, 31, 32,
26034 );
26035 assert_eq_m512i(r, e);
26036 }
26037
26038 #[simd_test(enable = "avx512fp16")]
26039 fn test_mm512_mask_cvtt_roundph_epi16() {
26040 let a = _mm512_set_ph(
26041 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26042 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
26043 31.0, 32.0,
26044 );
26045 let src = _mm512_set_epi16(
26046 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
26047 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
26048 );
26049 let r = _mm512_mask_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(
26050 src,
26051 0b01010101010101010101010101010101,
26052 a,
26053 );
26054 let e = _mm512_set_epi16(
26055 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
26056 24, 34, 26, 36, 28, 38, 30, 40, 32,
26057 );
26058 assert_eq_m512i(r, e);
26059 }
26060
26061 #[simd_test(enable = "avx512fp16")]
26062 fn test_mm512_maskz_cvtt_roundph_epi16() {
26063 let a = _mm512_set_ph(
26064 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26065 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
26066 31.0, 32.0,
26067 );
26068 let r = _mm512_maskz_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(
26069 0b01010101010101010101010101010101,
26070 a,
26071 );
26072 let e = _mm512_set_epi16(
26073 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
26074 0, 28, 0, 30, 0, 32,
26075 );
26076 assert_eq_m512i(r, e);
26077 }
26078
26079 #[simd_test(enable = "avx512fp16,avx512vl")]
26080 fn test_mm_cvttph_epu16() {
26081 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26082 let r = _mm_cvttph_epu16(a);
26083 let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
26084 assert_eq_m128i(r, e);
26085 }
26086
26087 #[simd_test(enable = "avx512fp16,avx512vl")]
26088 fn test_mm_mask_cvttph_epu16() {
26089 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26090 let src = _mm_set_epi16(10, 11, 12, 13, 14, 15, 16, 17);
26091 let r = _mm_mask_cvttph_epu16(src, 0b01010101, a);
26092 let e = _mm_set_epi16(10, 2, 12, 4, 14, 6, 16, 8);
26093 assert_eq_m128i(r, e);
26094 }
26095
26096 #[simd_test(enable = "avx512fp16,avx512vl")]
26097 fn test_mm_maskz_cvttph_epu16() {
26098 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26099 let r = _mm_maskz_cvttph_epu16(0b01010101, a);
26100 let e = _mm_set_epi16(0, 2, 0, 4, 0, 6, 0, 8);
26101 assert_eq_m128i(r, e);
26102 }
26103
26104 #[simd_test(enable = "avx512fp16,avx512vl")]
26105 fn test_mm256_cvttph_epu16() {
26106 let a = _mm256_set_ph(
26107 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26108 );
26109 let r = _mm256_cvttph_epu16(a);
26110 let e = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
26111 assert_eq_m256i(r, e);
26112 }
26113
26114 #[simd_test(enable = "avx512fp16,avx512vl")]
26115 fn test_mm256_mask_cvttph_epu16() {
26116 let a = _mm256_set_ph(
26117 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26118 );
26119 let src = _mm256_set_epi16(
26120 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26121 );
26122 let r = _mm256_mask_cvttph_epu16(src, 0b0101010101010101, a);
26123 let e = _mm256_set_epi16(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
26124 assert_eq_m256i(r, e);
26125 }
26126
26127 #[simd_test(enable = "avx512fp16,avx512vl")]
26128 fn test_mm256_maskz_cvttph_epu16() {
26129 let a = _mm256_set_ph(
26130 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26131 );
26132 let r = _mm256_maskz_cvttph_epu16(0b0101010101010101, a);
26133 let e = _mm256_set_epi16(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
26134 assert_eq_m256i(r, e);
26135 }
26136
26137 #[simd_test(enable = "avx512fp16")]
26138 fn test_mm512_cvttph_epu16() {
26139 let a = _mm512_set_ph(
26140 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26141 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
26142 31.0, 32.0,
26143 );
26144 let r = _mm512_cvttph_epu16(a);
26145 let e = _mm512_set_epi16(
26146 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
26147 25, 26, 27, 28, 29, 30, 31, 32,
26148 );
26149 assert_eq_m512i(r, e);
26150 }
26151
26152 #[simd_test(enable = "avx512fp16")]
26153 fn test_mm512_mask_cvttph_epu16() {
26154 let a = _mm512_set_ph(
26155 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26156 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
26157 31.0, 32.0,
26158 );
26159 let src = _mm512_set_epi16(
26160 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
26161 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
26162 );
26163 let r = _mm512_mask_cvttph_epu16(src, 0b01010101010101010101010101010101, a);
26164 let e = _mm512_set_epi16(
26165 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
26166 24, 34, 26, 36, 28, 38, 30, 40, 32,
26167 );
26168 assert_eq_m512i(r, e);
26169 }
26170
26171 #[simd_test(enable = "avx512fp16")]
26172 fn test_mm512_maskz_cvttph_epu16() {
26173 let a = _mm512_set_ph(
26174 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26175 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
26176 31.0, 32.0,
26177 );
26178 let r = _mm512_maskz_cvttph_epu16(0b01010101010101010101010101010101, a);
26179 let e = _mm512_set_epi16(
26180 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
26181 0, 28, 0, 30, 0, 32,
26182 );
26183 assert_eq_m512i(r, e);
26184 }
26185
26186 #[simd_test(enable = "avx512fp16")]
26187 fn test_mm512_cvtt_roundph_epu16() {
26188 let a = _mm512_set_ph(
26189 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26190 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
26191 31.0, 32.0,
26192 );
26193 let r = _mm512_cvtt_roundph_epu16::<_MM_FROUND_NO_EXC>(a);
26194 let e = _mm512_set_epi16(
26195 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
26196 25, 26, 27, 28, 29, 30, 31, 32,
26197 );
26198 assert_eq_m512i(r, e);
26199 }
26200
26201 #[simd_test(enable = "avx512fp16")]
26202 fn test_mm512_mask_cvtt_roundph_epu16() {
26203 let a = _mm512_set_ph(
26204 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26205 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
26206 31.0, 32.0,
26207 );
26208 let src = _mm512_set_epi16(
26209 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
26210 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
26211 );
26212 let r = _mm512_mask_cvtt_roundph_epu16::<_MM_FROUND_NO_EXC>(
26213 src,
26214 0b01010101010101010101010101010101,
26215 a,
26216 );
26217 let e = _mm512_set_epi16(
26218 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
26219 24, 34, 26, 36, 28, 38, 30, 40, 32,
26220 );
26221 assert_eq_m512i(r, e);
26222 }
26223
26224 #[simd_test(enable = "avx512fp16")]
26225 fn test_mm512_maskz_cvtt_roundph_epu16() {
26226 let a = _mm512_set_ph(
26227 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26228 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
26229 31.0, 32.0,
26230 );
26231 let r = _mm512_maskz_cvtt_roundph_epu16::<_MM_FROUND_NO_EXC>(
26232 0b01010101010101010101010101010101,
26233 a,
26234 );
26235 let e = _mm512_set_epi16(
26236 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
26237 0, 28, 0, 30, 0, 32,
26238 );
26239 assert_eq_m512i(r, e);
26240 }
26241
26242 #[simd_test(enable = "avx512fp16,avx512vl")]
26243 fn test_mm_cvtph_epi32() {
26244 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26245 let r = _mm_cvtph_epi32(a);
26246 let e = _mm_set_epi32(1, 2, 3, 4);
26247 assert_eq_m128i(r, e);
26248 }
26249
26250 #[simd_test(enable = "avx512fp16,avx512vl")]
26251 fn test_mm_mask_cvtph_epi32() {
26252 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26253 let src = _mm_set_epi32(10, 11, 12, 13);
26254 let r = _mm_mask_cvtph_epi32(src, 0b0101, a);
26255 let e = _mm_set_epi32(10, 2, 12, 4);
26256 assert_eq_m128i(r, e);
26257 }
26258
26259 #[simd_test(enable = "avx512fp16,avx512vl")]
26260 fn test_mm_maskz_cvtph_epi32() {
26261 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26262 let r = _mm_maskz_cvtph_epi32(0b0101, a);
26263 let e = _mm_set_epi32(0, 2, 0, 4);
26264 assert_eq_m128i(r, e);
26265 }
26266
26267 #[simd_test(enable = "avx512fp16,avx512vl")]
26268 fn test_mm256_cvtph_epi32() {
26269 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26270 let r = _mm256_cvtph_epi32(a);
26271 let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
26272 assert_eq_m256i(r, e);
26273 }
26274
26275 #[simd_test(enable = "avx512fp16,avx512vl")]
26276 fn test_mm256_mask_cvtph_epi32() {
26277 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26278 let src = _mm256_set_epi32(10, 11, 12, 13, 14, 15, 16, 17);
26279 let r = _mm256_mask_cvtph_epi32(src, 0b01010101, a);
26280 let e = _mm256_set_epi32(10, 2, 12, 4, 14, 6, 16, 8);
26281 assert_eq_m256i(r, e);
26282 }
26283
26284 #[simd_test(enable = "avx512fp16,avx512vl")]
26285 fn test_mm256_maskz_cvtph_epi32() {
26286 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26287 let r = _mm256_maskz_cvtph_epi32(0b01010101, a);
26288 let e = _mm256_set_epi32(0, 2, 0, 4, 0, 6, 0, 8);
26289 assert_eq_m256i(r, e);
26290 }
26291
26292 #[simd_test(enable = "avx512fp16")]
26293 fn test_mm512_cvtph_epi32() {
26294 let a = _mm256_set_ph(
26295 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26296 );
26297 let r = _mm512_cvtph_epi32(a);
26298 let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
26299 assert_eq_m512i(r, e);
26300 }
26301
26302 #[simd_test(enable = "avx512fp16")]
26303 fn test_mm512_mask_cvtph_epi32() {
26304 let a = _mm256_set_ph(
26305 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26306 );
26307 let src = _mm512_set_epi32(
26308 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26309 );
26310 let r = _mm512_mask_cvtph_epi32(src, 0b0101010101010101, a);
26311 let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
26312 assert_eq_m512i(r, e);
26313 }
26314
26315 #[simd_test(enable = "avx512fp16")]
26316 fn test_mm512_maskz_cvtph_epi32() {
26317 let a = _mm256_set_ph(
26318 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26319 );
26320 let r = _mm512_maskz_cvtph_epi32(0b0101010101010101, a);
26321 let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
26322 assert_eq_m512i(r, e);
26323 }
26324
26325 #[simd_test(enable = "avx512fp16")]
26326 fn test_mm512_cvt_roundph_epi32() {
26327 let a = _mm256_set_ph(
26328 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26329 );
26330 let r = _mm512_cvt_roundph_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
26331 let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
26332 assert_eq_m512i(r, e);
26333 }
26334
26335 #[simd_test(enable = "avx512fp16")]
26336 fn test_mm512_mask_cvt_roundph_epi32() {
26337 let a = _mm256_set_ph(
26338 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26339 );
26340 let src = _mm512_set_epi32(
26341 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26342 );
26343 let r = _mm512_mask_cvt_roundph_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
26344 src,
26345 0b0101010101010101,
26346 a,
26347 );
26348 let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
26349 assert_eq_m512i(r, e);
26350 }
26351
26352 #[simd_test(enable = "avx512fp16")]
26353 fn test_mm512_maskz_cvt_roundph_epi32() {
26354 let a = _mm256_set_ph(
26355 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26356 );
26357 let r = _mm512_maskz_cvt_roundph_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
26358 0b0101010101010101,
26359 a,
26360 );
26361 let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
26362 assert_eq_m512i(r, e);
26363 }
26364
26365 #[simd_test(enable = "avx512fp16")]
26366 fn test_mm_cvtsh_i32() {
26367 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26368 let r = _mm_cvtsh_i32(a);
26369 assert_eq!(r, 1);
26370 }
26371
26372 #[simd_test(enable = "avx512fp16")]
26373 fn test_mm_cvt_roundsh_i32() {
26374 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26375 let r = _mm_cvt_roundsh_i32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
26376 assert_eq!(r, 1);
26377 }
26378
26379 #[simd_test(enable = "avx512fp16,avx512vl")]
26380 fn test_mm_cvtph_epu32() {
26381 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26382 let r = _mm_cvtph_epu32(a);
26383 let e = _mm_set_epi32(1, 2, 3, 4);
26384 assert_eq_m128i(r, e);
26385 }
26386
26387 #[simd_test(enable = "avx512fp16,avx512vl")]
26388 fn test_mm_mask_cvtph_epu32() {
26389 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26390 let src = _mm_set_epi32(10, 11, 12, 13);
26391 let r = _mm_mask_cvtph_epu32(src, 0b0101, a);
26392 let e = _mm_set_epi32(10, 2, 12, 4);
26393 assert_eq_m128i(r, e);
26394 }
26395
26396 #[simd_test(enable = "avx512fp16,avx512vl")]
26397 fn test_mm_maskz_cvtph_epu32() {
26398 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26399 let r = _mm_maskz_cvtph_epu32(0b0101, a);
26400 let e = _mm_set_epi32(0, 2, 0, 4);
26401 assert_eq_m128i(r, e);
26402 }
26403
26404 #[simd_test(enable = "avx512fp16,avx512vl")]
26405 fn test_mm256_cvtph_epu32() {
26406 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26407 let r = _mm256_cvtph_epu32(a);
26408 let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
26409 assert_eq_m256i(r, e);
26410 }
26411
26412 #[simd_test(enable = "avx512fp16,avx512vl")]
26413 fn test_mm256_mask_cvtph_epu32() {
26414 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26415 let src = _mm256_set_epi32(10, 11, 12, 13, 14, 15, 16, 17);
26416 let r = _mm256_mask_cvtph_epu32(src, 0b01010101, a);
26417 let e = _mm256_set_epi32(10, 2, 12, 4, 14, 6, 16, 8);
26418 assert_eq_m256i(r, e);
26419 }
26420
26421 #[simd_test(enable = "avx512fp16,avx512vl")]
26422 fn test_mm256_maskz_cvtph_epu32() {
26423 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26424 let r = _mm256_maskz_cvtph_epu32(0b01010101, a);
26425 let e = _mm256_set_epi32(0, 2, 0, 4, 0, 6, 0, 8);
26426 assert_eq_m256i(r, e);
26427 }
26428
26429 #[simd_test(enable = "avx512fp16")]
26430 fn test_mm512_cvtph_epu32() {
26431 let a = _mm256_set_ph(
26432 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26433 );
26434 let r = _mm512_cvtph_epu32(a);
26435 let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
26436 assert_eq_m512i(r, e);
26437 }
26438
26439 #[simd_test(enable = "avx512fp16")]
26440 fn test_mm512_mask_cvtph_epu32() {
26441 let a = _mm256_set_ph(
26442 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26443 );
26444 let src = _mm512_set_epi32(
26445 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26446 );
26447 let r = _mm512_mask_cvtph_epu32(src, 0b0101010101010101, a);
26448 let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
26449 assert_eq_m512i(r, e);
26450 }
26451
26452 #[simd_test(enable = "avx512fp16")]
26453 fn test_mm512_maskz_cvtph_epu32() {
26454 let a = _mm256_set_ph(
26455 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26456 );
26457 let r = _mm512_maskz_cvtph_epu32(0b0101010101010101, a);
26458 let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
26459 assert_eq_m512i(r, e);
26460 }
26461
26462 #[simd_test(enable = "avx512fp16")]
26463 fn test_mm512_cvt_roundph_epu32() {
26464 let a = _mm256_set_ph(
26465 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26466 );
26467 let r = _mm512_cvt_roundph_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
26468 let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
26469 assert_eq_m512i(r, e);
26470 }
26471
26472 #[simd_test(enable = "avx512fp16")]
26473 fn test_mm512_mask_cvt_roundph_epu32() {
26474 let a = _mm256_set_ph(
26475 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26476 );
26477 let src = _mm512_set_epi32(
26478 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26479 );
26480 let r = _mm512_mask_cvt_roundph_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
26481 src,
26482 0b0101010101010101,
26483 a,
26484 );
26485 let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
26486 assert_eq_m512i(r, e);
26487 }
26488
26489 #[simd_test(enable = "avx512fp16")]
26490 fn test_mm512_maskz_cvt_roundph_epu32() {
26491 let a = _mm256_set_ph(
26492 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26493 );
26494 let r = _mm512_maskz_cvt_roundph_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
26495 0b0101010101010101,
26496 a,
26497 );
26498 let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
26499 assert_eq_m512i(r, e);
26500 }
26501
26502 #[simd_test(enable = "avx512fp16")]
26503 fn test_mm_cvtsh_u32() {
26504 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26505 let r = _mm_cvtsh_u32(a);
26506 assert_eq!(r, 1);
26507 }
26508
26509 #[simd_test(enable = "avx512fp16")]
26510 fn test_mm_cvt_roundsh_u32() {
26511 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26512 let r = _mm_cvt_roundsh_u32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
26513 assert_eq!(r, 1);
26514 }
26515
26516 #[simd_test(enable = "avx512fp16,avx512vl")]
26517 fn test_mm_cvttph_epi32() {
26518 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26519 let r = _mm_cvttph_epi32(a);
26520 let e = _mm_set_epi32(1, 2, 3, 4);
26521 assert_eq_m128i(r, e);
26522 }
26523
26524 #[simd_test(enable = "avx512fp16,avx512vl")]
26525 fn test_mm_mask_cvttph_epi32() {
26526 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26527 let src = _mm_set_epi32(10, 11, 12, 13);
26528 let r = _mm_mask_cvttph_epi32(src, 0b0101, a);
26529 let e = _mm_set_epi32(10, 2, 12, 4);
26530 assert_eq_m128i(r, e);
26531 }
26532
26533 #[simd_test(enable = "avx512fp16,avx512vl")]
26534 fn test_mm_maskz_cvttph_epi32() {
26535 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26536 let r = _mm_maskz_cvttph_epi32(0b0101, a);
26537 let e = _mm_set_epi32(0, 2, 0, 4);
26538 assert_eq_m128i(r, e);
26539 }
26540
26541 #[simd_test(enable = "avx512fp16,avx512vl")]
26542 fn test_mm256_cvttph_epi32() {
26543 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26544 let r = _mm256_cvttph_epi32(a);
26545 let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
26546 assert_eq_m256i(r, e);
26547 }
26548
26549 #[simd_test(enable = "avx512fp16,avx512vl")]
26550 fn test_mm256_mask_cvttph_epi32() {
26551 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26552 let src = _mm256_set_epi32(10, 11, 12, 13, 14, 15, 16, 17);
26553 let r = _mm256_mask_cvttph_epi32(src, 0b01010101, a);
26554 let e = _mm256_set_epi32(10, 2, 12, 4, 14, 6, 16, 8);
26555 assert_eq_m256i(r, e);
26556 }
26557
26558 #[simd_test(enable = "avx512fp16,avx512vl")]
26559 fn test_mm256_maskz_cvttph_epi32() {
26560 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26561 let r = _mm256_maskz_cvttph_epi32(0b01010101, a);
26562 let e = _mm256_set_epi32(0, 2, 0, 4, 0, 6, 0, 8);
26563 assert_eq_m256i(r, e);
26564 }
26565
26566 #[simd_test(enable = "avx512fp16")]
26567 fn test_mm512_cvttph_epi32() {
26568 let a = _mm256_set_ph(
26569 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26570 );
26571 let r = _mm512_cvttph_epi32(a);
26572 let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
26573 assert_eq_m512i(r, e);
26574 }
26575
26576 #[simd_test(enable = "avx512fp16")]
26577 fn test_mm512_mask_cvttph_epi32() {
26578 let a = _mm256_set_ph(
26579 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26580 );
26581 let src = _mm512_set_epi32(
26582 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26583 );
26584 let r = _mm512_mask_cvttph_epi32(src, 0b0101010101010101, a);
26585 let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
26586 assert_eq_m512i(r, e);
26587 }
26588
26589 #[simd_test(enable = "avx512fp16")]
26590 fn test_mm512_maskz_cvttph_epi32() {
26591 let a = _mm256_set_ph(
26592 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26593 );
26594 let r = _mm512_maskz_cvttph_epi32(0b0101010101010101, a);
26595 let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
26596 assert_eq_m512i(r, e);
26597 }
26598
26599 #[simd_test(enable = "avx512fp16")]
26600 fn test_mm512_cvtt_roundph_epi32() {
26601 let a = _mm256_set_ph(
26602 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26603 );
26604 let r = _mm512_cvtt_roundph_epi32::<_MM_FROUND_NO_EXC>(a);
26605 let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
26606 assert_eq_m512i(r, e);
26607 }
26608
26609 #[simd_test(enable = "avx512fp16")]
26610 fn test_mm512_mask_cvtt_roundph_epi32() {
26611 let a = _mm256_set_ph(
26612 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26613 );
26614 let src = _mm512_set_epi32(
26615 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26616 );
26617 let r = _mm512_mask_cvtt_roundph_epi32::<_MM_FROUND_NO_EXC>(src, 0b0101010101010101, a);
26618 let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
26619 assert_eq_m512i(r, e);
26620 }
26621
26622 #[simd_test(enable = "avx512fp16")]
26623 fn test_mm512_maskz_cvtt_roundph_epi32() {
26624 let a = _mm256_set_ph(
26625 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26626 );
26627 let r = _mm512_maskz_cvtt_roundph_epi32::<_MM_FROUND_NO_EXC>(0b0101010101010101, a);
26628 let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
26629 assert_eq_m512i(r, e);
26630 }
26631
26632 #[simd_test(enable = "avx512fp16")]
26633 fn test_mm_cvttsh_i32() {
26634 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26635 let r = _mm_cvttsh_i32(a);
26636 assert_eq!(r, 1);
26637 }
26638
26639 #[simd_test(enable = "avx512fp16")]
26640 fn test_mm_cvtt_roundsh_i32() {
26641 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26642 let r = _mm_cvtt_roundsh_i32::<_MM_FROUND_NO_EXC>(a);
26643 assert_eq!(r, 1);
26644 }
26645
26646 #[simd_test(enable = "avx512fp16,avx512vl")]
26647 fn test_mm_cvttph_epu32() {
26648 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26649 let r = _mm_cvttph_epu32(a);
26650 let e = _mm_set_epi32(1, 2, 3, 4);
26651 assert_eq_m128i(r, e);
26652 }
26653
26654 #[simd_test(enable = "avx512fp16,avx512vl")]
26655 fn test_mm_mask_cvttph_epu32() {
26656 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26657 let src = _mm_set_epi32(10, 11, 12, 13);
26658 let r = _mm_mask_cvttph_epu32(src, 0b0101, a);
26659 let e = _mm_set_epi32(10, 2, 12, 4);
26660 assert_eq_m128i(r, e);
26661 }
26662
26663 #[simd_test(enable = "avx512fp16,avx512vl")]
26664 fn test_mm_maskz_cvttph_epu32() {
26665 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26666 let r = _mm_maskz_cvttph_epu32(0b0101, a);
26667 let e = _mm_set_epi32(0, 2, 0, 4);
26668 assert_eq_m128i(r, e);
26669 }
26670
26671 #[simd_test(enable = "avx512fp16,avx512vl")]
26672 fn test_mm256_cvttph_epu32() {
26673 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26674 let r = _mm256_cvttph_epu32(a);
26675 let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
26676 assert_eq_m256i(r, e);
26677 }
26678
26679 #[simd_test(enable = "avx512fp16,avx512vl")]
26680 fn test_mm256_mask_cvttph_epu32() {
26681 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26682 let src = _mm256_set_epi32(10, 11, 12, 13, 14, 15, 16, 17);
26683 let r = _mm256_mask_cvttph_epu32(src, 0b01010101, a);
26684 let e = _mm256_set_epi32(10, 2, 12, 4, 14, 6, 16, 8);
26685 assert_eq_m256i(r, e);
26686 }
26687
26688 #[simd_test(enable = "avx512fp16,avx512vl")]
26689 fn test_mm256_maskz_cvttph_epu32() {
26690 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26691 let r = _mm256_maskz_cvttph_epu32(0b01010101, a);
26692 let e = _mm256_set_epi32(0, 2, 0, 4, 0, 6, 0, 8);
26693 assert_eq_m256i(r, e);
26694 }
26695
26696 #[simd_test(enable = "avx512fp16")]
26697 fn test_mm512_cvttph_epu32() {
26698 let a = _mm256_set_ph(
26699 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26700 );
26701 let r = _mm512_cvttph_epu32(a);
26702 let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
26703 assert_eq_m512i(r, e);
26704 }
26705
26706 #[simd_test(enable = "avx512fp16")]
26707 fn test_mm512_mask_cvttph_epu32() {
26708 let a = _mm256_set_ph(
26709 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26710 );
26711 let src = _mm512_set_epi32(
26712 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26713 );
26714 let r = _mm512_mask_cvttph_epu32(src, 0b0101010101010101, a);
26715 let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
26716 assert_eq_m512i(r, e);
26717 }
26718
26719 #[simd_test(enable = "avx512fp16")]
26720 fn test_mm512_maskz_cvttph_epu32() {
26721 let a = _mm256_set_ph(
26722 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26723 );
26724 let r = _mm512_maskz_cvttph_epu32(0b0101010101010101, a);
26725 let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
26726 assert_eq_m512i(r, e);
26727 }
26728
26729 #[simd_test(enable = "avx512fp16")]
26730 fn test_mm512_cvtt_roundph_epu32() {
26731 let a = _mm256_set_ph(
26732 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26733 );
26734 let r = _mm512_cvtt_roundph_epu32::<_MM_FROUND_NO_EXC>(a);
26735 let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
26736 assert_eq_m512i(r, e);
26737 }
26738
26739 #[simd_test(enable = "avx512fp16")]
26740 fn test_mm512_mask_cvtt_roundph_epu32() {
26741 let a = _mm256_set_ph(
26742 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26743 );
26744 let src = _mm512_set_epi32(
26745 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26746 );
26747 let r = _mm512_mask_cvtt_roundph_epu32::<_MM_FROUND_NO_EXC>(src, 0b0101010101010101, a);
26748 let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
26749 assert_eq_m512i(r, e);
26750 }
26751
26752 #[simd_test(enable = "avx512fp16")]
26753 fn test_mm512_maskz_cvtt_roundph_epu32() {
26754 let a = _mm256_set_ph(
26755 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26756 );
26757 let r = _mm512_maskz_cvtt_roundph_epu32::<_MM_FROUND_NO_EXC>(0b0101010101010101, a);
26758 let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
26759 assert_eq_m512i(r, e);
26760 }
26761
26762 #[simd_test(enable = "avx512fp16")]
26763 fn test_mm_cvttsh_u32() {
26764 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26765 let r = _mm_cvttsh_u32(a);
26766 assert_eq!(r, 1);
26767 }
26768
26769 #[simd_test(enable = "avx512fp16")]
26770 fn test_mm_cvtt_roundsh_u32() {
26771 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26772 let r = _mm_cvtt_roundsh_u32::<_MM_FROUND_NO_EXC>(a);
26773 assert_eq!(r, 1);
26774 }
26775
26776 #[simd_test(enable = "avx512fp16,avx512vl")]
26777 fn test_mm_cvtph_epi64() {
26778 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26779 let r = _mm_cvtph_epi64(a);
26780 let e = _mm_set_epi64x(1, 2);
26781 assert_eq_m128i(r, e);
26782 }
26783
26784 #[simd_test(enable = "avx512fp16,avx512vl")]
26785 fn test_mm_mask_cvtph_epi64() {
26786 let src = _mm_set_epi64x(3, 4);
26787 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26788 let r = _mm_mask_cvtph_epi64(src, 0b01, a);
26789 let e = _mm_set_epi64x(3, 2);
26790 assert_eq_m128i(r, e);
26791 }
26792
26793 #[simd_test(enable = "avx512fp16,avx512vl")]
26794 fn test_mm_maskz_cvtph_epi64() {
26795 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26796 let r = _mm_maskz_cvtph_epi64(0b01, a);
26797 let e = _mm_set_epi64x(0, 2);
26798 assert_eq_m128i(r, e);
26799 }
26800
26801 #[simd_test(enable = "avx512fp16,avx512vl")]
26802 fn test_mm256_cvtph_epi64() {
26803 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26804 let r = _mm256_cvtph_epi64(a);
26805 let e = _mm256_set_epi64x(1, 2, 3, 4);
26806 assert_eq_m256i(r, e);
26807 }
26808
26809 #[simd_test(enable = "avx512fp16,avx512vl")]
26810 fn test_mm256_mask_cvtph_epi64() {
26811 let src = _mm256_set_epi64x(5, 6, 7, 8);
26812 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26813 let r = _mm256_mask_cvtph_epi64(src, 0b0101, a);
26814 let e = _mm256_set_epi64x(5, 2, 7, 4);
26815 assert_eq_m256i(r, e);
26816 }
26817
26818 #[simd_test(enable = "avx512fp16,avx512vl")]
26819 fn test_mm256_maskz_cvtph_epi64() {
26820 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26821 let r = _mm256_maskz_cvtph_epi64(0b0101, a);
26822 let e = _mm256_set_epi64x(0, 2, 0, 4);
26823 assert_eq_m256i(r, e);
26824 }
26825
26826 #[simd_test(enable = "avx512fp16")]
26827 fn test_mm512_cvtph_epi64() {
26828 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26829 let r = _mm512_cvtph_epi64(a);
26830 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
26831 assert_eq_m512i(r, e);
26832 }
26833
26834 #[simd_test(enable = "avx512fp16")]
26835 fn test_mm512_mask_cvtph_epi64() {
26836 let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
26837 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26838 let r = _mm512_mask_cvtph_epi64(src, 0b01010101, a);
26839 let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
26840 assert_eq_m512i(r, e);
26841 }
26842
26843 #[simd_test(enable = "avx512fp16")]
26844 fn test_mm512_maskz_cvtph_epi64() {
26845 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26846 let r = _mm512_maskz_cvtph_epi64(0b01010101, a);
26847 let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
26848 assert_eq_m512i(r, e);
26849 }
26850
26851 #[simd_test(enable = "avx512fp16")]
26852 fn test_mm512_cvt_roundph_epi64() {
26853 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26854 let r = _mm512_cvt_roundph_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
26855 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
26856 assert_eq_m512i(r, e);
26857 }
26858
26859 #[simd_test(enable = "avx512fp16")]
26860 fn test_mm512_mask_cvt_roundph_epi64() {
26861 let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
26862 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26863 let r = _mm512_mask_cvt_roundph_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
26864 src, 0b01010101, a,
26865 );
26866 let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
26867 assert_eq_m512i(r, e);
26868 }
26869
26870 #[simd_test(enable = "avx512fp16")]
26871 fn test_mm512_maskz_cvt_roundph_epi64() {
26872 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26873 let r = _mm512_maskz_cvt_roundph_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
26874 0b01010101, a,
26875 );
26876 let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
26877 assert_eq_m512i(r, e);
26878 }
26879
26880 #[simd_test(enable = "avx512fp16,avx512vl")]
26881 fn test_mm_cvtph_epu64() {
26882 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26883 let r = _mm_cvtph_epu64(a);
26884 let e = _mm_set_epi64x(1, 2);
26885 assert_eq_m128i(r, e);
26886 }
26887
26888 #[simd_test(enable = "avx512fp16,avx512vl")]
26889 fn test_mm_mask_cvtph_epu64() {
26890 let src = _mm_set_epi64x(3, 4);
26891 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26892 let r = _mm_mask_cvtph_epu64(src, 0b01, a);
26893 let e = _mm_set_epi64x(3, 2);
26894 assert_eq_m128i(r, e);
26895 }
26896
26897 #[simd_test(enable = "avx512fp16,avx512vl")]
26898 fn test_mm_maskz_cvtph_epu64() {
26899 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26900 let r = _mm_maskz_cvtph_epu64(0b01, a);
26901 let e = _mm_set_epi64x(0, 2);
26902 assert_eq_m128i(r, e);
26903 }
26904
26905 #[simd_test(enable = "avx512fp16,avx512vl")]
26906 fn test_mm256_cvtph_epu64() {
26907 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26908 let r = _mm256_cvtph_epu64(a);
26909 let e = _mm256_set_epi64x(1, 2, 3, 4);
26910 assert_eq_m256i(r, e);
26911 }
26912
26913 #[simd_test(enable = "avx512fp16,avx512vl")]
26914 fn test_mm256_mask_cvtph_epu64() {
26915 let src = _mm256_set_epi64x(5, 6, 7, 8);
26916 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26917 let r = _mm256_mask_cvtph_epu64(src, 0b0101, a);
26918 let e = _mm256_set_epi64x(5, 2, 7, 4);
26919 assert_eq_m256i(r, e);
26920 }
26921
26922 #[simd_test(enable = "avx512fp16,avx512vl")]
26923 fn test_mm256_maskz_cvtph_epu64() {
26924 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26925 let r = _mm256_maskz_cvtph_epu64(0b0101, a);
26926 let e = _mm256_set_epi64x(0, 2, 0, 4);
26927 assert_eq_m256i(r, e);
26928 }
26929
26930 #[simd_test(enable = "avx512fp16")]
26931 fn test_mm512_cvtph_epu64() {
26932 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26933 let r = _mm512_cvtph_epu64(a);
26934 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
26935 assert_eq_m512i(r, e);
26936 }
26937
26938 #[simd_test(enable = "avx512fp16")]
26939 fn test_mm512_mask_cvtph_epu64() {
26940 let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
26941 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26942 let r = _mm512_mask_cvtph_epu64(src, 0b01010101, a);
26943 let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
26944 assert_eq_m512i(r, e);
26945 }
26946
26947 #[simd_test(enable = "avx512fp16")]
26948 fn test_mm512_maskz_cvtph_epu64() {
26949 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26950 let r = _mm512_maskz_cvtph_epu64(0b01010101, a);
26951 let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
26952 assert_eq_m512i(r, e);
26953 }
26954
26955 #[simd_test(enable = "avx512fp16")]
26956 fn test_mm512_cvt_roundph_epu64() {
26957 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26958 let r = _mm512_cvt_roundph_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
26959 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
26960 assert_eq_m512i(r, e);
26961 }
26962
26963 #[simd_test(enable = "avx512fp16")]
26964 fn test_mm512_mask_cvt_roundph_epu64() {
26965 let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
26966 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26967 let r = _mm512_mask_cvt_roundph_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
26968 src, 0b01010101, a,
26969 );
26970 let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
26971 assert_eq_m512i(r, e);
26972 }
26973
26974 #[simd_test(enable = "avx512fp16")]
26975 fn test_mm512_maskz_cvt_roundph_epu64() {
26976 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26977 let r = _mm512_maskz_cvt_roundph_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
26978 0b01010101, a,
26979 );
26980 let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
26981 assert_eq_m512i(r, e);
26982 }
26983
26984 #[simd_test(enable = "avx512fp16,avx512vl")]
26985 fn test_mm_cvttph_epi64() {
26986 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26987 let r = _mm_cvttph_epi64(a);
26988 let e = _mm_set_epi64x(1, 2);
26989 assert_eq_m128i(r, e);
26990 }
26991
26992 #[simd_test(enable = "avx512fp16,avx512vl")]
26993 fn test_mm_mask_cvttph_epi64() {
26994 let src = _mm_set_epi64x(3, 4);
26995 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26996 let r = _mm_mask_cvttph_epi64(src, 0b01, a);
26997 let e = _mm_set_epi64x(3, 2);
26998 assert_eq_m128i(r, e);
26999 }
27000
27001 #[simd_test(enable = "avx512fp16,avx512vl")]
27002 fn test_mm_maskz_cvttph_epi64() {
27003 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
27004 let r = _mm_maskz_cvttph_epi64(0b01, a);
27005 let e = _mm_set_epi64x(0, 2);
27006 assert_eq_m128i(r, e);
27007 }
27008
27009 #[simd_test(enable = "avx512fp16,avx512vl")]
27010 fn test_mm256_cvttph_epi64() {
27011 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
27012 let r = _mm256_cvttph_epi64(a);
27013 let e = _mm256_set_epi64x(1, 2, 3, 4);
27014 assert_eq_m256i(r, e);
27015 }
27016
27017 #[simd_test(enable = "avx512fp16,avx512vl")]
27018 fn test_mm256_mask_cvttph_epi64() {
27019 let src = _mm256_set_epi64x(5, 6, 7, 8);
27020 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
27021 let r = _mm256_mask_cvttph_epi64(src, 0b0101, a);
27022 let e = _mm256_set_epi64x(5, 2, 7, 4);
27023 assert_eq_m256i(r, e);
27024 }
27025
27026 #[simd_test(enable = "avx512fp16,avx512vl")]
27027 fn test_mm256_maskz_cvttph_epi64() {
27028 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
27029 let r = _mm256_maskz_cvttph_epi64(0b0101, a);
27030 let e = _mm256_set_epi64x(0, 2, 0, 4);
27031 assert_eq_m256i(r, e);
27032 }
27033
27034 #[simd_test(enable = "avx512fp16")]
27035 fn test_mm512_cvttph_epi64() {
27036 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27037 let r = _mm512_cvttph_epi64(a);
27038 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
27039 assert_eq_m512i(r, e);
27040 }
27041
27042 #[simd_test(enable = "avx512fp16")]
27043 fn test_mm512_mask_cvttph_epi64() {
27044 let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
27045 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27046 let r = _mm512_mask_cvttph_epi64(src, 0b01010101, a);
27047 let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
27048 assert_eq_m512i(r, e);
27049 }
27050
27051 #[simd_test(enable = "avx512fp16")]
27052 fn test_mm512_maskz_cvttph_epi64() {
27053 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27054 let r = _mm512_maskz_cvttph_epi64(0b01010101, a);
27055 let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
27056 assert_eq_m512i(r, e);
27057 }
27058
27059 #[simd_test(enable = "avx512fp16")]
27060 fn test_mm512_cvtt_roundph_epi64() {
27061 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27062 let r = _mm512_cvtt_roundph_epi64::<_MM_FROUND_NO_EXC>(a);
27063 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
27064 assert_eq_m512i(r, e);
27065 }
27066
27067 #[simd_test(enable = "avx512fp16")]
27068 fn test_mm512_mask_cvtt_roundph_epi64() {
27069 let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
27070 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27071 let r = _mm512_mask_cvtt_roundph_epi64::<_MM_FROUND_NO_EXC>(src, 0b01010101, a);
27072 let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
27073 assert_eq_m512i(r, e);
27074 }
27075
27076 #[simd_test(enable = "avx512fp16")]
27077 fn test_mm512_maskz_cvtt_roundph_epi64() {
27078 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27079 let r = _mm512_maskz_cvtt_roundph_epi64::<_MM_FROUND_NO_EXC>(0b01010101, a);
27080 let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
27081 assert_eq_m512i(r, e);
27082 }
27083
27084 #[simd_test(enable = "avx512fp16,avx512vl")]
27085 fn test_mm_cvttph_epu64() {
27086 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
27087 let r = _mm_cvttph_epu64(a);
27088 let e = _mm_set_epi64x(1, 2);
27089 assert_eq_m128i(r, e);
27090 }
27091
27092 #[simd_test(enable = "avx512fp16,avx512vl")]
27093 fn test_mm_mask_cvttph_epu64() {
27094 let src = _mm_set_epi64x(3, 4);
27095 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
27096 let r = _mm_mask_cvttph_epu64(src, 0b01, a);
27097 let e = _mm_set_epi64x(3, 2);
27098 assert_eq_m128i(r, e);
27099 }
27100
27101 #[simd_test(enable = "avx512fp16,avx512vl")]
27102 fn test_mm_maskz_cvttph_epu64() {
27103 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
27104 let r = _mm_maskz_cvttph_epu64(0b01, a);
27105 let e = _mm_set_epi64x(0, 2);
27106 assert_eq_m128i(r, e);
27107 }
27108
27109 #[simd_test(enable = "avx512fp16,avx512vl")]
27110 fn test_mm256_cvttph_epu64() {
27111 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
27112 let r = _mm256_cvttph_epu64(a);
27113 let e = _mm256_set_epi64x(1, 2, 3, 4);
27114 assert_eq_m256i(r, e);
27115 }
27116
27117 #[simd_test(enable = "avx512fp16,avx512vl")]
27118 fn test_mm256_mask_cvttph_epu64() {
27119 let src = _mm256_set_epi64x(5, 6, 7, 8);
27120 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
27121 let r = _mm256_mask_cvttph_epu64(src, 0b0101, a);
27122 let e = _mm256_set_epi64x(5, 2, 7, 4);
27123 assert_eq_m256i(r, e);
27124 }
27125
27126 #[simd_test(enable = "avx512fp16,avx512vl")]
27127 fn test_mm256_maskz_cvttph_epu64() {
27128 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
27129 let r = _mm256_maskz_cvttph_epu64(0b0101, a);
27130 let e = _mm256_set_epi64x(0, 2, 0, 4);
27131 assert_eq_m256i(r, e);
27132 }
27133
27134 #[simd_test(enable = "avx512fp16")]
27135 fn test_mm512_cvttph_epu64() {
27136 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27137 let r = _mm512_cvttph_epu64(a);
27138 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
27139 assert_eq_m512i(r, e);
27140 }
27141
27142 #[simd_test(enable = "avx512fp16")]
27143 fn test_mm512_mask_cvttph_epu64() {
27144 let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
27145 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27146 let r = _mm512_mask_cvttph_epu64(src, 0b01010101, a);
27147 let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
27148 assert_eq_m512i(r, e);
27149 }
27150
27151 #[simd_test(enable = "avx512fp16")]
27152 fn test_mm512_maskz_cvttph_epu64() {
27153 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27154 let r = _mm512_maskz_cvttph_epu64(0b01010101, a);
27155 let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
27156 assert_eq_m512i(r, e);
27157 }
27158
27159 #[simd_test(enable = "avx512fp16")]
27160 fn test_mm512_cvtt_roundph_epu64() {
27161 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27162 let r = _mm512_cvtt_roundph_epu64::<_MM_FROUND_NO_EXC>(a);
27163 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
27164 assert_eq_m512i(r, e);
27165 }
27166
27167 #[simd_test(enable = "avx512fp16")]
27168 fn test_mm512_mask_cvtt_roundph_epu64() {
27169 let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
27170 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27171 let r = _mm512_mask_cvtt_roundph_epu64::<_MM_FROUND_NO_EXC>(src, 0b01010101, a);
27172 let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
27173 assert_eq_m512i(r, e);
27174 }
27175
27176 #[simd_test(enable = "avx512fp16")]
27177 fn test_mm512_maskz_cvtt_roundph_epu64() {
27178 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27179 let r = _mm512_maskz_cvtt_roundph_epu64::<_MM_FROUND_NO_EXC>(0b01010101, a);
27180 let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
27181 assert_eq_m512i(r, e);
27182 }
27183
27184 #[simd_test(enable = "avx512fp16,avx512vl")]
27185 fn test_mm_cvtxph_ps() {
27186 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
27187 let r = _mm_cvtxph_ps(a);
27188 let e = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
27189 assert_eq_m128(r, e);
27190 }
27191
27192 #[simd_test(enable = "avx512fp16,avx512vl")]
27193 fn test_mm_mask_cvtxph_ps() {
27194 let src = _mm_set_ps(10.0, 11.0, 12.0, 13.0);
27195 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
27196 let r = _mm_mask_cvtxph_ps(src, 0b0101, a);
27197 let e = _mm_set_ps(10.0, 2.0, 12.0, 4.0);
27198 assert_eq_m128(r, e);
27199 }
27200
27201 #[simd_test(enable = "avx512fp16,avx512vl")]
27202 fn test_mm_maskz_cvtxph_ps() {
27203 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
27204 let r = _mm_maskz_cvtxph_ps(0b0101, a);
27205 let e = _mm_set_ps(0.0, 2.0, 0.0, 4.0);
27206 assert_eq_m128(r, e);
27207 }
27208
27209 #[simd_test(enable = "avx512fp16,avx512vl")]
27210 fn test_mm256_cvtxph_ps() {
27211 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27212 let r = _mm256_cvtxph_ps(a);
27213 let e = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27214 assert_eq_m256(r, e);
27215 }
27216
27217 #[simd_test(enable = "avx512fp16,avx512vl")]
27218 fn test_mm256_mask_cvtxph_ps() {
27219 let src = _mm256_set_ps(10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0);
27220 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27221 let r = _mm256_mask_cvtxph_ps(src, 0b01010101, a);
27222 let e = _mm256_set_ps(10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0);
27223 assert_eq_m256(r, e);
27224 }
27225
27226 #[simd_test(enable = "avx512fp16,avx512vl")]
27227 fn test_mm256_maskz_cvtxph_ps() {
27228 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27229 let r = _mm256_maskz_cvtxph_ps(0b01010101, a);
27230 let e = _mm256_set_ps(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0);
27231 assert_eq_m256(r, e);
27232 }
27233
27234 #[simd_test(enable = "avx512fp16")]
27235 fn test_mm512_cvtxph_ps() {
27236 let a = _mm256_set_ph(
27237 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
27238 );
27239 let r = _mm512_cvtxph_ps(a);
27240 let e = _mm512_set_ps(
27241 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
27242 );
27243 assert_eq_m512(r, e);
27244 }
27245
27246 #[simd_test(enable = "avx512fp16")]
27247 fn test_mm512_mask_cvtxph_ps() {
27248 let src = _mm512_set_ps(
27249 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0,
27250 24.0, 25.0,
27251 );
27252 let a = _mm256_set_ph(
27253 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
27254 );
27255 let r = _mm512_mask_cvtxph_ps(src, 0b0101010101010101, a);
27256 let e = _mm512_set_ps(
27257 10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0, 18.0, 10.0, 20.0, 12.0, 22.0, 14.0, 24.0,
27258 16.0,
27259 );
27260 assert_eq_m512(r, e);
27261 }
27262
27263 #[simd_test(enable = "avx512fp16")]
27264 fn test_mm512_maskz_cvtxph_ps() {
27265 let a = _mm256_set_ph(
27266 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
27267 );
27268 let r = _mm512_maskz_cvtxph_ps(0b0101010101010101, a);
27269 let e = _mm512_set_ps(
27270 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
27271 );
27272 assert_eq_m512(r, e);
27273 }
27274
27275 #[simd_test(enable = "avx512fp16")]
27276 fn test_mm512_cvtx_roundph_ps() {
27277 let a = _mm256_set_ph(
27278 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
27279 );
27280 let r = _mm512_cvtx_roundph_ps::<_MM_FROUND_NO_EXC>(a);
27281 let e = _mm512_set_ps(
27282 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
27283 );
27284 assert_eq_m512(r, e);
27285 }
27286
27287 #[simd_test(enable = "avx512fp16")]
27288 fn test_mm512_mask_cvtx_roundph_ps() {
27289 let src = _mm512_set_ps(
27290 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0,
27291 24.0, 25.0,
27292 );
27293 let a = _mm256_set_ph(
27294 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
27295 );
27296 let r = _mm512_mask_cvtx_roundph_ps::<_MM_FROUND_NO_EXC>(src, 0b0101010101010101, a);
27297 let e = _mm512_set_ps(
27298 10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0, 18.0, 10.0, 20.0, 12.0, 22.0, 14.0, 24.0,
27299 16.0,
27300 );
27301 assert_eq_m512(r, e);
27302 }
27303
27304 #[simd_test(enable = "avx512fp16")]
27305 fn test_mm512_maskz_cvtx_roundph_ps() {
27306 let a = _mm256_set_ph(
27307 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
27308 );
27309 let r = _mm512_maskz_cvtx_roundph_ps::<_MM_FROUND_NO_EXC>(0b0101010101010101, a);
27310 let e = _mm512_set_ps(
27311 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
27312 );
27313 assert_eq_m512(r, e);
27314 }
27315
27316 #[simd_test(enable = "avx512fp16")]
27317 fn test_mm_cvtsh_ss() {
27318 let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0);
27319 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27320 let r = _mm_cvtsh_ss(a, b);
27321 let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0);
27322 assert_eq_m128(r, e);
27323 }
27324
27325 #[simd_test(enable = "avx512fp16")]
27326 fn test_mm_mask_cvtsh_ss() {
27327 let src = _mm_setr_ps(3.0, 11.0, 12.0, 13.0);
27328 let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0);
27329 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27330 let r = _mm_mask_cvtsh_ss(src, 0, a, b);
27331 let e = _mm_setr_ps(3.0, 20.0, 21.0, 22.0);
27332 assert_eq_m128(r, e);
27333 let r = _mm_mask_cvtsh_ss(src, 1, a, b);
27334 let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0);
27335 assert_eq_m128(r, e);
27336 }
27337
27338 #[simd_test(enable = "avx512fp16")]
27339 fn test_mm_maskz_cvtsh_ss() {
27340 let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0);
27341 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27342 let r = _mm_maskz_cvtsh_ss(0, a, b);
27343 let e = _mm_setr_ps(0.0, 20.0, 21.0, 22.0);
27344 assert_eq_m128(r, e);
27345 let r = _mm_maskz_cvtsh_ss(1, a, b);
27346 let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0);
27347 assert_eq_m128(r, e);
27348 }
27349
27350 #[simd_test(enable = "avx512fp16")]
27351 fn test_mm_cvt_roundsh_ss() {
27352 let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0);
27353 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27354 let r = _mm_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(a, b);
27355 let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0);
27356 assert_eq_m128(r, e);
27357 }
27358
27359 #[simd_test(enable = "avx512fp16")]
27360 fn test_mm_mask_cvt_roundsh_ss() {
27361 let src = _mm_setr_ps(3.0, 11.0, 12.0, 13.0);
27362 let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0);
27363 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27364 let r = _mm_mask_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(src, 0, a, b);
27365 let e = _mm_setr_ps(3.0, 20.0, 21.0, 22.0);
27366 assert_eq_m128(r, e);
27367 let r = _mm_mask_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(src, 1, a, b);
27368 let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0);
27369 assert_eq_m128(r, e);
27370 }
27371
27372 #[simd_test(enable = "avx512fp16")]
27373 fn test_mm_maskz_cvt_roundsh_ss() {
27374 let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0);
27375 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27376 let r = _mm_maskz_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(0, a, b);
27377 let e = _mm_setr_ps(0.0, 20.0, 21.0, 22.0);
27378 assert_eq_m128(r, e);
27379 let r = _mm_maskz_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(1, a, b);
27380 let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0);
27381 assert_eq_m128(r, e);
27382 }
27383
27384 #[simd_test(enable = "avx512fp16,avx512vl")]
27385 fn test_mm_cvtph_pd() {
27386 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
27387 let r = _mm_cvtph_pd(a);
27388 let e = _mm_set_pd(1.0, 2.0);
27389 assert_eq_m128d(r, e);
27390 }
27391
27392 #[simd_test(enable = "avx512fp16,avx512vl")]
27393 fn test_mm_mask_cvtph_pd() {
27394 let src = _mm_set_pd(10.0, 11.0);
27395 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
27396 let r = _mm_mask_cvtph_pd(src, 0b01, a);
27397 let e = _mm_set_pd(10.0, 2.0);
27398 assert_eq_m128d(r, e);
27399 }
27400
27401 #[simd_test(enable = "avx512fp16,avx512vl")]
27402 fn test_mm_maskz_cvtph_pd() {
27403 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
27404 let r = _mm_maskz_cvtph_pd(0b01, a);
27405 let e = _mm_set_pd(0.0, 2.0);
27406 assert_eq_m128d(r, e);
27407 }
27408
27409 #[simd_test(enable = "avx512fp16,avx512vl")]
27410 fn test_mm256_cvtph_pd() {
27411 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
27412 let r = _mm256_cvtph_pd(a);
27413 let e = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
27414 assert_eq_m256d(r, e);
27415 }
27416
27417 #[simd_test(enable = "avx512fp16,avx512vl")]
27418 fn test_mm256_mask_cvtph_pd() {
27419 let src = _mm256_set_pd(10.0, 11.0, 12.0, 13.0);
27420 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
27421 let r = _mm256_mask_cvtph_pd(src, 0b0101, a);
27422 let e = _mm256_set_pd(10.0, 2.0, 12.0, 4.0);
27423 assert_eq_m256d(r, e);
27424 }
27425
27426 #[simd_test(enable = "avx512fp16,avx512vl")]
27427 fn test_mm256_maskz_cvtph_pd() {
27428 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
27429 let r = _mm256_maskz_cvtph_pd(0b0101, a);
27430 let e = _mm256_set_pd(0.0, 2.0, 0.0, 4.0);
27431 assert_eq_m256d(r, e);
27432 }
27433
27434 #[simd_test(enable = "avx512fp16")]
27435 fn test_mm512_cvtph_pd() {
27436 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27437 let r = _mm512_cvtph_pd(a);
27438 let e = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27439 assert_eq_m512d(r, e);
27440 }
27441
27442 #[simd_test(enable = "avx512fp16")]
27443 fn test_mm512_mask_cvtph_pd() {
27444 let src = _mm512_set_pd(10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0);
27445 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27446 let r = _mm512_mask_cvtph_pd(src, 0b01010101, a);
27447 let e = _mm512_set_pd(10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0);
27448 assert_eq_m512d(r, e);
27449 }
27450
27451 #[simd_test(enable = "avx512fp16")]
27452 fn test_mm512_maskz_cvtph_pd() {
27453 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27454 let r = _mm512_maskz_cvtph_pd(0b01010101, a);
27455 let e = _mm512_set_pd(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0);
27456 assert_eq_m512d(r, e);
27457 }
27458
27459 #[simd_test(enable = "avx512fp16")]
27460 fn test_mm512_cvt_roundph_pd() {
27461 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27462 let r = _mm512_cvt_roundph_pd::<_MM_FROUND_NO_EXC>(a);
27463 let e = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27464 assert_eq_m512d(r, e);
27465 }
27466
27467 #[simd_test(enable = "avx512fp16")]
27468 fn test_mm512_mask_cvt_roundph_pd() {
27469 let src = _mm512_set_pd(10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0);
27470 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27471 let r = _mm512_mask_cvt_roundph_pd::<_MM_FROUND_NO_EXC>(src, 0b01010101, a);
27472 let e = _mm512_set_pd(10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0);
27473 assert_eq_m512d(r, e);
27474 }
27475
27476 #[simd_test(enable = "avx512fp16")]
27477 fn test_mm512_maskz_cvt_roundph_pd() {
27478 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27479 let r = _mm512_maskz_cvt_roundph_pd::<_MM_FROUND_NO_EXC>(0b01010101, a);
27480 let e = _mm512_set_pd(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0);
27481 assert_eq_m512d(r, e);
27482 }
27483
27484 #[simd_test(enable = "avx512fp16")]
27485 fn test_mm_cvtsh_sd() {
27486 let a = _mm_setr_pd(2.0, 20.0);
27487 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27488 let r = _mm_cvtsh_sd(a, b);
27489 let e = _mm_setr_pd(1.0, 20.0);
27490 assert_eq_m128d(r, e);
27491 }
27492
27493 #[simd_test(enable = "avx512fp16")]
27494 fn test_mm_mask_cvtsh_sd() {
27495 let src = _mm_setr_pd(3.0, 11.0);
27496 let a = _mm_setr_pd(2.0, 20.0);
27497 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27498 let r = _mm_mask_cvtsh_sd(src, 0, a, b);
27499 let e = _mm_setr_pd(3.0, 20.0);
27500 assert_eq_m128d(r, e);
27501 let r = _mm_mask_cvtsh_sd(src, 1, a, b);
27502 let e = _mm_setr_pd(1.0, 20.0);
27503 assert_eq_m128d(r, e);
27504 }
27505
27506 #[simd_test(enable = "avx512fp16")]
27507 fn test_mm_maskz_cvtsh_sd() {
27508 let a = _mm_setr_pd(2.0, 20.0);
27509 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27510 let r = _mm_maskz_cvtsh_sd(0, a, b);
27511 let e = _mm_setr_pd(0.0, 20.0);
27512 assert_eq_m128d(r, e);
27513 let r = _mm_maskz_cvtsh_sd(1, a, b);
27514 let e = _mm_setr_pd(1.0, 20.0);
27515 assert_eq_m128d(r, e);
27516 }
27517
27518 #[simd_test(enable = "avx512fp16")]
27519 fn test_mm_cvt_roundsh_sd() {
27520 let a = _mm_setr_pd(2.0, 20.0);
27521 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27522 let r = _mm_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(a, b);
27523 let e = _mm_setr_pd(1.0, 20.0);
27524 assert_eq_m128d(r, e);
27525 }
27526
27527 #[simd_test(enable = "avx512fp16")]
27528 fn test_mm_mask_cvt_roundsh_sd() {
27529 let src = _mm_setr_pd(3.0, 11.0);
27530 let a = _mm_setr_pd(2.0, 20.0);
27531 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27532 let r = _mm_mask_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(src, 0, a, b);
27533 let e = _mm_setr_pd(3.0, 20.0);
27534 assert_eq_m128d(r, e);
27535 let r = _mm_mask_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(src, 1, a, b);
27536 let e = _mm_setr_pd(1.0, 20.0);
27537 assert_eq_m128d(r, e);
27538 }
27539
27540 #[simd_test(enable = "avx512fp16")]
27541 fn test_mm_maskz_cvt_roundsh_sd() {
27542 let a = _mm_setr_pd(2.0, 20.0);
27543 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27544 let r = _mm_maskz_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(0, a, b);
27545 let e = _mm_setr_pd(0.0, 20.0);
27546 assert_eq_m128d(r, e);
27547 let r = _mm_maskz_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(1, a, b);
27548 let e = _mm_setr_pd(1.0, 20.0);
27549 assert_eq_m128d(r, e);
27550 }
27551
27552 #[simd_test(enable = "avx512fp16")]
27553 const fn test_mm_cvtsh_h() {
27554 let a = _mm_setr_ph(1.0, 2.0, 3.0, 42.0, 5.0, 6.0, 7.0, 8.0);
27555 let r = _mm_cvtsh_h(a);
27556 assert_eq!(r, 1.0);
27557 }
27558
27559 #[simd_test(enable = "avx512fp16")]
27560 const fn test_mm256_cvtsh_h() {
27561 let a = _mm256_setr_ph(
27562 1.0, 2.0, 3.0, 42.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
27563 );
27564 let r = _mm256_cvtsh_h(a);
27565 assert_eq!(r, 1.0);
27566 }
27567
27568 #[simd_test(enable = "avx512fp16")]
27569 const fn test_mm512_cvtsh_h() {
27570 let a = _mm512_setr_ph(
27571 1.0, 2.0, 3.0, 42.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
27572 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
27573 31.0, 32.0,
27574 );
27575 let r = _mm512_cvtsh_h(a);
27576 assert_eq!(r, 1.0);
27577 }
27578
27579 #[simd_test(enable = "avx512fp16")]
27580 const fn test_mm_cvtsi128_si16() {
27581 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
27582 let r = _mm_cvtsi128_si16(a);
27583 assert_eq!(r, 1);
27584 }
27585
27586 #[simd_test(enable = "avx512fp16")]
27587 const fn test_mm_cvtsi16_si128() {
27588 let a = 1;
27589 let r = _mm_cvtsi16_si128(a);
27590 let e = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
27591 assert_eq_m128i(r, e);
27592 }
27593}
27594