1 | use crate::arch::asm; |
2 | use crate::core_arch::{simd::*, x86::*}; |
3 | use crate::intrinsics::{fmaf16, simd::*}; |
4 | use crate::ptr; |
5 | |
6 | /// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values. |
7 | /// |
8 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_ph) |
9 | #[inline ] |
10 | #[target_feature (enable = "avx512fp16" )] |
11 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12 | pub fn _mm_set_ph( |
13 | e7: f16, |
14 | e6: f16, |
15 | e5: f16, |
16 | e4: f16, |
17 | e3: f16, |
18 | e2: f16, |
19 | e1: f16, |
20 | e0: f16, |
21 | ) -> __m128h { |
22 | __m128h([e0, e1, e2, e3, e4, e5, e6, e7]) |
23 | } |
24 | |
25 | /// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values. |
26 | /// |
27 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_ph) |
28 | #[inline ] |
29 | #[target_feature (enable = "avx512fp16" )] |
30 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
31 | pub fn _mm256_set_ph( |
32 | e15: f16, |
33 | e14: f16, |
34 | e13: f16, |
35 | e12: f16, |
36 | e11: f16, |
37 | e10: f16, |
38 | e9: f16, |
39 | e8: f16, |
40 | e7: f16, |
41 | e6: f16, |
42 | e5: f16, |
43 | e4: f16, |
44 | e3: f16, |
45 | e2: f16, |
46 | e1: f16, |
47 | e0: f16, |
48 | ) -> __m256h { |
49 | __m256h([ |
50 | e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, |
51 | ]) |
52 | } |
53 | |
54 | /// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values. |
55 | /// |
56 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set_ph) |
57 | #[inline ] |
58 | #[target_feature (enable = "avx512fp16" )] |
59 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
60 | pub fn _mm512_set_ph( |
61 | e31: f16, |
62 | e30: f16, |
63 | e29: f16, |
64 | e28: f16, |
65 | e27: f16, |
66 | e26: f16, |
67 | e25: f16, |
68 | e24: f16, |
69 | e23: f16, |
70 | e22: f16, |
71 | e21: f16, |
72 | e20: f16, |
73 | e19: f16, |
74 | e18: f16, |
75 | e17: f16, |
76 | e16: f16, |
77 | e15: f16, |
78 | e14: f16, |
79 | e13: f16, |
80 | e12: f16, |
81 | e11: f16, |
82 | e10: f16, |
83 | e9: f16, |
84 | e8: f16, |
85 | e7: f16, |
86 | e6: f16, |
87 | e5: f16, |
88 | e4: f16, |
89 | e3: f16, |
90 | e2: f16, |
91 | e1: f16, |
92 | e0: f16, |
93 | ) -> __m512h { |
94 | __m512h([ |
95 | e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19, |
96 | e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31, |
97 | ]) |
98 | } |
99 | |
100 | /// Copy half-precision (16-bit) floating-point elements from a to the lower element of dst and zero |
101 | /// the upper 7 elements. |
102 | /// |
103 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_sh) |
104 | #[inline ] |
105 | #[target_feature (enable = "avx512fp16" )] |
106 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
107 | pub fn _mm_set_sh(a: f16) -> __m128h { |
108 | __m128h([a, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]) |
109 | } |
110 | |
111 | /// Broadcast the half-precision (16-bit) floating-point value a to all elements of dst. |
112 | /// |
113 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_ph) |
114 | #[inline ] |
115 | #[target_feature (enable = "avx512fp16" )] |
116 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
117 | pub fn _mm_set1_ph(a: f16) -> __m128h { |
118 | unsafe { transmute(src:f16x8::splat(a)) } |
119 | } |
120 | |
121 | /// Broadcast the half-precision (16-bit) floating-point value a to all elements of dst. |
122 | /// |
123 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_ph) |
124 | #[inline ] |
125 | #[target_feature (enable = "avx512fp16" )] |
126 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
127 | pub fn _mm256_set1_ph(a: f16) -> __m256h { |
128 | unsafe { transmute(src:f16x16::splat(a)) } |
129 | } |
130 | |
131 | /// Broadcast the half-precision (16-bit) floating-point value a to all elements of dst. |
132 | /// |
133 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_ph) |
134 | #[inline ] |
135 | #[target_feature (enable = "avx512fp16" )] |
136 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
137 | pub fn _mm512_set1_ph(a: f16) -> __m512h { |
138 | unsafe { transmute(src:f16x32::splat(a)) } |
139 | } |
140 | |
141 | /// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values in reverse order. |
142 | /// |
143 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_ph) |
144 | #[inline ] |
145 | #[target_feature (enable = "avx512fp16" )] |
146 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
147 | pub fn _mm_setr_ph( |
148 | e0: f16, |
149 | e1: f16, |
150 | e2: f16, |
151 | e3: f16, |
152 | e4: f16, |
153 | e5: f16, |
154 | e6: f16, |
155 | e7: f16, |
156 | ) -> __m128h { |
157 | __m128h([e0, e1, e2, e3, e4, e5, e6, e7]) |
158 | } |
159 | |
160 | /// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values in reverse order. |
161 | /// |
162 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_ph) |
163 | #[inline ] |
164 | #[target_feature (enable = "avx512fp16" )] |
165 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
166 | pub fn _mm256_setr_ph( |
167 | e0: f16, |
168 | e1: f16, |
169 | e2: f16, |
170 | e3: f16, |
171 | e4: f16, |
172 | e5: f16, |
173 | e6: f16, |
174 | e7: f16, |
175 | e8: f16, |
176 | e9: f16, |
177 | e10: f16, |
178 | e11: f16, |
179 | e12: f16, |
180 | e13: f16, |
181 | e14: f16, |
182 | e15: f16, |
183 | ) -> __m256h { |
184 | __m256h([ |
185 | e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, |
186 | ]) |
187 | } |
188 | |
189 | /// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values in reverse order. |
190 | /// |
191 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setr_ph) |
192 | #[inline ] |
193 | #[target_feature (enable = "avx512fp16" )] |
194 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
195 | pub fn _mm512_setr_ph( |
196 | e0: f16, |
197 | e1: f16, |
198 | e2: f16, |
199 | e3: f16, |
200 | e4: f16, |
201 | e5: f16, |
202 | e6: f16, |
203 | e7: f16, |
204 | e8: f16, |
205 | e9: f16, |
206 | e10: f16, |
207 | e11: f16, |
208 | e12: f16, |
209 | e13: f16, |
210 | e14: f16, |
211 | e15: f16, |
212 | e16: f16, |
213 | e17: f16, |
214 | e18: f16, |
215 | e19: f16, |
216 | e20: f16, |
217 | e21: f16, |
218 | e22: f16, |
219 | e23: f16, |
220 | e24: f16, |
221 | e25: f16, |
222 | e26: f16, |
223 | e27: f16, |
224 | e28: f16, |
225 | e29: f16, |
226 | e30: f16, |
227 | e31: f16, |
228 | ) -> __m512h { |
229 | __m512h([ |
230 | e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19, |
231 | e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31, |
232 | ]) |
233 | } |
234 | |
235 | /// Return vector of type __m128h with all elements set to zero. |
236 | /// |
237 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_ph) |
238 | #[inline ] |
239 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
240 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
241 | pub fn _mm_setzero_ph() -> __m128h { |
242 | unsafe { transmute(src:f16x8::ZERO) } |
243 | } |
244 | |
245 | /// Return vector of type __m256h with all elements set to zero. |
246 | /// |
247 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setzero_ph) |
248 | #[inline ] |
249 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
250 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
251 | pub fn _mm256_setzero_ph() -> __m256h { |
252 | f16x16::ZERO.as_m256h() |
253 | } |
254 | |
255 | /// Return vector of type __m512h with all elements set to zero. |
256 | /// |
257 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setzero_ph) |
258 | #[inline ] |
259 | #[target_feature (enable = "avx512fp16" )] |
260 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
261 | pub fn _mm512_setzero_ph() -> __m512h { |
262 | f16x32::ZERO.as_m512h() |
263 | } |
264 | |
265 | /// Return vector of type `__m128h` with indetermination elements. |
266 | /// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically |
267 | /// picks some valid value and is not equivalent to [`mem::MaybeUninit`](crate::mem::MaybeUninit). |
268 | /// In practice, this is typically equivalent to [`mem::zeroed`](crate::mem::zeroed). |
269 | /// |
270 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_ph) |
271 | #[inline ] |
272 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
273 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
274 | pub fn _mm_undefined_ph() -> __m128h { |
275 | f16x8::ZERO.as_m128h() |
276 | } |
277 | |
278 | /// Return vector of type `__m256h` with indetermination elements. |
279 | /// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically |
280 | /// picks some valid value and is not equivalent to [`mem::MaybeUninit`](crate::mem::MaybeUninit). |
281 | /// In practice, this is typically equivalent to [`mem::zeroed`](crate::mem::zeroed). |
282 | /// |
283 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_undefined_ph) |
284 | #[inline ] |
285 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
286 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
287 | pub fn _mm256_undefined_ph() -> __m256h { |
288 | f16x16::ZERO.as_m256h() |
289 | } |
290 | |
291 | /// Return vector of type `__m512h` with indetermination elements. |
292 | /// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically |
293 | /// picks some valid value and is not equivalent to [`mem::MaybeUninit`](crate::mem::MaybeUninit). |
294 | /// In practice, this is typically equivalent to [`mem::zeroed`](crate::mem::zeroed). |
295 | /// |
296 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_ph) |
297 | #[inline ] |
298 | #[target_feature (enable = "avx512fp16" )] |
299 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
300 | pub fn _mm512_undefined_ph() -> __m512h { |
301 | f16x32::ZERO.as_m512h() |
302 | } |
303 | |
304 | /// Cast vector of type `__m128d` to type `__m128h`. This intrinsic is only used for compilation and |
305 | /// does not generate any instructions, thus it has zero latency. |
306 | /// |
307 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_ph) |
308 | #[inline ] |
309 | #[target_feature (enable = "avx512fp16" )] |
310 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
311 | pub fn _mm_castpd_ph(a: __m128d) -> __m128h { |
312 | unsafe { transmute(src:a) } |
313 | } |
314 | |
315 | /// Cast vector of type `__m256d` to type `__m256h`. This intrinsic is only used for compilation and |
316 | /// does not generate any instructions, thus it has zero latency. |
317 | /// |
318 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castpd_ph) |
319 | #[inline ] |
320 | #[target_feature (enable = "avx512fp16" )] |
321 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
322 | pub fn _mm256_castpd_ph(a: __m256d) -> __m256h { |
323 | unsafe { transmute(src:a) } |
324 | } |
325 | |
326 | /// Cast vector of type `__m512d` to type `__m512h`. This intrinsic is only used for compilation and |
327 | /// does not generate any instructions, thus it has zero latency. |
328 | /// |
329 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castpd_ph) |
330 | #[inline ] |
331 | #[target_feature (enable = "avx512fp16" )] |
332 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
333 | pub fn _mm512_castpd_ph(a: __m512d) -> __m512h { |
334 | unsafe { transmute(src:a) } |
335 | } |
336 | |
337 | /// Cast vector of type `__m128h` to type `__m128d`. This intrinsic is only used for compilation and |
338 | /// does not generate any instructions, thus it has zero latency. |
339 | /// |
340 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castph_pd) |
341 | #[inline ] |
342 | #[target_feature (enable = "avx512fp16" )] |
343 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
344 | pub fn _mm_castph_pd(a: __m128h) -> __m128d { |
345 | unsafe { transmute(src:a) } |
346 | } |
347 | |
348 | /// Cast vector of type `__m256h` to type `__m256d`. This intrinsic is only used for compilation and |
349 | /// does not generate any instructions, thus it has zero latency. |
350 | /// |
351 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph_pd) |
352 | #[inline ] |
353 | #[target_feature (enable = "avx512fp16" )] |
354 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
355 | pub fn _mm256_castph_pd(a: __m256h) -> __m256d { |
356 | unsafe { transmute(src:a) } |
357 | } |
358 | |
359 | /// Cast vector of type `__m512h` to type `__m512d`. This intrinsic is only used for compilation and |
360 | /// does not generate any instructions, thus it has zero latency. |
361 | /// |
362 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph_pd) |
363 | #[inline ] |
364 | #[target_feature (enable = "avx512fp16" )] |
365 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
366 | pub fn _mm512_castph_pd(a: __m512h) -> __m512d { |
367 | unsafe { transmute(src:a) } |
368 | } |
369 | |
370 | /// Cast vector of type `__m128` to type `__m128h`. This intrinsic is only used for compilation and |
371 | /// does not generate any instructions, thus it has zero latency. |
372 | /// |
373 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castps_ph) |
374 | #[inline ] |
375 | #[target_feature (enable = "avx512fp16" )] |
376 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
377 | pub fn _mm_castps_ph(a: __m128) -> __m128h { |
378 | unsafe { transmute(src:a) } |
379 | } |
380 | |
381 | /// Cast vector of type `__m256` to type `__m256h`. This intrinsic is only used for compilation and |
382 | /// does not generate any instructions, thus it has zero latency. |
383 | /// |
384 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castps_ph) |
385 | #[inline ] |
386 | #[target_feature (enable = "avx512fp16" )] |
387 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
388 | pub fn _mm256_castps_ph(a: __m256) -> __m256h { |
389 | unsafe { transmute(src:a) } |
390 | } |
391 | |
392 | /// Cast vector of type `__m512` to type `__m512h`. This intrinsic is only used for compilation and |
393 | /// does not generate any instructions, thus it has zero latency. |
394 | /// |
395 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castps_ph) |
396 | #[inline ] |
397 | #[target_feature (enable = "avx512fp16" )] |
398 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
399 | pub fn _mm512_castps_ph(a: __m512) -> __m512h { |
400 | unsafe { transmute(src:a) } |
401 | } |
402 | |
403 | /// Cast vector of type `__m128h` to type `__m128`. This intrinsic is only used for compilation and |
404 | /// does not generate any instructions, thus it has zero latency. |
405 | /// |
406 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castph_ps) |
407 | #[inline ] |
408 | #[target_feature (enable = "avx512fp16" )] |
409 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
410 | pub fn _mm_castph_ps(a: __m128h) -> __m128 { |
411 | unsafe { transmute(src:a) } |
412 | } |
413 | |
414 | /// Cast vector of type `__m256h` to type `__m256`. This intrinsic is only used for compilation and |
415 | /// does not generate any instructions, thus it has zero latency. |
416 | /// |
417 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph_ps) |
418 | #[inline ] |
419 | #[target_feature (enable = "avx512fp16" )] |
420 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
421 | pub fn _mm256_castph_ps(a: __m256h) -> __m256 { |
422 | unsafe { transmute(src:a) } |
423 | } |
424 | |
425 | /// Cast vector of type `__m512h` to type `__m512`. This intrinsic is only used for compilation and |
426 | /// does not generate any instructions, thus it has zero latency. |
427 | /// |
428 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph_ps) |
429 | #[inline ] |
430 | #[target_feature (enable = "avx512fp16" )] |
431 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
432 | pub fn _mm512_castph_ps(a: __m512h) -> __m512 { |
433 | unsafe { transmute(src:a) } |
434 | } |
435 | |
436 | /// Cast vector of type `__m128i` to type `__m128h`. This intrinsic is only used for compilation and |
437 | /// does not generate any instructions, thus it has zero latency. |
438 | /// |
439 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castsi128_ph) |
440 | #[inline ] |
441 | #[target_feature (enable = "avx512fp16" )] |
442 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
443 | pub fn _mm_castsi128_ph(a: __m128i) -> __m128h { |
444 | unsafe { transmute(src:a) } |
445 | } |
446 | |
447 | /// Cast vector of type `__m256i` to type `__m256h`. This intrinsic is only used for compilation and |
448 | /// does not generate any instructions, thus it has zero latency. |
449 | /// |
450 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castsi256_ph) |
451 | #[inline ] |
452 | #[target_feature (enable = "avx512fp16" )] |
453 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
454 | pub fn _mm256_castsi256_ph(a: __m256i) -> __m256h { |
455 | unsafe { transmute(src:a) } |
456 | } |
457 | |
458 | /// Cast vector of type `__m512i` to type `__m512h`. This intrinsic is only used for compilation and |
459 | /// does not generate any instructions, thus it has zero latency. |
460 | /// |
461 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castsi512_ph) |
462 | #[inline ] |
463 | #[target_feature (enable = "avx512fp16" )] |
464 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
465 | pub fn _mm512_castsi512_ph(a: __m512i) -> __m512h { |
466 | unsafe { transmute(src:a) } |
467 | } |
468 | |
469 | /// Cast vector of type `__m128h` to type `__m128i`. This intrinsic is only used for compilation and |
470 | /// does not generate any instructions, thus it has zero latency. |
471 | /// |
472 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castph_si128) |
473 | #[inline ] |
474 | #[target_feature (enable = "avx512fp16" )] |
475 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
476 | pub fn _mm_castph_si128(a: __m128h) -> __m128i { |
477 | unsafe { transmute(src:a) } |
478 | } |
479 | |
480 | /// Cast vector of type `__m256h` to type `__m256i`. This intrinsic is only used for compilation and |
481 | /// does not generate any instructions, thus it has zero latency. |
482 | /// |
483 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph_si256) |
484 | #[inline ] |
485 | #[target_feature (enable = "avx512fp16" )] |
486 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
487 | pub fn _mm256_castph_si256(a: __m256h) -> __m256i { |
488 | unsafe { transmute(src:a) } |
489 | } |
490 | |
491 | /// Cast vector of type `__m512h` to type `__m512i`. This intrinsic is only used for compilation and |
492 | /// does not generate any instructions, thus it has zero latency. |
493 | /// |
494 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph_si512) |
495 | #[inline ] |
496 | #[target_feature (enable = "avx512fp16" )] |
497 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
498 | pub fn _mm512_castph_si512(a: __m512h) -> __m512i { |
499 | unsafe { transmute(src:a) } |
500 | } |
501 | |
502 | /// Cast vector of type `__m256h` to type `__m128h`. This intrinsic is only used for compilation and |
503 | /// does not generate any instructions, thus it has zero latency. |
504 | /// |
505 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph256_ph128) |
506 | #[inline ] |
507 | #[target_feature (enable = "avx512fp16" )] |
508 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
509 | pub fn _mm256_castph256_ph128(a: __m256h) -> __m128h { |
510 | unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) } |
511 | } |
512 | |
513 | /// Cast vector of type `__m512h` to type `__m128h`. This intrinsic is only used for compilation and |
514 | /// does not generate any instructions, thus it has zero latency. |
515 | /// |
516 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph512_ph128) |
517 | #[inline ] |
518 | #[target_feature (enable = "avx512fp16" )] |
519 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
520 | pub fn _mm512_castph512_ph128(a: __m512h) -> __m128h { |
521 | unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) } |
522 | } |
523 | |
524 | /// Cast vector of type `__m512h` to type `__m256h`. This intrinsic is only used for compilation and |
525 | /// does not generate any instructions, thus it has zero latency. |
526 | /// |
527 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph512_ph256) |
528 | #[inline ] |
529 | #[target_feature (enable = "avx512fp16" )] |
530 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
531 | pub fn _mm512_castph512_ph256(a: __m512h) -> __m256h { |
532 | unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } |
533 | } |
534 | |
535 | /// Cast vector of type `__m128h` to type `__m256h`. The upper 8 elements of the result are undefined. |
536 | /// In practice, the upper elements are zeroed. This intrinsic can generate the `vzeroupper` instruction, |
537 | /// but most of the time it does not generate any instructions. |
538 | /// |
539 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph128_ph256) |
540 | #[inline ] |
541 | #[target_feature (enable = "avx512fp16" )] |
542 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
543 | pub fn _mm256_castph128_ph256(a: __m128h) -> __m256h { |
544 | unsafe { |
545 | simd_shuffle!( |
546 | a, |
547 | _mm_undefined_ph(), |
548 | [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8] |
549 | ) |
550 | } |
551 | } |
552 | |
553 | /// Cast vector of type `__m128h` to type `__m512h`. The upper 24 elements of the result are undefined. |
554 | /// In practice, the upper elements are zeroed. This intrinsic can generate the `vzeroupper` instruction, |
555 | /// but most of the time it does not generate any instructions. |
556 | /// |
557 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph128_ph512) |
558 | #[inline ] |
559 | #[target_feature (enable = "avx512fp16" )] |
560 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
561 | pub fn _mm512_castph128_ph512(a: __m128h) -> __m512h { |
562 | unsafe { |
563 | simd_shuffle!( |
564 | a, |
565 | _mm_undefined_ph(), |
566 | [ |
567 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, |
568 | 8, 8, 8, 8 |
569 | ] |
570 | ) |
571 | } |
572 | } |
573 | |
574 | /// Cast vector of type `__m256h` to type `__m512h`. The upper 16 elements of the result are undefined. |
575 | /// In practice, the upper elements are zeroed. This intrinsic can generate the `vzeroupper` instruction, |
576 | /// but most of the time it does not generate any instructions. |
577 | /// |
578 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph256_ph512) |
579 | #[inline ] |
580 | #[target_feature (enable = "avx512fp16" )] |
581 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
582 | pub fn _mm512_castph256_ph512(a: __m256h) -> __m512h { |
583 | unsafe { |
584 | simd_shuffle!( |
585 | a, |
586 | _mm256_undefined_ph(), |
587 | [ |
588 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, |
589 | 16, 16, 16, 16, 16, 16, 16, 16, 16 |
590 | ] |
591 | ) |
592 | } |
593 | } |
594 | |
595 | /// Cast vector of type `__m256h` to type `__m128h`. The upper 8 elements of the result are zeroed. |
596 | /// This intrinsic can generate the `vzeroupper` instruction, but most of the time it does not generate |
597 | /// any instructions. |
598 | /// |
599 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_zextph128_ph256) |
600 | #[inline ] |
601 | #[target_feature (enable = "avx512fp16" )] |
602 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
603 | pub fn _mm256_zextph128_ph256(a: __m128h) -> __m256h { |
604 | unsafe { |
605 | simd_shuffle!( |
606 | a, |
607 | _mm_setzero_ph(), |
608 | [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8] |
609 | ) |
610 | } |
611 | } |
612 | |
613 | /// Cast vector of type `__m256h` to type `__m512h`. The upper 16 elements of the result are zeroed. |
614 | /// This intrinsic can generate the `vzeroupper` instruction, but most of the time it does not generate |
615 | /// any instructions. |
616 | /// |
617 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_zextph256_ph512) |
618 | #[inline ] |
619 | #[target_feature (enable = "avx512fp16" )] |
620 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
621 | pub fn _mm512_zextph256_ph512(a: __m256h) -> __m512h { |
622 | unsafe { |
623 | simd_shuffle!( |
624 | a, |
625 | _mm256_setzero_ph(), |
626 | [ |
627 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, |
628 | 16, 16, 16, 16, 16, 16, 16, 16, 16 |
629 | ] |
630 | ) |
631 | } |
632 | } |
633 | |
634 | /// Cast vector of type `__m128h` to type `__m512h`. The upper 24 elements of the result are zeroed. |
635 | /// This intrinsic can generate the `vzeroupper` instruction, but most of the time it does not generate |
636 | /// any instructions. |
637 | /// |
638 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_zextph128_ph512) |
639 | #[inline ] |
640 | #[target_feature (enable = "avx512fp16" )] |
641 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
642 | pub fn _mm512_zextph128_ph512(a: __m128h) -> __m512h { |
643 | unsafe { |
644 | simd_shuffle!( |
645 | a, |
646 | _mm_setzero_ph(), |
647 | [ |
648 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, |
649 | 8, 8, 8, 8 |
650 | ] |
651 | ) |
652 | } |
653 | } |
654 | |
655 | macro_rules! cmp_asm { // FIXME: use LLVM intrinsics |
656 | ($mask_type: ty, $reg: ident, $a: expr, $b: expr) => {{ |
657 | let dst: $mask_type; |
658 | asm!( |
659 | "vcmpph {k}, {a}, {b}, {imm8}" , |
660 | k = lateout(kreg) dst, |
661 | a = in($reg) $a, |
662 | b = in($reg) $b, |
663 | imm8 = const IMM5, |
664 | options(pure, nomem, nostack) |
665 | ); |
666 | dst |
667 | }}; |
668 | ($mask_type: ty, $mask: expr, $reg: ident, $a: expr, $b: expr) => {{ |
669 | let dst: $mask_type; |
670 | asm!( |
671 | "vcmpph {k} {{ {mask} }}, {a}, {b}, {imm8}" , |
672 | k = lateout(kreg) dst, |
673 | mask = in(kreg) $mask, |
674 | a = in($reg) $a, |
675 | b = in($reg) $b, |
676 | imm8 = const IMM5, |
677 | options(pure, nomem, nostack) |
678 | ); |
679 | dst |
680 | }}; |
681 | } |
682 | |
683 | /// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison |
684 | /// operand specified by imm8, and store the results in mask vector k. |
685 | /// |
686 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ph_mask) |
687 | #[inline ] |
688 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
689 | #[rustc_legacy_const_generics (2)] |
690 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
691 | pub fn _mm_cmp_ph_mask<const IMM5: i32>(a: __m128h, b: __m128h) -> __mmask8 { |
692 | unsafe { |
693 | static_assert_uimm_bits!(IMM5, 5); |
694 | cmp_asm!(__mmask8, xmm_reg, a, b) |
695 | } |
696 | } |
697 | |
698 | /// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison |
699 | /// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are |
700 | /// zeroed out when the corresponding mask bit is not set). |
701 | /// |
702 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_ph_mask) |
703 | #[inline ] |
704 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
705 | #[rustc_legacy_const_generics (3)] |
706 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
707 | pub fn _mm_mask_cmp_ph_mask<const IMM5: i32>(k1: __mmask8, a: __m128h, b: __m128h) -> __mmask8 { |
708 | unsafe { |
709 | static_assert_uimm_bits!(IMM5, 5); |
710 | cmp_asm!(__mmask8, k1, xmm_reg, a, b) |
711 | } |
712 | } |
713 | |
714 | /// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison |
715 | /// operand specified by imm8, and store the results in mask vector k. |
716 | /// |
717 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmp_ph_mask) |
718 | #[inline ] |
719 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
720 | #[rustc_legacy_const_generics (2)] |
721 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
722 | pub fn _mm256_cmp_ph_mask<const IMM5: i32>(a: __m256h, b: __m256h) -> __mmask16 { |
723 | unsafe { |
724 | static_assert_uimm_bits!(IMM5, 5); |
725 | cmp_asm!(__mmask16, ymm_reg, a, b) |
726 | } |
727 | } |
728 | |
729 | /// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison |
730 | /// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are |
731 | /// zeroed out when the corresponding mask bit is not set). |
732 | /// |
733 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmp_ph_mask) |
734 | #[inline ] |
735 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
736 | #[rustc_legacy_const_generics (3)] |
737 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
738 | pub fn _mm256_mask_cmp_ph_mask<const IMM5: i32>( |
739 | k1: __mmask16, |
740 | a: __m256h, |
741 | b: __m256h, |
742 | ) -> __mmask16 { |
743 | unsafe { |
744 | static_assert_uimm_bits!(IMM5, 5); |
745 | cmp_asm!(__mmask16, k1, ymm_reg, a, b) |
746 | } |
747 | } |
748 | |
749 | /// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison |
750 | /// operand specified by imm8, and store the results in mask vector k. |
751 | /// |
752 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_ph_mask) |
753 | #[inline ] |
754 | #[target_feature (enable = "avx512fp16" )] |
755 | #[rustc_legacy_const_generics (2)] |
756 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
757 | pub fn _mm512_cmp_ph_mask<const IMM5: i32>(a: __m512h, b: __m512h) -> __mmask32 { |
758 | unsafe { |
759 | static_assert_uimm_bits!(IMM5, 5); |
760 | cmp_asm!(__mmask32, zmm_reg, a, b) |
761 | } |
762 | } |
763 | |
764 | /// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison |
765 | /// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are |
766 | /// zeroed out when the corresponding mask bit is not set). |
767 | /// |
768 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_ph_mask) |
769 | #[inline ] |
770 | #[target_feature (enable = "avx512fp16" )] |
771 | #[rustc_legacy_const_generics (3)] |
772 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
773 | pub fn _mm512_mask_cmp_ph_mask<const IMM5: i32>( |
774 | k1: __mmask32, |
775 | a: __m512h, |
776 | b: __m512h, |
777 | ) -> __mmask32 { |
778 | unsafe { |
779 | static_assert_uimm_bits!(IMM5, 5); |
780 | cmp_asm!(__mmask32, k1, zmm_reg, a, b) |
781 | } |
782 | } |
783 | |
784 | /// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison |
785 | /// operand specified by imm8, and store the results in mask vector k. |
786 | /// |
787 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
788 | /// |
789 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_round_ph_mask) |
790 | #[inline ] |
791 | #[target_feature (enable = "avx512fp16" )] |
792 | #[rustc_legacy_const_generics (2, 3)] |
793 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
794 | pub fn _mm512_cmp_round_ph_mask<const IMM5: i32, const SAE: i32>( |
795 | a: __m512h, |
796 | b: __m512h, |
797 | ) -> __mmask32 { |
798 | unsafe { |
799 | static_assert_uimm_bits!(IMM5, 5); |
800 | static_assert_sae!(SAE); |
801 | if SAE == _MM_FROUND_NO_EXC { |
802 | let dst: __mmask32; |
803 | asm!( |
804 | "vcmpph { k}, { a}, { b}, {{sae}}, { imm8}" , |
805 | k = lateout(kreg) dst, |
806 | a = in(zmm_reg) a, |
807 | b = in(zmm_reg) b, |
808 | imm8 = const IMM5, |
809 | options(pure, nomem, nostack) |
810 | ); |
811 | dst |
812 | } else { |
813 | cmp_asm!(__mmask32, zmm_reg, a, b) |
814 | } |
815 | } |
816 | } |
817 | |
818 | /// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison |
819 | /// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are |
820 | /// zeroed out when the corresponding mask bit is not set). |
821 | /// |
822 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
823 | /// |
824 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_round_ph_mask) |
825 | #[inline ] |
826 | #[target_feature (enable = "avx512fp16" )] |
827 | #[rustc_legacy_const_generics (3, 4)] |
828 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
829 | pub fn _mm512_mask_cmp_round_ph_mask<const IMM5: i32, const SAE: i32>( |
830 | k1: __mmask32, |
831 | a: __m512h, |
832 | b: __m512h, |
833 | ) -> __mmask32 { |
834 | unsafe { |
835 | static_assert_uimm_bits!(IMM5, 5); |
836 | static_assert_sae!(SAE); |
837 | if SAE == _MM_FROUND_NO_EXC { |
838 | let dst: __mmask32; |
839 | asm!( |
840 | "vcmpph { k} {{{ k1}}}, { a}, { b}, {{sae}}, { imm8}" , |
841 | k = lateout(kreg) dst, |
842 | k1 = in(kreg) k1, |
843 | a = in(zmm_reg) a, |
844 | b = in(zmm_reg) b, |
845 | imm8 = const IMM5, |
846 | options(pure, nomem, nostack) |
847 | ); |
848 | dst |
849 | } else { |
850 | cmp_asm!(__mmask32, k1, zmm_reg, a, b) |
851 | } |
852 | } |
853 | } |
854 | |
855 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison |
856 | /// operand specified by imm8, and store the result in mask vector k. Exceptions can be suppressed by |
857 | /// passing _MM_FROUND_NO_EXC in the sae parameter. |
858 | /// |
859 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_round_sh_mask) |
860 | #[inline ] |
861 | #[target_feature (enable = "avx512fp16" )] |
862 | #[rustc_legacy_const_generics (2, 3)] |
863 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
864 | pub fn _mm_cmp_round_sh_mask<const IMM5: i32, const SAE: i32>(a: __m128h, b: __m128h) -> __mmask8 { |
865 | static_assert_uimm_bits!(IMM5, 5); |
866 | static_assert_sae!(SAE); |
867 | _mm_mask_cmp_round_sh_mask::<IMM5, SAE>(k1:0xff, a, b) |
868 | } |
869 | |
870 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison |
871 | /// operand specified by imm8, and store the result in mask vector k using zeromask k1. Exceptions can be |
872 | /// suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
873 | /// |
874 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_round_sh_mask) |
875 | #[inline ] |
876 | #[target_feature (enable = "avx512fp16" )] |
877 | #[rustc_legacy_const_generics (3, 4)] |
878 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
879 | pub fn _mm_mask_cmp_round_sh_mask<const IMM5: i32, const SAE: i32>( |
880 | k1: __mmask8, |
881 | a: __m128h, |
882 | b: __m128h, |
883 | ) -> __mmask8 { |
884 | unsafe { |
885 | static_assert_uimm_bits!(IMM5, 5); |
886 | static_assert_sae!(SAE); |
887 | vcmpsh(a, b, IMM5, mask:k1, SAE) |
888 | } |
889 | } |
890 | |
891 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison |
892 | /// operand specified by imm8, and store the result in mask vector k. |
893 | /// |
894 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_sh_mask) |
895 | #[inline ] |
896 | #[target_feature (enable = "avx512fp16" )] |
897 | #[rustc_legacy_const_generics (2)] |
898 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
899 | pub fn _mm_cmp_sh_mask<const IMM5: i32>(a: __m128h, b: __m128h) -> __mmask8 { |
900 | static_assert_uimm_bits!(IMM5, 5); |
901 | _mm_cmp_round_sh_mask::<IMM5, _MM_FROUND_CUR_DIRECTION>(a, b) |
902 | } |
903 | |
904 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison |
905 | /// operand specified by imm8, and store the result in mask vector k using zeromask k1. |
906 | /// |
907 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_sh_mask) |
908 | #[inline ] |
909 | #[target_feature (enable = "avx512fp16" )] |
910 | #[rustc_legacy_const_generics (3)] |
911 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
912 | pub fn _mm_mask_cmp_sh_mask<const IMM5: i32>(k1: __mmask8, a: __m128h, b: __m128h) -> __mmask8 { |
913 | static_assert_uimm_bits!(IMM5, 5); |
914 | _mm_mask_cmp_round_sh_mask::<IMM5, _MM_FROUND_CUR_DIRECTION>(k1, a, b) |
915 | } |
916 | |
917 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison |
918 | /// operand specified by imm8, and return the boolean result (0 or 1). |
919 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
920 | /// |
921 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comi_round_sh) |
922 | #[inline ] |
923 | #[target_feature (enable = "avx512fp16" )] |
924 | #[rustc_legacy_const_generics (2, 3)] |
925 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
926 | pub fn _mm_comi_round_sh<const IMM5: i32, const SAE: i32>(a: __m128h, b: __m128h) -> i32 { |
927 | unsafe { |
928 | static_assert_uimm_bits!(IMM5, 5); |
929 | static_assert_sae!(SAE); |
930 | vcomish(a, b, IMM5, SAE) |
931 | } |
932 | } |
933 | |
934 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison |
935 | /// operand specified by imm8, and return the boolean result (0 or 1). |
936 | /// |
937 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comi_sh) |
938 | #[inline ] |
939 | #[target_feature (enable = "avx512fp16" )] |
940 | #[rustc_legacy_const_generics (2)] |
941 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
942 | pub fn _mm_comi_sh<const IMM5: i32>(a: __m128h, b: __m128h) -> i32 { |
943 | static_assert_uimm_bits!(IMM5, 5); |
944 | _mm_comi_round_sh::<IMM5, _MM_FROUND_CUR_DIRECTION>(a, b) |
945 | } |
946 | |
947 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b for equality, and return |
948 | /// the boolean result (0 or 1). |
949 | /// |
950 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comieq_sh) |
951 | #[inline ] |
952 | #[target_feature (enable = "avx512fp16" )] |
953 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
954 | pub fn _mm_comieq_sh(a: __m128h, b: __m128h) -> i32 { |
955 | _mm_comi_sh::<_CMP_EQ_OS>(a, b) |
956 | } |
957 | |
958 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than-or-equal, |
959 | /// and return the boolean result (0 or 1). |
960 | /// |
961 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comige_sh) |
962 | #[inline ] |
963 | #[target_feature (enable = "avx512fp16" )] |
964 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
965 | pub fn _mm_comige_sh(a: __m128h, b: __m128h) -> i32 { |
966 | _mm_comi_sh::<_CMP_GE_OS>(a, b) |
967 | } |
968 | |
969 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than, and return |
970 | /// the boolean result (0 or 1). |
971 | /// |
972 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comigt_sh) |
973 | #[inline ] |
974 | #[target_feature (enable = "avx512fp16" )] |
975 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
976 | pub fn _mm_comigt_sh(a: __m128h, b: __m128h) -> i32 { |
977 | _mm_comi_sh::<_CMP_GT_OS>(a, b) |
978 | } |
979 | |
980 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than-or-equal, and |
981 | /// return the boolean result (0 or 1). |
982 | /// |
983 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comile_sh) |
984 | #[inline ] |
985 | #[target_feature (enable = "avx512fp16" )] |
986 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
987 | pub fn _mm_comile_sh(a: __m128h, b: __m128h) -> i32 { |
988 | _mm_comi_sh::<_CMP_LE_OS>(a, b) |
989 | } |
990 | |
991 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than, and return |
992 | /// the boolean result (0 or 1). |
993 | /// |
994 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comilt_sh) |
995 | #[inline ] |
996 | #[target_feature (enable = "avx512fp16" )] |
997 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
998 | pub fn _mm_comilt_sh(a: __m128h, b: __m128h) -> i32 { |
999 | _mm_comi_sh::<_CMP_LT_OS>(a, b) |
1000 | } |
1001 | |
1002 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b for not-equal, and return |
1003 | /// the boolean result (0 or 1). |
1004 | /// |
1005 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comineq_sh) |
1006 | #[inline ] |
1007 | #[target_feature (enable = "avx512fp16" )] |
1008 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1009 | pub fn _mm_comineq_sh(a: __m128h, b: __m128h) -> i32 { |
1010 | _mm_comi_sh::<_CMP_NEQ_OS>(a, b) |
1011 | } |
1012 | |
1013 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b for equality, and |
1014 | /// return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. |
1015 | /// |
1016 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomieq_sh) |
1017 | #[inline ] |
1018 | #[target_feature (enable = "avx512fp16" )] |
1019 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1020 | pub fn _mm_ucomieq_sh(a: __m128h, b: __m128h) -> i32 { |
1021 | _mm_comi_sh::<_CMP_EQ_OQ>(a, b) |
1022 | } |
1023 | |
1024 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than-or-equal, |
1025 | /// and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. |
1026 | /// |
1027 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomige_sh) |
1028 | #[inline ] |
1029 | #[target_feature (enable = "avx512fp16" )] |
1030 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1031 | pub fn _mm_ucomige_sh(a: __m128h, b: __m128h) -> i32 { |
1032 | _mm_comi_sh::<_CMP_GE_OQ>(a, b) |
1033 | } |
1034 | |
1035 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than, and return |
1036 | /// the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. |
1037 | /// |
1038 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomigt_sh) |
1039 | #[inline ] |
1040 | #[target_feature (enable = "avx512fp16" )] |
1041 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1042 | pub fn _mm_ucomigt_sh(a: __m128h, b: __m128h) -> i32 { |
1043 | _mm_comi_sh::<_CMP_GT_OQ>(a, b) |
1044 | } |
1045 | |
1046 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than-or-equal, and |
1047 | /// return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. |
1048 | /// |
1049 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomile_sh) |
1050 | #[inline ] |
1051 | #[target_feature (enable = "avx512fp16" )] |
1052 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1053 | pub fn _mm_ucomile_sh(a: __m128h, b: __m128h) -> i32 { |
1054 | _mm_comi_sh::<_CMP_LE_OQ>(a, b) |
1055 | } |
1056 | |
1057 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than, and return |
1058 | /// the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. |
1059 | /// |
1060 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomilt_sh) |
1061 | #[inline ] |
1062 | #[target_feature (enable = "avx512fp16" )] |
1063 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1064 | pub fn _mm_ucomilt_sh(a: __m128h, b: __m128h) -> i32 { |
1065 | _mm_comi_sh::<_CMP_LT_OQ>(a, b) |
1066 | } |
1067 | |
1068 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b for not-equal, and return |
1069 | /// the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. |
1070 | /// |
1071 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomineq_sh) |
1072 | #[inline ] |
1073 | #[target_feature (enable = "avx512fp16" )] |
1074 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1075 | pub fn _mm_ucomineq_sh(a: __m128h, b: __m128h) -> i32 { |
1076 | _mm_comi_sh::<_CMP_NEQ_OQ>(a, b) |
1077 | } |
1078 | |
1079 | /// Load 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from memory into |
1080 | /// a new vector. The address must be aligned to 16 bytes or a general-protection exception may be generated. |
1081 | /// |
1082 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_ph) |
1083 | #[inline ] |
1084 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1085 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1086 | pub unsafe fn _mm_load_ph(mem_addr: *const f16) -> __m128h { |
1087 | *mem_addr.cast() |
1088 | } |
1089 | |
1090 | /// Load 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from memory into |
1091 | /// a new vector. The address must be aligned to 32 bytes or a general-protection exception may be generated. |
1092 | /// |
1093 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_load_ph) |
1094 | #[inline ] |
1095 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1096 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1097 | pub unsafe fn _mm256_load_ph(mem_addr: *const f16) -> __m256h { |
1098 | *mem_addr.cast() |
1099 | } |
1100 | |
1101 | /// Load 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from memory into |
1102 | /// a new vector. The address must be aligned to 64 bytes or a general-protection exception may be generated. |
1103 | /// |
1104 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_load_ph) |
1105 | #[inline ] |
1106 | #[target_feature (enable = "avx512fp16" )] |
1107 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1108 | pub unsafe fn _mm512_load_ph(mem_addr: *const f16) -> __m512h { |
1109 | *mem_addr.cast() |
1110 | } |
1111 | |
1112 | /// Load a half-precision (16-bit) floating-point element from memory into the lower element of a new vector, |
1113 | /// and zero the upper elements |
1114 | /// |
1115 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_sh) |
1116 | #[inline ] |
1117 | #[target_feature (enable = "avx512fp16" )] |
1118 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1119 | pub unsafe fn _mm_load_sh(mem_addr: *const f16) -> __m128h { |
1120 | _mm_set_sh(*mem_addr) |
1121 | } |
1122 | |
1123 | /// Load a half-precision (16-bit) floating-point element from memory into the lower element of a new vector |
1124 | /// using writemask k (the element is copied from src when mask bit 0 is not set), and zero the upper elements. |
1125 | /// |
1126 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_sh) |
1127 | #[inline ] |
1128 | #[target_feature (enable = "avx512fp16" )] |
1129 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1130 | pub unsafe fn _mm_mask_load_sh(src: __m128h, k: __mmask8, mem_addr: *const f16) -> __m128h { |
1131 | let mut dst: __m128h = src; |
1132 | asm!( |
1133 | vpl!("vmovsh {dst}{{{k}}}" ), |
1134 | dst = inout(xmm_reg) dst, |
1135 | k = in(kreg) k, |
1136 | p = in(reg) mem_addr, |
1137 | options(pure, readonly, nostack, preserves_flags) |
1138 | ); |
1139 | dst |
1140 | } |
1141 | |
1142 | /// Load a half-precision (16-bit) floating-point element from memory into the lower element of a new vector |
1143 | /// using zeromask k (the element is zeroed out when mask bit 0 is not set), and zero the upper elements. |
1144 | /// |
1145 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_sh) |
1146 | #[inline ] |
1147 | #[target_feature (enable = "avx512fp16" )] |
1148 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1149 | pub unsafe fn _mm_maskz_load_sh(k: __mmask8, mem_addr: *const f16) -> __m128h { |
1150 | let mut dst: __m128h; |
1151 | asm!( |
1152 | vpl!("vmovsh {dst}{{{k}}}{{z}}" ), |
1153 | dst = out(xmm_reg) dst, |
1154 | k = in(kreg) k, |
1155 | p = in(reg) mem_addr, |
1156 | options(pure, readonly, nostack, preserves_flags) |
1157 | ); |
1158 | dst |
1159 | } |
1160 | |
1161 | /// Load 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from memory into |
1162 | /// a new vector. The address does not need to be aligned to any particular boundary. |
1163 | /// |
1164 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_ph) |
1165 | #[inline ] |
1166 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1167 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1168 | pub unsafe fn _mm_loadu_ph(mem_addr: *const f16) -> __m128h { |
1169 | ptr::read_unaligned(src:mem_addr.cast()) |
1170 | } |
1171 | |
1172 | /// Load 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from memory into |
1173 | /// a new vector. The address does not need to be aligned to any particular boundary. |
1174 | /// |
1175 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_loadu_ph) |
1176 | #[inline ] |
1177 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1178 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1179 | pub unsafe fn _mm256_loadu_ph(mem_addr: *const f16) -> __m256h { |
1180 | ptr::read_unaligned(src:mem_addr.cast()) |
1181 | } |
1182 | |
1183 | /// Load 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from memory into |
1184 | /// a new vector. The address does not need to be aligned to any particular boundary. |
1185 | /// |
1186 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_ph) |
1187 | #[inline ] |
1188 | #[target_feature (enable = "avx512fp16" )] |
1189 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1190 | pub unsafe fn _mm512_loadu_ph(mem_addr: *const f16) -> __m512h { |
1191 | ptr::read_unaligned(src:mem_addr.cast()) |
1192 | } |
1193 | |
1194 | /// Move the lower half-precision (16-bit) floating-point element from b to the lower element of dst |
1195 | /// using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper |
1196 | /// 7 packed elements from a to the upper elements of dst. |
1197 | /// |
1198 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_move_sh) |
1199 | #[inline ] |
1200 | #[target_feature (enable = "avx512fp16" )] |
1201 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1202 | pub fn _mm_mask_move_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
1203 | unsafe { |
1204 | let mut mov: f16 = simd_extract!(src, 0); |
1205 | if (k & 1) != 0 { |
1206 | mov = simd_extract!(b, 0); |
1207 | } |
1208 | simd_insert!(a, 0, mov) |
1209 | } |
1210 | } |
1211 | |
1212 | /// Move the lower half-precision (16-bit) floating-point element from b to the lower element of dst |
1213 | /// using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed |
1214 | /// elements from a to the upper elements of dst. |
1215 | /// |
1216 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_move_sh) |
1217 | #[inline ] |
1218 | #[target_feature (enable = "avx512fp16" )] |
1219 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1220 | pub fn _mm_maskz_move_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
1221 | unsafe { |
1222 | let mut mov: f16 = 0.; |
1223 | if (k & 1) != 0 { |
1224 | mov = simd_extract!(b, 0); |
1225 | } |
1226 | simd_insert!(a, 0, mov) |
1227 | } |
1228 | } |
1229 | |
1230 | /// Move the lower half-precision (16-bit) floating-point element from b to the lower element of dst, |
1231 | /// and copy the upper 7 packed elements from a to the upper elements of dst. |
1232 | /// |
1233 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_sh) |
1234 | #[inline ] |
1235 | #[target_feature (enable = "avx512fp16" )] |
1236 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1237 | pub fn _mm_move_sh(a: __m128h, b: __m128h) -> __m128h { |
1238 | unsafe { |
1239 | let mov: f16 = simd_extract!(b, 0); |
1240 | simd_insert!(a, 0, mov) |
1241 | } |
1242 | } |
1243 | |
1244 | /// Store 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from a into memory. |
1245 | /// The address must be aligned to 16 bytes or a general-protection exception may be generated. |
1246 | /// |
1247 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_ph) |
1248 | #[inline ] |
1249 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1250 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1251 | pub unsafe fn _mm_store_ph(mem_addr: *mut f16, a: __m128h) { |
1252 | *mem_addr.cast() = a; |
1253 | } |
1254 | |
1255 | /// Store 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from a into memory. |
1256 | /// The address must be aligned to 32 bytes or a general-protection exception may be generated. |
1257 | /// |
1258 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_store_ph) |
1259 | #[inline ] |
1260 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1261 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1262 | pub unsafe fn _mm256_store_ph(mem_addr: *mut f16, a: __m256h) { |
1263 | *mem_addr.cast() = a; |
1264 | } |
1265 | |
1266 | /// Store 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from a into memory. |
1267 | /// The address must be aligned to 64 bytes or a general-protection exception may be generated. |
1268 | /// |
1269 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_store_ph) |
1270 | #[inline ] |
1271 | #[target_feature (enable = "avx512fp16" )] |
1272 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1273 | pub unsafe fn _mm512_store_ph(mem_addr: *mut f16, a: __m512h) { |
1274 | *mem_addr.cast() = a; |
1275 | } |
1276 | |
1277 | /// Store the lower half-precision (16-bit) floating-point element from a into memory. |
1278 | /// |
1279 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_sh) |
1280 | #[inline ] |
1281 | #[target_feature (enable = "avx512fp16" )] |
1282 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1283 | pub unsafe fn _mm_store_sh(mem_addr: *mut f16, a: __m128h) { |
1284 | *mem_addr = simd_extract!(a, 0); |
1285 | } |
1286 | |
1287 | /// Store the lower half-precision (16-bit) floating-point element from a into memory using writemask k |
1288 | /// |
1289 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_sh) |
1290 | #[inline ] |
1291 | #[target_feature (enable = "avx512fp16" )] |
1292 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1293 | pub unsafe fn _mm_mask_store_sh(mem_addr: *mut f16, k: __mmask8, a: __m128h) { |
1294 | asm!( |
1295 | vps!("vmovdqu16" , "{{{k}}}, {src}" ), |
1296 | p = in(reg) mem_addr, |
1297 | k = in(kreg) k, |
1298 | src = in(xmm_reg) a, |
1299 | options(nostack, preserves_flags) |
1300 | ); |
1301 | } |
1302 | |
1303 | /// Store 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from a into memory. |
1304 | /// The address does not need to be aligned to any particular boundary. |
1305 | /// |
1306 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_ph) |
1307 | #[inline ] |
1308 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1309 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1310 | pub unsafe fn _mm_storeu_ph(mem_addr: *mut f16, a: __m128h) { |
1311 | ptr::write_unaligned(dst:mem_addr.cast(), src:a); |
1312 | } |
1313 | |
1314 | /// Store 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from a into memory. |
1315 | /// The address does not need to be aligned to any particular boundary. |
1316 | /// |
1317 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_storeu_ph) |
1318 | #[inline ] |
1319 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1320 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1321 | pub unsafe fn _mm256_storeu_ph(mem_addr: *mut f16, a: __m256h) { |
1322 | ptr::write_unaligned(dst:mem_addr.cast(), src:a); |
1323 | } |
1324 | |
1325 | /// Store 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from a into memory. |
1326 | /// The address does not need to be aligned to any particular boundary. |
1327 | /// |
1328 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_ph) |
1329 | #[inline ] |
1330 | #[target_feature (enable = "avx512fp16" )] |
1331 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1332 | pub unsafe fn _mm512_storeu_ph(mem_addr: *mut f16, a: __m512h) { |
1333 | ptr::write_unaligned(dst:mem_addr.cast(), src:a); |
1334 | } |
1335 | |
1336 | /// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst. |
1337 | /// |
1338 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_ph) |
1339 | #[inline ] |
1340 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1341 | #[cfg_attr (test, assert_instr(vaddph))] |
1342 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1343 | pub fn _mm_add_ph(a: __m128h, b: __m128h) -> __m128h { |
1344 | unsafe { simd_add(x:a, y:b) } |
1345 | } |
1346 | |
1347 | /// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using |
1348 | /// writemask k (elements are copied from src when the corresponding mask bit is not set). |
1349 | /// |
1350 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_ph) |
1351 | #[inline ] |
1352 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1353 | #[cfg_attr (test, assert_instr(vaddph))] |
1354 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1355 | pub fn _mm_mask_add_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
1356 | unsafe { |
1357 | let r: __m128h = _mm_add_ph(a, b); |
1358 | simd_select_bitmask(m:k, yes:r, no:src) |
1359 | } |
1360 | } |
1361 | |
1362 | /// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using |
1363 | /// zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1364 | /// |
1365 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_ph) |
1366 | #[inline ] |
1367 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1368 | #[cfg_attr (test, assert_instr(vaddph))] |
1369 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1370 | pub fn _mm_maskz_add_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
1371 | unsafe { |
1372 | let r: __m128h = _mm_add_ph(a, b); |
1373 | simd_select_bitmask(m:k, yes:r, no:_mm_setzero_ph()) |
1374 | } |
1375 | } |
1376 | |
1377 | /// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst. |
1378 | /// |
1379 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_ph) |
1380 | #[inline ] |
1381 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1382 | #[cfg_attr (test, assert_instr(vaddph))] |
1383 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1384 | pub fn _mm256_add_ph(a: __m256h, b: __m256h) -> __m256h { |
1385 | unsafe { simd_add(x:a, y:b) } |
1386 | } |
1387 | |
1388 | /// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using |
1389 | /// writemask k (elements are copied from src when the corresponding mask bit is not set). |
1390 | /// |
1391 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_add_ph) |
1392 | #[inline ] |
1393 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1394 | #[cfg_attr (test, assert_instr(vaddph))] |
1395 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1396 | pub fn _mm256_mask_add_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h { |
1397 | unsafe { |
1398 | let r: __m256h = _mm256_add_ph(a, b); |
1399 | simd_select_bitmask(m:k, yes:r, no:src) |
1400 | } |
1401 | } |
1402 | |
1403 | /// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using |
1404 | /// zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1405 | /// |
1406 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_add_ph) |
1407 | #[inline ] |
1408 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1409 | #[cfg_attr (test, assert_instr(vaddph))] |
1410 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1411 | pub fn _mm256_maskz_add_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { |
1412 | unsafe { |
1413 | let r: __m256h = _mm256_add_ph(a, b); |
1414 | simd_select_bitmask(m:k, yes:r, no:_mm256_setzero_ph()) |
1415 | } |
1416 | } |
1417 | |
1418 | /// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst. |
1419 | /// |
1420 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_ph) |
1421 | #[inline ] |
1422 | #[target_feature (enable = "avx512fp16" )] |
1423 | #[cfg_attr (test, assert_instr(vaddph))] |
1424 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1425 | pub fn _mm512_add_ph(a: __m512h, b: __m512h) -> __m512h { |
1426 | unsafe { simd_add(x:a, y:b) } |
1427 | } |
1428 | |
1429 | /// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using |
1430 | /// writemask k (elements are copied from src when the corresponding mask bit is not set). |
1431 | /// |
1432 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_ph) |
1433 | #[inline ] |
1434 | #[target_feature (enable = "avx512fp16" )] |
1435 | #[cfg_attr (test, assert_instr(vaddph))] |
1436 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1437 | pub fn _mm512_mask_add_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h { |
1438 | unsafe { |
1439 | let r: __m512h = _mm512_add_ph(a, b); |
1440 | simd_select_bitmask(m:k, yes:r, no:src) |
1441 | } |
1442 | } |
1443 | |
1444 | /// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using |
1445 | /// zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1446 | /// |
1447 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_ph) |
1448 | #[inline ] |
1449 | #[target_feature (enable = "avx512fp16" )] |
1450 | #[cfg_attr (test, assert_instr(vaddph))] |
1451 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1452 | pub fn _mm512_maskz_add_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { |
1453 | unsafe { |
1454 | let r: __m512h = _mm512_add_ph(a, b); |
1455 | simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph()) |
1456 | } |
1457 | } |
1458 | |
1459 | /// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst. |
1460 | /// Rounding is done according to the rounding parameter, which can be one of: |
1461 | /// |
1462 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
1463 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
1464 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
1465 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
1466 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
1467 | /// |
1468 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_round_ph) |
1469 | #[inline ] |
1470 | #[target_feature (enable = "avx512fp16" )] |
1471 | #[cfg_attr (test, assert_instr(vaddph, ROUNDING = 8))] |
1472 | #[rustc_legacy_const_generics (2)] |
1473 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1474 | pub fn _mm512_add_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h { |
1475 | unsafe { |
1476 | static_assert_rounding!(ROUNDING); |
1477 | vaddph(a, b, ROUNDING) |
1478 | } |
1479 | } |
1480 | |
1481 | /// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using |
1482 | /// writemask k (elements are copied from src when the corresponding mask bit is not set). |
1483 | /// Rounding is done according to the rounding parameter, which can be one of: |
1484 | /// |
1485 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
1486 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
1487 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
1488 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
1489 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
1490 | /// |
1491 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_round_ph) |
1492 | #[inline ] |
1493 | #[target_feature (enable = "avx512fp16" )] |
1494 | #[cfg_attr (test, assert_instr(vaddph, ROUNDING = 8))] |
1495 | #[rustc_legacy_const_generics (4)] |
1496 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1497 | pub fn _mm512_mask_add_round_ph<const ROUNDING: i32>( |
1498 | src: __m512h, |
1499 | k: __mmask32, |
1500 | a: __m512h, |
1501 | b: __m512h, |
1502 | ) -> __m512h { |
1503 | unsafe { |
1504 | static_assert_rounding!(ROUNDING); |
1505 | let r: __m512h = _mm512_add_round_ph::<ROUNDING>(a, b); |
1506 | simd_select_bitmask(m:k, yes:r, no:src) |
1507 | } |
1508 | } |
1509 | |
1510 | /// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using |
1511 | /// zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1512 | /// Rounding is done according to the rounding parameter, which can be one of: |
1513 | /// |
1514 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
1515 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
1516 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
1517 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
1518 | /// |
1519 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_round_ph) |
1520 | #[inline ] |
1521 | #[target_feature (enable = "avx512fp16" )] |
1522 | #[cfg_attr (test, assert_instr(vaddph, ROUNDING = 8))] |
1523 | #[rustc_legacy_const_generics (3)] |
1524 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1525 | pub fn _mm512_maskz_add_round_ph<const ROUNDING: i32>( |
1526 | k: __mmask32, |
1527 | a: __m512h, |
1528 | b: __m512h, |
1529 | ) -> __m512h { |
1530 | unsafe { |
1531 | static_assert_rounding!(ROUNDING); |
1532 | let r: __m512h = _mm512_add_round_ph::<ROUNDING>(a, b); |
1533 | simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph()) |
1534 | } |
1535 | } |
1536 | |
1537 | /// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the |
1538 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst. |
1539 | /// Rounding is done according to the rounding parameter, which can be one of: |
1540 | /// |
1541 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
1542 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
1543 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
1544 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
1545 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
1546 | /// |
1547 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_round_sh) |
1548 | #[inline ] |
1549 | #[target_feature (enable = "avx512fp16" )] |
1550 | #[cfg_attr (test, assert_instr(vaddsh, ROUNDING = 8))] |
1551 | #[rustc_legacy_const_generics (2)] |
1552 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1553 | pub fn _mm_add_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h { |
1554 | static_assert_rounding!(ROUNDING); |
1555 | _mm_mask_add_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b) |
1556 | } |
1557 | |
1558 | /// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the |
1559 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using |
1560 | /// writemask k (the element is copied from src when mask bit 0 is not set). |
1561 | /// Rounding is done according to the rounding parameter, which can be one of: |
1562 | /// |
1563 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
1564 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
1565 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
1566 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
1567 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
1568 | /// |
1569 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_round_sh) |
1570 | #[inline ] |
1571 | #[target_feature (enable = "avx512fp16" )] |
1572 | #[cfg_attr (test, assert_instr(vaddsh, ROUNDING = 8))] |
1573 | #[rustc_legacy_const_generics (4)] |
1574 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1575 | pub fn _mm_mask_add_round_sh<const ROUNDING: i32>( |
1576 | src: __m128h, |
1577 | k: __mmask8, |
1578 | a: __m128h, |
1579 | b: __m128h, |
1580 | ) -> __m128h { |
1581 | unsafe { |
1582 | static_assert_rounding!(ROUNDING); |
1583 | vaddsh(a, b, src, k, ROUNDING) |
1584 | } |
1585 | } |
1586 | |
1587 | /// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the |
1588 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using |
1589 | /// zeromask k (the element is zeroed out when mask bit 0 is not set). |
1590 | /// Rounding is done according to the rounding parameter, which can be one of: |
1591 | /// |
1592 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
1593 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
1594 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
1595 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
1596 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
1597 | /// |
1598 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_round_sh) |
1599 | #[inline ] |
1600 | #[target_feature (enable = "avx512fp16" )] |
1601 | #[cfg_attr (test, assert_instr(vaddsh, ROUNDING = 8))] |
1602 | #[rustc_legacy_const_generics (3)] |
1603 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1604 | pub fn _mm_maskz_add_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
1605 | static_assert_rounding!(ROUNDING); |
1606 | _mm_mask_add_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b) |
1607 | } |
1608 | |
1609 | /// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the |
1610 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst. |
1611 | /// |
1612 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_sh) |
1613 | #[inline ] |
1614 | #[target_feature (enable = "avx512fp16" )] |
1615 | #[cfg_attr (test, assert_instr(vaddsh))] |
1616 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1617 | pub fn _mm_add_sh(a: __m128h, b: __m128h) -> __m128h { |
1618 | _mm_add_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b) |
1619 | } |
1620 | |
1621 | /// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the |
1622 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using |
1623 | /// writemask k (the element is copied from src when mask bit 0 is not set). |
1624 | /// |
1625 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_sh) |
1626 | #[inline ] |
1627 | #[target_feature (enable = "avx512fp16" )] |
1628 | #[cfg_attr (test, assert_instr(vaddsh))] |
1629 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1630 | pub fn _mm_mask_add_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
1631 | _mm_mask_add_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) |
1632 | } |
1633 | |
1634 | /// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the |
1635 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using |
1636 | /// zeromask k (the element is zeroed out when mask bit 0 is not set). |
1637 | /// |
1638 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_sh) |
1639 | #[inline ] |
1640 | #[target_feature (enable = "avx512fp16" )] |
1641 | #[cfg_attr (test, assert_instr(vaddsh))] |
1642 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1643 | pub fn _mm_maskz_add_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
1644 | _mm_maskz_add_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b) |
1645 | } |
1646 | |
1647 | /// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst. |
1648 | /// |
1649 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_ph) |
1650 | #[inline ] |
1651 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1652 | #[cfg_attr (test, assert_instr(vsubph))] |
1653 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1654 | pub fn _mm_sub_ph(a: __m128h, b: __m128h) -> __m128h { |
1655 | unsafe { simd_sub(lhs:a, rhs:b) } |
1656 | } |
1657 | |
1658 | /// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using |
1659 | /// writemask k (elements are copied from src when the corresponding mask bit is not set). |
1660 | /// |
1661 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_ph) |
1662 | #[inline ] |
1663 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1664 | #[cfg_attr (test, assert_instr(vsubph))] |
1665 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1666 | pub fn _mm_mask_sub_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
1667 | unsafe { |
1668 | let r: __m128h = _mm_sub_ph(a, b); |
1669 | simd_select_bitmask(m:k, yes:r, no:src) |
1670 | } |
1671 | } |
1672 | |
1673 | /// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using |
1674 | /// zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1675 | /// |
1676 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_ph) |
1677 | #[inline ] |
1678 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1679 | #[cfg_attr (test, assert_instr(vsubph))] |
1680 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1681 | pub fn _mm_maskz_sub_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
1682 | unsafe { |
1683 | let r: __m128h = _mm_sub_ph(a, b); |
1684 | simd_select_bitmask(m:k, yes:r, no:_mm_setzero_ph()) |
1685 | } |
1686 | } |
1687 | |
1688 | /// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst. |
1689 | /// |
1690 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_ph) |
1691 | #[inline ] |
1692 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1693 | #[cfg_attr (test, assert_instr(vsubph))] |
1694 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1695 | pub fn _mm256_sub_ph(a: __m256h, b: __m256h) -> __m256h { |
1696 | unsafe { simd_sub(lhs:a, rhs:b) } |
1697 | } |
1698 | |
1699 | /// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using |
1700 | /// writemask k (elements are copied from src when the corresponding mask bit is not set). |
1701 | /// |
1702 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sub_ph) |
1703 | #[inline ] |
1704 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1705 | #[cfg_attr (test, assert_instr(vsubph))] |
1706 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1707 | pub fn _mm256_mask_sub_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h { |
1708 | unsafe { |
1709 | let r: __m256h = _mm256_sub_ph(a, b); |
1710 | simd_select_bitmask(m:k, yes:r, no:src) |
1711 | } |
1712 | } |
1713 | |
1714 | /// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using |
1715 | /// zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1716 | /// |
1717 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sub_ph) |
1718 | #[inline ] |
1719 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1720 | #[cfg_attr (test, assert_instr(vsubph))] |
1721 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1722 | pub fn _mm256_maskz_sub_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { |
1723 | unsafe { |
1724 | let r: __m256h = _mm256_sub_ph(a, b); |
1725 | simd_select_bitmask(m:k, yes:r, no:_mm256_setzero_ph()) |
1726 | } |
1727 | } |
1728 | |
1729 | /// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst. |
1730 | /// |
1731 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_ph) |
1732 | #[inline ] |
1733 | #[target_feature (enable = "avx512fp16" )] |
1734 | #[cfg_attr (test, assert_instr(vsubph))] |
1735 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1736 | pub fn _mm512_sub_ph(a: __m512h, b: __m512h) -> __m512h { |
1737 | unsafe { simd_sub(lhs:a, rhs:b) } |
1738 | } |
1739 | |
1740 | /// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using |
1741 | /// writemask k (elements are copied from src when the corresponding mask bit is not set). |
1742 | /// |
1743 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_ph) |
1744 | #[inline ] |
1745 | #[target_feature (enable = "avx512fp16" )] |
1746 | #[cfg_attr (test, assert_instr(vsubph))] |
1747 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1748 | pub fn _mm512_mask_sub_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h { |
1749 | unsafe { |
1750 | let r: __m512h = _mm512_sub_ph(a, b); |
1751 | simd_select_bitmask(m:k, yes:r, no:src) |
1752 | } |
1753 | } |
1754 | |
1755 | /// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using |
1756 | /// zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1757 | /// |
1758 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_ph) |
1759 | #[inline ] |
1760 | #[target_feature (enable = "avx512fp16" )] |
1761 | #[cfg_attr (test, assert_instr(vsubph))] |
1762 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1763 | pub fn _mm512_maskz_sub_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { |
1764 | unsafe { |
1765 | let r: __m512h = _mm512_sub_ph(a, b); |
1766 | simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph()) |
1767 | } |
1768 | } |
1769 | |
1770 | /// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst. |
1771 | /// Rounding is done according to the rounding parameter, which can be one of: |
1772 | /// |
1773 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
1774 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
1775 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
1776 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
1777 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
1778 | /// |
1779 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_round_ph) |
1780 | #[inline ] |
1781 | #[target_feature (enable = "avx512fp16" )] |
1782 | #[cfg_attr (test, assert_instr(vsubph, ROUNDING = 8))] |
1783 | #[rustc_legacy_const_generics (2)] |
1784 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1785 | pub fn _mm512_sub_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h { |
1786 | unsafe { |
1787 | static_assert_rounding!(ROUNDING); |
1788 | vsubph(a, b, ROUNDING) |
1789 | } |
1790 | } |
1791 | |
1792 | /// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using |
1793 | /// writemask k (elements are copied from src when the corresponding mask bit is not set). |
1794 | /// Rounding is done according to the rounding parameter, which can be one of: |
1795 | /// |
1796 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
1797 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
1798 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
1799 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
1800 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
1801 | /// |
1802 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_round_ph) |
1803 | #[inline ] |
1804 | #[target_feature (enable = "avx512fp16" )] |
1805 | #[cfg_attr (test, assert_instr(vsubph, ROUNDING = 8))] |
1806 | #[rustc_legacy_const_generics (4)] |
1807 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1808 | pub fn _mm512_mask_sub_round_ph<const ROUNDING: i32>( |
1809 | src: __m512h, |
1810 | k: __mmask32, |
1811 | a: __m512h, |
1812 | b: __m512h, |
1813 | ) -> __m512h { |
1814 | unsafe { |
1815 | static_assert_rounding!(ROUNDING); |
1816 | let r: __m512h = _mm512_sub_round_ph::<ROUNDING>(a, b); |
1817 | simd_select_bitmask(m:k, yes:r, no:src) |
1818 | } |
1819 | } |
1820 | |
1821 | /// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using |
1822 | /// zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1823 | /// Rounding is done according to the rounding parameter, which can be one of: |
1824 | /// |
1825 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
1826 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
1827 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
1828 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
1829 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
1830 | /// |
1831 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_round_ph) |
1832 | #[inline ] |
1833 | #[target_feature (enable = "avx512fp16" )] |
1834 | #[cfg_attr (test, assert_instr(vsubph, ROUNDING = 8))] |
1835 | #[rustc_legacy_const_generics (3)] |
1836 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1837 | pub fn _mm512_maskz_sub_round_ph<const ROUNDING: i32>( |
1838 | k: __mmask32, |
1839 | a: __m512h, |
1840 | b: __m512h, |
1841 | ) -> __m512h { |
1842 | unsafe { |
1843 | static_assert_rounding!(ROUNDING); |
1844 | let r: __m512h = _mm512_sub_round_ph::<ROUNDING>(a, b); |
1845 | simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph()) |
1846 | } |
1847 | } |
1848 | |
1849 | /// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the |
1850 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst. |
1851 | /// Rounding is done according to the rounding parameter, which can be one of: |
1852 | /// |
1853 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
1854 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
1855 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
1856 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
1857 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
1858 | /// |
1859 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_round_sh) |
1860 | #[inline ] |
1861 | #[target_feature (enable = "avx512fp16" )] |
1862 | #[cfg_attr (test, assert_instr(vsubsh, ROUNDING = 8))] |
1863 | #[rustc_legacy_const_generics (2)] |
1864 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1865 | pub fn _mm_sub_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h { |
1866 | static_assert_rounding!(ROUNDING); |
1867 | _mm_mask_sub_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b) |
1868 | } |
1869 | |
1870 | /// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the |
1871 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using |
1872 | /// writemask k (the element is copied from src when mask bit 0 is not set). |
1873 | /// Rounding is done according to the rounding parameter, which can be one of: |
1874 | /// |
1875 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
1876 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
1877 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
1878 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
1879 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
1880 | /// |
1881 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_round_sh) |
1882 | #[inline ] |
1883 | #[target_feature (enable = "avx512fp16" )] |
1884 | #[cfg_attr (test, assert_instr(vsubsh, ROUNDING = 8))] |
1885 | #[rustc_legacy_const_generics (4)] |
1886 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1887 | pub fn _mm_mask_sub_round_sh<const ROUNDING: i32>( |
1888 | src: __m128h, |
1889 | k: __mmask8, |
1890 | a: __m128h, |
1891 | b: __m128h, |
1892 | ) -> __m128h { |
1893 | unsafe { |
1894 | static_assert_rounding!(ROUNDING); |
1895 | vsubsh(a, b, src, k, ROUNDING) |
1896 | } |
1897 | } |
1898 | |
1899 | /// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the |
1900 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using |
1901 | /// zeromask k (the element is zeroed out when mask bit 0 is not set). |
1902 | /// Rounding is done according to the rounding parameter, which can be one of: |
1903 | /// |
1904 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
1905 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
1906 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
1907 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
1908 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
1909 | /// |
1910 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_round_sh) |
1911 | #[inline ] |
1912 | #[target_feature (enable = "avx512fp16" )] |
1913 | #[cfg_attr (test, assert_instr(vsubsh, ROUNDING = 8))] |
1914 | #[rustc_legacy_const_generics (3)] |
1915 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1916 | pub fn _mm_maskz_sub_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
1917 | static_assert_rounding!(ROUNDING); |
1918 | _mm_mask_sub_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b) |
1919 | } |
1920 | |
1921 | /// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the |
1922 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst. |
1923 | /// |
1924 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_sh) |
1925 | #[inline ] |
1926 | #[target_feature (enable = "avx512fp16" )] |
1927 | #[cfg_attr (test, assert_instr(vsubsh))] |
1928 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1929 | pub fn _mm_sub_sh(a: __m128h, b: __m128h) -> __m128h { |
1930 | _mm_sub_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b) |
1931 | } |
1932 | |
1933 | /// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the |
1934 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using |
1935 | /// writemask k (the element is copied from src when mask bit 0 is not set). |
1936 | /// |
1937 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_sh) |
1938 | #[inline ] |
1939 | #[target_feature (enable = "avx512fp16" )] |
1940 | #[cfg_attr (test, assert_instr(vsubsh))] |
1941 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1942 | pub fn _mm_mask_sub_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
1943 | _mm_mask_sub_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) |
1944 | } |
1945 | |
1946 | /// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the |
1947 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using |
1948 | /// zeromask k (the element is zeroed out when mask bit 0 is not set). |
1949 | /// |
1950 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_sh) |
1951 | #[inline ] |
1952 | #[target_feature (enable = "avx512fp16" )] |
1953 | #[cfg_attr (test, assert_instr(vsubsh))] |
1954 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1955 | pub fn _mm_maskz_sub_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
1956 | _mm_maskz_sub_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b) |
1957 | } |
1958 | |
1959 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst. |
1960 | /// |
1961 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_ph) |
1962 | #[inline ] |
1963 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1964 | #[cfg_attr (test, assert_instr(vmulph))] |
1965 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1966 | pub fn _mm_mul_ph(a: __m128h, b: __m128h) -> __m128h { |
1967 | unsafe { simd_mul(x:a, y:b) } |
1968 | } |
1969 | |
1970 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using |
1971 | /// writemask k (elements are copied from src when the corresponding mask bit is not set). |
1972 | /// |
1973 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_ph) |
1974 | #[inline ] |
1975 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1976 | #[cfg_attr (test, assert_instr(vmulph))] |
1977 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1978 | pub fn _mm_mask_mul_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
1979 | unsafe { |
1980 | let r: __m128h = _mm_mul_ph(a, b); |
1981 | simd_select_bitmask(m:k, yes:r, no:src) |
1982 | } |
1983 | } |
1984 | |
1985 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using |
1986 | /// zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1987 | /// |
1988 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_ph) |
1989 | #[inline ] |
1990 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1991 | #[cfg_attr (test, assert_instr(vmulph))] |
1992 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1993 | pub fn _mm_maskz_mul_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
1994 | unsafe { |
1995 | let r: __m128h = _mm_mul_ph(a, b); |
1996 | simd_select_bitmask(m:k, yes:r, no:_mm_setzero_ph()) |
1997 | } |
1998 | } |
1999 | |
2000 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst. |
2001 | /// |
2002 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_ph) |
2003 | #[inline ] |
2004 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2005 | #[cfg_attr (test, assert_instr(vmulph))] |
2006 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2007 | pub fn _mm256_mul_ph(a: __m256h, b: __m256h) -> __m256h { |
2008 | unsafe { simd_mul(x:a, y:b) } |
2009 | } |
2010 | |
2011 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using |
2012 | /// writemask k (elements are copied from src when the corresponding mask bit is not set). |
2013 | /// |
2014 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mul_ph) |
2015 | #[inline ] |
2016 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2017 | #[cfg_attr (test, assert_instr(vmulph))] |
2018 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2019 | pub fn _mm256_mask_mul_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h { |
2020 | unsafe { |
2021 | let r: __m256h = _mm256_mul_ph(a, b); |
2022 | simd_select_bitmask(m:k, yes:r, no:src) |
2023 | } |
2024 | } |
2025 | |
2026 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using |
2027 | /// zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
2028 | /// |
2029 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mul_ph) |
2030 | #[inline ] |
2031 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2032 | #[cfg_attr (test, assert_instr(vmulph))] |
2033 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2034 | pub fn _mm256_maskz_mul_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { |
2035 | unsafe { |
2036 | let r: __m256h = _mm256_mul_ph(a, b); |
2037 | simd_select_bitmask(m:k, yes:r, no:_mm256_setzero_ph()) |
2038 | } |
2039 | } |
2040 | |
2041 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst. |
2042 | /// |
2043 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_ph) |
2044 | #[inline ] |
2045 | #[target_feature (enable = "avx512fp16" )] |
2046 | #[cfg_attr (test, assert_instr(vmulph))] |
2047 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2048 | pub fn _mm512_mul_ph(a: __m512h, b: __m512h) -> __m512h { |
2049 | unsafe { simd_mul(x:a, y:b) } |
2050 | } |
2051 | |
2052 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using |
2053 | /// writemask k (elements are copied from src when the corresponding mask bit is not set). |
2054 | /// |
2055 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_ph) |
2056 | #[inline ] |
2057 | #[target_feature (enable = "avx512fp16" )] |
2058 | #[cfg_attr (test, assert_instr(vmulph))] |
2059 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2060 | pub fn _mm512_mask_mul_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h { |
2061 | unsafe { |
2062 | let r: __m512h = _mm512_mul_ph(a, b); |
2063 | simd_select_bitmask(m:k, yes:r, no:src) |
2064 | } |
2065 | } |
2066 | |
2067 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using |
2068 | /// zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
2069 | /// |
2070 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_ph) |
2071 | #[inline ] |
2072 | #[target_feature (enable = "avx512fp16" )] |
2073 | #[cfg_attr (test, assert_instr(vmulph))] |
2074 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2075 | pub fn _mm512_maskz_mul_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { |
2076 | unsafe { |
2077 | let r: __m512h = _mm512_mul_ph(a, b); |
2078 | simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph()) |
2079 | } |
2080 | } |
2081 | |
2082 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst. |
2083 | /// Rounding is done according to the rounding parameter, which can be one of: |
2084 | /// |
2085 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2086 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2087 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2088 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2089 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2090 | /// |
2091 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_round_ph) |
2092 | #[inline ] |
2093 | #[target_feature (enable = "avx512fp16" )] |
2094 | #[cfg_attr (test, assert_instr(vmulph, ROUNDING = 8))] |
2095 | #[rustc_legacy_const_generics (2)] |
2096 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2097 | pub fn _mm512_mul_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h { |
2098 | unsafe { |
2099 | static_assert_rounding!(ROUNDING); |
2100 | vmulph(a, b, ROUNDING) |
2101 | } |
2102 | } |
2103 | |
2104 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using |
2105 | /// writemask k (elements are copied from src when the corresponding mask bit is not set). |
2106 | /// Rounding is done according to the rounding parameter, which can be one of: |
2107 | /// |
2108 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2109 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2110 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2111 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2112 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2113 | /// |
2114 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_round_ph) |
2115 | #[inline ] |
2116 | #[target_feature (enable = "avx512fp16" )] |
2117 | #[cfg_attr (test, assert_instr(vmulph, ROUNDING = 8))] |
2118 | #[rustc_legacy_const_generics (4)] |
2119 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2120 | pub fn _mm512_mask_mul_round_ph<const ROUNDING: i32>( |
2121 | src: __m512h, |
2122 | k: __mmask32, |
2123 | a: __m512h, |
2124 | b: __m512h, |
2125 | ) -> __m512h { |
2126 | unsafe { |
2127 | static_assert_rounding!(ROUNDING); |
2128 | let r: __m512h = _mm512_mul_round_ph::<ROUNDING>(a, b); |
2129 | simd_select_bitmask(m:k, yes:r, no:src) |
2130 | } |
2131 | } |
2132 | |
2133 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using |
2134 | /// zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
2135 | /// Rounding is done according to the rounding parameter, which can be one of: |
2136 | /// |
2137 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2138 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2139 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2140 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2141 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2142 | /// |
2143 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_round_ph) |
2144 | #[inline ] |
2145 | #[target_feature (enable = "avx512fp16" )] |
2146 | #[cfg_attr (test, assert_instr(vmulph, ROUNDING = 8))] |
2147 | #[rustc_legacy_const_generics (3)] |
2148 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2149 | pub fn _mm512_maskz_mul_round_ph<const ROUNDING: i32>( |
2150 | k: __mmask32, |
2151 | a: __m512h, |
2152 | b: __m512h, |
2153 | ) -> __m512h { |
2154 | unsafe { |
2155 | static_assert_rounding!(ROUNDING); |
2156 | let r: __m512h = _mm512_mul_round_ph::<ROUNDING>(a, b); |
2157 | simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph()) |
2158 | } |
2159 | } |
2160 | |
2161 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the |
2162 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst. |
2163 | /// Rounding is done according to the rounding parameter, which can be one of: |
2164 | /// |
2165 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2166 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2167 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2168 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2169 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2170 | /// |
2171 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_round_sh) |
2172 | #[inline ] |
2173 | #[target_feature (enable = "avx512fp16" )] |
2174 | #[cfg_attr (test, assert_instr(vmulsh, ROUNDING = 8))] |
2175 | #[rustc_legacy_const_generics (2)] |
2176 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2177 | pub fn _mm_mul_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h { |
2178 | static_assert_rounding!(ROUNDING); |
2179 | _mm_mask_mul_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b) |
2180 | } |
2181 | |
2182 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the |
2183 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using |
2184 | /// writemask k (the element is copied from src when mask bit 0 is not set). |
2185 | /// Rounding is done according to the rounding parameter, which can be one of: |
2186 | /// |
2187 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2188 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2189 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2190 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2191 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2192 | /// |
2193 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_round_sh) |
2194 | #[inline ] |
2195 | #[target_feature (enable = "avx512fp16" )] |
2196 | #[cfg_attr (test, assert_instr(vmulsh, ROUNDING = 8))] |
2197 | #[rustc_legacy_const_generics (4)] |
2198 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2199 | pub fn _mm_mask_mul_round_sh<const ROUNDING: i32>( |
2200 | src: __m128h, |
2201 | k: __mmask8, |
2202 | a: __m128h, |
2203 | b: __m128h, |
2204 | ) -> __m128h { |
2205 | unsafe { |
2206 | static_assert_rounding!(ROUNDING); |
2207 | vmulsh(a, b, src, k, ROUNDING) |
2208 | } |
2209 | } |
2210 | |
2211 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the |
2212 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using |
2213 | /// zeromask k (the element is zeroed out when mask bit 0 is not set). |
2214 | /// Rounding is done according to the rounding parameter, which can be one of: |
2215 | /// |
2216 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2217 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2218 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2219 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2220 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2221 | /// |
2222 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_round_sh) |
2223 | #[inline ] |
2224 | #[target_feature (enable = "avx512fp16" )] |
2225 | #[cfg_attr (test, assert_instr(vmulsh, ROUNDING = 8))] |
2226 | #[rustc_legacy_const_generics (3)] |
2227 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2228 | pub fn _mm_maskz_mul_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
2229 | static_assert_rounding!(ROUNDING); |
2230 | _mm_mask_mul_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b) |
2231 | } |
2232 | |
2233 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the |
2234 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst. |
2235 | /// |
2236 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_sh) |
2237 | #[inline ] |
2238 | #[target_feature (enable = "avx512fp16" )] |
2239 | #[cfg_attr (test, assert_instr(vmulsh))] |
2240 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2241 | pub fn _mm_mul_sh(a: __m128h, b: __m128h) -> __m128h { |
2242 | _mm_mul_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b) |
2243 | } |
2244 | |
2245 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the |
2246 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using |
2247 | /// writemask k (the element is copied from src when mask bit 0 is not set). |
2248 | /// |
2249 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_sh) |
2250 | #[inline ] |
2251 | #[target_feature (enable = "avx512fp16" )] |
2252 | #[cfg_attr (test, assert_instr(vmulsh))] |
2253 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2254 | pub fn _mm_mask_mul_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
2255 | _mm_mask_mul_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) |
2256 | } |
2257 | |
2258 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the |
2259 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using |
2260 | /// zeromask k (the element is zeroed out when mask bit 0 is not set). |
2261 | /// |
2262 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_sh) |
2263 | #[inline ] |
2264 | #[target_feature (enable = "avx512fp16" )] |
2265 | #[cfg_attr (test, assert_instr(vmulsh))] |
2266 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2267 | pub fn _mm_maskz_mul_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
2268 | _mm_maskz_mul_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b) |
2269 | } |
2270 | |
2271 | /// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst. |
2272 | /// |
2273 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_ph) |
2274 | #[inline ] |
2275 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2276 | #[cfg_attr (test, assert_instr(vdivph))] |
2277 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2278 | pub fn _mm_div_ph(a: __m128h, b: __m128h) -> __m128h { |
2279 | unsafe { simd_div(lhs:a, rhs:b) } |
2280 | } |
2281 | |
2282 | /// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using |
2283 | /// writemask k (elements are copied from src when the corresponding mask bit is not set). |
2284 | /// |
2285 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_ph) |
2286 | #[inline ] |
2287 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2288 | #[cfg_attr (test, assert_instr(vdivph))] |
2289 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2290 | pub fn _mm_mask_div_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
2291 | unsafe { |
2292 | let r: __m128h = _mm_div_ph(a, b); |
2293 | simd_select_bitmask(m:k, yes:r, no:src) |
2294 | } |
2295 | } |
2296 | |
2297 | /// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using |
2298 | /// zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
2299 | /// |
2300 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_ph) |
2301 | #[inline ] |
2302 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2303 | #[cfg_attr (test, assert_instr(vdivph))] |
2304 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2305 | pub fn _mm_maskz_div_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
2306 | unsafe { |
2307 | let r: __m128h = _mm_div_ph(a, b); |
2308 | simd_select_bitmask(m:k, yes:r, no:_mm_setzero_ph()) |
2309 | } |
2310 | } |
2311 | |
2312 | /// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst. |
2313 | /// |
2314 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_div_ph) |
2315 | #[inline ] |
2316 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2317 | #[cfg_attr (test, assert_instr(vdivph))] |
2318 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2319 | pub fn _mm256_div_ph(a: __m256h, b: __m256h) -> __m256h { |
2320 | unsafe { simd_div(lhs:a, rhs:b) } |
2321 | } |
2322 | |
2323 | /// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using |
2324 | /// writemask k (elements are copied from src when the corresponding mask bit is not set). |
2325 | /// |
2326 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_div_ph) |
2327 | #[inline ] |
2328 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2329 | #[cfg_attr (test, assert_instr(vdivph))] |
2330 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2331 | pub fn _mm256_mask_div_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h { |
2332 | unsafe { |
2333 | let r: __m256h = _mm256_div_ph(a, b); |
2334 | simd_select_bitmask(m:k, yes:r, no:src) |
2335 | } |
2336 | } |
2337 | |
2338 | /// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using |
2339 | /// zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
2340 | /// |
2341 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_div_ph) |
2342 | #[inline ] |
2343 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2344 | #[cfg_attr (test, assert_instr(vdivph))] |
2345 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2346 | pub fn _mm256_maskz_div_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { |
2347 | unsafe { |
2348 | let r: __m256h = _mm256_div_ph(a, b); |
2349 | simd_select_bitmask(m:k, yes:r, no:_mm256_setzero_ph()) |
2350 | } |
2351 | } |
2352 | |
2353 | /// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst. |
2354 | /// |
2355 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_div_ph) |
2356 | #[inline ] |
2357 | #[target_feature (enable = "avx512fp16" )] |
2358 | #[cfg_attr (test, assert_instr(vdivph))] |
2359 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2360 | pub fn _mm512_div_ph(a: __m512h, b: __m512h) -> __m512h { |
2361 | unsafe { simd_div(lhs:a, rhs:b) } |
2362 | } |
2363 | |
2364 | /// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using |
2365 | /// writemask k (elements are copied from src when the corresponding mask bit is not set). |
2366 | /// |
2367 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_div_ph) |
2368 | #[inline ] |
2369 | #[target_feature (enable = "avx512fp16" )] |
2370 | #[cfg_attr (test, assert_instr(vdivph))] |
2371 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2372 | pub fn _mm512_mask_div_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h { |
2373 | unsafe { |
2374 | let r: __m512h = _mm512_div_ph(a, b); |
2375 | simd_select_bitmask(m:k, yes:r, no:src) |
2376 | } |
2377 | } |
2378 | |
2379 | /// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using |
2380 | /// zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
2381 | /// |
2382 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_div_ph) |
2383 | #[inline ] |
2384 | #[target_feature (enable = "avx512fp16" )] |
2385 | #[cfg_attr (test, assert_instr(vdivph))] |
2386 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2387 | pub fn _mm512_maskz_div_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { |
2388 | unsafe { |
2389 | let r: __m512h = _mm512_div_ph(a, b); |
2390 | simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph()) |
2391 | } |
2392 | } |
2393 | |
2394 | /// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst. |
2395 | /// Rounding is done according to the rounding parameter, which can be one of: |
2396 | /// |
2397 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2398 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2399 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2400 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2401 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2402 | /// |
2403 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_div_round_ph) |
2404 | #[inline ] |
2405 | #[target_feature (enable = "avx512fp16" )] |
2406 | #[cfg_attr (test, assert_instr(vdivph, ROUNDING = 8))] |
2407 | #[rustc_legacy_const_generics (2)] |
2408 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2409 | pub fn _mm512_div_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h { |
2410 | unsafe { |
2411 | static_assert_rounding!(ROUNDING); |
2412 | vdivph(a, b, ROUNDING) |
2413 | } |
2414 | } |
2415 | |
2416 | /// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using |
2417 | /// writemask k (elements are copied from src when the corresponding mask bit is not set). |
2418 | /// Rounding is done according to the rounding parameter, which can be one of: |
2419 | /// |
2420 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2421 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2422 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2423 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2424 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2425 | /// |
2426 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_div_round_ph) |
2427 | #[inline ] |
2428 | #[target_feature (enable = "avx512fp16" )] |
2429 | #[cfg_attr (test, assert_instr(vdivph, ROUNDING = 8))] |
2430 | #[rustc_legacy_const_generics (4)] |
2431 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2432 | pub fn _mm512_mask_div_round_ph<const ROUNDING: i32>( |
2433 | src: __m512h, |
2434 | k: __mmask32, |
2435 | a: __m512h, |
2436 | b: __m512h, |
2437 | ) -> __m512h { |
2438 | unsafe { |
2439 | static_assert_rounding!(ROUNDING); |
2440 | let r: __m512h = _mm512_div_round_ph::<ROUNDING>(a, b); |
2441 | simd_select_bitmask(m:k, yes:r, no:src) |
2442 | } |
2443 | } |
2444 | |
2445 | /// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using |
2446 | /// zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
2447 | /// Rounding is done according to the rounding parameter, which can be one of: |
2448 | /// |
2449 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2450 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2451 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2452 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2453 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2454 | /// |
2455 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_div_round_ph) |
2456 | #[inline ] |
2457 | #[target_feature (enable = "avx512fp16" )] |
2458 | #[cfg_attr (test, assert_instr(vdivph, ROUNDING = 8))] |
2459 | #[rustc_legacy_const_generics (3)] |
2460 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2461 | pub fn _mm512_maskz_div_round_ph<const ROUNDING: i32>( |
2462 | k: __mmask32, |
2463 | a: __m512h, |
2464 | b: __m512h, |
2465 | ) -> __m512h { |
2466 | unsafe { |
2467 | static_assert_rounding!(ROUNDING); |
2468 | let r: __m512h = _mm512_div_round_ph::<ROUNDING>(a, b); |
2469 | simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph()) |
2470 | } |
2471 | } |
2472 | |
2473 | /// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the |
2474 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst. |
2475 | /// Rounding is done according to the rounding parameter, which can be one of: |
2476 | /// |
2477 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2478 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2479 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2480 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2481 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2482 | /// |
2483 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_round_sh) |
2484 | #[inline ] |
2485 | #[target_feature (enable = "avx512fp16" )] |
2486 | #[cfg_attr (test, assert_instr(vdivsh, ROUNDING = 8))] |
2487 | #[rustc_legacy_const_generics (2)] |
2488 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2489 | pub fn _mm_div_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h { |
2490 | static_assert_rounding!(ROUNDING); |
2491 | _mm_mask_div_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b) |
2492 | } |
2493 | |
2494 | /// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the |
2495 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using |
2496 | /// writemask k (the element is copied from src when mask bit 0 is not set). |
2497 | /// Rounding is done according to the rounding parameter, which can be one of: |
2498 | /// |
2499 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2500 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2501 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2502 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2503 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2504 | /// |
2505 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_round_sh) |
2506 | #[inline ] |
2507 | #[target_feature (enable = "avx512fp16" )] |
2508 | #[cfg_attr (test, assert_instr(vdivsh, ROUNDING = 8))] |
2509 | #[rustc_legacy_const_generics (4)] |
2510 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2511 | pub fn _mm_mask_div_round_sh<const ROUNDING: i32>( |
2512 | src: __m128h, |
2513 | k: __mmask8, |
2514 | a: __m128h, |
2515 | b: __m128h, |
2516 | ) -> __m128h { |
2517 | unsafe { |
2518 | static_assert_rounding!(ROUNDING); |
2519 | vdivsh(a, b, src, k, ROUNDING) |
2520 | } |
2521 | } |
2522 | |
2523 | /// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the |
2524 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using |
2525 | /// zeromask k (the element is zeroed out when mask bit 0 is not set). |
2526 | /// Rounding is done according to the rounding parameter, which can be one of: |
2527 | /// |
2528 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2529 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2530 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2531 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2532 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2533 | /// |
2534 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_round_sh) |
2535 | #[inline ] |
2536 | #[target_feature (enable = "avx512fp16" )] |
2537 | #[cfg_attr (test, assert_instr(vdivsh, ROUNDING = 8))] |
2538 | #[rustc_legacy_const_generics (3)] |
2539 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2540 | pub fn _mm_maskz_div_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
2541 | static_assert_rounding!(ROUNDING); |
2542 | _mm_mask_div_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b) |
2543 | } |
2544 | |
2545 | /// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the |
2546 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst. |
2547 | /// |
2548 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_sh) |
2549 | #[inline ] |
2550 | #[target_feature (enable = "avx512fp16" )] |
2551 | #[cfg_attr (test, assert_instr(vdivsh))] |
2552 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2553 | pub fn _mm_div_sh(a: __m128h, b: __m128h) -> __m128h { |
2554 | _mm_div_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b) |
2555 | } |
2556 | |
2557 | /// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the |
2558 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using |
2559 | /// writemask k (the element is copied from src when mask bit 0 is not set). |
2560 | /// |
2561 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_sh) |
2562 | #[inline ] |
2563 | #[target_feature (enable = "avx512fp16" )] |
2564 | #[cfg_attr (test, assert_instr(vdivsh))] |
2565 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2566 | pub fn _mm_mask_div_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
2567 | _mm_mask_div_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) |
2568 | } |
2569 | |
2570 | /// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the |
2571 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using |
2572 | /// zeromask k (the element is zeroed out when mask bit 0 is not set). |
2573 | /// |
2574 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_sh) |
2575 | #[inline ] |
2576 | #[target_feature (enable = "avx512fp16" )] |
2577 | #[cfg_attr (test, assert_instr(vdivsh))] |
2578 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2579 | pub fn _mm_maskz_div_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
2580 | _mm_maskz_div_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b) |
2581 | } |
2582 | |
2583 | /// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is |
2584 | /// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex |
2585 | /// number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2586 | /// |
2587 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_pch) |
2588 | #[inline ] |
2589 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2590 | #[cfg_attr (test, assert_instr(vfmulcph))] |
2591 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2592 | pub fn _mm_mul_pch(a: __m128h, b: __m128h) -> __m128h { |
2593 | _mm_mask_mul_pch(src:_mm_undefined_ph(), k:0xff, a, b) |
2594 | } |
2595 | |
2596 | /// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element |
2597 | /// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent |
2598 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2599 | /// |
2600 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_pch) |
2601 | #[inline ] |
2602 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2603 | #[cfg_attr (test, assert_instr(vfmulcph))] |
2604 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2605 | pub fn _mm_mask_mul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
2606 | unsafe { transmute(src:vfmulcph_128(a:transmute(a), b:transmute(b), src:transmute(src), k)) } |
2607 | } |
2608 | |
2609 | /// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element |
2610 | /// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent |
2611 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2612 | /// |
2613 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_pch) |
2614 | #[inline ] |
2615 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2616 | #[cfg_attr (test, assert_instr(vfmulcph))] |
2617 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2618 | pub fn _mm_maskz_mul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
2619 | _mm_mask_mul_pch(src:_mm_setzero_ph(), k, a, b) |
2620 | } |
2621 | |
2622 | /// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is |
2623 | /// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex |
2624 | /// number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2625 | /// |
2626 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_pch) |
2627 | #[inline ] |
2628 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2629 | #[cfg_attr (test, assert_instr(vfmulcph))] |
2630 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2631 | pub fn _mm256_mul_pch(a: __m256h, b: __m256h) -> __m256h { |
2632 | _mm256_mask_mul_pch(src:_mm256_undefined_ph(), k:0xff, a, b) |
2633 | } |
2634 | |
2635 | /// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element |
2636 | /// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent |
2637 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2638 | /// |
2639 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mul_pch) |
2640 | #[inline ] |
2641 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2642 | #[cfg_attr (test, assert_instr(vfmulcph))] |
2643 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2644 | pub fn _mm256_mask_mul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h { |
2645 | unsafe { transmute(src:vfmulcph_256(a:transmute(a), b:transmute(b), src:transmute(src), k)) } |
2646 | } |
2647 | |
2648 | /// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element |
2649 | /// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent |
2650 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2651 | /// |
2652 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mul_pch) |
2653 | #[inline ] |
2654 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2655 | #[cfg_attr (test, assert_instr(vfmulcph))] |
2656 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2657 | pub fn _mm256_maskz_mul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h { |
2658 | _mm256_mask_mul_pch(src:_mm256_setzero_ph(), k, a, b) |
2659 | } |
2660 | |
2661 | /// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is |
2662 | /// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex |
2663 | /// number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2664 | /// |
2665 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_pch) |
2666 | #[inline ] |
2667 | #[target_feature (enable = "avx512fp16" )] |
2668 | #[cfg_attr (test, assert_instr(vfmulcph))] |
2669 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2670 | pub fn _mm512_mul_pch(a: __m512h, b: __m512h) -> __m512h { |
2671 | _mm512_mask_mul_pch(src:_mm512_undefined_ph(), k:0xffff, a, b) |
2672 | } |
2673 | |
2674 | /// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element |
2675 | /// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent |
2676 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2677 | /// |
2678 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_pch) |
2679 | #[inline ] |
2680 | #[target_feature (enable = "avx512fp16" )] |
2681 | #[cfg_attr (test, assert_instr(vfmulcph))] |
2682 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2683 | pub fn _mm512_mask_mul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h { |
2684 | _mm512_mask_mul_round_pch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) |
2685 | } |
2686 | |
2687 | /// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element |
2688 | /// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent |
2689 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2690 | /// |
2691 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_pch) |
2692 | #[inline ] |
2693 | #[target_feature (enable = "avx512fp16" )] |
2694 | #[cfg_attr (test, assert_instr(vfmulcph))] |
2695 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2696 | pub fn _mm512_maskz_mul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h { |
2697 | _mm512_mask_mul_pch(src:_mm512_setzero_ph(), k, a, b) |
2698 | } |
2699 | |
2700 | /// Multiply the packed complex numbers in a and b, and store the results in dst. Each complex number is |
2701 | /// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex |
2702 | /// number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2703 | /// |
2704 | /// Rounding is done according to the rounding parameter, which can be one of: |
2705 | /// |
2706 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2707 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2708 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2709 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2710 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2711 | /// |
2712 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_round_pch) |
2713 | #[inline ] |
2714 | #[target_feature (enable = "avx512fp16" )] |
2715 | #[cfg_attr (test, assert_instr(vfmulcph, ROUNDING = 8))] |
2716 | #[rustc_legacy_const_generics (2)] |
2717 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2718 | pub fn _mm512_mul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h { |
2719 | static_assert_rounding!(ROUNDING); |
2720 | _mm512_mask_mul_round_pch::<ROUNDING>(src:_mm512_undefined_ph(), k:0xffff, a, b) |
2721 | } |
2722 | |
2723 | /// Multiply the packed complex numbers in a and b, and store the results in dst using writemask k (the element |
2724 | /// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent |
2725 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2726 | /// |
2727 | /// Rounding is done according to the rounding parameter, which can be one of: |
2728 | /// |
2729 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2730 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2731 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2732 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2733 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2734 | /// |
2735 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_round_pch) |
2736 | #[inline ] |
2737 | #[target_feature (enable = "avx512fp16" )] |
2738 | #[cfg_attr (test, assert_instr(vfmulcph, ROUNDING = 8))] |
2739 | #[rustc_legacy_const_generics (4)] |
2740 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2741 | pub fn _mm512_mask_mul_round_pch<const ROUNDING: i32>( |
2742 | src: __m512h, |
2743 | k: __mmask16, |
2744 | a: __m512h, |
2745 | b: __m512h, |
2746 | ) -> __m512h { |
2747 | unsafe { |
2748 | static_assert_rounding!(ROUNDING); |
2749 | transmute(src:vfmulcph_512( |
2750 | a:transmute(a), |
2751 | b:transmute(b), |
2752 | src:transmute(src), |
2753 | k, |
2754 | ROUNDING, |
2755 | )) |
2756 | } |
2757 | } |
2758 | |
2759 | /// Multiply the packed complex numbers in a and b, and store the results in dst using zeromask k (the element |
2760 | /// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent |
2761 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2762 | /// |
2763 | /// Rounding is done according to the rounding parameter, which can be one of: |
2764 | /// |
2765 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2766 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2767 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2768 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2769 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2770 | /// |
2771 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_round_pch) |
2772 | #[inline ] |
2773 | #[target_feature (enable = "avx512fp16" )] |
2774 | #[cfg_attr (test, assert_instr(vfmulcph, ROUNDING = 8))] |
2775 | #[rustc_legacy_const_generics (3)] |
2776 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2777 | pub fn _mm512_maskz_mul_round_pch<const ROUNDING: i32>( |
2778 | k: __mmask16, |
2779 | a: __m512h, |
2780 | b: __m512h, |
2781 | ) -> __m512h { |
2782 | static_assert_rounding!(ROUNDING); |
2783 | _mm512_mask_mul_round_pch::<ROUNDING>(src:_mm512_setzero_ph(), k, a, b) |
2784 | } |
2785 | |
2786 | /// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst, |
2787 | /// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is |
2788 | /// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex |
2789 | /// number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2790 | /// |
2791 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_sch) |
2792 | #[inline ] |
2793 | #[target_feature (enable = "avx512fp16" )] |
2794 | #[cfg_attr (test, assert_instr(vfmulcsh))] |
2795 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2796 | pub fn _mm_mul_sch(a: __m128h, b: __m128h) -> __m128h { |
2797 | _mm_mask_mul_sch(src:f16x8::ZERO.as_m128h(), k:0xff, a, b) |
2798 | } |
2799 | |
2800 | /// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using |
2801 | /// writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 6 packed |
2802 | /// elements from a to the upper elements of dst. Each complex number is composed of two adjacent |
2803 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2804 | /// |
2805 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_sch) |
2806 | #[inline ] |
2807 | #[target_feature (enable = "avx512fp16" )] |
2808 | #[cfg_attr (test, assert_instr(vfmulcsh))] |
2809 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2810 | pub fn _mm_mask_mul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
2811 | _mm_mask_mul_round_sch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) |
2812 | } |
2813 | |
2814 | /// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using |
2815 | /// zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements |
2816 | /// from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision |
2817 | /// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2818 | /// |
2819 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_sch) |
2820 | #[inline ] |
2821 | #[target_feature (enable = "avx512fp16" )] |
2822 | #[cfg_attr (test, assert_instr(vfmulcsh))] |
2823 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2824 | pub fn _mm_maskz_mul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
2825 | _mm_mask_mul_sch(src:f16x8::ZERO.as_m128h(), k, a, b) |
2826 | } |
2827 | |
2828 | /// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst, |
2829 | /// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is |
2830 | /// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex |
2831 | /// number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2832 | /// |
2833 | /// Rounding is done according to the rounding parameter, which can be one of: |
2834 | /// |
2835 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2836 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2837 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2838 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2839 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2840 | /// |
2841 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_round_sch) |
2842 | #[inline ] |
2843 | #[target_feature (enable = "avx512fp16" )] |
2844 | #[cfg_attr (test, assert_instr(vfmulcsh, ROUNDING = 8))] |
2845 | #[rustc_legacy_const_generics (2)] |
2846 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2847 | pub fn _mm_mul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h { |
2848 | static_assert_rounding!(ROUNDING); |
2849 | _mm_mask_mul_round_sch::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b) |
2850 | } |
2851 | |
2852 | /// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using |
2853 | /// writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 6 packed |
2854 | /// elements from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision |
2855 | /// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2856 | /// |
2857 | /// Rounding is done according to the rounding parameter, which can be one of: |
2858 | /// |
2859 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2860 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2861 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2862 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2863 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2864 | /// |
2865 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_round_sch) |
2866 | #[inline ] |
2867 | #[target_feature (enable = "avx512fp16" )] |
2868 | #[cfg_attr (test, assert_instr(vfmulcsh, ROUNDING = 8))] |
2869 | #[rustc_legacy_const_generics (4)] |
2870 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2871 | pub fn _mm_mask_mul_round_sch<const ROUNDING: i32>( |
2872 | src: __m128h, |
2873 | k: __mmask8, |
2874 | a: __m128h, |
2875 | b: __m128h, |
2876 | ) -> __m128h { |
2877 | unsafe { |
2878 | static_assert_rounding!(ROUNDING); |
2879 | transmute(src:vfmulcsh( |
2880 | a:transmute(a), |
2881 | b:transmute(b), |
2882 | src:transmute(src), |
2883 | k, |
2884 | ROUNDING, |
2885 | )) |
2886 | } |
2887 | } |
2888 | |
2889 | /// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using |
2890 | /// zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements |
2891 | /// from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision |
2892 | /// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2893 | /// |
2894 | /// Rounding is done according to the rounding parameter, which can be one of: |
2895 | /// |
2896 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2897 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2898 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2899 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2900 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2901 | /// |
2902 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_round_sch) |
2903 | #[inline ] |
2904 | #[target_feature (enable = "avx512fp16" )] |
2905 | #[cfg_attr (test, assert_instr(vfmulcsh, ROUNDING = 8))] |
2906 | #[rustc_legacy_const_generics (3)] |
2907 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2908 | pub fn _mm_maskz_mul_round_sch<const ROUNDING: i32>( |
2909 | k: __mmask8, |
2910 | a: __m128h, |
2911 | b: __m128h, |
2912 | ) -> __m128h { |
2913 | static_assert_rounding!(ROUNDING); |
2914 | _mm_mask_mul_round_sch::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b) |
2915 | } |
2916 | |
2917 | /// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is |
2918 | /// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex |
2919 | /// number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2920 | /// |
2921 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmul_pch) |
2922 | #[inline ] |
2923 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2924 | #[cfg_attr (test, assert_instr(vfmulcph))] |
2925 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2926 | pub fn _mm_fmul_pch(a: __m128h, b: __m128h) -> __m128h { |
2927 | _mm_mul_pch(a, b) |
2928 | } |
2929 | |
2930 | /// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element |
2931 | /// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent |
2932 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2933 | /// |
2934 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmul_pch) |
2935 | #[inline ] |
2936 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2937 | #[cfg_attr (test, assert_instr(vfmulcph))] |
2938 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2939 | pub fn _mm_mask_fmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
2940 | _mm_mask_mul_pch(src, k, a, b) |
2941 | } |
2942 | |
2943 | /// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element |
2944 | /// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision |
2945 | /// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2946 | /// |
2947 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmul_pch) |
2948 | #[inline ] |
2949 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2950 | #[cfg_attr (test, assert_instr(vfmulcph))] |
2951 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2952 | pub fn _mm_maskz_fmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
2953 | _mm_maskz_mul_pch(k, a, b) |
2954 | } |
2955 | |
2956 | /// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is |
2957 | /// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex |
2958 | /// number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2959 | /// |
2960 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmul_pch) |
2961 | #[inline ] |
2962 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2963 | #[cfg_attr (test, assert_instr(vfmulcph))] |
2964 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2965 | pub fn _mm256_fmul_pch(a: __m256h, b: __m256h) -> __m256h { |
2966 | _mm256_mul_pch(a, b) |
2967 | } |
2968 | |
2969 | /// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element |
2970 | /// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision |
2971 | /// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2972 | /// |
2973 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmul_pch) |
2974 | #[inline ] |
2975 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2976 | #[cfg_attr (test, assert_instr(vfmulcph))] |
2977 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2978 | pub fn _mm256_mask_fmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h { |
2979 | _mm256_mask_mul_pch(src, k, a, b) |
2980 | } |
2981 | |
2982 | /// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element |
2983 | /// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision |
2984 | /// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2985 | /// |
2986 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmul_pch) |
2987 | #[inline ] |
2988 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2989 | #[cfg_attr (test, assert_instr(vfmulcph))] |
2990 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2991 | pub fn _mm256_maskz_fmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h { |
2992 | _mm256_maskz_mul_pch(k, a, b) |
2993 | } |
2994 | |
2995 | /// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is composed |
2996 | /// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2997 | /// |
2998 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmul_pch) |
2999 | #[inline ] |
3000 | #[target_feature (enable = "avx512fp16" )] |
3001 | #[cfg_attr (test, assert_instr(vfmulcph))] |
3002 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3003 | pub fn _mm512_fmul_pch(a: __m512h, b: __m512h) -> __m512h { |
3004 | _mm512_mul_pch(a, b) |
3005 | } |
3006 | |
3007 | /// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element |
3008 | /// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision |
3009 | /// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
3010 | /// |
3011 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmul_pch) |
3012 | #[inline ] |
3013 | #[target_feature (enable = "avx512fp16" )] |
3014 | #[cfg_attr (test, assert_instr(vfmulcph))] |
3015 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3016 | pub fn _mm512_mask_fmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h { |
3017 | _mm512_mask_mul_pch(src, k, a, b) |
3018 | } |
3019 | |
3020 | /// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element |
3021 | /// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision |
3022 | /// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
3023 | /// |
3024 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmul_pch) |
3025 | #[inline ] |
3026 | #[target_feature (enable = "avx512fp16" )] |
3027 | #[cfg_attr (test, assert_instr(vfmulcph))] |
3028 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3029 | pub fn _mm512_maskz_fmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h { |
3030 | _mm512_maskz_mul_pch(k, a, b) |
3031 | } |
3032 | |
3033 | /// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is composed |
3034 | /// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
3035 | /// Rounding is done according to the rounding parameter, which can be one of: |
3036 | /// |
3037 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3038 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3039 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3040 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3041 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3042 | /// |
3043 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmul_round_pch) |
3044 | #[inline ] |
3045 | #[target_feature (enable = "avx512fp16" )] |
3046 | #[cfg_attr (test, assert_instr(vfmulcph, ROUNDING = 8))] |
3047 | #[rustc_legacy_const_generics (2)] |
3048 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3049 | pub fn _mm512_fmul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h { |
3050 | static_assert_rounding!(ROUNDING); |
3051 | _mm512_mul_round_pch::<ROUNDING>(a, b) |
3052 | } |
3053 | |
3054 | /// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element |
3055 | /// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision |
3056 | /// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
3057 | /// Rounding is done according to the rounding parameter, which can be one of: |
3058 | /// |
3059 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3060 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3061 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3062 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3063 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3064 | /// |
3065 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmul_round_pch) |
3066 | #[inline ] |
3067 | #[target_feature (enable = "avx512fp16" )] |
3068 | #[cfg_attr (test, assert_instr(vfmulcph, ROUNDING = 8))] |
3069 | #[rustc_legacy_const_generics (4)] |
3070 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3071 | pub fn _mm512_mask_fmul_round_pch<const ROUNDING: i32>( |
3072 | src: __m512h, |
3073 | k: __mmask16, |
3074 | a: __m512h, |
3075 | b: __m512h, |
3076 | ) -> __m512h { |
3077 | static_assert_rounding!(ROUNDING); |
3078 | _mm512_mask_mul_round_pch::<ROUNDING>(src, k, a, b) |
3079 | } |
3080 | |
3081 | /// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element |
3082 | /// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision |
3083 | /// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
3084 | /// Rounding is done according to the rounding parameter, which can be one of: |
3085 | /// |
3086 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3087 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3088 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3089 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3090 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3091 | /// |
3092 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmul_round_pch) |
3093 | #[inline ] |
3094 | #[target_feature (enable = "avx512fp16" )] |
3095 | #[cfg_attr (test, assert_instr(vfmulcph, ROUNDING = 8))] |
3096 | #[rustc_legacy_const_generics (3)] |
3097 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3098 | pub fn _mm512_maskz_fmul_round_pch<const ROUNDING: i32>( |
3099 | k: __mmask16, |
3100 | a: __m512h, |
3101 | b: __m512h, |
3102 | ) -> __m512h { |
3103 | static_assert_rounding!(ROUNDING); |
3104 | _mm512_maskz_mul_round_pch::<ROUNDING>(k, a, b) |
3105 | } |
3106 | |
3107 | /// Multiply the lower complex numbers in a and b, and store the results in dst. Each complex number is |
3108 | /// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex |
3109 | /// number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
3110 | /// |
3111 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmul_sch) |
3112 | #[inline ] |
3113 | #[target_feature (enable = "avx512fp16" )] |
3114 | #[cfg_attr (test, assert_instr(vfmulcsh))] |
3115 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3116 | pub fn _mm_fmul_sch(a: __m128h, b: __m128h) -> __m128h { |
3117 | _mm_mul_sch(a, b) |
3118 | } |
3119 | |
3120 | /// Multiply the lower complex numbers in a and b, and store the results in dst using writemask k (the element |
3121 | /// is copied from src when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision |
3122 | /// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
3123 | /// |
3124 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmul_sch) |
3125 | #[inline ] |
3126 | #[target_feature (enable = "avx512fp16" )] |
3127 | #[cfg_attr (test, assert_instr(vfmulcsh))] |
3128 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3129 | pub fn _mm_mask_fmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
3130 | _mm_mask_mul_sch(src, k, a, b) |
3131 | } |
3132 | |
3133 | /// Multiply the lower complex numbers in a and b, and store the results in dst using zeromask k (the element |
3134 | /// is zeroed out when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision |
3135 | /// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
3136 | /// |
3137 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmul_sch) |
3138 | #[inline ] |
3139 | #[target_feature (enable = "avx512fp16" )] |
3140 | #[cfg_attr (test, assert_instr(vfmulcsh))] |
3141 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3142 | pub fn _mm_maskz_fmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
3143 | _mm_maskz_mul_sch(k, a, b) |
3144 | } |
3145 | |
3146 | /// Multiply the lower complex numbers in a and b, and store the results in dst. Each complex number is composed |
3147 | /// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
3148 | /// |
3149 | /// Rounding is done according to the rounding parameter, which can be one of: |
3150 | /// |
3151 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3152 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3153 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3154 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3155 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3156 | /// |
3157 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmul_round_sch) |
3158 | #[inline ] |
3159 | #[target_feature (enable = "avx512fp16" )] |
3160 | #[cfg_attr (test, assert_instr(vfmulcsh, ROUNDING = 8))] |
3161 | #[rustc_legacy_const_generics (2)] |
3162 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3163 | pub fn _mm_fmul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h { |
3164 | static_assert_rounding!(ROUNDING); |
3165 | _mm_mul_round_sch::<ROUNDING>(a, b) |
3166 | } |
3167 | |
3168 | /// Multiply the lower complex numbers in a and b, and store the results in dst using writemask k (the element |
3169 | /// is copied from src when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision |
3170 | /// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
3171 | /// |
3172 | /// Rounding is done according to the rounding parameter, which can be one of: |
3173 | /// |
3174 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3175 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3176 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3177 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3178 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3179 | /// |
3180 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmul_round_sch) |
3181 | #[inline ] |
3182 | #[target_feature (enable = "avx512fp16" )] |
3183 | #[cfg_attr (test, assert_instr(vfmulcsh, ROUNDING = 8))] |
3184 | #[rustc_legacy_const_generics (4)] |
3185 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3186 | pub fn _mm_mask_fmul_round_sch<const ROUNDING: i32>( |
3187 | src: __m128h, |
3188 | k: __mmask8, |
3189 | a: __m128h, |
3190 | b: __m128h, |
3191 | ) -> __m128h { |
3192 | static_assert_rounding!(ROUNDING); |
3193 | _mm_mask_mul_round_sch::<ROUNDING>(src, k, a, b) |
3194 | } |
3195 | |
3196 | /// Multiply the lower complex numbers in a and b, and store the results in dst using zeromask k (the element |
3197 | /// is zeroed out when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision |
3198 | /// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
3199 | /// |
3200 | /// Rounding is done according to the rounding parameter, which can be one of: |
3201 | /// |
3202 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3203 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3204 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3205 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3206 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3207 | /// |
3208 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmul_round_sch) |
3209 | #[inline ] |
3210 | #[target_feature (enable = "avx512fp16" )] |
3211 | #[cfg_attr (test, assert_instr(vfmulcsh, ROUNDING = 8))] |
3212 | #[rustc_legacy_const_generics (3)] |
3213 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3214 | pub fn _mm_maskz_fmul_round_sch<const ROUNDING: i32>( |
3215 | k: __mmask8, |
3216 | a: __m128h, |
3217 | b: __m128h, |
3218 | ) -> __m128h { |
3219 | static_assert_rounding!(ROUNDING); |
3220 | _mm_maskz_mul_round_sch::<ROUNDING>(k, a, b) |
3221 | } |
3222 | |
3223 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3224 | /// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) |
3225 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
3226 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3227 | /// |
3228 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmul_pch) |
3229 | #[inline ] |
3230 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
3231 | #[cfg_attr (test, assert_instr(vfcmulcph))] |
3232 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3233 | pub fn _mm_cmul_pch(a: __m128h, b: __m128h) -> __m128h { |
3234 | _mm_mask_cmul_pch(src:_mm_undefined_ph(), k:0xff, a, b) |
3235 | } |
3236 | |
3237 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3238 | /// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set). |
3239 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3240 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3241 | /// |
3242 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmul_pch) |
3243 | #[inline ] |
3244 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
3245 | #[cfg_attr (test, assert_instr(vfcmulcph))] |
3246 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3247 | pub fn _mm_mask_cmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
3248 | unsafe { transmute(src:vfcmulcph_128(a:transmute(a), b:transmute(b), src:transmute(src), k)) } |
3249 | } |
3250 | |
3251 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3252 | /// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set). |
3253 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3254 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3255 | /// |
3256 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cmul_pch) |
3257 | #[inline ] |
3258 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
3259 | #[cfg_attr (test, assert_instr(vfcmulcph))] |
3260 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3261 | pub fn _mm_maskz_cmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
3262 | _mm_mask_cmul_pch(src:_mm_setzero_ph(), k, a, b) |
3263 | } |
3264 | |
3265 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3266 | /// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) |
3267 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
3268 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3269 | /// |
3270 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmul_pch) |
3271 | #[inline ] |
3272 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
3273 | #[cfg_attr (test, assert_instr(vfcmulcph))] |
3274 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3275 | pub fn _mm256_cmul_pch(a: __m256h, b: __m256h) -> __m256h { |
3276 | _mm256_mask_cmul_pch(src:_mm256_undefined_ph(), k:0xff, a, b) |
3277 | } |
3278 | |
3279 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3280 | /// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set). |
3281 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3282 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3283 | /// |
3284 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmul_pch) |
3285 | #[inline ] |
3286 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
3287 | #[cfg_attr (test, assert_instr(vfcmulcph))] |
3288 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3289 | pub fn _mm256_mask_cmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h { |
3290 | unsafe { transmute(src:vfcmulcph_256(a:transmute(a), b:transmute(b), src:transmute(src), k)) } |
3291 | } |
3292 | |
3293 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3294 | /// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set). |
3295 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3296 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3297 | /// |
3298 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cmul_pch) |
3299 | #[inline ] |
3300 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
3301 | #[cfg_attr (test, assert_instr(vfcmulcph))] |
3302 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3303 | pub fn _mm256_maskz_cmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h { |
3304 | _mm256_mask_cmul_pch(src:_mm256_setzero_ph(), k, a, b) |
3305 | } |
3306 | |
3307 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3308 | /// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) |
3309 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
3310 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3311 | /// |
3312 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmul_pch) |
3313 | #[inline ] |
3314 | #[target_feature (enable = "avx512fp16" )] |
3315 | #[cfg_attr (test, assert_instr(vfcmulcph))] |
3316 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3317 | pub fn _mm512_cmul_pch(a: __m512h, b: __m512h) -> __m512h { |
3318 | _mm512_mask_cmul_pch(src:_mm512_undefined_ph(), k:0xffff, a, b) |
3319 | } |
3320 | |
3321 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3322 | /// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set). |
3323 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3324 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3325 | /// |
3326 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmul_pch) |
3327 | #[inline ] |
3328 | #[target_feature (enable = "avx512fp16" )] |
3329 | #[cfg_attr (test, assert_instr(vfcmulcph))] |
3330 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3331 | pub fn _mm512_mask_cmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h { |
3332 | _mm512_mask_cmul_round_pch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) |
3333 | } |
3334 | |
3335 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3336 | /// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set). |
3337 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3338 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3339 | /// |
3340 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cmul_pch) |
3341 | #[inline ] |
3342 | #[target_feature (enable = "avx512fp16" )] |
3343 | #[cfg_attr (test, assert_instr(vfcmulcph))] |
3344 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3345 | pub fn _mm512_maskz_cmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h { |
3346 | _mm512_mask_cmul_pch(src:_mm512_setzero_ph(), k, a, b) |
3347 | } |
3348 | |
3349 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3350 | /// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) |
3351 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
3352 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3353 | /// |
3354 | /// Rounding is done according to the rounding parameter, which can be one of: |
3355 | /// |
3356 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3357 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3358 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3359 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3360 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3361 | /// |
3362 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmul_round_pch) |
3363 | #[inline ] |
3364 | #[target_feature (enable = "avx512fp16" )] |
3365 | #[cfg_attr (test, assert_instr(vfcmulcph, ROUNDING = 8))] |
3366 | #[rustc_legacy_const_generics (2)] |
3367 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3368 | pub fn _mm512_cmul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h { |
3369 | static_assert_rounding!(ROUNDING); |
3370 | _mm512_mask_cmul_round_pch::<ROUNDING>(src:_mm512_undefined_ph(), k:0xffff, a, b) |
3371 | } |
3372 | |
3373 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3374 | /// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set). |
3375 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3376 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3377 | /// |
3378 | /// Rounding is done according to the rounding parameter, which can be one of: |
3379 | /// |
3380 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3381 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3382 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3383 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3384 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3385 | /// |
3386 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmul_round_pch) |
3387 | #[inline ] |
3388 | #[target_feature (enable = "avx512fp16" )] |
3389 | #[cfg_attr (test, assert_instr(vfcmulcph, ROUNDING = 8))] |
3390 | #[rustc_legacy_const_generics (4)] |
3391 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3392 | pub fn _mm512_mask_cmul_round_pch<const ROUNDING: i32>( |
3393 | src: __m512h, |
3394 | k: __mmask16, |
3395 | a: __m512h, |
3396 | b: __m512h, |
3397 | ) -> __m512h { |
3398 | unsafe { |
3399 | static_assert_rounding!(ROUNDING); |
3400 | transmute(src:vfcmulcph_512( |
3401 | a:transmute(a), |
3402 | b:transmute(b), |
3403 | src:transmute(src), |
3404 | k, |
3405 | ROUNDING, |
3406 | )) |
3407 | } |
3408 | } |
3409 | |
3410 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3411 | /// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set). |
3412 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3413 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3414 | /// |
3415 | /// Rounding is done according to the rounding parameter, which can be one of: |
3416 | /// |
3417 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3418 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3419 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3420 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3421 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3422 | /// |
3423 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cmul_round_pch) |
3424 | #[inline ] |
3425 | #[target_feature (enable = "avx512fp16" )] |
3426 | #[cfg_attr (test, assert_instr(vfcmulcph, ROUNDING = 8))] |
3427 | #[rustc_legacy_const_generics (3)] |
3428 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3429 | pub fn _mm512_maskz_cmul_round_pch<const ROUNDING: i32>( |
3430 | k: __mmask16, |
3431 | a: __m512h, |
3432 | b: __m512h, |
3433 | ) -> __m512h { |
3434 | static_assert_rounding!(ROUNDING); |
3435 | _mm512_mask_cmul_round_pch::<ROUNDING>(src:_mm512_setzero_ph(), k, a, b) |
3436 | } |
3437 | |
3438 | /// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b, |
3439 | /// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) |
3440 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
3441 | /// |
3442 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmul_sch) |
3443 | #[inline ] |
3444 | #[target_feature (enable = "avx512fp16" )] |
3445 | #[cfg_attr (test, assert_instr(vfcmulcsh))] |
3446 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3447 | pub fn _mm_cmul_sch(a: __m128h, b: __m128h) -> __m128h { |
3448 | _mm_mask_cmul_sch(src:f16x8::ZERO.as_m128h(), k:0xff, a, b) |
3449 | } |
3450 | |
3451 | /// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b, |
3452 | /// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set). |
3453 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3454 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
3455 | /// |
3456 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmul_sch) |
3457 | #[inline ] |
3458 | #[target_feature (enable = "avx512fp16" )] |
3459 | #[cfg_attr (test, assert_instr(vfcmulcsh))] |
3460 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3461 | pub fn _mm_mask_cmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
3462 | _mm_mask_cmul_round_sch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) |
3463 | } |
3464 | |
3465 | /// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b, |
3466 | /// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set). |
3467 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3468 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
3469 | /// |
3470 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cmul_sch) |
3471 | #[inline ] |
3472 | #[target_feature (enable = "avx512fp16" )] |
3473 | #[cfg_attr (test, assert_instr(vfcmulcsh))] |
3474 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3475 | pub fn _mm_maskz_cmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
3476 | _mm_mask_cmul_sch(src:f16x8::ZERO.as_m128h(), k, a, b) |
3477 | } |
3478 | |
3479 | /// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b, |
3480 | /// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) |
3481 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
3482 | /// |
3483 | /// Rounding is done according to the rounding parameter, which can be one of: |
3484 | /// |
3485 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3486 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3487 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3488 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3489 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3490 | /// |
3491 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmul_round_sch) |
3492 | #[inline ] |
3493 | #[target_feature (enable = "avx512fp16" )] |
3494 | #[cfg_attr (test, assert_instr(vfcmulcsh, ROUNDING = 8))] |
3495 | #[rustc_legacy_const_generics (2)] |
3496 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3497 | pub fn _mm_cmul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h { |
3498 | static_assert_rounding!(ROUNDING); |
3499 | _mm_mask_cmul_round_sch::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b) |
3500 | } |
3501 | |
3502 | /// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b, |
3503 | /// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set). |
3504 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3505 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3506 | /// |
3507 | /// Rounding is done according to the rounding parameter, which can be one of: |
3508 | /// |
3509 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3510 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3511 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3512 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3513 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3514 | /// |
3515 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmul_round_sch) |
3516 | #[inline ] |
3517 | #[target_feature (enable = "avx512fp16" )] |
3518 | #[cfg_attr (test, assert_instr(vfcmulcsh, ROUNDING = 8))] |
3519 | #[rustc_legacy_const_generics (4)] |
3520 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3521 | pub fn _mm_mask_cmul_round_sch<const ROUNDING: i32>( |
3522 | src: __m128h, |
3523 | k: __mmask8, |
3524 | a: __m128h, |
3525 | b: __m128h, |
3526 | ) -> __m128h { |
3527 | unsafe { |
3528 | static_assert_rounding!(ROUNDING); |
3529 | transmute(src:vfcmulcsh( |
3530 | a:transmute(a), |
3531 | b:transmute(b), |
3532 | src:transmute(src), |
3533 | k, |
3534 | ROUNDING, |
3535 | )) |
3536 | } |
3537 | } |
3538 | |
3539 | /// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b, |
3540 | /// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set). |
3541 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3542 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3543 | /// |
3544 | /// Rounding is done according to the rounding parameter, which can be one of: |
3545 | /// |
3546 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3547 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3548 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3549 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3550 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3551 | /// |
3552 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cmul_round_sch) |
3553 | #[inline ] |
3554 | #[target_feature (enable = "avx512fp16" )] |
3555 | #[cfg_attr (test, assert_instr(vfcmulcsh, ROUNDING = 8))] |
3556 | #[rustc_legacy_const_generics (3)] |
3557 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3558 | pub fn _mm_maskz_cmul_round_sch<const ROUNDING: i32>( |
3559 | k: __mmask8, |
3560 | a: __m128h, |
3561 | b: __m128h, |
3562 | ) -> __m128h { |
3563 | static_assert_rounding!(ROUNDING); |
3564 | _mm_mask_cmul_round_sch::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b) |
3565 | } |
3566 | |
3567 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3568 | /// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) |
3569 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
3570 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3571 | /// |
3572 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmul_pch) |
3573 | #[inline ] |
3574 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
3575 | #[cfg_attr (test, assert_instr(vfcmulcph))] |
3576 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3577 | pub fn _mm_fcmul_pch(a: __m128h, b: __m128h) -> __m128h { |
3578 | _mm_cmul_pch(a, b) |
3579 | } |
3580 | |
3581 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3582 | /// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set). |
3583 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3584 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3585 | /// |
3586 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmul_pch) |
3587 | #[inline ] |
3588 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
3589 | #[cfg_attr (test, assert_instr(vfcmulcph))] |
3590 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3591 | pub fn _mm_mask_fcmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
3592 | _mm_mask_cmul_pch(src, k, a, b) |
3593 | } |
3594 | |
3595 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3596 | /// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set). |
3597 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3598 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3599 | /// |
3600 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmul_pch) |
3601 | #[inline ] |
3602 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
3603 | #[cfg_attr (test, assert_instr(vfcmulcph))] |
3604 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3605 | pub fn _mm_maskz_fcmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
3606 | _mm_maskz_cmul_pch(k, a, b) |
3607 | } |
3608 | |
3609 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3610 | /// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) |
3611 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
3612 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3613 | /// |
3614 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fcmul_pch) |
3615 | #[inline ] |
3616 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
3617 | #[cfg_attr (test, assert_instr(vfcmulcph))] |
3618 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3619 | pub fn _mm256_fcmul_pch(a: __m256h, b: __m256h) -> __m256h { |
3620 | _mm256_cmul_pch(a, b) |
3621 | } |
3622 | |
3623 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3624 | /// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set). |
3625 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3626 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3627 | /// |
3628 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fcmul_pch) |
3629 | #[inline ] |
3630 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
3631 | #[cfg_attr (test, assert_instr(vfcmulcph))] |
3632 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3633 | pub fn _mm256_mask_fcmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h { |
3634 | _mm256_mask_cmul_pch(src, k, a, b) |
3635 | } |
3636 | |
3637 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3638 | /// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set). |
3639 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3640 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3641 | /// |
3642 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fcmul_pch) |
3643 | #[inline ] |
3644 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
3645 | #[cfg_attr (test, assert_instr(vfcmulcph))] |
3646 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3647 | pub fn _mm256_maskz_fcmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h { |
3648 | _mm256_maskz_cmul_pch(k, a, b) |
3649 | } |
3650 | |
3651 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3652 | /// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) |
3653 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
3654 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3655 | /// |
3656 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmul_pch) |
3657 | #[inline ] |
3658 | #[target_feature (enable = "avx512fp16" )] |
3659 | #[cfg_attr (test, assert_instr(vfcmulcph))] |
3660 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3661 | pub fn _mm512_fcmul_pch(a: __m512h, b: __m512h) -> __m512h { |
3662 | _mm512_cmul_pch(a, b) |
3663 | } |
3664 | |
3665 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3666 | /// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set). |
3667 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3668 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3669 | /// |
3670 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmul_pch) |
3671 | #[inline ] |
3672 | #[target_feature (enable = "avx512fp16" )] |
3673 | #[cfg_attr (test, assert_instr(vfcmulcph))] |
3674 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3675 | pub fn _mm512_mask_fcmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h { |
3676 | _mm512_mask_cmul_pch(src, k, a, b) |
3677 | } |
3678 | |
3679 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3680 | /// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set). |
3681 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3682 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3683 | /// |
3684 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmul_pch) |
3685 | #[inline ] |
3686 | #[target_feature (enable = "avx512fp16" )] |
3687 | #[cfg_attr (test, assert_instr(vfcmulcph))] |
3688 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3689 | pub fn _mm512_maskz_fcmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h { |
3690 | _mm512_maskz_cmul_pch(k, a, b) |
3691 | } |
3692 | |
3693 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3694 | /// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) |
3695 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
3696 | /// |
3697 | /// Rounding is done according to the rounding parameter, which can be one of: |
3698 | /// |
3699 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3700 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3701 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3702 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3703 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3704 | /// |
3705 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmul_round_pch) |
3706 | #[inline ] |
3707 | #[target_feature (enable = "avx512fp16" )] |
3708 | #[cfg_attr (test, assert_instr(vfcmulcph, ROUNDING = 8))] |
3709 | #[rustc_legacy_const_generics (2)] |
3710 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3711 | pub fn _mm512_fcmul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h { |
3712 | static_assert_rounding!(ROUNDING); |
3713 | _mm512_cmul_round_pch::<ROUNDING>(a, b) |
3714 | } |
3715 | |
3716 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3717 | /// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set). |
3718 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3719 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3720 | /// |
3721 | /// Rounding is done according to the rounding parameter, which can be one of: |
3722 | /// |
3723 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3724 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3725 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3726 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3727 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3728 | /// |
3729 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmul_round_pch) |
3730 | #[inline ] |
3731 | #[target_feature (enable = "avx512fp16" )] |
3732 | #[cfg_attr (test, assert_instr(vfcmulcph, ROUNDING = 8))] |
3733 | #[rustc_legacy_const_generics (4)] |
3734 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3735 | pub fn _mm512_mask_fcmul_round_pch<const ROUNDING: i32>( |
3736 | src: __m512h, |
3737 | k: __mmask16, |
3738 | a: __m512h, |
3739 | b: __m512h, |
3740 | ) -> __m512h { |
3741 | static_assert_rounding!(ROUNDING); |
3742 | _mm512_mask_cmul_round_pch::<ROUNDING>(src, k, a, b) |
3743 | } |
3744 | |
3745 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3746 | /// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set). |
3747 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3748 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3749 | /// |
3750 | /// Rounding is done according to the rounding parameter, which can be one of: |
3751 | /// |
3752 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3753 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3754 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3755 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3756 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3757 | /// |
3758 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmul_round_pch) |
3759 | #[inline ] |
3760 | #[target_feature (enable = "avx512fp16" )] |
3761 | #[cfg_attr (test, assert_instr(vfcmulcph, ROUNDING = 8))] |
3762 | #[rustc_legacy_const_generics (3)] |
3763 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3764 | pub fn _mm512_maskz_fcmul_round_pch<const ROUNDING: i32>( |
3765 | k: __mmask16, |
3766 | a: __m512h, |
3767 | b: __m512h, |
3768 | ) -> __m512h { |
3769 | static_assert_rounding!(ROUNDING); |
3770 | _mm512_maskz_cmul_round_pch::<ROUNDING>(k, a, b) |
3771 | } |
3772 | |
3773 | /// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b, |
3774 | /// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) |
3775 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
3776 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3777 | /// |
3778 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmul_sch) |
3779 | #[inline ] |
3780 | #[target_feature (enable = "avx512fp16" )] |
3781 | #[cfg_attr (test, assert_instr(vfcmulcsh))] |
3782 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3783 | pub fn _mm_fcmul_sch(a: __m128h, b: __m128h) -> __m128h { |
3784 | _mm_cmul_sch(a, b) |
3785 | } |
3786 | |
3787 | /// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b, |
3788 | /// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set). |
3789 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3790 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3791 | /// |
3792 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmul_sch) |
3793 | #[inline ] |
3794 | #[target_feature (enable = "avx512fp16" )] |
3795 | #[cfg_attr (test, assert_instr(vfcmulcsh))] |
3796 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3797 | pub fn _mm_mask_fcmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
3798 | _mm_mask_cmul_sch(src, k, a, b) |
3799 | } |
3800 | |
3801 | /// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b, |
3802 | /// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set). |
3803 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3804 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3805 | /// |
3806 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmul_sch) |
3807 | #[inline ] |
3808 | #[target_feature (enable = "avx512fp16" )] |
3809 | #[cfg_attr (test, assert_instr(vfcmulcsh))] |
3810 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3811 | pub fn _mm_maskz_fcmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
3812 | _mm_maskz_cmul_sch(k, a, b) |
3813 | } |
3814 | |
3815 | /// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b, |
3816 | /// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) |
3817 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
3818 | /// |
3819 | /// Rounding is done according to the rounding parameter, which can be one of: |
3820 | /// |
3821 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3822 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3823 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3824 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3825 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3826 | /// |
3827 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmul_round_sch) |
3828 | #[inline ] |
3829 | #[target_feature (enable = "avx512fp16" )] |
3830 | #[cfg_attr (test, assert_instr(vfcmulcsh, ROUNDING = 8))] |
3831 | #[rustc_legacy_const_generics (2)] |
3832 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3833 | pub fn _mm_fcmul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h { |
3834 | static_assert_rounding!(ROUNDING); |
3835 | _mm_cmul_round_sch::<ROUNDING>(a, b) |
3836 | } |
3837 | |
3838 | /// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b, |
3839 | /// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set). |
3840 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3841 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3842 | /// |
3843 | /// Rounding is done according to the rounding parameter, which can be one of: |
3844 | /// |
3845 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3846 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3847 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3848 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3849 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3850 | /// |
3851 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmul_round_sch) |
3852 | #[inline ] |
3853 | #[target_feature (enable = "avx512fp16" )] |
3854 | #[cfg_attr (test, assert_instr(vfcmulcsh, ROUNDING = 8))] |
3855 | #[rustc_legacy_const_generics (4)] |
3856 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3857 | pub fn _mm_mask_fcmul_round_sch<const ROUNDING: i32>( |
3858 | src: __m128h, |
3859 | k: __mmask8, |
3860 | a: __m128h, |
3861 | b: __m128h, |
3862 | ) -> __m128h { |
3863 | static_assert_rounding!(ROUNDING); |
3864 | _mm_mask_cmul_round_sch::<ROUNDING>(src, k, a, b) |
3865 | } |
3866 | |
3867 | /// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b, |
3868 | /// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set). |
3869 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3870 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3871 | /// |
3872 | /// Rounding is done according to the rounding parameter, which can be one of: |
3873 | /// |
3874 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3875 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3876 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3877 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3878 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3879 | /// |
3880 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmul_round_sch) |
3881 | #[inline ] |
3882 | #[target_feature (enable = "avx512fp16" )] |
3883 | #[cfg_attr (test, assert_instr(vfcmulcsh, ROUNDING = 8))] |
3884 | #[rustc_legacy_const_generics (3)] |
3885 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3886 | pub fn _mm_maskz_fcmul_round_sch<const ROUNDING: i32>( |
3887 | k: __mmask8, |
3888 | a: __m128h, |
3889 | b: __m128h, |
3890 | ) -> __m128h { |
3891 | static_assert_rounding!(ROUNDING); |
3892 | _mm_maskz_cmul_round_sch::<ROUNDING>(k, a, b) |
3893 | } |
3894 | |
3895 | /// Finds the absolute value of each packed half-precision (16-bit) floating-point element in v2, storing |
3896 | /// the results in dst. |
3897 | /// |
3898 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_ph) |
3899 | #[inline ] |
3900 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
3901 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3902 | pub fn _mm_abs_ph(v2: __m128h) -> __m128h { |
3903 | unsafe { transmute(src:_mm_and_si128(a:transmute(v2), b:_mm_set1_epi16(i16::MAX))) } |
3904 | } |
3905 | |
3906 | /// Finds the absolute value of each packed half-precision (16-bit) floating-point element in v2, storing |
3907 | /// the result in dst. |
3908 | /// |
3909 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_abs_ph) |
3910 | #[inline ] |
3911 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
3912 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3913 | pub fn _mm256_abs_ph(v2: __m256h) -> __m256h { |
3914 | unsafe { transmute(src:_mm256_and_si256(a:transmute(v2), b:_mm256_set1_epi16(i16::MAX))) } |
3915 | } |
3916 | |
3917 | /// Finds the absolute value of each packed half-precision (16-bit) floating-point element in v2, storing |
3918 | /// the result in dst. |
3919 | /// |
3920 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_ph) |
3921 | #[inline ] |
3922 | #[target_feature (enable = "avx512fp16" )] |
3923 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3924 | pub fn _mm512_abs_ph(v2: __m512h) -> __m512h { |
3925 | unsafe { transmute(src:_mm512_and_si512(a:transmute(v2), b:_mm512_set1_epi16(i16::MAX))) } |
3926 | } |
3927 | |
3928 | /// Compute the complex conjugates of complex numbers in a, and store the results in dst. Each complex |
3929 | /// number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines |
3930 | /// the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate |
3931 | /// `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3932 | /// |
3933 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_conj_pch) |
3934 | #[inline ] |
3935 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
3936 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3937 | pub fn _mm_conj_pch(a: __m128h) -> __m128h { |
3938 | unsafe { transmute(src:_mm_xor_si128(a:transmute(a), b:_mm_set1_epi32(i32::MIN))) } |
3939 | } |
3940 | |
3941 | /// Compute the complex conjugates of complex numbers in a, and store the results in dst using writemask k |
3942 | /// (the element is copied from src when corresponding mask bit is not set). Each complex number is composed of two |
3943 | /// adjacent half-precision (16-bit) floating-point elements, which defines the complex number |
3944 | /// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3945 | /// |
3946 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_conj_pch) |
3947 | #[inline ] |
3948 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
3949 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3950 | pub fn _mm_mask_conj_pch(src: __m128h, k: __mmask8, a: __m128h) -> __m128h { |
3951 | unsafe { |
3952 | let r: __m128 = transmute(src:_mm_conj_pch(a)); |
3953 | transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src))) |
3954 | } |
3955 | } |
3956 | |
3957 | /// Compute the complex conjugates of complex numbers in a, and store the results in dst using zeromask k |
3958 | /// (the element is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent |
3959 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
3960 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3961 | /// |
3962 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_conj_pch) |
3963 | #[inline ] |
3964 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
3965 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3966 | pub fn _mm_maskz_conj_pch(k: __mmask8, a: __m128h) -> __m128h { |
3967 | _mm_mask_conj_pch(src:_mm_setzero_ph(), k, a) |
3968 | } |
3969 | |
3970 | /// Compute the complex conjugates of complex numbers in a, and store the results in dst. Each complex number |
3971 | /// is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex |
3972 | /// number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3973 | /// |
3974 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_conj_pch) |
3975 | #[inline ] |
3976 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
3977 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3978 | pub fn _mm256_conj_pch(a: __m256h) -> __m256h { |
3979 | unsafe { transmute(src:_mm256_xor_si256(a:transmute(a), b:_mm256_set1_epi32(i32::MIN))) } |
3980 | } |
3981 | |
3982 | /// Compute the complex conjugates of complex numbers in a, and store the results in dst using writemask k |
3983 | /// (the element is copied from src when corresponding mask bit is not set). Each complex number is composed of two |
3984 | /// adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
3985 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3986 | /// |
3987 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_conj_pch) |
3988 | #[inline ] |
3989 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
3990 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3991 | pub fn _mm256_mask_conj_pch(src: __m256h, k: __mmask8, a: __m256h) -> __m256h { |
3992 | unsafe { |
3993 | let r: __m256 = transmute(src:_mm256_conj_pch(a)); |
3994 | transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src))) |
3995 | } |
3996 | } |
3997 | |
3998 | /// Compute the complex conjugates of complex numbers in a, and store the results in dst using zeromask k |
3999 | /// (the element is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent |
4000 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
4001 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4002 | /// |
4003 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_conj_pch) |
4004 | #[inline ] |
4005 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
4006 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4007 | pub fn _mm256_maskz_conj_pch(k: __mmask8, a: __m256h) -> __m256h { |
4008 | _mm256_mask_conj_pch(src:_mm256_setzero_ph(), k, a) |
4009 | } |
4010 | |
4011 | /// Compute the complex conjugates of complex numbers in a, and store the results in dst. Each complex number |
4012 | /// is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex |
4013 | /// number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4014 | /// |
4015 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_conj_pch) |
4016 | #[inline ] |
4017 | #[target_feature (enable = "avx512fp16" )] |
4018 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4019 | pub fn _mm512_conj_pch(a: __m512h) -> __m512h { |
4020 | unsafe { transmute(src:_mm512_xor_si512(a:transmute(a), b:_mm512_set1_epi32(i32::MIN))) } |
4021 | } |
4022 | |
4023 | /// Compute the complex conjugates of complex numbers in a, and store the results in dst using writemask k |
4024 | /// (the element is copied from src when corresponding mask bit is not set). Each complex number is composed of two |
4025 | /// adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
4026 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4027 | /// |
4028 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_conj_pch) |
4029 | #[inline ] |
4030 | #[target_feature (enable = "avx512fp16" )] |
4031 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4032 | pub fn _mm512_mask_conj_pch(src: __m512h, k: __mmask16, a: __m512h) -> __m512h { |
4033 | unsafe { |
4034 | let r: __m512 = transmute(src:_mm512_conj_pch(a)); |
4035 | transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src))) |
4036 | } |
4037 | } |
4038 | |
4039 | /// Compute the complex conjugates of complex numbers in a, and store the results in dst using zeromask k |
4040 | /// (the element is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent |
4041 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
4042 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4043 | /// |
4044 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_conj_pch) |
4045 | #[inline ] |
4046 | #[target_feature (enable = "avx512fp16" )] |
4047 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4048 | pub fn _mm512_maskz_conj_pch(k: __mmask16, a: __m512h) -> __m512h { |
4049 | _mm512_mask_conj_pch(src:_mm512_setzero_ph(), k, a) |
4050 | } |
4051 | |
4052 | /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, |
4053 | /// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) |
4054 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4055 | /// |
4056 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_pch) |
4057 | #[inline ] |
4058 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
4059 | #[cfg_attr (test, assert_instr(vfmaddcph))] |
4060 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4061 | pub fn _mm_fmadd_pch(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
4062 | _mm_mask3_fmadd_pch(a, b, c, k:0xff) |
4063 | } |
4064 | |
4065 | /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, |
4066 | /// and store the results in dst using writemask k (the element is copied from a when the corresponding |
4067 | /// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) |
4068 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4069 | /// |
4070 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_pch) |
4071 | #[inline ] |
4072 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
4073 | #[cfg_attr (test, assert_instr(vfmaddcph))] |
4074 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4075 | pub fn _mm_mask_fmadd_pch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { |
4076 | unsafe { |
4077 | let r: __m128 = transmute(src:_mm_mask3_fmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does |
4078 | transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a))) |
4079 | } |
4080 | } |
4081 | |
4082 | /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, |
4083 | /// and store the results in dst using writemask k (the element is copied from c when the corresponding |
4084 | /// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) |
4085 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4086 | /// |
4087 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_pch) |
4088 | #[inline ] |
4089 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
4090 | #[cfg_attr (test, assert_instr(vfmaddcph))] |
4091 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4092 | pub fn _mm_mask3_fmadd_pch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { |
4093 | unsafe { |
4094 | transmute(src:vfmaddcph_mask3_128( |
4095 | a:transmute(a), |
4096 | b:transmute(b), |
4097 | c:transmute(src:c), |
4098 | k, |
4099 | )) |
4100 | } |
4101 | } |
4102 | |
4103 | /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, |
4104 | /// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask |
4105 | /// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point |
4106 | /// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4107 | /// |
4108 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_pch) |
4109 | #[inline ] |
4110 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
4111 | #[cfg_attr (test, assert_instr(vfmaddcph))] |
4112 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4113 | pub fn _mm_maskz_fmadd_pch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
4114 | unsafe { |
4115 | transmute(src:vfmaddcph_maskz_128( |
4116 | a:transmute(a), |
4117 | b:transmute(b), |
4118 | c:transmute(src:c), |
4119 | k, |
4120 | )) |
4121 | } |
4122 | } |
4123 | |
4124 | /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, |
4125 | /// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) |
4126 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4127 | /// |
4128 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmadd_pch) |
4129 | #[inline ] |
4130 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
4131 | #[cfg_attr (test, assert_instr(vfmaddcph))] |
4132 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4133 | pub fn _mm256_fmadd_pch(a: __m256h, b: __m256h, c: __m256h) -> __m256h { |
4134 | _mm256_mask3_fmadd_pch(a, b, c, k:0xff) |
4135 | } |
4136 | |
4137 | /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, |
4138 | /// and store the results in dst using writemask k (the element is copied from a when the corresponding mask |
4139 | /// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point |
4140 | /// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4141 | /// |
4142 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmadd_pch) |
4143 | #[inline ] |
4144 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
4145 | #[cfg_attr (test, assert_instr(vfmaddcph))] |
4146 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4147 | pub fn _mm256_mask_fmadd_pch(a: __m256h, k: __mmask8, b: __m256h, c: __m256h) -> __m256h { |
4148 | unsafe { |
4149 | let r: __m256 = transmute(src:_mm256_mask3_fmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does |
4150 | transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a))) |
4151 | } |
4152 | } |
4153 | |
4154 | /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, |
4155 | /// and store the results in dst using writemask k (the element is copied from c when the corresponding |
4156 | /// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) |
4157 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4158 | /// |
4159 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmadd_pch) |
4160 | #[inline ] |
4161 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
4162 | #[cfg_attr (test, assert_instr(vfmaddcph))] |
4163 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4164 | pub fn _mm256_mask3_fmadd_pch(a: __m256h, b: __m256h, c: __m256h, k: __mmask8) -> __m256h { |
4165 | unsafe { |
4166 | transmute(src:vfmaddcph_mask3_256( |
4167 | a:transmute(a), |
4168 | b:transmute(b), |
4169 | c:transmute(src:c), |
4170 | k, |
4171 | )) |
4172 | } |
4173 | } |
4174 | |
4175 | /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, |
4176 | /// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask |
4177 | /// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point |
4178 | /// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4179 | /// |
4180 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmadd_pch) |
4181 | #[inline ] |
4182 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
4183 | #[cfg_attr (test, assert_instr(vfmaddcph))] |
4184 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4185 | pub fn _mm256_maskz_fmadd_pch(k: __mmask8, a: __m256h, b: __m256h, c: __m256h) -> __m256h { |
4186 | unsafe { |
4187 | transmute(src:vfmaddcph_maskz_256( |
4188 | a:transmute(a), |
4189 | b:transmute(b), |
4190 | c:transmute(src:c), |
4191 | k, |
4192 | )) |
4193 | } |
4194 | } |
4195 | |
4196 | /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, |
4197 | /// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) |
4198 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4199 | /// |
4200 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_pch) |
4201 | #[inline ] |
4202 | #[target_feature (enable = "avx512fp16" )] |
4203 | #[cfg_attr (test, assert_instr(vfmaddcph))] |
4204 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4205 | pub fn _mm512_fmadd_pch(a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
4206 | _mm512_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c) |
4207 | } |
4208 | |
4209 | /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, |
4210 | /// and store the results in dst using writemask k (the element is copied from a when the corresponding mask |
4211 | /// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point |
4212 | /// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4213 | /// |
4214 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_pch) |
4215 | #[inline ] |
4216 | #[target_feature (enable = "avx512fp16" )] |
4217 | #[cfg_attr (test, assert_instr(vfmaddcph))] |
4218 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4219 | pub fn _mm512_mask_fmadd_pch(a: __m512h, k: __mmask16, b: __m512h, c: __m512h) -> __m512h { |
4220 | _mm512_mask_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c) |
4221 | } |
4222 | |
4223 | /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, |
4224 | /// and store the results in dst using writemask k (the element is copied from c when the corresponding |
4225 | /// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) |
4226 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4227 | /// |
4228 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_pch) |
4229 | #[inline ] |
4230 | #[target_feature (enable = "avx512fp16" )] |
4231 | #[cfg_attr (test, assert_instr(vfmaddcph))] |
4232 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4233 | pub fn _mm512_mask3_fmadd_pch(a: __m512h, b: __m512h, c: __m512h, k: __mmask16) -> __m512h { |
4234 | _mm512_mask3_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k) |
4235 | } |
4236 | |
4237 | /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, |
4238 | /// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask |
4239 | /// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point |
4240 | /// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4241 | /// |
4242 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_pch) |
4243 | #[inline ] |
4244 | #[target_feature (enable = "avx512fp16" )] |
4245 | #[cfg_attr (test, assert_instr(vfmaddcph))] |
4246 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4247 | pub fn _mm512_maskz_fmadd_pch(k: __mmask16, a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
4248 | _mm512_maskz_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c) |
4249 | } |
4250 | |
4251 | /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, |
4252 | /// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) |
4253 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4254 | /// |
4255 | /// Rounding is done according to the rounding parameter, which can be one of: |
4256 | /// |
4257 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
4258 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
4259 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
4260 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
4261 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
4262 | /// |
4263 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_round_pch) |
4264 | #[inline ] |
4265 | #[target_feature (enable = "avx512fp16" )] |
4266 | #[cfg_attr (test, assert_instr(vfmaddcph, ROUNDING = 8))] |
4267 | #[rustc_legacy_const_generics (3)] |
4268 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4269 | pub fn _mm512_fmadd_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
4270 | static_assert_rounding!(ROUNDING); |
4271 | _mm512_mask3_fmadd_round_pch::<ROUNDING>(a, b, c, k:0xffff) |
4272 | } |
4273 | |
4274 | /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, |
4275 | /// and store the results in dst using writemask k (the element is copied from a when the corresponding mask |
4276 | /// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point |
4277 | /// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4278 | /// |
4279 | /// Rounding is done according to the rounding parameter, which can be one of: |
4280 | /// |
4281 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
4282 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
4283 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
4284 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
4285 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
4286 | /// |
4287 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_round_pch) |
4288 | #[inline ] |
4289 | #[target_feature (enable = "avx512fp16" )] |
4290 | #[cfg_attr (test, assert_instr(vfmaddcph, ROUNDING = 8))] |
4291 | #[rustc_legacy_const_generics (4)] |
4292 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4293 | pub fn _mm512_mask_fmadd_round_pch<const ROUNDING: i32>( |
4294 | a: __m512h, |
4295 | k: __mmask16, |
4296 | b: __m512h, |
4297 | c: __m512h, |
4298 | ) -> __m512h { |
4299 | unsafe { |
4300 | static_assert_rounding!(ROUNDING); |
4301 | let r: __m512 = transmute(src:_mm512_mask3_fmadd_round_pch::<ROUNDING>(a, b, c, k)); // using `0xffff` would have been fine here, but this is what CLang does |
4302 | transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a))) |
4303 | } |
4304 | } |
4305 | |
4306 | /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, |
4307 | /// and store the results in dst using writemask k (the element is copied from c when the corresponding |
4308 | /// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) |
4309 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4310 | /// |
4311 | /// Rounding is done according to the rounding parameter, which can be one of: |
4312 | /// |
4313 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
4314 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
4315 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
4316 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
4317 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
4318 | /// |
4319 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_round_pch) |
4320 | #[inline ] |
4321 | #[target_feature (enable = "avx512fp16" )] |
4322 | #[cfg_attr (test, assert_instr(vfmaddcph, ROUNDING = 8))] |
4323 | #[rustc_legacy_const_generics (4)] |
4324 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4325 | pub fn _mm512_mask3_fmadd_round_pch<const ROUNDING: i32>( |
4326 | a: __m512h, |
4327 | b: __m512h, |
4328 | c: __m512h, |
4329 | k: __mmask16, |
4330 | ) -> __m512h { |
4331 | unsafe { |
4332 | static_assert_rounding!(ROUNDING); |
4333 | transmute(src:vfmaddcph_mask3_512( |
4334 | a:transmute(a), |
4335 | b:transmute(b), |
4336 | c:transmute(src:c), |
4337 | k, |
4338 | ROUNDING, |
4339 | )) |
4340 | } |
4341 | } |
4342 | |
4343 | /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, |
4344 | /// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask |
4345 | /// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point |
4346 | /// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4347 | /// |
4348 | /// Rounding is done according to the rounding parameter, which can be one of: |
4349 | /// |
4350 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
4351 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
4352 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
4353 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
4354 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
4355 | /// |
4356 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_round_pch) |
4357 | #[inline ] |
4358 | #[target_feature (enable = "avx512fp16" )] |
4359 | #[cfg_attr (test, assert_instr(vfmaddcph, ROUNDING = 8))] |
4360 | #[rustc_legacy_const_generics (4)] |
4361 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4362 | pub fn _mm512_maskz_fmadd_round_pch<const ROUNDING: i32>( |
4363 | k: __mmask16, |
4364 | a: __m512h, |
4365 | b: __m512h, |
4366 | c: __m512h, |
4367 | ) -> __m512h { |
4368 | unsafe { |
4369 | static_assert_rounding!(ROUNDING); |
4370 | transmute(src:vfmaddcph_maskz_512( |
4371 | a:transmute(a), |
4372 | b:transmute(b), |
4373 | c:transmute(src:c), |
4374 | k, |
4375 | ROUNDING, |
4376 | )) |
4377 | } |
4378 | } |
4379 | |
4380 | /// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and |
4381 | /// store the result in the lower elements of dst, and copy the upper 6 packed elements from a to the |
4382 | /// upper elements of dst. Each complex number is composed of two adjacent half-precision (16-bit) |
4383 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4384 | /// |
4385 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_sch) |
4386 | #[inline ] |
4387 | #[target_feature (enable = "avx512fp16" )] |
4388 | #[cfg_attr (test, assert_instr(vfmaddcsh))] |
4389 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4390 | pub fn _mm_fmadd_sch(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
4391 | _mm_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c) |
4392 | } |
4393 | |
4394 | /// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and |
4395 | /// store the result in the lower elements of dst using writemask k (elements are copied from a when |
4396 | /// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst. |
4397 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, |
4398 | /// which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4399 | /// |
4400 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_sch) |
4401 | #[inline ] |
4402 | #[target_feature (enable = "avx512fp16" )] |
4403 | #[cfg_attr (test, assert_instr(vfmaddcsh))] |
4404 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4405 | pub fn _mm_mask_fmadd_sch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { |
4406 | _mm_mask_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c) |
4407 | } |
4408 | |
4409 | /// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and |
4410 | /// store the result in the lower elements of dst using writemask k (elements are copied from c when |
4411 | /// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst. |
4412 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, |
4413 | /// which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4414 | /// |
4415 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_sch) |
4416 | #[inline ] |
4417 | #[target_feature (enable = "avx512fp16" )] |
4418 | #[cfg_attr (test, assert_instr(vfmaddcsh))] |
4419 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4420 | pub fn _mm_mask3_fmadd_sch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { |
4421 | _mm_mask3_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k) |
4422 | } |
4423 | |
4424 | /// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and |
4425 | /// store the result in the lower elements of dst using zeromask k (elements are zeroed out when mask |
4426 | /// bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst. Each |
4427 | /// complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
4428 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4429 | /// |
4430 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_sch) |
4431 | #[inline ] |
4432 | #[target_feature (enable = "avx512fp16" )] |
4433 | #[cfg_attr (test, assert_instr(vfmaddcsh))] |
4434 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4435 | pub fn _mm_maskz_fmadd_sch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
4436 | _mm_maskz_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c) |
4437 | } |
4438 | |
4439 | /// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and |
4440 | /// store the result in the lower elements of dst. Each complex number is composed of two adjacent |
4441 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4442 | /// |
4443 | /// Rounding is done according to the rounding parameter, which can be one of: |
4444 | /// |
4445 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
4446 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
4447 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
4448 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
4449 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
4450 | /// |
4451 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_round_sch) |
4452 | #[inline ] |
4453 | #[target_feature (enable = "avx512fp16" )] |
4454 | #[cfg_attr (test, assert_instr(vfmaddcsh, ROUNDING = 8))] |
4455 | #[rustc_legacy_const_generics (3)] |
4456 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4457 | pub fn _mm_fmadd_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
4458 | unsafe { |
4459 | static_assert_rounding!(ROUNDING); |
4460 | transmute(src:vfmaddcsh_mask( |
4461 | a:transmute(a), |
4462 | b:transmute(b), |
4463 | c:transmute(c), |
4464 | k:0xff, |
4465 | ROUNDING, |
4466 | )) |
4467 | } |
4468 | } |
4469 | |
4470 | /// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and |
4471 | /// store the result in the lower elements of dst using writemask k (elements are copied from a when |
4472 | /// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst. |
4473 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, |
4474 | /// which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4475 | /// |
4476 | /// Rounding is done according to the rounding parameter, which can be one of: |
4477 | /// |
4478 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
4479 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
4480 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
4481 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
4482 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
4483 | /// |
4484 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_round_sch) |
4485 | #[inline ] |
4486 | #[target_feature (enable = "avx512fp16" )] |
4487 | #[cfg_attr (test, assert_instr(vfmaddcsh, ROUNDING = 8))] |
4488 | #[rustc_legacy_const_generics (4)] |
4489 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4490 | pub fn _mm_mask_fmadd_round_sch<const ROUNDING: i32>( |
4491 | a: __m128h, |
4492 | k: __mmask8, |
4493 | b: __m128h, |
4494 | c: __m128h, |
4495 | ) -> __m128h { |
4496 | unsafe { |
4497 | static_assert_rounding!(ROUNDING); |
4498 | let a: __m128 = transmute(src:a); |
4499 | let r: __m128 = vfmaddcsh_mask(a, b:transmute(b), c:transmute(src:c), k, ROUNDING); // using `0xff` would have been fine here, but this is what CLang does |
4500 | transmute(src:_mm_mask_move_ss(src:a, k, a, b:r)) |
4501 | } |
4502 | } |
4503 | |
4504 | /// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and |
4505 | /// store the result in the lower elements of dst using writemask k (elements are copied from c when |
4506 | /// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst. |
4507 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, |
4508 | /// which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4509 | /// |
4510 | /// Rounding is done according to the rounding parameter, which can be one of: |
4511 | /// |
4512 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
4513 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
4514 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
4515 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
4516 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
4517 | /// |
4518 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_round_sch) |
4519 | #[inline ] |
4520 | #[target_feature (enable = "avx512fp16" )] |
4521 | #[cfg_attr (test, assert_instr(vfmaddcsh, ROUNDING = 8))] |
4522 | #[rustc_legacy_const_generics (4)] |
4523 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4524 | pub fn _mm_mask3_fmadd_round_sch<const ROUNDING: i32>( |
4525 | a: __m128h, |
4526 | b: __m128h, |
4527 | c: __m128h, |
4528 | k: __mmask8, |
4529 | ) -> __m128h { |
4530 | unsafe { |
4531 | static_assert_rounding!(ROUNDING); |
4532 | let c: __m128 = transmute(src:c); |
4533 | let r: __m128 = vfmaddcsh_mask(a:transmute(a), b:transmute(src:b), c, k, ROUNDING); |
4534 | transmute(src:_mm_move_ss(a:c, b:r)) |
4535 | } |
4536 | } |
4537 | |
4538 | /// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and |
4539 | /// store the result in the lower elements of dst using zeromask k (elements are zeroed out when mask |
4540 | /// bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst. Each |
4541 | /// complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
4542 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4543 | /// |
4544 | /// Rounding is done according to the rounding parameter, which can be one of: |
4545 | /// |
4546 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
4547 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
4548 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
4549 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
4550 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
4551 | /// |
4552 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_round_sch) |
4553 | #[inline ] |
4554 | #[target_feature (enable = "avx512fp16" )] |
4555 | #[cfg_attr (test, assert_instr(vfmaddcsh, ROUNDING = 8))] |
4556 | #[rustc_legacy_const_generics (4)] |
4557 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4558 | pub fn _mm_maskz_fmadd_round_sch<const ROUNDING: i32>( |
4559 | k: __mmask8, |
4560 | a: __m128h, |
4561 | b: __m128h, |
4562 | c: __m128h, |
4563 | ) -> __m128h { |
4564 | unsafe { |
4565 | static_assert_rounding!(ROUNDING); |
4566 | transmute(src:vfmaddcsh_maskz( |
4567 | a:transmute(a), |
4568 | b:transmute(b), |
4569 | c:transmute(src:c), |
4570 | k, |
4571 | ROUNDING, |
4572 | )) |
4573 | } |
4574 | } |
4575 | |
4576 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate |
4577 | /// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed |
4578 | /// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number |
4579 | /// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4580 | /// |
4581 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_pch) |
4582 | #[inline ] |
4583 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
4584 | #[cfg_attr (test, assert_instr(vfcmaddcph))] |
4585 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4586 | pub fn _mm_fcmadd_pch(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
4587 | _mm_mask3_fcmadd_pch(a, b, c, k:0xff) |
4588 | } |
4589 | |
4590 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate |
4591 | /// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is |
4592 | /// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent |
4593 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
4594 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4595 | /// |
4596 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_pch) |
4597 | #[inline ] |
4598 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
4599 | #[cfg_attr (test, assert_instr(vfcmaddcph))] |
4600 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4601 | pub fn _mm_mask_fcmadd_pch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { |
4602 | unsafe { |
4603 | let r: __m128 = transmute(src:_mm_mask3_fcmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does |
4604 | transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a))) |
4605 | } |
4606 | } |
4607 | |
4608 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate |
4609 | /// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is |
4610 | /// copied from c when the corresponding mask bit is not set). Each complex number is composed of two adjacent |
4611 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
4612 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4613 | /// |
4614 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fcmadd_pch) |
4615 | #[inline ] |
4616 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
4617 | #[cfg_attr (test, assert_instr(vfcmaddcph))] |
4618 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4619 | pub fn _mm_mask3_fcmadd_pch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { |
4620 | unsafe { |
4621 | transmute(src:vfcmaddcph_mask3_128( |
4622 | a:transmute(a), |
4623 | b:transmute(b), |
4624 | c:transmute(src:c), |
4625 | k, |
4626 | )) |
4627 | } |
4628 | } |
4629 | |
4630 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate |
4631 | /// to the corresponding complex numbers in c, and store the results in dst using zeromask k (the element is |
4632 | /// zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent |
4633 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
4634 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4635 | /// |
4636 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_pch) |
4637 | #[inline ] |
4638 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
4639 | #[cfg_attr (test, assert_instr(vfcmaddcph))] |
4640 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4641 | pub fn _mm_maskz_fcmadd_pch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
4642 | unsafe { |
4643 | transmute(src:vfcmaddcph_maskz_128( |
4644 | a:transmute(a), |
4645 | b:transmute(b), |
4646 | c:transmute(src:c), |
4647 | k, |
4648 | )) |
4649 | } |
4650 | } |
4651 | |
4652 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate |
4653 | /// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed |
4654 | /// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number |
4655 | /// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4656 | /// |
4657 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fcmadd_pch) |
4658 | #[inline ] |
4659 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
4660 | #[cfg_attr (test, assert_instr(vfcmaddcph))] |
4661 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4662 | pub fn _mm256_fcmadd_pch(a: __m256h, b: __m256h, c: __m256h) -> __m256h { |
4663 | _mm256_mask3_fcmadd_pch(a, b, c, k:0xff) |
4664 | } |
4665 | |
4666 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate |
4667 | /// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is |
4668 | /// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent |
4669 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
4670 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4671 | /// |
4672 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fcmadd_pch) |
4673 | #[inline ] |
4674 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
4675 | #[cfg_attr (test, assert_instr(vfcmaddcph))] |
4676 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4677 | pub fn _mm256_mask_fcmadd_pch(a: __m256h, k: __mmask8, b: __m256h, c: __m256h) -> __m256h { |
4678 | unsafe { |
4679 | let r: __m256 = transmute(src:_mm256_mask3_fcmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does |
4680 | transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a))) |
4681 | } |
4682 | } |
4683 | |
4684 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate |
4685 | /// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is |
4686 | /// copied from c when the corresponding mask bit is not set). Each complex number is composed of two adjacent |
4687 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
4688 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4689 | /// |
4690 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fcmadd_pch) |
4691 | #[inline ] |
4692 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
4693 | #[cfg_attr (test, assert_instr(vfcmaddcph))] |
4694 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4695 | pub fn _mm256_mask3_fcmadd_pch(a: __m256h, b: __m256h, c: __m256h, k: __mmask8) -> __m256h { |
4696 | unsafe { |
4697 | transmute(src:vfcmaddcph_mask3_256( |
4698 | a:transmute(a), |
4699 | b:transmute(b), |
4700 | c:transmute(src:c), |
4701 | k, |
4702 | )) |
4703 | } |
4704 | } |
4705 | |
4706 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate |
4707 | /// to the corresponding complex numbers in c, and store the results in dst using zeromask k (the element is |
4708 | /// zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent |
4709 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
4710 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4711 | /// |
4712 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fcmadd_pch) |
4713 | #[inline ] |
4714 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
4715 | #[cfg_attr (test, assert_instr(vfcmaddcph))] |
4716 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4717 | pub fn _mm256_maskz_fcmadd_pch(k: __mmask8, a: __m256h, b: __m256h, c: __m256h) -> __m256h { |
4718 | unsafe { |
4719 | transmute(src:vfcmaddcph_maskz_256( |
4720 | a:transmute(a), |
4721 | b:transmute(b), |
4722 | c:transmute(src:c), |
4723 | k, |
4724 | )) |
4725 | } |
4726 | } |
4727 | |
4728 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate |
4729 | /// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed |
4730 | /// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number |
4731 | /// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4732 | /// |
4733 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmadd_pch) |
4734 | #[inline ] |
4735 | #[target_feature (enable = "avx512fp16" )] |
4736 | #[cfg_attr (test, assert_instr(vfcmaddcph))] |
4737 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4738 | pub fn _mm512_fcmadd_pch(a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
4739 | _mm512_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c) |
4740 | } |
4741 | |
4742 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate |
4743 | /// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is |
4744 | /// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent |
4745 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
4746 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4747 | /// |
4748 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmadd_pch) |
4749 | #[inline ] |
4750 | #[target_feature (enable = "avx512fp16" )] |
4751 | #[cfg_attr (test, assert_instr(vfcmaddcph))] |
4752 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4753 | pub fn _mm512_mask_fcmadd_pch(a: __m512h, k: __mmask16, b: __m512h, c: __m512h) -> __m512h { |
4754 | _mm512_mask_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c) |
4755 | } |
4756 | |
4757 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate |
4758 | /// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is |
4759 | /// copied from c when the corresponding mask bit is not set). Each complex number is composed of two adjacent |
4760 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
4761 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4762 | /// |
4763 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fcmadd_pch) |
4764 | #[inline ] |
4765 | #[target_feature (enable = "avx512fp16" )] |
4766 | #[cfg_attr (test, assert_instr(vfcmaddcph))] |
4767 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4768 | pub fn _mm512_mask3_fcmadd_pch(a: __m512h, b: __m512h, c: __m512h, k: __mmask16) -> __m512h { |
4769 | _mm512_mask3_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k) |
4770 | } |
4771 | |
4772 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate |
4773 | /// to the corresponding complex numbers in c, and store the results in dst using zeromask k (the element is |
4774 | /// zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent |
4775 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
4776 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4777 | /// |
4778 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmadd_pch) |
4779 | #[inline ] |
4780 | #[target_feature (enable = "avx512fp16" )] |
4781 | #[cfg_attr (test, assert_instr(vfcmaddcph))] |
4782 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4783 | pub fn _mm512_maskz_fcmadd_pch(k: __mmask16, a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
4784 | _mm512_maskz_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c) |
4785 | } |
4786 | |
4787 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate |
4788 | /// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed |
4789 | /// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number |
4790 | /// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4791 | /// |
4792 | /// Rounding is done according to the rounding parameter, which can be one of: |
4793 | /// |
4794 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
4795 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
4796 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
4797 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
4798 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
4799 | /// |
4800 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmadd_round_pch) |
4801 | #[inline ] |
4802 | #[target_feature (enable = "avx512fp16" )] |
4803 | #[cfg_attr (test, assert_instr(vfcmaddcph, ROUNDING = 8))] |
4804 | #[rustc_legacy_const_generics (3)] |
4805 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4806 | pub fn _mm512_fcmadd_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
4807 | static_assert_rounding!(ROUNDING); |
4808 | _mm512_mask3_fcmadd_round_pch::<ROUNDING>(a, b, c, k:0xffff) |
4809 | } |
4810 | |
4811 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate |
4812 | /// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is |
4813 | /// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent |
4814 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
4815 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4816 | /// |
4817 | /// Rounding is done according to the rounding parameter, which can be one of: |
4818 | /// |
4819 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
4820 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
4821 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
4822 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
4823 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
4824 | /// |
4825 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmadd_round_pch) |
4826 | #[inline ] |
4827 | #[target_feature (enable = "avx512fp16" )] |
4828 | #[cfg_attr (test, assert_instr(vfcmaddcph, ROUNDING = 8))] |
4829 | #[rustc_legacy_const_generics (4)] |
4830 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4831 | pub fn _mm512_mask_fcmadd_round_pch<const ROUNDING: i32>( |
4832 | a: __m512h, |
4833 | k: __mmask16, |
4834 | b: __m512h, |
4835 | c: __m512h, |
4836 | ) -> __m512h { |
4837 | unsafe { |
4838 | static_assert_rounding!(ROUNDING); |
4839 | let r: __m512 = transmute(src:_mm512_mask3_fcmadd_round_pch::<ROUNDING>(a, b, c, k)); // using `0xffff` would have been fine here, but this is what CLang does |
4840 | transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a))) |
4841 | } |
4842 | } |
4843 | |
4844 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate |
4845 | /// to the corresponding complex numbers in c using writemask k (the element is copied from c when the corresponding |
4846 | /// mask bit is not set), and store the results in dst. Each complex number is composed of two adjacent half-precision |
4847 | /// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1`, or the complex |
4848 | /// conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4849 | /// |
4850 | /// Rounding is done according to the rounding parameter, which can be one of: |
4851 | /// |
4852 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
4853 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
4854 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
4855 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
4856 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
4857 | /// |
4858 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fcmadd_round_pch) |
4859 | #[inline ] |
4860 | #[target_feature (enable = "avx512fp16" )] |
4861 | #[cfg_attr (test, assert_instr(vfcmaddcph, ROUNDING = 8))] |
4862 | #[rustc_legacy_const_generics (4)] |
4863 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4864 | pub fn _mm512_mask3_fcmadd_round_pch<const ROUNDING: i32>( |
4865 | a: __m512h, |
4866 | b: __m512h, |
4867 | c: __m512h, |
4868 | k: __mmask16, |
4869 | ) -> __m512h { |
4870 | unsafe { |
4871 | static_assert_rounding!(ROUNDING); |
4872 | transmute(src:vfcmaddcph_mask3_512( |
4873 | a:transmute(a), |
4874 | b:transmute(b), |
4875 | c:transmute(src:c), |
4876 | k, |
4877 | ROUNDING, |
4878 | )) |
4879 | } |
4880 | } |
4881 | |
4882 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate |
4883 | /// to the corresponding complex numbers in c using zeromask k (the element is zeroed out when the corresponding |
4884 | /// mask bit is not set), and store the results in dst. Each complex number is composed of two adjacent half-precision |
4885 | /// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1`, or the complex |
4886 | /// conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4887 | /// |
4888 | /// Rounding is done according to the rounding parameter, which can be one of: |
4889 | /// |
4890 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
4891 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
4892 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
4893 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
4894 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
4895 | /// |
4896 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmadd_round_pch) |
4897 | #[inline ] |
4898 | #[target_feature (enable = "avx512fp16" )] |
4899 | #[cfg_attr (test, assert_instr(vfcmaddcph, ROUNDING = 8))] |
4900 | #[rustc_legacy_const_generics (4)] |
4901 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4902 | pub fn _mm512_maskz_fcmadd_round_pch<const ROUNDING: i32>( |
4903 | k: __mmask16, |
4904 | a: __m512h, |
4905 | b: __m512h, |
4906 | c: __m512h, |
4907 | ) -> __m512h { |
4908 | unsafe { |
4909 | static_assert_rounding!(ROUNDING); |
4910 | transmute(src:vfcmaddcph_maskz_512( |
4911 | a:transmute(a), |
4912 | b:transmute(b), |
4913 | c:transmute(src:c), |
4914 | k, |
4915 | ROUNDING, |
4916 | )) |
4917 | } |
4918 | } |
4919 | |
4920 | /// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b, |
4921 | /// accumulate to the lower complex number in c, and store the result in the lower elements of dst, |
4922 | /// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is |
4923 | /// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex |
4924 | /// number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4925 | /// |
4926 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_sch) |
4927 | #[inline ] |
4928 | #[target_feature (enable = "avx512fp16" )] |
4929 | #[cfg_attr (test, assert_instr(vfcmaddcsh))] |
4930 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4931 | pub fn _mm_fcmadd_sch(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
4932 | _mm_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c) |
4933 | } |
4934 | |
4935 | /// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b, |
4936 | /// accumulate to the lower complex number in c, and store the result in the lower elements of dst using |
4937 | /// writemask k (the element is copied from a when the corresponding mask bit is not set), and copy the upper |
4938 | /// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent |
4939 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
4940 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4941 | /// |
4942 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_sch) |
4943 | #[inline ] |
4944 | #[target_feature (enable = "avx512fp16" )] |
4945 | #[cfg_attr (test, assert_instr(vfcmaddcsh))] |
4946 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4947 | pub fn _mm_mask_fcmadd_sch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { |
4948 | _mm_mask_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c) |
4949 | } |
4950 | |
4951 | /// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b, |
4952 | /// accumulate to the lower complex number in c, and store the result in the lower elements of dst using |
4953 | /// writemask k (the element is copied from c when the corresponding mask bit is not set), and copy the upper |
4954 | /// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent |
4955 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
4956 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4957 | /// |
4958 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fcmadd_sch) |
4959 | #[inline ] |
4960 | #[target_feature (enable = "avx512fp16" )] |
4961 | #[cfg_attr (test, assert_instr(vfcmaddcsh))] |
4962 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4963 | pub fn _mm_mask3_fcmadd_sch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { |
4964 | _mm_mask3_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k) |
4965 | } |
4966 | |
4967 | /// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b, |
4968 | /// accumulate to the lower complex number in c, and store the result in the lower elements of dst using |
4969 | /// zeromask k (the element is zeroed out when the corresponding mask bit is not set), and copy the upper |
4970 | /// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent |
4971 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
4972 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4973 | /// |
4974 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_sch) |
4975 | #[inline ] |
4976 | #[target_feature (enable = "avx512fp16" )] |
4977 | #[cfg_attr (test, assert_instr(vfcmaddcsh))] |
4978 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4979 | pub fn _mm_maskz_fcmadd_sch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
4980 | _mm_maskz_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c) |
4981 | } |
4982 | |
4983 | /// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b, |
4984 | /// accumulate to the lower complex number in c, and store the result in the lower elements of dst, |
4985 | /// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is |
4986 | /// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex |
4987 | /// number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4988 | /// |
4989 | /// Rounding is done according to the rounding parameter, which can be one of: |
4990 | /// |
4991 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
4992 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
4993 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
4994 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
4995 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
4996 | /// |
4997 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_round_sch) |
4998 | #[inline ] |
4999 | #[target_feature (enable = "avx512fp16" )] |
5000 | #[cfg_attr (test, assert_instr(vfcmaddcsh, ROUNDING = 8))] |
5001 | #[rustc_legacy_const_generics (3)] |
5002 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5003 | pub fn _mm_fcmadd_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
5004 | unsafe { |
5005 | static_assert_rounding!(ROUNDING); |
5006 | transmute(src:vfcmaddcsh_mask( |
5007 | a:transmute(a), |
5008 | b:transmute(b), |
5009 | c:transmute(c), |
5010 | k:0xff, |
5011 | ROUNDING, |
5012 | )) |
5013 | } |
5014 | } |
5015 | |
5016 | /// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b, |
5017 | /// accumulate to the lower complex number in c, and store the result in the lower elements of dst using |
5018 | /// writemask k (the element is copied from a when the corresponding mask bit is not set), and copy the upper |
5019 | /// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent |
5020 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
5021 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
5022 | /// |
5023 | /// Rounding is done according to the rounding parameter, which can be one of: |
5024 | /// |
5025 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
5026 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
5027 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
5028 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
5029 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5030 | /// |
5031 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_round_sch) |
5032 | #[inline ] |
5033 | #[target_feature (enable = "avx512fp16" )] |
5034 | #[cfg_attr (test, assert_instr(vfcmaddcsh, ROUNDING = 8))] |
5035 | #[rustc_legacy_const_generics (4)] |
5036 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5037 | pub fn _mm_mask_fcmadd_round_sch<const ROUNDING: i32>( |
5038 | a: __m128h, |
5039 | k: __mmask8, |
5040 | b: __m128h, |
5041 | c: __m128h, |
5042 | ) -> __m128h { |
5043 | unsafe { |
5044 | static_assert_rounding!(ROUNDING); |
5045 | let a: __m128 = transmute(src:a); |
5046 | let r: __m128 = vfcmaddcsh_mask(a, b:transmute(b), c:transmute(src:c), k, ROUNDING); |
5047 | transmute(src:_mm_mask_move_ss(src:a, k, a, b:r)) |
5048 | } |
5049 | } |
5050 | |
5051 | /// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b, |
5052 | /// accumulate to the lower complex number in c, and store the result in the lower elements of dst using |
5053 | /// writemask k (the element is copied from c when the corresponding mask bit is not set), and copy the upper |
5054 | /// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent |
5055 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
5056 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
5057 | /// |
5058 | /// Rounding is done according to the rounding parameter, which can be one of: |
5059 | /// |
5060 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
5061 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
5062 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
5063 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
5064 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5065 | /// |
5066 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fcmadd_round_sch) |
5067 | #[inline ] |
5068 | #[target_feature (enable = "avx512fp16" )] |
5069 | #[cfg_attr (test, assert_instr(vfcmaddcsh, ROUNDING = 8))] |
5070 | #[rustc_legacy_const_generics (4)] |
5071 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5072 | pub fn _mm_mask3_fcmadd_round_sch<const ROUNDING: i32>( |
5073 | a: __m128h, |
5074 | b: __m128h, |
5075 | c: __m128h, |
5076 | k: __mmask8, |
5077 | ) -> __m128h { |
5078 | unsafe { |
5079 | static_assert_rounding!(ROUNDING); |
5080 | let c: __m128 = transmute(src:c); |
5081 | let r: __m128 = vfcmaddcsh_mask(a:transmute(a), b:transmute(src:b), c, k, ROUNDING); |
5082 | transmute(src:_mm_move_ss(a:c, b:r)) |
5083 | } |
5084 | } |
5085 | |
5086 | /// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b, |
5087 | /// accumulate to the lower complex number in c using zeromask k (the element is zeroed out when the corresponding |
5088 | /// mask bit is not set), and store the result in the lower elements of dst, and copy the upper 6 packed elements |
5089 | /// from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision (16-bit) |
5090 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1`, or the complex |
5091 | /// conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
5092 | /// |
5093 | /// Rounding is done according to the rounding parameter, which can be one of: |
5094 | /// |
5095 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
5096 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
5097 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
5098 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
5099 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5100 | /// |
5101 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_round_sch) |
5102 | #[inline ] |
5103 | #[target_feature (enable = "avx512fp16" )] |
5104 | #[cfg_attr (test, assert_instr(vfcmaddcsh, ROUNDING = 8))] |
5105 | #[rustc_legacy_const_generics (4)] |
5106 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5107 | pub fn _mm_maskz_fcmadd_round_sch<const ROUNDING: i32>( |
5108 | k: __mmask8, |
5109 | a: __m128h, |
5110 | b: __m128h, |
5111 | c: __m128h, |
5112 | ) -> __m128h { |
5113 | unsafe { |
5114 | static_assert_rounding!(ROUNDING); |
5115 | transmute(src:vfcmaddcsh_maskz( |
5116 | a:transmute(a), |
5117 | b:transmute(b), |
5118 | c:transmute(src:c), |
5119 | k, |
5120 | ROUNDING, |
5121 | )) |
5122 | } |
5123 | } |
5124 | |
5125 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate |
5126 | /// result to packed elements in c, and store the results in dst. |
5127 | /// |
5128 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_ph) |
5129 | #[inline ] |
5130 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
5131 | #[cfg_attr (test, assert_instr(vfmadd))] |
5132 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5133 | pub fn _mm_fmadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
5134 | unsafe { simd_fma(x:a, y:b, z:c) } |
5135 | } |
5136 | |
5137 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate |
5138 | /// result to packed elements in c, and store the results in dst using writemask k (the element is copied |
5139 | /// from a when the corresponding mask bit is not set). |
5140 | /// |
5141 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_ph) |
5142 | #[inline ] |
5143 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
5144 | #[cfg_attr (test, assert_instr(vfmadd))] |
5145 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5146 | pub fn _mm_mask_fmadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { |
5147 | unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ph(a, b, c), no:a) } |
5148 | } |
5149 | |
5150 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate |
5151 | /// result to packed elements in c, and store the results in dst using writemask k (the element is copied |
5152 | /// from c when the corresponding mask bit is not set). |
5153 | /// |
5154 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_ph) |
5155 | #[inline ] |
5156 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
5157 | #[cfg_attr (test, assert_instr(vfmadd))] |
5158 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5159 | pub fn _mm_mask3_fmadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { |
5160 | unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ph(a, b, c), no:c) } |
5161 | } |
5162 | |
5163 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate |
5164 | /// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed |
5165 | /// out when the corresponding mask bit is not set). |
5166 | /// |
5167 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_ph) |
5168 | #[inline ] |
5169 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
5170 | #[cfg_attr (test, assert_instr(vfmadd))] |
5171 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5172 | pub fn _mm_maskz_fmadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
5173 | unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ph(a, b, c), no:_mm_setzero_ph()) } |
5174 | } |
5175 | |
5176 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate |
5177 | /// result to packed elements in c, and store the results in dst. |
5178 | /// |
5179 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmadd_ph) |
5180 | #[inline ] |
5181 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
5182 | #[cfg_attr (test, assert_instr(vfmadd))] |
5183 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5184 | pub fn _mm256_fmadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h { |
5185 | unsafe { simd_fma(x:a, y:b, z:c) } |
5186 | } |
5187 | |
5188 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate |
5189 | /// result to packed elements in c, and store the results in dst using writemask k (the element is copied |
5190 | /// from a when the corresponding mask bit is not set). |
5191 | /// |
5192 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmadd_ph) |
5193 | #[inline ] |
5194 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
5195 | #[cfg_attr (test, assert_instr(vfmadd))] |
5196 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5197 | pub fn _mm256_mask_fmadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h { |
5198 | unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ph(a, b, c), no:a) } |
5199 | } |
5200 | |
5201 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate |
5202 | /// result to packed elements in c, and store the results in dst using writemask k (the element is copied |
5203 | /// from c when the corresponding mask bit is not set). |
5204 | /// |
5205 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmadd_ph) |
5206 | #[inline ] |
5207 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
5208 | #[cfg_attr (test, assert_instr(vfmadd))] |
5209 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5210 | pub fn _mm256_mask3_fmadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h { |
5211 | unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ph(a, b, c), no:c) } |
5212 | } |
5213 | |
5214 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate |
5215 | /// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed |
5216 | /// out when the corresponding mask bit is not set). |
5217 | /// |
5218 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmadd_ph) |
5219 | #[inline ] |
5220 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
5221 | #[cfg_attr (test, assert_instr(vfmadd))] |
5222 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5223 | pub fn _mm256_maskz_fmadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h { |
5224 | unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ph(a, b, c), no:_mm256_setzero_ph()) } |
5225 | } |
5226 | |
5227 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate |
5228 | /// result to packed elements in c, and store the results in dst. |
5229 | /// |
5230 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_ph) |
5231 | #[inline ] |
5232 | #[target_feature (enable = "avx512fp16" )] |
5233 | #[cfg_attr (test, assert_instr(vfmadd))] |
5234 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5235 | pub fn _mm512_fmadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
5236 | unsafe { simd_fma(x:a, y:b, z:c) } |
5237 | } |
5238 | |
5239 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate |
5240 | /// result to packed elements in c, and store the results in dst using writemask k (the element is copied |
5241 | /// from a when the corresponding mask bit is not set). |
5242 | /// |
5243 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_ph) |
5244 | #[inline ] |
5245 | #[target_feature (enable = "avx512fp16" )] |
5246 | #[cfg_attr (test, assert_instr(vfmadd))] |
5247 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5248 | pub fn _mm512_mask_fmadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h { |
5249 | unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ph(a, b, c), no:a) } |
5250 | } |
5251 | |
5252 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate |
5253 | /// result to packed elements in c, and store the results in dst using writemask k (the element is copied |
5254 | /// from c when the corresponding mask bit is not set). |
5255 | /// |
5256 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_ph) |
5257 | #[inline ] |
5258 | #[target_feature (enable = "avx512fp16" )] |
5259 | #[cfg_attr (test, assert_instr(vfmadd))] |
5260 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5261 | pub fn _mm512_mask3_fmadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h { |
5262 | unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ph(a, b, c), no:c) } |
5263 | } |
5264 | |
5265 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate |
5266 | /// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed |
5267 | /// out when the corresponding mask bit is not set). |
5268 | /// |
5269 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_ph) |
5270 | #[inline ] |
5271 | #[target_feature (enable = "avx512fp16" )] |
5272 | #[cfg_attr (test, assert_instr(vfmadd))] |
5273 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5274 | pub fn _mm512_maskz_fmadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
5275 | unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ph(a, b, c), no:_mm512_setzero_ph()) } |
5276 | } |
5277 | |
5278 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate |
5279 | /// result to packed elements in c, and store the results in dst. |
5280 | /// |
5281 | /// Rounding is done according to the rounding parameter, which can be one of: |
5282 | /// |
5283 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
5284 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
5285 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
5286 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
5287 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5288 | /// |
5289 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_round_ph) |
5290 | #[inline ] |
5291 | #[target_feature (enable = "avx512fp16" )] |
5292 | #[cfg_attr (test, assert_instr(vfmadd, ROUNDING = 8))] |
5293 | #[rustc_legacy_const_generics (3)] |
5294 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5295 | pub fn _mm512_fmadd_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
5296 | unsafe { |
5297 | static_assert_rounding!(ROUNDING); |
5298 | vfmaddph_512(a, b, c, ROUNDING) |
5299 | } |
5300 | } |
5301 | |
5302 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate |
5303 | /// result to packed elements in c, and store the results in dst using writemask k (the element is copied |
5304 | /// from a when the corresponding mask bit is not set). |
5305 | /// |
5306 | /// Rounding is done according to the rounding parameter, which can be one of: |
5307 | /// |
5308 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
5309 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
5310 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
5311 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
5312 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5313 | /// |
5314 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_round_ph) |
5315 | #[inline ] |
5316 | #[target_feature (enable = "avx512fp16" )] |
5317 | #[cfg_attr (test, assert_instr(vfmadd, ROUNDING = 8))] |
5318 | #[rustc_legacy_const_generics (4)] |
5319 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5320 | pub fn _mm512_mask_fmadd_round_ph<const ROUNDING: i32>( |
5321 | a: __m512h, |
5322 | k: __mmask32, |
5323 | b: __m512h, |
5324 | c: __m512h, |
5325 | ) -> __m512h { |
5326 | unsafe { |
5327 | static_assert_rounding!(ROUNDING); |
5328 | simd_select_bitmask(m:k, yes:_mm512_fmadd_round_ph::<ROUNDING>(a, b, c), no:a) |
5329 | } |
5330 | } |
5331 | |
5332 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate |
5333 | /// result to packed elements in c, and store the results in dst using writemask k (the element is copied |
5334 | /// from c when the corresponding mask bit is not set). |
5335 | /// |
5336 | /// Rounding is done according to the rounding parameter, which can be one of: |
5337 | /// |
5338 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
5339 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
5340 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
5341 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
5342 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5343 | /// |
5344 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_round_ph) |
5345 | #[inline ] |
5346 | #[target_feature (enable = "avx512fp16" )] |
5347 | #[cfg_attr (test, assert_instr(vfmadd, ROUNDING = 8))] |
5348 | #[rustc_legacy_const_generics (4)] |
5349 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5350 | pub fn _mm512_mask3_fmadd_round_ph<const ROUNDING: i32>( |
5351 | a: __m512h, |
5352 | b: __m512h, |
5353 | c: __m512h, |
5354 | k: __mmask32, |
5355 | ) -> __m512h { |
5356 | unsafe { |
5357 | static_assert_rounding!(ROUNDING); |
5358 | simd_select_bitmask(m:k, yes:_mm512_fmadd_round_ph::<ROUNDING>(a, b, c), no:c) |
5359 | } |
5360 | } |
5361 | |
5362 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate |
5363 | /// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed |
5364 | /// out when the corresponding mask bit is not set). |
5365 | /// |
5366 | /// Rounding is done according to the rounding parameter, which can be one of: |
5367 | /// |
5368 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
5369 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
5370 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
5371 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
5372 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5373 | /// |
5374 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_round_ph) |
5375 | #[inline ] |
5376 | #[target_feature (enable = "avx512fp16" )] |
5377 | #[cfg_attr (test, assert_instr(vfmadd, ROUNDING = 8))] |
5378 | #[rustc_legacy_const_generics (4)] |
5379 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5380 | pub fn _mm512_maskz_fmadd_round_ph<const ROUNDING: i32>( |
5381 | k: __mmask32, |
5382 | a: __m512h, |
5383 | b: __m512h, |
5384 | c: __m512h, |
5385 | ) -> __m512h { |
5386 | unsafe { |
5387 | static_assert_rounding!(ROUNDING); |
5388 | simd_select_bitmask( |
5389 | m:k, |
5390 | yes:_mm512_fmadd_round_ph::<ROUNDING>(a, b, c), |
5391 | no:_mm512_setzero_ph(), |
5392 | ) |
5393 | } |
5394 | } |
5395 | |
5396 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate |
5397 | /// result to the lower element in c. Store the result in the lower element of dst, and copy the upper |
5398 | /// 7 packed elements from a to the upper elements of dst. |
5399 | /// |
5400 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_sh) |
5401 | #[inline ] |
5402 | #[target_feature (enable = "avx512fp16" )] |
5403 | #[cfg_attr (test, assert_instr(vfmadd))] |
5404 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5405 | pub fn _mm_fmadd_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
5406 | unsafe { |
5407 | let extracta: f16 = simd_extract!(a, 0); |
5408 | let extractb: f16 = simd_extract!(b, 0); |
5409 | let extractc: f16 = simd_extract!(c, 0); |
5410 | let r: f16 = fmaf16(a:extracta, b:extractb, c:extractc); |
5411 | simd_insert!(a, 0, r) |
5412 | } |
5413 | } |
5414 | |
5415 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate |
5416 | /// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element |
5417 | /// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the |
5418 | /// upper elements of dst. |
5419 | /// |
5420 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_sh) |
5421 | #[inline ] |
5422 | #[target_feature (enable = "avx512fp16" )] |
5423 | #[cfg_attr (test, assert_instr(vfmadd))] |
5424 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5425 | pub fn _mm_mask_fmadd_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { |
5426 | unsafe { |
5427 | let mut fmadd: f16 = simd_extract!(a, 0); |
5428 | if k & 1 != 0 { |
5429 | let extractb: f16 = simd_extract!(b, 0); |
5430 | let extractc: f16 = simd_extract!(c, 0); |
5431 | fmadd = fmaf16(a:fmadd, b:extractb, c:extractc); |
5432 | } |
5433 | simd_insert!(a, 0, fmadd) |
5434 | } |
5435 | } |
5436 | |
5437 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate |
5438 | /// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element |
5439 | /// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the |
5440 | /// upper elements of dst. |
5441 | /// |
5442 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_sh) |
5443 | #[inline ] |
5444 | #[target_feature (enable = "avx512fp16" )] |
5445 | #[cfg_attr (test, assert_instr(vfmadd))] |
5446 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5447 | pub fn _mm_mask3_fmadd_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { |
5448 | unsafe { |
5449 | let mut fmadd: f16 = simd_extract!(c, 0); |
5450 | if k & 1 != 0 { |
5451 | let extracta: f16 = simd_extract!(a, 0); |
5452 | let extractb: f16 = simd_extract!(b, 0); |
5453 | fmadd = fmaf16(a:extracta, b:extractb, c:fmadd); |
5454 | } |
5455 | simd_insert!(c, 0, fmadd) |
5456 | } |
5457 | } |
5458 | |
5459 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate |
5460 | /// result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element |
5461 | /// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the |
5462 | /// upper elements of dst. |
5463 | /// |
5464 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_sh) |
5465 | #[inline ] |
5466 | #[target_feature (enable = "avx512fp16" )] |
5467 | #[cfg_attr (test, assert_instr(vfmadd))] |
5468 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5469 | pub fn _mm_maskz_fmadd_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
5470 | unsafe { |
5471 | let mut fmadd: f16 = 0.0; |
5472 | if k & 1 != 0 { |
5473 | let extracta: f16 = simd_extract!(a, 0); |
5474 | let extractb: f16 = simd_extract!(b, 0); |
5475 | let extractc: f16 = simd_extract!(c, 0); |
5476 | fmadd = fmaf16(a:extracta, b:extractb, c:extractc); |
5477 | } |
5478 | simd_insert!(a, 0, fmadd) |
5479 | } |
5480 | } |
5481 | |
5482 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate |
5483 | /// result to the lower element in c. Store the result in the lower element of dst, and copy the upper |
5484 | /// 7 packed elements from a to the upper elements of dst. |
5485 | /// |
5486 | /// Rounding is done according to the rounding parameter, which can be one of: |
5487 | /// |
5488 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
5489 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
5490 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
5491 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
5492 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5493 | /// |
5494 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_round_sh) |
5495 | #[inline ] |
5496 | #[target_feature (enable = "avx512fp16" )] |
5497 | #[cfg_attr (test, assert_instr(vfmadd, ROUNDING = 8))] |
5498 | #[rustc_legacy_const_generics (3)] |
5499 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5500 | pub fn _mm_fmadd_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
5501 | unsafe { |
5502 | static_assert_rounding!(ROUNDING); |
5503 | let extracta: f16 = simd_extract!(a, 0); |
5504 | let extractb: f16 = simd_extract!(b, 0); |
5505 | let extractc: f16 = simd_extract!(c, 0); |
5506 | let r: f16 = vfmaddsh(a:extracta, b:extractb, c:extractc, ROUNDING); |
5507 | simd_insert!(a, 0, r) |
5508 | } |
5509 | } |
5510 | |
5511 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate |
5512 | /// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element |
5513 | /// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the |
5514 | /// upper elements of dst. |
5515 | /// |
5516 | /// Rounding is done according to the rounding parameter, which can be one of: |
5517 | /// |
5518 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
5519 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
5520 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
5521 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
5522 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5523 | /// |
5524 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_round_sh) |
5525 | #[inline ] |
5526 | #[target_feature (enable = "avx512fp16" )] |
5527 | #[cfg_attr (test, assert_instr(vfmadd, ROUNDING = 8))] |
5528 | #[rustc_legacy_const_generics (4)] |
5529 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5530 | pub fn _mm_mask_fmadd_round_sh<const ROUNDING: i32>( |
5531 | a: __m128h, |
5532 | k: __mmask8, |
5533 | b: __m128h, |
5534 | c: __m128h, |
5535 | ) -> __m128h { |
5536 | unsafe { |
5537 | static_assert_rounding!(ROUNDING); |
5538 | let mut fmadd: f16 = simd_extract!(a, 0); |
5539 | if k & 1 != 0 { |
5540 | let extractb: f16 = simd_extract!(b, 0); |
5541 | let extractc: f16 = simd_extract!(c, 0); |
5542 | fmadd = vfmaddsh(a:fmadd, b:extractb, c:extractc, ROUNDING); |
5543 | } |
5544 | simd_insert!(a, 0, fmadd) |
5545 | } |
5546 | } |
5547 | |
5548 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate |
5549 | /// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element |
5550 | /// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the |
5551 | /// upper elements of dst. |
5552 | /// |
5553 | /// Rounding is done according to the rounding parameter, which can be one of: |
5554 | /// |
5555 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
5556 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
5557 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
5558 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
5559 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5560 | /// |
5561 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_round_sh) |
5562 | #[inline ] |
5563 | #[target_feature (enable = "avx512fp16" )] |
5564 | #[cfg_attr (test, assert_instr(vfmadd, ROUNDING = 8))] |
5565 | #[rustc_legacy_const_generics (4)] |
5566 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5567 | pub fn _mm_mask3_fmadd_round_sh<const ROUNDING: i32>( |
5568 | a: __m128h, |
5569 | b: __m128h, |
5570 | c: __m128h, |
5571 | k: __mmask8, |
5572 | ) -> __m128h { |
5573 | unsafe { |
5574 | static_assert_rounding!(ROUNDING); |
5575 | let mut fmadd: f16 = simd_extract!(c, 0); |
5576 | if k & 1 != 0 { |
5577 | let extracta: f16 = simd_extract!(a, 0); |
5578 | let extractb: f16 = simd_extract!(b, 0); |
5579 | fmadd = vfmaddsh(a:extracta, b:extractb, c:fmadd, ROUNDING); |
5580 | } |
5581 | simd_insert!(c, 0, fmadd) |
5582 | } |
5583 | } |
5584 | |
5585 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate |
5586 | /// result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element |
5587 | /// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the |
5588 | /// upper elements of dst. |
5589 | /// |
5590 | /// Rounding is done according to the rounding parameter, which can be one of: |
5591 | /// |
5592 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
5593 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
5594 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
5595 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
5596 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5597 | /// |
5598 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_round_sh) |
5599 | #[inline ] |
5600 | #[target_feature (enable = "avx512fp16" )] |
5601 | #[cfg_attr (test, assert_instr(vfmadd, ROUNDING = 8))] |
5602 | #[rustc_legacy_const_generics (4)] |
5603 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5604 | pub fn _mm_maskz_fmadd_round_sh<const ROUNDING: i32>( |
5605 | k: __mmask8, |
5606 | a: __m128h, |
5607 | b: __m128h, |
5608 | c: __m128h, |
5609 | ) -> __m128h { |
5610 | unsafe { |
5611 | static_assert_rounding!(ROUNDING); |
5612 | let mut fmadd: f16 = 0.0; |
5613 | if k & 1 != 0 { |
5614 | let extracta: f16 = simd_extract!(a, 0); |
5615 | let extractb: f16 = simd_extract!(b, 0); |
5616 | let extractc: f16 = simd_extract!(c, 0); |
5617 | fmadd = vfmaddsh(a:extracta, b:extractb, c:extractc, ROUNDING); |
5618 | } |
5619 | simd_insert!(a, 0, fmadd) |
5620 | } |
5621 | } |
5622 | |
5623 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
5624 | /// in c from the intermediate result, and store the results in dst. |
5625 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5626 | /// |
5627 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_ph) |
5628 | #[inline ] |
5629 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
5630 | #[cfg_attr (test, assert_instr(vfmsub))] |
5631 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5632 | pub fn _mm_fmsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
5633 | unsafe { simd_fma(x:a, y:b, z:simd_neg(c)) } |
5634 | } |
5635 | |
5636 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
5637 | /// in c from the intermediate result, and store the results in dst using writemask k (the element is copied |
5638 | /// from a when the corresponding mask bit is not set). |
5639 | /// |
5640 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_ph) |
5641 | #[inline ] |
5642 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
5643 | #[cfg_attr (test, assert_instr(vfmsub))] |
5644 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5645 | pub fn _mm_mask_fmsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { |
5646 | unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ph(a, b, c), no:a) } |
5647 | } |
5648 | |
5649 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
5650 | /// in c from the intermediate result, and store the results in dst using writemask k (the element is copied |
5651 | /// from c when the corresponding mask bit is not set). |
5652 | /// |
5653 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsub_ph) |
5654 | #[inline ] |
5655 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
5656 | #[cfg_attr (test, assert_instr(vfmsub))] |
5657 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5658 | pub fn _mm_mask3_fmsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { |
5659 | unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ph(a, b, c), no:c) } |
5660 | } |
5661 | |
5662 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
5663 | /// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed |
5664 | /// out when the corresponding mask bit is not set). |
5665 | /// |
5666 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_ph) |
5667 | #[inline ] |
5668 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
5669 | #[cfg_attr (test, assert_instr(vfmsub))] |
5670 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5671 | pub fn _mm_maskz_fmsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
5672 | unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ph(a, b, c), no:_mm_setzero_ph()) } |
5673 | } |
5674 | |
5675 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
5676 | /// in c from the intermediate result, and store the results in dst. |
5677 | /// |
5678 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsub_ph) |
5679 | #[inline ] |
5680 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
5681 | #[cfg_attr (test, assert_instr(vfmsub))] |
5682 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5683 | pub fn _mm256_fmsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h { |
5684 | unsafe { simd_fma(x:a, y:b, z:simd_neg(c)) } |
5685 | } |
5686 | |
5687 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
5688 | /// in c from the intermediate result, and store the results in dst using writemask k (the element is copied |
5689 | /// from a when the corresponding mask bit is not set). |
5690 | /// |
5691 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmsub_ph) |
5692 | #[inline ] |
5693 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
5694 | #[cfg_attr (test, assert_instr(vfmsub))] |
5695 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5696 | pub fn _mm256_mask_fmsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h { |
5697 | unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ph(a, b, c), no:a) } |
5698 | } |
5699 | |
5700 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
5701 | /// in c from the intermediate result, and store the results in dst using writemask k (the element is copied |
5702 | /// from c when the corresponding mask bit is not set). |
5703 | /// |
5704 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmsub_ph) |
5705 | #[inline ] |
5706 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
5707 | #[cfg_attr (test, assert_instr(vfmsub))] |
5708 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5709 | pub fn _mm256_mask3_fmsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h { |
5710 | unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ph(a, b, c), no:c) } |
5711 | } |
5712 | |
5713 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
5714 | /// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed |
5715 | /// out when the corresponding mask bit is not set). |
5716 | /// |
5717 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmsub_ph) |
5718 | #[inline ] |
5719 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
5720 | #[cfg_attr (test, assert_instr(vfmsub))] |
5721 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5722 | pub fn _mm256_maskz_fmsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h { |
5723 | unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ph(a, b, c), no:_mm256_setzero_ph()) } |
5724 | } |
5725 | |
5726 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
5727 | /// in c from the intermediate result, and store the results in dst. |
5728 | /// |
5729 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsub_ph) |
5730 | #[inline ] |
5731 | #[target_feature (enable = "avx512fp16" )] |
5732 | #[cfg_attr (test, assert_instr(vfmsub))] |
5733 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5734 | pub fn _mm512_fmsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
5735 | unsafe { simd_fma(x:a, y:b, z:simd_neg(c)) } |
5736 | } |
5737 | |
5738 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
5739 | /// in c from the intermediate result, and store the results in dst using writemask k (the element is copied |
5740 | /// from a when the corresponding mask bit is not set). |
5741 | /// |
5742 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsub_ph) |
5743 | #[inline ] |
5744 | #[target_feature (enable = "avx512fp16" )] |
5745 | #[cfg_attr (test, assert_instr(vfmsub))] |
5746 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5747 | pub fn _mm512_mask_fmsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h { |
5748 | unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ph(a, b, c), no:a) } |
5749 | } |
5750 | |
5751 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
5752 | /// in c from the intermediate result, and store the results in dst using writemask k (the element is copied |
5753 | /// from c when the corresponding mask bit is not set). |
5754 | /// |
5755 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsub_ph) |
5756 | #[inline ] |
5757 | #[target_feature (enable = "avx512fp16" )] |
5758 | #[cfg_attr (test, assert_instr(vfmsub))] |
5759 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5760 | pub fn _mm512_mask3_fmsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h { |
5761 | unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ph(a, b, c), no:c) } |
5762 | } |
5763 | |
5764 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
5765 | /// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed |
5766 | /// out when the corresponding mask bit is not set). |
5767 | /// |
5768 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsub_ph) |
5769 | #[inline ] |
5770 | #[target_feature (enable = "avx512fp16" )] |
5771 | #[cfg_attr (test, assert_instr(vfmsub))] |
5772 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5773 | pub fn _mm512_maskz_fmsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
5774 | unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ph(a, b, c), no:_mm512_setzero_ph()) } |
5775 | } |
5776 | |
5777 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
5778 | /// in c from the intermediate result, and store the results in dst. |
5779 | /// |
5780 | /// Rounding is done according to the rounding parameter, which can be one of: |
5781 | /// |
5782 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
5783 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
5784 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
5785 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
5786 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5787 | /// |
5788 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsub_round_ph) |
5789 | #[inline ] |
5790 | #[target_feature (enable = "avx512fp16" )] |
5791 | #[cfg_attr (test, assert_instr(vfmsub, ROUNDING = 8))] |
5792 | #[rustc_legacy_const_generics (3)] |
5793 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5794 | pub fn _mm512_fmsub_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
5795 | unsafe { |
5796 | static_assert_rounding!(ROUNDING); |
5797 | vfmaddph_512(a, b, c:simd_neg(c), ROUNDING) |
5798 | } |
5799 | } |
5800 | |
5801 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
5802 | /// in c from the intermediate result, and store the results in dst using writemask k (the element is copied |
5803 | /// from a when the corresponding mask bit is not set). |
5804 | /// |
5805 | /// Rounding is done according to the rounding parameter, which can be one of: |
5806 | /// |
5807 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
5808 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
5809 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
5810 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
5811 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5812 | /// |
5813 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsub_round_ph) |
5814 | #[inline ] |
5815 | #[target_feature (enable = "avx512fp16" )] |
5816 | #[cfg_attr (test, assert_instr(vfmsub, ROUNDING = 8))] |
5817 | #[rustc_legacy_const_generics (4)] |
5818 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5819 | pub fn _mm512_mask_fmsub_round_ph<const ROUNDING: i32>( |
5820 | a: __m512h, |
5821 | k: __mmask32, |
5822 | b: __m512h, |
5823 | c: __m512h, |
5824 | ) -> __m512h { |
5825 | unsafe { |
5826 | static_assert_rounding!(ROUNDING); |
5827 | simd_select_bitmask(m:k, yes:_mm512_fmsub_round_ph::<ROUNDING>(a, b, c), no:a) |
5828 | } |
5829 | } |
5830 | |
5831 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
5832 | /// in c from the intermediate result, and store the results in dst using writemask k (the element is copied |
5833 | /// from c when the corresponding mask bit is not set). |
5834 | /// |
5835 | /// Rounding is done according to the rounding parameter, which can be one of: |
5836 | /// |
5837 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
5838 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
5839 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
5840 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
5841 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5842 | /// |
5843 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsub_round_ph) |
5844 | #[inline ] |
5845 | #[target_feature (enable = "avx512fp16" )] |
5846 | #[cfg_attr (test, assert_instr(vfmsub, ROUNDING = 8))] |
5847 | #[rustc_legacy_const_generics (4)] |
5848 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5849 | pub fn _mm512_mask3_fmsub_round_ph<const ROUNDING: i32>( |
5850 | a: __m512h, |
5851 | b: __m512h, |
5852 | c: __m512h, |
5853 | k: __mmask32, |
5854 | ) -> __m512h { |
5855 | unsafe { |
5856 | static_assert_rounding!(ROUNDING); |
5857 | simd_select_bitmask(m:k, yes:_mm512_fmsub_round_ph::<ROUNDING>(a, b, c), no:c) |
5858 | } |
5859 | } |
5860 | |
5861 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
5862 | /// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed |
5863 | /// out when the corresponding mask bit is not set). |
5864 | /// |
5865 | /// Rounding is done according to the rounding parameter, which can be one of: |
5866 | /// |
5867 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
5868 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
5869 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
5870 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
5871 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5872 | /// |
5873 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsub_round_ph) |
5874 | #[inline ] |
5875 | #[target_feature (enable = "avx512fp16" )] |
5876 | #[cfg_attr (test, assert_instr(vfmsub, ROUNDING = 8))] |
5877 | #[rustc_legacy_const_generics (4)] |
5878 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5879 | pub fn _mm512_maskz_fmsub_round_ph<const ROUNDING: i32>( |
5880 | k: __mmask32, |
5881 | a: __m512h, |
5882 | b: __m512h, |
5883 | c: __m512h, |
5884 | ) -> __m512h { |
5885 | unsafe { |
5886 | static_assert_rounding!(ROUNDING); |
5887 | simd_select_bitmask( |
5888 | m:k, |
5889 | yes:_mm512_fmsub_round_ph::<ROUNDING>(a, b, c), |
5890 | no:_mm512_setzero_ph(), |
5891 | ) |
5892 | } |
5893 | } |
5894 | |
5895 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements |
5896 | /// in c from the intermediate result. Store the result in the lower element of dst, and copy the upper |
5897 | /// 7 packed elements from a to the upper elements of dst. |
5898 | /// |
5899 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_sh) |
5900 | #[inline ] |
5901 | #[target_feature (enable = "avx512fp16" )] |
5902 | #[cfg_attr (test, assert_instr(vfmsub))] |
5903 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5904 | pub fn _mm_fmsub_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
5905 | unsafe { |
5906 | let extracta: f16 = simd_extract!(a, 0); |
5907 | let extractb: f16 = simd_extract!(b, 0); |
5908 | let extractc: f16 = simd_extract!(c, 0); |
5909 | let r: f16 = fmaf16(a:extracta, b:extractb, -extractc); |
5910 | simd_insert!(a, 0, r) |
5911 | } |
5912 | } |
5913 | |
5914 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements |
5915 | /// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element |
5916 | /// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the |
5917 | /// upper elements of dst. |
5918 | /// |
5919 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_sh) |
5920 | #[inline ] |
5921 | #[target_feature (enable = "avx512fp16" )] |
5922 | #[cfg_attr (test, assert_instr(vfmsub))] |
5923 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5924 | pub fn _mm_mask_fmsub_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { |
5925 | unsafe { |
5926 | let mut fmsub: f16 = simd_extract!(a, 0); |
5927 | if k & 1 != 0 { |
5928 | let extractb: f16 = simd_extract!(b, 0); |
5929 | let extractc: f16 = simd_extract!(c, 0); |
5930 | fmsub = fmaf16(a:fmsub, b:extractb, -extractc); |
5931 | } |
5932 | simd_insert!(a, 0, fmsub) |
5933 | } |
5934 | } |
5935 | |
5936 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements |
5937 | /// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element |
5938 | /// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the |
5939 | /// upper elements of dst. |
5940 | /// |
5941 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsub_sh) |
5942 | #[inline ] |
5943 | #[target_feature (enable = "avx512fp16" )] |
5944 | #[cfg_attr (test, assert_instr(vfmsub))] |
5945 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5946 | pub fn _mm_mask3_fmsub_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { |
5947 | unsafe { |
5948 | let mut fmsub: f16 = simd_extract!(c, 0); |
5949 | if k & 1 != 0 { |
5950 | let extracta: f16 = simd_extract!(a, 0); |
5951 | let extractb: f16 = simd_extract!(b, 0); |
5952 | fmsub = fmaf16(a:extracta, b:extractb, -fmsub); |
5953 | } |
5954 | simd_insert!(c, 0, fmsub) |
5955 | } |
5956 | } |
5957 | |
5958 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements |
5959 | /// in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element |
5960 | /// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the |
5961 | /// upper elements of dst. |
5962 | /// |
5963 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_sh) |
5964 | #[inline ] |
5965 | #[target_feature (enable = "avx512fp16" )] |
5966 | #[cfg_attr (test, assert_instr(vfmsub))] |
5967 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5968 | pub fn _mm_maskz_fmsub_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
5969 | unsafe { |
5970 | let mut fmsub: f16 = 0.0; |
5971 | if k & 1 != 0 { |
5972 | let extracta: f16 = simd_extract!(a, 0); |
5973 | let extractb: f16 = simd_extract!(b, 0); |
5974 | let extractc: f16 = simd_extract!(c, 0); |
5975 | fmsub = fmaf16(a:extracta, b:extractb, -extractc); |
5976 | } |
5977 | simd_insert!(a, 0, fmsub) |
5978 | } |
5979 | } |
5980 | |
5981 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements |
5982 | /// in c from the intermediate result. Store the result in the lower element of dst, and copy the upper |
5983 | /// 7 packed elements from a to the upper elements of dst. |
5984 | /// |
5985 | /// Rounding is done according to the rounding parameter, which can be one of: |
5986 | /// |
5987 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
5988 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
5989 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
5990 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
5991 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5992 | /// |
5993 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_round_sh) |
5994 | #[inline ] |
5995 | #[target_feature (enable = "avx512fp16" )] |
5996 | #[cfg_attr (test, assert_instr(vfmsub, ROUNDING = 8))] |
5997 | #[rustc_legacy_const_generics (3)] |
5998 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5999 | pub fn _mm_fmsub_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
6000 | unsafe { |
6001 | static_assert_rounding!(ROUNDING); |
6002 | let extracta: f16 = simd_extract!(a, 0); |
6003 | let extractb: f16 = simd_extract!(b, 0); |
6004 | let extractc: f16 = simd_extract!(c, 0); |
6005 | let r: f16 = vfmaddsh(a:extracta, b:extractb, -extractc, ROUNDING); |
6006 | simd_insert!(a, 0, r) |
6007 | } |
6008 | } |
6009 | |
6010 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements |
6011 | /// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element |
6012 | /// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the |
6013 | /// upper elements of dst. |
6014 | /// |
6015 | /// Rounding is done according to the rounding parameter, which can be one of: |
6016 | /// |
6017 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
6018 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
6019 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
6020 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
6021 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6022 | /// |
6023 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_round_sh) |
6024 | #[inline ] |
6025 | #[target_feature (enable = "avx512fp16" )] |
6026 | #[cfg_attr (test, assert_instr(vfmsub, ROUNDING = 8))] |
6027 | #[rustc_legacy_const_generics (4)] |
6028 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6029 | pub fn _mm_mask_fmsub_round_sh<const ROUNDING: i32>( |
6030 | a: __m128h, |
6031 | k: __mmask8, |
6032 | b: __m128h, |
6033 | c: __m128h, |
6034 | ) -> __m128h { |
6035 | unsafe { |
6036 | static_assert_rounding!(ROUNDING); |
6037 | let mut fmsub: f16 = simd_extract!(a, 0); |
6038 | if k & 1 != 0 { |
6039 | let extractb: f16 = simd_extract!(b, 0); |
6040 | let extractc: f16 = simd_extract!(c, 0); |
6041 | fmsub = vfmaddsh(a:fmsub, b:extractb, -extractc, ROUNDING); |
6042 | } |
6043 | simd_insert!(a, 0, fmsub) |
6044 | } |
6045 | } |
6046 | |
6047 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements |
6048 | /// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element |
6049 | /// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the |
6050 | /// upper elements of dst. |
6051 | /// |
6052 | /// Rounding is done according to the rounding parameter, which can be one of: |
6053 | /// |
6054 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
6055 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
6056 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
6057 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
6058 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6059 | /// |
6060 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsub_round_sh) |
6061 | #[inline ] |
6062 | #[target_feature (enable = "avx512fp16" )] |
6063 | #[cfg_attr (test, assert_instr(vfmsub, ROUNDING = 8))] |
6064 | #[rustc_legacy_const_generics (4)] |
6065 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6066 | pub fn _mm_mask3_fmsub_round_sh<const ROUNDING: i32>( |
6067 | a: __m128h, |
6068 | b: __m128h, |
6069 | c: __m128h, |
6070 | k: __mmask8, |
6071 | ) -> __m128h { |
6072 | unsafe { |
6073 | static_assert_rounding!(ROUNDING); |
6074 | let mut fmsub: f16 = simd_extract!(c, 0); |
6075 | if k & 1 != 0 { |
6076 | let extracta: f16 = simd_extract!(a, 0); |
6077 | let extractb: f16 = simd_extract!(b, 0); |
6078 | fmsub = vfmaddsh(a:extracta, b:extractb, -fmsub, ROUNDING); |
6079 | } |
6080 | simd_insert!(c, 0, fmsub) |
6081 | } |
6082 | } |
6083 | |
6084 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements |
6085 | /// in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element |
6086 | /// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the |
6087 | /// upper elements of dst. |
6088 | /// |
6089 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_round_sh) |
6090 | #[inline ] |
6091 | #[target_feature (enable = "avx512fp16" )] |
6092 | #[cfg_attr (test, assert_instr(vfmsub, ROUNDING = 8))] |
6093 | #[rustc_legacy_const_generics (4)] |
6094 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6095 | pub fn _mm_maskz_fmsub_round_sh<const ROUNDING: i32>( |
6096 | k: __mmask8, |
6097 | a: __m128h, |
6098 | b: __m128h, |
6099 | c: __m128h, |
6100 | ) -> __m128h { |
6101 | unsafe { |
6102 | static_assert_rounding!(ROUNDING); |
6103 | let mut fmsub: f16 = 0.0; |
6104 | if k & 1 != 0 { |
6105 | let extracta: f16 = simd_extract!(a, 0); |
6106 | let extractb: f16 = simd_extract!(b, 0); |
6107 | let extractc: f16 = simd_extract!(c, 0); |
6108 | fmsub = vfmaddsh(a:extracta, b:extractb, -extractc, ROUNDING); |
6109 | } |
6110 | simd_insert!(a, 0, fmsub) |
6111 | } |
6112 | } |
6113 | |
6114 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate |
6115 | /// result from packed elements in c, and store the results in dst. |
6116 | /// |
6117 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_ph) |
6118 | #[inline ] |
6119 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
6120 | #[cfg_attr (test, assert_instr(vfnmadd))] |
6121 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6122 | pub fn _mm_fnmadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
6123 | unsafe { simd_fma(x:simd_neg(a), y:b, z:c) } |
6124 | } |
6125 | |
6126 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate |
6127 | /// result from packed elements in c, and store the results in dst using writemask k (the element is copied |
6128 | /// from a when the corresponding mask bit is not set). |
6129 | /// |
6130 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_ph) |
6131 | #[inline ] |
6132 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
6133 | #[cfg_attr (test, assert_instr(vfnmadd))] |
6134 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6135 | pub fn _mm_mask_fnmadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { |
6136 | unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ph(a, b, c), no:a) } |
6137 | } |
6138 | |
6139 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate |
6140 | /// result from packed elements in c, and store the results in dst using writemask k (the element is copied |
6141 | /// from c when the corresponding mask bit is not set). |
6142 | /// |
6143 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_ph) |
6144 | #[inline ] |
6145 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
6146 | #[cfg_attr (test, assert_instr(vfnmadd))] |
6147 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6148 | pub fn _mm_mask3_fnmadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { |
6149 | unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ph(a, b, c), no:c) } |
6150 | } |
6151 | |
6152 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate |
6153 | /// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed |
6154 | /// out when the corresponding mask bit is not set). |
6155 | /// |
6156 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_ph) |
6157 | #[inline ] |
6158 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
6159 | #[cfg_attr (test, assert_instr(vfnmadd))] |
6160 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6161 | pub fn _mm_maskz_fnmadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
6162 | unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ph(a, b, c), no:_mm_setzero_ph()) } |
6163 | } |
6164 | |
6165 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate |
6166 | /// result from packed elements in c, and store the results in dst. |
6167 | /// |
6168 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmadd_ph) |
6169 | #[inline ] |
6170 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
6171 | #[cfg_attr (test, assert_instr(vfnmadd))] |
6172 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6173 | pub fn _mm256_fnmadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h { |
6174 | unsafe { simd_fma(x:simd_neg(a), y:b, z:c) } |
6175 | } |
6176 | |
6177 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate |
6178 | /// result from packed elements in c, and store the results in dst using writemask k (the element is copied |
6179 | /// from a when the corresponding mask bit is not set). |
6180 | /// |
6181 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fnmadd_ph) |
6182 | #[inline ] |
6183 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
6184 | #[cfg_attr (test, assert_instr(vfnmadd))] |
6185 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6186 | pub fn _mm256_mask_fnmadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h { |
6187 | unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ph(a, b, c), no:a) } |
6188 | } |
6189 | |
6190 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate |
6191 | /// result from packed elements in c, and store the results in dst using writemask k (the element is copied |
6192 | /// from c when the corresponding mask bit is not set). |
6193 | /// |
6194 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fnmadd_ph) |
6195 | #[inline ] |
6196 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
6197 | #[cfg_attr (test, assert_instr(vfnmadd))] |
6198 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6199 | pub fn _mm256_mask3_fnmadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h { |
6200 | unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ph(a, b, c), no:c) } |
6201 | } |
6202 | |
6203 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate |
6204 | /// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed |
6205 | /// out when the corresponding mask bit is not set). |
6206 | /// |
6207 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fnmadd_ph) |
6208 | #[inline ] |
6209 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
6210 | #[cfg_attr (test, assert_instr(vfnmadd))] |
6211 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6212 | pub fn _mm256_maskz_fnmadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h { |
6213 | unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ph(a, b, c), no:_mm256_setzero_ph()) } |
6214 | } |
6215 | |
6216 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate |
6217 | /// result from packed elements in c, and store the results in dst. |
6218 | /// |
6219 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmadd_ph) |
6220 | #[inline ] |
6221 | #[target_feature (enable = "avx512fp16" )] |
6222 | #[cfg_attr (test, assert_instr(vfnmadd))] |
6223 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6224 | pub fn _mm512_fnmadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
6225 | unsafe { simd_fma(x:simd_neg(a), y:b, z:c) } |
6226 | } |
6227 | |
6228 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate |
6229 | /// result from packed elements in c, and store the results in dst using writemask k (the element is copied |
6230 | /// from a when the corresponding mask bit is not set). |
6231 | /// |
6232 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmadd_ph) |
6233 | #[inline ] |
6234 | #[target_feature (enable = "avx512fp16" )] |
6235 | #[cfg_attr (test, assert_instr(vfnmadd))] |
6236 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6237 | pub fn _mm512_mask_fnmadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h { |
6238 | unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ph(a, b, c), no:a) } |
6239 | } |
6240 | |
6241 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate |
6242 | /// result from packed elements in c, and store the results in dst using writemask k (the element is copied |
6243 | /// from c when the corresponding mask bit is not set). |
6244 | /// |
6245 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmadd_ph) |
6246 | #[inline ] |
6247 | #[target_feature (enable = "avx512fp16" )] |
6248 | #[cfg_attr (test, assert_instr(vfnmadd))] |
6249 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6250 | pub fn _mm512_mask3_fnmadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h { |
6251 | unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ph(a, b, c), no:c) } |
6252 | } |
6253 | |
6254 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate |
6255 | /// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed |
6256 | /// out when the corresponding mask bit is not set). |
6257 | /// |
6258 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmadd_ph) |
6259 | #[inline ] |
6260 | #[target_feature (enable = "avx512fp16" )] |
6261 | #[cfg_attr (test, assert_instr(vfnmadd))] |
6262 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6263 | pub fn _mm512_maskz_fnmadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
6264 | unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ph(a, b, c), no:_mm512_setzero_ph()) } |
6265 | } |
6266 | |
6267 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate |
6268 | /// result from packed elements in c, and store the results in dst. |
6269 | /// |
6270 | /// Rounding is done according to the rounding parameter, which can be one of: |
6271 | /// |
6272 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
6273 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
6274 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
6275 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
6276 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6277 | /// |
6278 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmadd_round_ph) |
6279 | #[inline ] |
6280 | #[target_feature (enable = "avx512fp16" )] |
6281 | #[cfg_attr (test, assert_instr(vfnmadd, ROUNDING = 8))] |
6282 | #[rustc_legacy_const_generics (3)] |
6283 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6284 | pub fn _mm512_fnmadd_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
6285 | unsafe { |
6286 | static_assert_rounding!(ROUNDING); |
6287 | vfmaddph_512(a:simd_neg(a), b, c, ROUNDING) |
6288 | } |
6289 | } |
6290 | |
6291 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate |
6292 | /// result from packed elements in c, and store the results in dst using writemask k (the element is copied |
6293 | /// from a when the corresponding mask bit is not set). |
6294 | /// |
6295 | /// Rounding is done according to the rounding parameter, which can be one of: |
6296 | /// |
6297 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
6298 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
6299 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
6300 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
6301 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6302 | /// |
6303 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmadd_round_ph) |
6304 | #[inline ] |
6305 | #[target_feature (enable = "avx512fp16" )] |
6306 | #[cfg_attr (test, assert_instr(vfnmadd, ROUNDING = 8))] |
6307 | #[rustc_legacy_const_generics (4)] |
6308 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6309 | pub fn _mm512_mask_fnmadd_round_ph<const ROUNDING: i32>( |
6310 | a: __m512h, |
6311 | k: __mmask32, |
6312 | b: __m512h, |
6313 | c: __m512h, |
6314 | ) -> __m512h { |
6315 | unsafe { |
6316 | static_assert_rounding!(ROUNDING); |
6317 | simd_select_bitmask(m:k, yes:_mm512_fnmadd_round_ph::<ROUNDING>(a, b, c), no:a) |
6318 | } |
6319 | } |
6320 | |
6321 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate |
6322 | /// result from packed elements in c, and store the results in dst using writemask k (the element is copied |
6323 | /// from c when the corresponding mask bit is not set). |
6324 | /// |
6325 | /// Rounding is done according to the rounding parameter, which can be one of: |
6326 | /// |
6327 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
6328 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
6329 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
6330 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
6331 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6332 | /// |
6333 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmadd_round_ph) |
6334 | #[inline ] |
6335 | #[target_feature (enable = "avx512fp16" )] |
6336 | #[cfg_attr (test, assert_instr(vfnmadd, ROUNDING = 8))] |
6337 | #[rustc_legacy_const_generics (4)] |
6338 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6339 | pub fn _mm512_mask3_fnmadd_round_ph<const ROUNDING: i32>( |
6340 | a: __m512h, |
6341 | b: __m512h, |
6342 | c: __m512h, |
6343 | k: __mmask32, |
6344 | ) -> __m512h { |
6345 | unsafe { |
6346 | static_assert_rounding!(ROUNDING); |
6347 | simd_select_bitmask(m:k, yes:_mm512_fnmadd_round_ph::<ROUNDING>(a, b, c), no:c) |
6348 | } |
6349 | } |
6350 | |
6351 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate |
6352 | /// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed |
6353 | /// out when the corresponding mask bit is not set). |
6354 | /// |
6355 | /// Rounding is done according to the rounding parameter, which can be one of: |
6356 | /// |
6357 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
6358 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
6359 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
6360 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
6361 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6362 | /// |
6363 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmadd_round_ph) |
6364 | #[inline ] |
6365 | #[target_feature (enable = "avx512fp16" )] |
6366 | #[cfg_attr (test, assert_instr(vfnmadd, ROUNDING = 8))] |
6367 | #[rustc_legacy_const_generics (4)] |
6368 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6369 | pub fn _mm512_maskz_fnmadd_round_ph<const ROUNDING: i32>( |
6370 | k: __mmask32, |
6371 | a: __m512h, |
6372 | b: __m512h, |
6373 | c: __m512h, |
6374 | ) -> __m512h { |
6375 | unsafe { |
6376 | static_assert_rounding!(ROUNDING); |
6377 | simd_select_bitmask( |
6378 | m:k, |
6379 | yes:_mm512_fnmadd_round_ph::<ROUNDING>(a, b, c), |
6380 | no:_mm512_setzero_ph(), |
6381 | ) |
6382 | } |
6383 | } |
6384 | |
6385 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate |
6386 | /// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed |
6387 | /// elements from a to the upper elements of dst. |
6388 | /// |
6389 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_sh) |
6390 | #[inline ] |
6391 | #[target_feature (enable = "avx512fp16" )] |
6392 | #[cfg_attr (test, assert_instr(vfnmadd))] |
6393 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6394 | pub fn _mm_fnmadd_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
6395 | unsafe { |
6396 | let extracta: f16 = simd_extract!(a, 0); |
6397 | let extractb: f16 = simd_extract!(b, 0); |
6398 | let extractc: f16 = simd_extract!(c, 0); |
6399 | let r: f16 = fmaf16(-extracta, b:extractb, c:extractc); |
6400 | simd_insert!(a, 0, r) |
6401 | } |
6402 | } |
6403 | |
6404 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate |
6405 | /// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element |
6406 | /// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper |
6407 | /// elements of dst. |
6408 | /// |
6409 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_sh) |
6410 | #[inline ] |
6411 | #[target_feature (enable = "avx512fp16" )] |
6412 | #[cfg_attr (test, assert_instr(vfnmadd))] |
6413 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6414 | pub fn _mm_mask_fnmadd_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { |
6415 | unsafe { |
6416 | let mut fnmadd: f16 = simd_extract!(a, 0); |
6417 | if k & 1 != 0 { |
6418 | let extractb: f16 = simd_extract!(b, 0); |
6419 | let extractc: f16 = simd_extract!(c, 0); |
6420 | fnmadd = fmaf16(-fnmadd, b:extractb, c:extractc); |
6421 | } |
6422 | simd_insert!(a, 0, fnmadd) |
6423 | } |
6424 | } |
6425 | |
6426 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate |
6427 | /// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element |
6428 | /// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper |
6429 | /// elements of dst. |
6430 | /// |
6431 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_sh) |
6432 | #[inline ] |
6433 | #[target_feature (enable = "avx512fp16" )] |
6434 | #[cfg_attr (test, assert_instr(vfnmadd))] |
6435 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6436 | pub fn _mm_mask3_fnmadd_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { |
6437 | unsafe { |
6438 | let mut fnmadd: f16 = simd_extract!(c, 0); |
6439 | if k & 1 != 0 { |
6440 | let extracta: f16 = simd_extract!(a, 0); |
6441 | let extractb: f16 = simd_extract!(b, 0); |
6442 | fnmadd = fmaf16(-extracta, b:extractb, c:fnmadd); |
6443 | } |
6444 | simd_insert!(c, 0, fnmadd) |
6445 | } |
6446 | } |
6447 | |
6448 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate |
6449 | /// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element |
6450 | /// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper |
6451 | /// elements of dst. |
6452 | /// |
6453 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_sh) |
6454 | #[inline ] |
6455 | #[target_feature (enable = "avx512fp16" )] |
6456 | #[cfg_attr (test, assert_instr(vfnmadd))] |
6457 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6458 | pub fn _mm_maskz_fnmadd_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
6459 | unsafe { |
6460 | let mut fnmadd: f16 = 0.0; |
6461 | if k & 1 != 0 { |
6462 | let extracta: f16 = simd_extract!(a, 0); |
6463 | let extractb: f16 = simd_extract!(b, 0); |
6464 | let extractc: f16 = simd_extract!(c, 0); |
6465 | fnmadd = fmaf16(-extracta, b:extractb, c:extractc); |
6466 | } |
6467 | simd_insert!(a, 0, fnmadd) |
6468 | } |
6469 | } |
6470 | |
6471 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate |
6472 | /// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed |
6473 | /// elements from a to the upper elements of dst. |
6474 | /// |
6475 | /// Rounding is done according to the rounding parameter, which can be one of: |
6476 | /// |
6477 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
6478 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
6479 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
6480 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
6481 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6482 | /// |
6483 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_round_sh) |
6484 | #[inline ] |
6485 | #[target_feature (enable = "avx512fp16" )] |
6486 | #[cfg_attr (test, assert_instr(vfnmadd, ROUNDING = 8))] |
6487 | #[rustc_legacy_const_generics (3)] |
6488 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6489 | pub fn _mm_fnmadd_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
6490 | unsafe { |
6491 | static_assert_rounding!(ROUNDING); |
6492 | let extracta: f16 = simd_extract!(a, 0); |
6493 | let extractb: f16 = simd_extract!(b, 0); |
6494 | let extractc: f16 = simd_extract!(c, 0); |
6495 | let r: f16 = vfmaddsh(-extracta, b:extractb, c:extractc, ROUNDING); |
6496 | simd_insert!(a, 0, r) |
6497 | } |
6498 | } |
6499 | |
6500 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate |
6501 | /// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element |
6502 | /// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper |
6503 | /// elements of dst. |
6504 | /// |
6505 | /// Rounding is done according to the rounding parameter, which can be one of: |
6506 | /// |
6507 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
6508 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
6509 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
6510 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
6511 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6512 | /// |
6513 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_round_sh) |
6514 | #[inline ] |
6515 | #[target_feature (enable = "avx512fp16" )] |
6516 | #[cfg_attr (test, assert_instr(vfnmadd, ROUNDING = 8))] |
6517 | #[rustc_legacy_const_generics (4)] |
6518 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6519 | pub fn _mm_mask_fnmadd_round_sh<const ROUNDING: i32>( |
6520 | a: __m128h, |
6521 | k: __mmask8, |
6522 | b: __m128h, |
6523 | c: __m128h, |
6524 | ) -> __m128h { |
6525 | unsafe { |
6526 | static_assert_rounding!(ROUNDING); |
6527 | let mut fnmadd: f16 = simd_extract!(a, 0); |
6528 | if k & 1 != 0 { |
6529 | let extractb: f16 = simd_extract!(b, 0); |
6530 | let extractc: f16 = simd_extract!(c, 0); |
6531 | fnmadd = vfmaddsh(-fnmadd, b:extractb, c:extractc, ROUNDING); |
6532 | } |
6533 | simd_insert!(a, 0, fnmadd) |
6534 | } |
6535 | } |
6536 | |
6537 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate |
6538 | /// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element |
6539 | /// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper |
6540 | /// elements of dst. |
6541 | /// |
6542 | /// Rounding is done according to the rounding parameter, which can be one of: |
6543 | /// |
6544 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
6545 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
6546 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
6547 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
6548 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6549 | /// |
6550 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_round_sh) |
6551 | #[inline ] |
6552 | #[target_feature (enable = "avx512fp16" )] |
6553 | #[cfg_attr (test, assert_instr(vfnmadd, ROUNDING = 8))] |
6554 | #[rustc_legacy_const_generics (4)] |
6555 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6556 | pub fn _mm_mask3_fnmadd_round_sh<const ROUNDING: i32>( |
6557 | a: __m128h, |
6558 | b: __m128h, |
6559 | c: __m128h, |
6560 | k: __mmask8, |
6561 | ) -> __m128h { |
6562 | unsafe { |
6563 | static_assert_rounding!(ROUNDING); |
6564 | let mut fnmadd: f16 = simd_extract!(c, 0); |
6565 | if k & 1 != 0 { |
6566 | let extracta: f16 = simd_extract!(a, 0); |
6567 | let extractb: f16 = simd_extract!(b, 0); |
6568 | fnmadd = vfmaddsh(-extracta, b:extractb, c:fnmadd, ROUNDING); |
6569 | } |
6570 | simd_insert!(c, 0, fnmadd) |
6571 | } |
6572 | } |
6573 | |
6574 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate |
6575 | /// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element |
6576 | /// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper |
6577 | /// elements of dst. |
6578 | /// |
6579 | /// Rounding is done according to the rounding parameter, which can be one of: |
6580 | /// |
6581 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
6582 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
6583 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
6584 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
6585 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6586 | /// |
6587 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_round_sh) |
6588 | #[inline ] |
6589 | #[target_feature (enable = "avx512fp16" )] |
6590 | #[cfg_attr (test, assert_instr(vfnmadd, ROUNDING = 8))] |
6591 | #[rustc_legacy_const_generics (4)] |
6592 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6593 | pub fn _mm_maskz_fnmadd_round_sh<const ROUNDING: i32>( |
6594 | k: __mmask8, |
6595 | a: __m128h, |
6596 | b: __m128h, |
6597 | c: __m128h, |
6598 | ) -> __m128h { |
6599 | unsafe { |
6600 | static_assert_rounding!(ROUNDING); |
6601 | let mut fnmadd: f16 = 0.0; |
6602 | if k & 1 != 0 { |
6603 | let extracta: f16 = simd_extract!(a, 0); |
6604 | let extractb: f16 = simd_extract!(b, 0); |
6605 | let extractc: f16 = simd_extract!(c, 0); |
6606 | fnmadd = vfmaddsh(-extracta, b:extractb, c:extractc, ROUNDING); |
6607 | } |
6608 | simd_insert!(a, 0, fnmadd) |
6609 | } |
6610 | } |
6611 | |
6612 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
6613 | /// in c from the negated intermediate result, and store the results in dst. |
6614 | /// |
6615 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_ph) |
6616 | #[inline ] |
6617 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
6618 | #[cfg_attr (test, assert_instr(vfnmsub))] |
6619 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6620 | pub fn _mm_fnmsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
6621 | unsafe { simd_fma(x:simd_neg(a), y:b, z:simd_neg(c)) } |
6622 | } |
6623 | |
6624 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
6625 | /// in c from the negated intermediate result, and store the results in dst using writemask k (the element is |
6626 | /// copied from a when the corresponding mask bit is not set). |
6627 | /// |
6628 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_ph) |
6629 | #[inline ] |
6630 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
6631 | #[cfg_attr (test, assert_instr(vfnmsub))] |
6632 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6633 | pub fn _mm_mask_fnmsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { |
6634 | unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ph(a, b, c), no:a) } |
6635 | } |
6636 | |
6637 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
6638 | /// in c from the negated intermediate result, and store the results in dst using writemask k (the element is |
6639 | /// copied from c when the corresponding mask bit is not set). |
6640 | /// |
6641 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_ph) |
6642 | #[inline ] |
6643 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
6644 | #[cfg_attr (test, assert_instr(vfnmsub))] |
6645 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6646 | pub fn _mm_mask3_fnmsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { |
6647 | unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ph(a, b, c), no:c) } |
6648 | } |
6649 | |
6650 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
6651 | /// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is |
6652 | /// zeroed out when the corresponding mask bit is not set). |
6653 | /// |
6654 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_ph) |
6655 | #[inline ] |
6656 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
6657 | #[cfg_attr (test, assert_instr(vfnmsub))] |
6658 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6659 | pub fn _mm_maskz_fnmsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
6660 | unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ph(a, b, c), no:_mm_setzero_ph()) } |
6661 | } |
6662 | |
6663 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
6664 | /// in c from the negated intermediate result, and store the results in dst. |
6665 | /// |
6666 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmsub_ph) |
6667 | #[inline ] |
6668 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
6669 | #[cfg_attr (test, assert_instr(vfnmsub))] |
6670 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6671 | pub fn _mm256_fnmsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h { |
6672 | unsafe { simd_fma(x:simd_neg(a), y:b, z:simd_neg(c)) } |
6673 | } |
6674 | |
6675 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
6676 | /// in c from the negated intermediate result, and store the results in dst using writemask k (the element is |
6677 | /// copied from a when the corresponding mask bit is not set). |
6678 | /// |
6679 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fnmsub_ph) |
6680 | #[inline ] |
6681 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
6682 | #[cfg_attr (test, assert_instr(vfnmsub))] |
6683 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6684 | pub fn _mm256_mask_fnmsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h { |
6685 | unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ph(a, b, c), no:a) } |
6686 | } |
6687 | |
6688 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
6689 | /// in c from the negated intermediate result, and store the results in dst using writemask k (the element is |
6690 | /// copied from c when the corresponding mask bit is not set). |
6691 | /// |
6692 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fnmsub_ph) |
6693 | #[inline ] |
6694 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
6695 | #[cfg_attr (test, assert_instr(vfnmsub))] |
6696 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6697 | pub fn _mm256_mask3_fnmsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h { |
6698 | unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ph(a, b, c), no:c) } |
6699 | } |
6700 | |
6701 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
6702 | /// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is |
6703 | /// zeroed out when the corresponding mask bit is not set). |
6704 | /// |
6705 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fnmsub_ph) |
6706 | #[inline ] |
6707 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
6708 | #[cfg_attr (test, assert_instr(vfnmsub))] |
6709 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6710 | pub fn _mm256_maskz_fnmsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h { |
6711 | unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ph(a, b, c), no:_mm256_setzero_ph()) } |
6712 | } |
6713 | |
6714 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
6715 | /// in c from the negated intermediate result, and store the results in dst. |
6716 | /// |
6717 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmsub_ph) |
6718 | #[inline ] |
6719 | #[target_feature (enable = "avx512fp16" )] |
6720 | #[cfg_attr (test, assert_instr(vfnmsub))] |
6721 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6722 | pub fn _mm512_fnmsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
6723 | unsafe { simd_fma(x:simd_neg(a), y:b, z:simd_neg(c)) } |
6724 | } |
6725 | |
6726 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
6727 | /// in c from the negated intermediate result, and store the results in dst using writemask k (the element is |
6728 | /// copied from a when the corresponding mask bit is not set). |
6729 | /// |
6730 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmsub_ph) |
6731 | #[inline ] |
6732 | #[target_feature (enable = "avx512fp16" )] |
6733 | #[cfg_attr (test, assert_instr(vfnmsub))] |
6734 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6735 | pub fn _mm512_mask_fnmsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h { |
6736 | unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ph(a, b, c), no:a) } |
6737 | } |
6738 | |
6739 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
6740 | /// in c from the negated intermediate result, and store the results in dst using writemask k (the element is |
6741 | /// copied from c when the corresponding mask bit is not set). |
6742 | /// |
6743 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmsub_ph) |
6744 | #[inline ] |
6745 | #[target_feature (enable = "avx512fp16" )] |
6746 | #[cfg_attr (test, assert_instr(vfnmsub))] |
6747 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6748 | pub fn _mm512_mask3_fnmsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h { |
6749 | unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ph(a, b, c), no:c) } |
6750 | } |
6751 | |
6752 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
6753 | /// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is |
6754 | /// zeroed out when the corresponding mask bit is not set). |
6755 | /// |
6756 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmsub_ph) |
6757 | #[inline ] |
6758 | #[target_feature (enable = "avx512fp16" )] |
6759 | #[cfg_attr (test, assert_instr(vfnmsub))] |
6760 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6761 | pub fn _mm512_maskz_fnmsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
6762 | unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ph(a, b, c), no:_mm512_setzero_ph()) } |
6763 | } |
6764 | |
6765 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
6766 | /// in c from the negated intermediate result, and store the results in dst. |
6767 | /// |
6768 | /// Rounding is done according to the rounding parameter, which can be one of: |
6769 | /// |
6770 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
6771 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
6772 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
6773 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
6774 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6775 | /// |
6776 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmsub_round_ph) |
6777 | #[inline ] |
6778 | #[target_feature (enable = "avx512fp16" )] |
6779 | #[cfg_attr (test, assert_instr(vfnmsub, ROUNDING = 8))] |
6780 | #[rustc_legacy_const_generics (3)] |
6781 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6782 | pub fn _mm512_fnmsub_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
6783 | unsafe { |
6784 | static_assert_rounding!(ROUNDING); |
6785 | vfmaddph_512(a:simd_neg(a), b, c:simd_neg(c), ROUNDING) |
6786 | } |
6787 | } |
6788 | |
6789 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
6790 | /// in c from the negated intermediate result, and store the results in dst using writemask k (the element is |
6791 | /// copied from a when the corresponding mask bit is not set). |
6792 | /// |
6793 | /// Rounding is done according to the rounding parameter, which can be one of: |
6794 | /// |
6795 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
6796 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
6797 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
6798 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
6799 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6800 | /// |
6801 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmsub_round_ph) |
6802 | #[inline ] |
6803 | #[target_feature (enable = "avx512fp16" )] |
6804 | #[cfg_attr (test, assert_instr(vfnmsub, ROUNDING = 8))] |
6805 | #[rustc_legacy_const_generics (4)] |
6806 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6807 | pub fn _mm512_mask_fnmsub_round_ph<const ROUNDING: i32>( |
6808 | a: __m512h, |
6809 | k: __mmask32, |
6810 | b: __m512h, |
6811 | c: __m512h, |
6812 | ) -> __m512h { |
6813 | unsafe { |
6814 | static_assert_rounding!(ROUNDING); |
6815 | simd_select_bitmask(m:k, yes:_mm512_fnmsub_round_ph::<ROUNDING>(a, b, c), no:a) |
6816 | } |
6817 | } |
6818 | |
6819 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
6820 | /// in c from the negated intermediate result, and store the results in dst using writemask k (the element is |
6821 | /// copied from c when the corresponding mask bit is not set). |
6822 | /// |
6823 | /// Rounding is done according to the rounding parameter, which can be one of: |
6824 | /// |
6825 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
6826 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
6827 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
6828 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
6829 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6830 | /// |
6831 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmsub_round_ph) |
6832 | #[inline ] |
6833 | #[target_feature (enable = "avx512fp16" )] |
6834 | #[cfg_attr (test, assert_instr(vfnmsub, ROUNDING = 8))] |
6835 | #[rustc_legacy_const_generics (4)] |
6836 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6837 | pub fn _mm512_mask3_fnmsub_round_ph<const ROUNDING: i32>( |
6838 | a: __m512h, |
6839 | b: __m512h, |
6840 | c: __m512h, |
6841 | k: __mmask32, |
6842 | ) -> __m512h { |
6843 | unsafe { |
6844 | static_assert_rounding!(ROUNDING); |
6845 | simd_select_bitmask(m:k, yes:_mm512_fnmsub_round_ph::<ROUNDING>(a, b, c), no:c) |
6846 | } |
6847 | } |
6848 | |
6849 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
6850 | /// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is |
6851 | /// zeroed out when the corresponding mask bit is not set). |
6852 | /// |
6853 | /// Rounding is done according to the rounding parameter, which can be one of: |
6854 | /// |
6855 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
6856 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
6857 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
6858 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
6859 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6860 | /// |
6861 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmsub_round_ph) |
6862 | #[inline ] |
6863 | #[target_feature (enable = "avx512fp16" )] |
6864 | #[cfg_attr (test, assert_instr(vfnmsub, ROUNDING = 8))] |
6865 | #[rustc_legacy_const_generics (4)] |
6866 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6867 | pub fn _mm512_maskz_fnmsub_round_ph<const ROUNDING: i32>( |
6868 | k: __mmask32, |
6869 | a: __m512h, |
6870 | b: __m512h, |
6871 | c: __m512h, |
6872 | ) -> __m512h { |
6873 | unsafe { |
6874 | static_assert_rounding!(ROUNDING); |
6875 | simd_select_bitmask( |
6876 | m:k, |
6877 | yes:_mm512_fnmsub_round_ph::<ROUNDING>(a, b, c), |
6878 | no:_mm512_setzero_ph(), |
6879 | ) |
6880 | } |
6881 | } |
6882 | |
6883 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate |
6884 | /// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed |
6885 | /// elements from a to the upper elements of dst. |
6886 | /// |
6887 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_sh) |
6888 | #[inline ] |
6889 | #[target_feature (enable = "avx512fp16" )] |
6890 | #[cfg_attr (test, assert_instr(vfnmsub))] |
6891 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6892 | pub fn _mm_fnmsub_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
6893 | unsafe { |
6894 | let extracta: f16 = simd_extract!(a, 0); |
6895 | let extractb: f16 = simd_extract!(b, 0); |
6896 | let extractc: f16 = simd_extract!(c, 0); |
6897 | let r: f16 = fmaf16(-extracta, b:extractb, -extractc); |
6898 | simd_insert!(a, 0, r) |
6899 | } |
6900 | } |
6901 | |
6902 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate |
6903 | /// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element |
6904 | /// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper |
6905 | /// elements of dst. |
6906 | /// |
6907 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_sh) |
6908 | #[inline ] |
6909 | #[target_feature (enable = "avx512fp16" )] |
6910 | #[cfg_attr (test, assert_instr(vfnmsub))] |
6911 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6912 | pub fn _mm_mask_fnmsub_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { |
6913 | unsafe { |
6914 | let mut fnmsub: f16 = simd_extract!(a, 0); |
6915 | if k & 1 != 0 { |
6916 | let extractb: f16 = simd_extract!(b, 0); |
6917 | let extractc: f16 = simd_extract!(c, 0); |
6918 | fnmsub = fmaf16(-fnmsub, b:extractb, -extractc); |
6919 | } |
6920 | simd_insert!(a, 0, fnmsub) |
6921 | } |
6922 | } |
6923 | |
6924 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate |
6925 | /// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element |
6926 | /// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper |
6927 | /// elements of dst. |
6928 | /// |
6929 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_sh) |
6930 | #[inline ] |
6931 | #[target_feature (enable = "avx512fp16" )] |
6932 | #[cfg_attr (test, assert_instr(vfnmsub))] |
6933 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6934 | pub fn _mm_mask3_fnmsub_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { |
6935 | unsafe { |
6936 | let mut fnmsub: f16 = simd_extract!(c, 0); |
6937 | if k & 1 != 0 { |
6938 | let extracta: f16 = simd_extract!(a, 0); |
6939 | let extractb: f16 = simd_extract!(b, 0); |
6940 | fnmsub = fmaf16(-extracta, b:extractb, -fnmsub); |
6941 | } |
6942 | simd_insert!(c, 0, fnmsub) |
6943 | } |
6944 | } |
6945 | |
6946 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate |
6947 | /// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element |
6948 | /// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper |
6949 | /// elements of dst. |
6950 | /// |
6951 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_sh) |
6952 | #[inline ] |
6953 | #[target_feature (enable = "avx512fp16" )] |
6954 | #[cfg_attr (test, assert_instr(vfnmsub))] |
6955 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6956 | pub fn _mm_maskz_fnmsub_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
6957 | unsafe { |
6958 | let mut fnmsub: f16 = 0.0; |
6959 | if k & 1 != 0 { |
6960 | let extracta: f16 = simd_extract!(a, 0); |
6961 | let extractb: f16 = simd_extract!(b, 0); |
6962 | let extractc: f16 = simd_extract!(c, 0); |
6963 | fnmsub = fmaf16(-extracta, b:extractb, -extractc); |
6964 | } |
6965 | simd_insert!(a, 0, fnmsub) |
6966 | } |
6967 | } |
6968 | |
6969 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate |
6970 | /// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed |
6971 | /// elements from a to the upper elements of dst. |
6972 | /// |
6973 | /// Rounding is done according to the rounding parameter, which can be one of: |
6974 | /// |
6975 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
6976 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
6977 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
6978 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
6979 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6980 | /// |
6981 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_round_sh) |
6982 | #[inline ] |
6983 | #[target_feature (enable = "avx512fp16" )] |
6984 | #[cfg_attr (test, assert_instr(vfnmsub, ROUNDING = 8))] |
6985 | #[rustc_legacy_const_generics (3)] |
6986 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6987 | pub fn _mm_fnmsub_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
6988 | unsafe { |
6989 | static_assert_rounding!(ROUNDING); |
6990 | let extracta: f16 = simd_extract!(a, 0); |
6991 | let extractb: f16 = simd_extract!(b, 0); |
6992 | let extractc: f16 = simd_extract!(c, 0); |
6993 | let r: f16 = vfmaddsh(-extracta, b:extractb, -extractc, ROUNDING); |
6994 | simd_insert!(a, 0, r) |
6995 | } |
6996 | } |
6997 | |
6998 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate |
6999 | /// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element |
7000 | /// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper |
7001 | /// elements of dst. |
7002 | /// |
7003 | /// Rounding is done according to the rounding parameter, which can be one of: |
7004 | /// |
7005 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
7006 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
7007 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
7008 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
7009 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
7010 | /// |
7011 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_round_sh) |
7012 | #[inline ] |
7013 | #[target_feature (enable = "avx512fp16" )] |
7014 | #[cfg_attr (test, assert_instr(vfnmsub, ROUNDING = 8))] |
7015 | #[rustc_legacy_const_generics (4)] |
7016 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7017 | pub fn _mm_mask_fnmsub_round_sh<const ROUNDING: i32>( |
7018 | a: __m128h, |
7019 | k: __mmask8, |
7020 | b: __m128h, |
7021 | c: __m128h, |
7022 | ) -> __m128h { |
7023 | unsafe { |
7024 | static_assert_rounding!(ROUNDING); |
7025 | let mut fnmsub: f16 = simd_extract!(a, 0); |
7026 | if k & 1 != 0 { |
7027 | let extractb: f16 = simd_extract!(b, 0); |
7028 | let extractc: f16 = simd_extract!(c, 0); |
7029 | fnmsub = vfmaddsh(-fnmsub, b:extractb, -extractc, ROUNDING); |
7030 | } |
7031 | simd_insert!(a, 0, fnmsub) |
7032 | } |
7033 | } |
7034 | |
7035 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate |
7036 | /// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element |
7037 | /// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper |
7038 | /// elements of dst. |
7039 | /// |
7040 | /// Rounding is done according to the rounding parameter, which can be one of: |
7041 | /// |
7042 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
7043 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
7044 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
7045 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
7046 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
7047 | /// |
7048 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_round_sh) |
7049 | #[inline ] |
7050 | #[target_feature (enable = "avx512fp16" )] |
7051 | #[cfg_attr (test, assert_instr(vfnmsub, ROUNDING = 8))] |
7052 | #[rustc_legacy_const_generics (4)] |
7053 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7054 | pub fn _mm_mask3_fnmsub_round_sh<const ROUNDING: i32>( |
7055 | a: __m128h, |
7056 | b: __m128h, |
7057 | c: __m128h, |
7058 | k: __mmask8, |
7059 | ) -> __m128h { |
7060 | unsafe { |
7061 | static_assert_rounding!(ROUNDING); |
7062 | let mut fnmsub: f16 = simd_extract!(c, 0); |
7063 | if k & 1 != 0 { |
7064 | let extracta: f16 = simd_extract!(a, 0); |
7065 | let extractb: f16 = simd_extract!(b, 0); |
7066 | fnmsub = vfmaddsh(-extracta, b:extractb, -fnmsub, ROUNDING); |
7067 | } |
7068 | simd_insert!(c, 0, fnmsub) |
7069 | } |
7070 | } |
7071 | |
7072 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate |
7073 | /// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element |
7074 | /// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper |
7075 | /// elements of dst. |
7076 | /// |
7077 | /// Rounding is done according to the rounding parameter, which can be one of: |
7078 | /// |
7079 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
7080 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
7081 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
7082 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
7083 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
7084 | /// |
7085 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_round_sh) |
7086 | #[inline ] |
7087 | #[target_feature (enable = "avx512fp16" )] |
7088 | #[cfg_attr (test, assert_instr(vfnmsub, ROUNDING = 8))] |
7089 | #[rustc_legacy_const_generics (4)] |
7090 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7091 | pub fn _mm_maskz_fnmsub_round_sh<const ROUNDING: i32>( |
7092 | k: __mmask8, |
7093 | a: __m128h, |
7094 | b: __m128h, |
7095 | c: __m128h, |
7096 | ) -> __m128h { |
7097 | unsafe { |
7098 | static_assert_rounding!(ROUNDING); |
7099 | let mut fnmsub: f16 = 0.0; |
7100 | if k & 1 != 0 { |
7101 | let extracta: f16 = simd_extract!(a, 0); |
7102 | let extractb: f16 = simd_extract!(b, 0); |
7103 | let extractc: f16 = simd_extract!(c, 0); |
7104 | fnmsub = vfmaddsh(-extracta, b:extractb, -extractc, ROUNDING); |
7105 | } |
7106 | simd_insert!(a, 0, fnmsub) |
7107 | } |
7108 | } |
7109 | |
7110 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and |
7111 | /// subtract packed elements in c to/from the intermediate result, and store the results in dst. |
7112 | /// |
7113 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmaddsub_ph) |
7114 | #[inline ] |
7115 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7116 | #[cfg_attr (test, assert_instr(vfmaddsub))] |
7117 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7118 | pub fn _mm_fmaddsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
7119 | unsafe { vfmaddsubph_128(a, b, c) } |
7120 | } |
7121 | |
7122 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and |
7123 | /// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k |
7124 | /// (the element is copied from a when the corresponding mask bit is not set). |
7125 | /// |
7126 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmaddsub_ph) |
7127 | #[inline ] |
7128 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7129 | #[cfg_attr (test, assert_instr(vfmaddsub))] |
7130 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7131 | pub fn _mm_mask_fmaddsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { |
7132 | unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ph(a, b, c), no:a) } |
7133 | } |
7134 | |
7135 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and |
7136 | /// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k |
7137 | /// (the element is copied from c when the corresponding mask bit is not set). |
7138 | /// |
7139 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmaddsub_ph) |
7140 | #[inline ] |
7141 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7142 | #[cfg_attr (test, assert_instr(vfmaddsub))] |
7143 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7144 | pub fn _mm_mask3_fmaddsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { |
7145 | unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ph(a, b, c), no:c) } |
7146 | } |
7147 | |
7148 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and |
7149 | /// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k |
7150 | /// (the element is zeroed out when the corresponding mask bit is not set). |
7151 | /// |
7152 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmaddsub_ph) |
7153 | #[inline ] |
7154 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7155 | #[cfg_attr (test, assert_instr(vfmaddsub))] |
7156 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7157 | pub fn _mm_maskz_fmaddsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
7158 | unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ph(a, b, c), no:_mm_setzero_ph()) } |
7159 | } |
7160 | |
7161 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and |
7162 | /// subtract packed elements in c to/from the intermediate result, and store the results in dst. |
7163 | /// |
7164 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmaddsub_ph) |
7165 | #[inline ] |
7166 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7167 | #[cfg_attr (test, assert_instr(vfmaddsub))] |
7168 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7169 | pub fn _mm256_fmaddsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h { |
7170 | unsafe { vfmaddsubph_256(a, b, c) } |
7171 | } |
7172 | |
7173 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and |
7174 | /// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k |
7175 | /// (the element is copied from a when the corresponding mask bit is not set). |
7176 | /// |
7177 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmaddsub_ph) |
7178 | #[inline ] |
7179 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7180 | #[cfg_attr (test, assert_instr(vfmaddsub))] |
7181 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7182 | pub fn _mm256_mask_fmaddsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h { |
7183 | unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ph(a, b, c), no:a) } |
7184 | } |
7185 | |
7186 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and |
7187 | /// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k |
7188 | /// (the element is copied from c when the corresponding mask bit is not set). |
7189 | /// |
7190 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmaddsub_ph) |
7191 | #[inline ] |
7192 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7193 | #[cfg_attr (test, assert_instr(vfmaddsub))] |
7194 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7195 | pub fn _mm256_mask3_fmaddsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h { |
7196 | unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ph(a, b, c), no:c) } |
7197 | } |
7198 | |
7199 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and |
7200 | /// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k |
7201 | /// (the element is zeroed out when the corresponding mask bit is not set). |
7202 | /// |
7203 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmaddsub_ph) |
7204 | #[inline ] |
7205 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7206 | #[cfg_attr (test, assert_instr(vfmaddsub))] |
7207 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7208 | pub fn _mm256_maskz_fmaddsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h { |
7209 | unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ph(a, b, c), no:_mm256_setzero_ph()) } |
7210 | } |
7211 | |
7212 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and |
7213 | /// subtract packed elements in c to/from the intermediate result, and store the results in dst. |
7214 | /// |
7215 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmaddsub_ph) |
7216 | #[inline ] |
7217 | #[target_feature (enable = "avx512fp16" )] |
7218 | #[cfg_attr (test, assert_instr(vfmaddsub))] |
7219 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7220 | pub fn _mm512_fmaddsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
7221 | _mm512_fmaddsub_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b, c) |
7222 | } |
7223 | |
7224 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and |
7225 | /// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k |
7226 | /// (the element is copied from a when the corresponding mask bit is not set). |
7227 | /// |
7228 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmaddsub_ph) |
7229 | #[inline ] |
7230 | #[target_feature (enable = "avx512fp16" )] |
7231 | #[cfg_attr (test, assert_instr(vfmaddsub))] |
7232 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7233 | pub fn _mm512_mask_fmaddsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h { |
7234 | unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ph(a, b, c), no:a) } |
7235 | } |
7236 | |
7237 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and |
7238 | /// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k |
7239 | /// (the element is copied from c when the corresponding mask bit is not set). |
7240 | /// |
7241 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmaddsub_ph) |
7242 | #[inline ] |
7243 | #[target_feature (enable = "avx512fp16" )] |
7244 | #[cfg_attr (test, assert_instr(vfmaddsub))] |
7245 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7246 | pub fn _mm512_mask3_fmaddsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h { |
7247 | unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ph(a, b, c), no:c) } |
7248 | } |
7249 | |
7250 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and |
7251 | /// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k |
7252 | /// (the element is zeroed out when the corresponding mask bit is not set). |
7253 | /// |
7254 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmaddsub_ph) |
7255 | #[inline ] |
7256 | #[target_feature (enable = "avx512fp16" )] |
7257 | #[cfg_attr (test, assert_instr(vfmaddsub))] |
7258 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7259 | pub fn _mm512_maskz_fmaddsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
7260 | unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ph(a, b, c), no:_mm512_setzero_ph()) } |
7261 | } |
7262 | |
7263 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and |
7264 | /// subtract packed elements in c to/from the intermediate result, and store the results in dst. |
7265 | /// |
7266 | /// Rounding is done according to the rounding parameter, which can be one of: |
7267 | /// |
7268 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
7269 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
7270 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
7271 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
7272 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
7273 | /// |
7274 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmaddsub_round_ph) |
7275 | #[inline ] |
7276 | #[target_feature (enable = "avx512fp16" )] |
7277 | #[cfg_attr (test, assert_instr(vfmaddsub, ROUNDING = 8))] |
7278 | #[rustc_legacy_const_generics (3)] |
7279 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7280 | pub fn _mm512_fmaddsub_round_ph<const ROUNDING: i32>( |
7281 | a: __m512h, |
7282 | b: __m512h, |
7283 | c: __m512h, |
7284 | ) -> __m512h { |
7285 | unsafe { |
7286 | static_assert_rounding!(ROUNDING); |
7287 | vfmaddsubph_512(a, b, c, ROUNDING) |
7288 | } |
7289 | } |
7290 | |
7291 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and |
7292 | /// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k |
7293 | /// (the element is copied from a when the corresponding mask bit is not set). |
7294 | /// |
7295 | /// Rounding is done according to the rounding parameter, which can be one of: |
7296 | /// |
7297 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
7298 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
7299 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
7300 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
7301 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
7302 | /// |
7303 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmaddsub_round_ph) |
7304 | #[inline ] |
7305 | #[target_feature (enable = "avx512fp16" )] |
7306 | #[cfg_attr (test, assert_instr(vfmaddsub, ROUNDING = 8))] |
7307 | #[rustc_legacy_const_generics (4)] |
7308 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7309 | pub fn _mm512_mask_fmaddsub_round_ph<const ROUNDING: i32>( |
7310 | a: __m512h, |
7311 | k: __mmask32, |
7312 | b: __m512h, |
7313 | c: __m512h, |
7314 | ) -> __m512h { |
7315 | unsafe { |
7316 | static_assert_rounding!(ROUNDING); |
7317 | simd_select_bitmask(m:k, yes:_mm512_fmaddsub_round_ph::<ROUNDING>(a, b, c), no:a) |
7318 | } |
7319 | } |
7320 | |
7321 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and |
7322 | /// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k |
7323 | /// (the element is copied from c when the corresponding mask bit is not set). |
7324 | /// |
7325 | /// Rounding is done according to the rounding parameter, which can be one of: |
7326 | /// |
7327 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
7328 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
7329 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
7330 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
7331 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
7332 | /// |
7333 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmaddsub_round_ph) |
7334 | #[inline ] |
7335 | #[target_feature (enable = "avx512fp16" )] |
7336 | #[cfg_attr (test, assert_instr(vfmaddsub, ROUNDING = 8))] |
7337 | #[rustc_legacy_const_generics (4)] |
7338 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7339 | pub fn _mm512_mask3_fmaddsub_round_ph<const ROUNDING: i32>( |
7340 | a: __m512h, |
7341 | b: __m512h, |
7342 | c: __m512h, |
7343 | k: __mmask32, |
7344 | ) -> __m512h { |
7345 | unsafe { |
7346 | static_assert_rounding!(ROUNDING); |
7347 | simd_select_bitmask(m:k, yes:_mm512_fmaddsub_round_ph::<ROUNDING>(a, b, c), no:c) |
7348 | } |
7349 | } |
7350 | |
7351 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and |
7352 | /// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k |
7353 | /// (the element is zeroed out when the corresponding mask bit is not set). |
7354 | /// |
7355 | /// Rounding is done according to the rounding parameter, which can be one of: |
7356 | /// |
7357 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
7358 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
7359 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
7360 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
7361 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
7362 | /// |
7363 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmaddsub_round_ph) |
7364 | #[inline ] |
7365 | #[target_feature (enable = "avx512fp16" )] |
7366 | #[cfg_attr (test, assert_instr(vfmaddsub, ROUNDING = 8))] |
7367 | #[rustc_legacy_const_generics (4)] |
7368 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7369 | pub fn _mm512_maskz_fmaddsub_round_ph<const ROUNDING: i32>( |
7370 | k: __mmask32, |
7371 | a: __m512h, |
7372 | b: __m512h, |
7373 | c: __m512h, |
7374 | ) -> __m512h { |
7375 | unsafe { |
7376 | static_assert_rounding!(ROUNDING); |
7377 | simd_select_bitmask( |
7378 | m:k, |
7379 | yes:_mm512_fmaddsub_round_ph::<ROUNDING>(a, b, c), |
7380 | no:_mm512_setzero_ph(), |
7381 | ) |
7382 | } |
7383 | } |
7384 | |
7385 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract |
7386 | /// and add packed elements in c to/from the intermediate result, and store the results in dst. |
7387 | /// |
7388 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsubadd_ph) |
7389 | #[inline ] |
7390 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7391 | #[cfg_attr (test, assert_instr(vfmsubadd))] |
7392 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7393 | pub fn _mm_fmsubadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
7394 | unsafe { vfmaddsubph_128(a, b, c:simd_neg(c)) } |
7395 | } |
7396 | |
7397 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract |
7398 | /// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k |
7399 | /// (the element is copied from a when the corresponding mask bit is not set). |
7400 | /// |
7401 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsubadd_ph) |
7402 | #[inline ] |
7403 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7404 | #[cfg_attr (test, assert_instr(vfmsubadd))] |
7405 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7406 | pub fn _mm_mask_fmsubadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { |
7407 | unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ph(a, b, c), no:a) } |
7408 | } |
7409 | |
7410 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract |
7411 | /// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k |
7412 | /// (the element is copied from c when the corresponding mask bit is not set). |
7413 | /// |
7414 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsubadd_ph) |
7415 | #[inline ] |
7416 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7417 | #[cfg_attr (test, assert_instr(vfmsubadd))] |
7418 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7419 | pub fn _mm_mask3_fmsubadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { |
7420 | unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ph(a, b, c), no:c) } |
7421 | } |
7422 | |
7423 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract |
7424 | /// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k |
7425 | /// (the element is zeroed out when the corresponding mask bit is not set). |
7426 | /// |
7427 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsubadd_ph) |
7428 | #[inline ] |
7429 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7430 | #[cfg_attr (test, assert_instr(vfmsubadd))] |
7431 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7432 | pub fn _mm_maskz_fmsubadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
7433 | unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ph(a, b, c), no:_mm_setzero_ph()) } |
7434 | } |
7435 | |
7436 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract |
7437 | /// and add packed elements in c to/from the intermediate result, and store the results in dst. |
7438 | /// |
7439 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsubadd_ph) |
7440 | #[inline ] |
7441 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7442 | #[cfg_attr (test, assert_instr(vfmsubadd))] |
7443 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7444 | pub fn _mm256_fmsubadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h { |
7445 | unsafe { vfmaddsubph_256(a, b, c:simd_neg(c)) } |
7446 | } |
7447 | |
7448 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract |
7449 | /// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k |
7450 | /// (the element is copied from a when the corresponding mask bit is not set). |
7451 | /// |
7452 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmsubadd_ph) |
7453 | #[inline ] |
7454 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7455 | #[cfg_attr (test, assert_instr(vfmsubadd))] |
7456 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7457 | pub fn _mm256_mask_fmsubadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h { |
7458 | unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ph(a, b, c), no:a) } |
7459 | } |
7460 | |
7461 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract |
7462 | /// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k |
7463 | /// (the element is copied from c when the corresponding mask bit is not set). |
7464 | /// |
7465 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmsubadd_ph) |
7466 | #[inline ] |
7467 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7468 | #[cfg_attr (test, assert_instr(vfmsubadd))] |
7469 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7470 | pub fn _mm256_mask3_fmsubadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h { |
7471 | unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ph(a, b, c), no:c) } |
7472 | } |
7473 | |
7474 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract |
7475 | /// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k |
7476 | /// (the element is zeroed out when the corresponding mask bit is not set). |
7477 | /// |
7478 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmsubadd_ph) |
7479 | #[inline ] |
7480 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7481 | #[cfg_attr (test, assert_instr(vfmsubadd))] |
7482 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7483 | pub fn _mm256_maskz_fmsubadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h { |
7484 | unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ph(a, b, c), no:_mm256_setzero_ph()) } |
7485 | } |
7486 | |
7487 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract |
7488 | /// and add packed elements in c to/from the intermediate result, and store the results in dst. |
7489 | /// |
7490 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsubadd_ph) |
7491 | #[inline ] |
7492 | #[target_feature (enable = "avx512fp16" )] |
7493 | #[cfg_attr (test, assert_instr(vfmsubadd))] |
7494 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7495 | pub fn _mm512_fmsubadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
7496 | _mm512_fmsubadd_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b, c) |
7497 | } |
7498 | |
7499 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract |
7500 | /// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k |
7501 | /// (the element is copied from a when the corresponding mask bit is not set). |
7502 | /// |
7503 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsubadd_ph) |
7504 | #[inline ] |
7505 | #[target_feature (enable = "avx512fp16" )] |
7506 | #[cfg_attr (test, assert_instr(vfmsubadd))] |
7507 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7508 | pub fn _mm512_mask_fmsubadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h { |
7509 | unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ph(a, b, c), no:a) } |
7510 | } |
7511 | |
7512 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract |
7513 | /// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k |
7514 | /// (the element is copied from c when the corresponding mask bit is not set). |
7515 | /// |
7516 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsubadd_ph) |
7517 | #[inline ] |
7518 | #[target_feature (enable = "avx512fp16" )] |
7519 | #[cfg_attr (test, assert_instr(vfmsubadd))] |
7520 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7521 | pub fn _mm512_mask3_fmsubadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h { |
7522 | unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ph(a, b, c), no:c) } |
7523 | } |
7524 | |
7525 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract |
7526 | /// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k |
7527 | /// (the element is zeroed out when the corresponding mask bit is not set). |
7528 | /// |
7529 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsubadd_ph) |
7530 | #[inline ] |
7531 | #[target_feature (enable = "avx512fp16" )] |
7532 | #[cfg_attr (test, assert_instr(vfmsubadd))] |
7533 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7534 | pub fn _mm512_maskz_fmsubadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
7535 | unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ph(a, b, c), no:_mm512_setzero_ph()) } |
7536 | } |
7537 | |
7538 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract |
7539 | /// and add packed elements in c to/from the intermediate result, and store the results in dst. |
7540 | /// |
7541 | /// Rounding is done according to the rounding parameter, which can be one of: |
7542 | /// |
7543 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
7544 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
7545 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
7546 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
7547 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
7548 | /// |
7549 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsubadd_round_ph) |
7550 | #[inline ] |
7551 | #[target_feature (enable = "avx512fp16" )] |
7552 | #[cfg_attr (test, assert_instr(vfmsubadd, ROUNDING = 8))] |
7553 | #[rustc_legacy_const_generics (3)] |
7554 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7555 | pub fn _mm512_fmsubadd_round_ph<const ROUNDING: i32>( |
7556 | a: __m512h, |
7557 | b: __m512h, |
7558 | c: __m512h, |
7559 | ) -> __m512h { |
7560 | unsafe { |
7561 | static_assert_rounding!(ROUNDING); |
7562 | vfmaddsubph_512(a, b, c:simd_neg(c), ROUNDING) |
7563 | } |
7564 | } |
7565 | |
7566 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract |
7567 | /// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k |
7568 | /// (the element is copied from a when the corresponding mask bit is not set). |
7569 | /// |
7570 | /// Rounding is done according to the rounding parameter, which can be one of: |
7571 | /// |
7572 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
7573 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
7574 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
7575 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
7576 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
7577 | /// |
7578 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsubadd_round_ph) |
7579 | #[inline ] |
7580 | #[target_feature (enable = "avx512fp16" )] |
7581 | #[cfg_attr (test, assert_instr(vfmsubadd, ROUNDING = 8))] |
7582 | #[rustc_legacy_const_generics (4)] |
7583 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7584 | pub fn _mm512_mask_fmsubadd_round_ph<const ROUNDING: i32>( |
7585 | a: __m512h, |
7586 | k: __mmask32, |
7587 | b: __m512h, |
7588 | c: __m512h, |
7589 | ) -> __m512h { |
7590 | unsafe { |
7591 | static_assert_rounding!(ROUNDING); |
7592 | simd_select_bitmask(m:k, yes:_mm512_fmsubadd_round_ph::<ROUNDING>(a, b, c), no:a) |
7593 | } |
7594 | } |
7595 | |
7596 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract |
7597 | /// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k |
7598 | /// (the element is copied from c when the corresponding mask bit is not set). |
7599 | /// |
7600 | /// Rounding is done according to the rounding parameter, which can be one of: |
7601 | /// |
7602 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
7603 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
7604 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
7605 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
7606 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
7607 | /// |
7608 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsubadd_round_ph) |
7609 | #[inline ] |
7610 | #[target_feature (enable = "avx512fp16" )] |
7611 | #[cfg_attr (test, assert_instr(vfmsubadd, ROUNDING = 8))] |
7612 | #[rustc_legacy_const_generics (4)] |
7613 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7614 | pub fn _mm512_mask3_fmsubadd_round_ph<const ROUNDING: i32>( |
7615 | a: __m512h, |
7616 | b: __m512h, |
7617 | c: __m512h, |
7618 | k: __mmask32, |
7619 | ) -> __m512h { |
7620 | unsafe { |
7621 | static_assert_rounding!(ROUNDING); |
7622 | simd_select_bitmask(m:k, yes:_mm512_fmsubadd_round_ph::<ROUNDING>(a, b, c), no:c) |
7623 | } |
7624 | } |
7625 | |
7626 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract |
7627 | /// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k |
7628 | /// (the element is zeroed out when the corresponding mask bit is not set). |
7629 | /// |
7630 | /// Rounding is done according to the rounding parameter, which can be one of: |
7631 | /// |
7632 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
7633 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
7634 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
7635 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
7636 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
7637 | /// |
7638 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsubadd_round_ph) |
7639 | #[inline ] |
7640 | #[target_feature (enable = "avx512fp16" )] |
7641 | #[cfg_attr (test, assert_instr(vfmsubadd, ROUNDING = 8))] |
7642 | #[rustc_legacy_const_generics (4)] |
7643 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7644 | pub fn _mm512_maskz_fmsubadd_round_ph<const ROUNDING: i32>( |
7645 | k: __mmask32, |
7646 | a: __m512h, |
7647 | b: __m512h, |
7648 | c: __m512h, |
7649 | ) -> __m512h { |
7650 | unsafe { |
7651 | static_assert_rounding!(ROUNDING); |
7652 | simd_select_bitmask( |
7653 | m:k, |
7654 | yes:_mm512_fmsubadd_round_ph::<ROUNDING>(a, b, c), |
7655 | no:_mm512_setzero_ph(), |
7656 | ) |
7657 | } |
7658 | } |
7659 | |
7660 | /// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`. |
7661 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7662 | /// |
7663 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_ph) |
7664 | #[inline ] |
7665 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7666 | #[cfg_attr (test, assert_instr(vrcpph))] |
7667 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7668 | pub fn _mm_rcp_ph(a: __m128h) -> __m128h { |
7669 | _mm_mask_rcp_ph(src:_mm_undefined_ph(), k:0xff, a) |
7670 | } |
7671 | |
7672 | /// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst` |
7673 | /// using writemask `k` (elements are copied from `src` when the corresponding mask bit is not set). |
7674 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7675 | /// |
7676 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rcp_ph) |
7677 | #[inline ] |
7678 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7679 | #[cfg_attr (test, assert_instr(vrcpph))] |
7680 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7681 | pub fn _mm_mask_rcp_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h { |
7682 | unsafe { vrcpph_128(a, src, k) } |
7683 | } |
7684 | |
7685 | /// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst` |
7686 | /// using zeromask `k` (elements are zeroed out when the corresponding mask bit is not set). |
7687 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7688 | /// |
7689 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rcp_ph) |
7690 | #[inline ] |
7691 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7692 | #[cfg_attr (test, assert_instr(vrcpph))] |
7693 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7694 | pub fn _mm_maskz_rcp_ph(k: __mmask8, a: __m128h) -> __m128h { |
7695 | _mm_mask_rcp_ph(src:_mm_setzero_ph(), k, a) |
7696 | } |
7697 | |
7698 | /// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`. |
7699 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7700 | /// |
7701 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rcp_ph) |
7702 | #[inline ] |
7703 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7704 | #[cfg_attr (test, assert_instr(vrcpph))] |
7705 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7706 | pub fn _mm256_rcp_ph(a: __m256h) -> __m256h { |
7707 | _mm256_mask_rcp_ph(src:_mm256_undefined_ph(), k:0xffff, a) |
7708 | } |
7709 | |
7710 | /// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst` |
7711 | /// using writemask `k` (elements are copied from `src` when the corresponding mask bit is not set). |
7712 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7713 | /// |
7714 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_rcp_ph) |
7715 | #[inline ] |
7716 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7717 | #[cfg_attr (test, assert_instr(vrcpph))] |
7718 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7719 | pub fn _mm256_mask_rcp_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h { |
7720 | unsafe { vrcpph_256(a, src, k) } |
7721 | } |
7722 | |
7723 | /// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst` |
7724 | /// using zeromask `k` (elements are zeroed out when the corresponding mask bit is not set). |
7725 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7726 | /// |
7727 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_rcp_ph) |
7728 | #[inline ] |
7729 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7730 | #[cfg_attr (test, assert_instr(vrcpph))] |
7731 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7732 | pub fn _mm256_maskz_rcp_ph(k: __mmask16, a: __m256h) -> __m256h { |
7733 | _mm256_mask_rcp_ph(src:_mm256_setzero_ph(), k, a) |
7734 | } |
7735 | |
7736 | /// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`. |
7737 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7738 | /// |
7739 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rcp_ph) |
7740 | #[inline ] |
7741 | #[target_feature (enable = "avx512fp16" )] |
7742 | #[cfg_attr (test, assert_instr(vrcpph))] |
7743 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7744 | pub fn _mm512_rcp_ph(a: __m512h) -> __m512h { |
7745 | _mm512_mask_rcp_ph(src:_mm512_undefined_ph(), k:0xffffffff, a) |
7746 | } |
7747 | |
7748 | /// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst` |
7749 | /// using writemask `k` (elements are copied from `src` when the corresponding mask bit is not set). |
7750 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7751 | /// |
7752 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rcp_ph) |
7753 | #[inline ] |
7754 | #[target_feature (enable = "avx512fp16" )] |
7755 | #[cfg_attr (test, assert_instr(vrcpph))] |
7756 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7757 | pub fn _mm512_mask_rcp_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h { |
7758 | unsafe { vrcpph_512(a, src, k) } |
7759 | } |
7760 | |
7761 | /// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst` |
7762 | /// using zeromask `k` (elements are zeroed out when the corresponding mask bit is not set). |
7763 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7764 | /// |
7765 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rcp_ph) |
7766 | #[inline ] |
7767 | #[target_feature (enable = "avx512fp16" )] |
7768 | #[cfg_attr (test, assert_instr(vrcpph))] |
7769 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7770 | pub fn _mm512_maskz_rcp_ph(k: __mmask32, a: __m512h) -> __m512h { |
7771 | _mm512_mask_rcp_ph(src:_mm512_setzero_ph(), k, a) |
7772 | } |
7773 | |
7774 | /// Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in b, |
7775 | /// store the result in the lower element of dst, and copy the upper 7 packed elements from a to the |
7776 | /// upper elements of dst. |
7777 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7778 | /// |
7779 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_sh) |
7780 | #[inline ] |
7781 | #[target_feature (enable = "avx512fp16" )] |
7782 | #[cfg_attr (test, assert_instr(vrcpsh))] |
7783 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7784 | pub fn _mm_rcp_sh(a: __m128h, b: __m128h) -> __m128h { |
7785 | _mm_mask_rcp_sh(src:f16x8::ZERO.as_m128h(), k:0xff, a, b) |
7786 | } |
7787 | |
7788 | /// Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in b, |
7789 | /// store the result in the lower element of dst using writemask k (the element is copied from src when |
7790 | /// mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst. |
7791 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7792 | /// |
7793 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rcp_sh) |
7794 | #[inline ] |
7795 | #[target_feature (enable = "avx512fp16" )] |
7796 | #[cfg_attr (test, assert_instr(vrcpsh))] |
7797 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7798 | pub fn _mm_mask_rcp_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
7799 | unsafe { vrcpsh(a, b, src, k) } |
7800 | } |
7801 | |
7802 | /// Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in b, |
7803 | /// store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 |
7804 | /// is not set), and copy the upper 7 packed elements from a to the upper elements of dst. |
7805 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7806 | /// |
7807 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rcp_sh) |
7808 | #[inline ] |
7809 | #[target_feature (enable = "avx512fp16" )] |
7810 | #[cfg_attr (test, assert_instr(vrcpsh))] |
7811 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7812 | pub fn _mm_maskz_rcp_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
7813 | _mm_mask_rcp_sh(src:f16x8::ZERO.as_m128h(), k, a, b) |
7814 | } |
7815 | |
7816 | /// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point |
7817 | /// elements in a, and store the results in dst. |
7818 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7819 | /// |
7820 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt_ph) |
7821 | #[inline ] |
7822 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7823 | #[cfg_attr (test, assert_instr(vrsqrtph))] |
7824 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7825 | pub fn _mm_rsqrt_ph(a: __m128h) -> __m128h { |
7826 | _mm_mask_rsqrt_ph(src:_mm_undefined_ph(), k:0xff, a) |
7827 | } |
7828 | |
7829 | /// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point |
7830 | /// elements in a, and store the results in dst using writemask k (elements are copied from src when |
7831 | /// the corresponding mask bit is not set). |
7832 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7833 | /// |
7834 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rsqrt_ph) |
7835 | #[inline ] |
7836 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7837 | #[cfg_attr (test, assert_instr(vrsqrtph))] |
7838 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7839 | pub fn _mm_mask_rsqrt_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h { |
7840 | unsafe { vrsqrtph_128(a, src, k) } |
7841 | } |
7842 | |
7843 | /// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point |
7844 | /// elements in a, and store the results in dst using zeromask k (elements are zeroed out when the |
7845 | /// corresponding mask bit is not set). |
7846 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7847 | /// |
7848 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rsqrt_ph) |
7849 | #[inline ] |
7850 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7851 | #[cfg_attr (test, assert_instr(vrsqrtph))] |
7852 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7853 | pub fn _mm_maskz_rsqrt_ph(k: __mmask8, a: __m128h) -> __m128h { |
7854 | _mm_mask_rsqrt_ph(src:_mm_setzero_ph(), k, a) |
7855 | } |
7856 | |
7857 | /// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point |
7858 | /// elements in a, and store the results in dst. |
7859 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7860 | /// |
7861 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rsqrt_ph) |
7862 | #[inline ] |
7863 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7864 | #[cfg_attr (test, assert_instr(vrsqrtph))] |
7865 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7866 | pub fn _mm256_rsqrt_ph(a: __m256h) -> __m256h { |
7867 | _mm256_mask_rsqrt_ph(src:_mm256_undefined_ph(), k:0xffff, a) |
7868 | } |
7869 | |
7870 | /// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point |
7871 | /// elements in a, and store the results in dst using writemask k (elements are copied from src when |
7872 | /// the corresponding mask bit is not set). |
7873 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7874 | /// |
7875 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_rsqrt_ph) |
7876 | #[inline ] |
7877 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7878 | #[cfg_attr (test, assert_instr(vrsqrtph))] |
7879 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7880 | pub fn _mm256_mask_rsqrt_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h { |
7881 | unsafe { vrsqrtph_256(a, src, k) } |
7882 | } |
7883 | |
7884 | /// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point |
7885 | /// elements in a, and store the results in dst using zeromask k (elements are zeroed out when the |
7886 | /// corresponding mask bit is not set). |
7887 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7888 | /// |
7889 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_rsqrt_ph) |
7890 | #[inline ] |
7891 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7892 | #[cfg_attr (test, assert_instr(vrsqrtph))] |
7893 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7894 | pub fn _mm256_maskz_rsqrt_ph(k: __mmask16, a: __m256h) -> __m256h { |
7895 | _mm256_mask_rsqrt_ph(src:_mm256_setzero_ph(), k, a) |
7896 | } |
7897 | |
7898 | /// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point |
7899 | /// elements in a, and store the results in dst. |
7900 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7901 | /// |
7902 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rsqrt_ph) |
7903 | #[inline ] |
7904 | #[target_feature (enable = "avx512fp16" )] |
7905 | #[cfg_attr (test, assert_instr(vrsqrtph))] |
7906 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7907 | pub fn _mm512_rsqrt_ph(a: __m512h) -> __m512h { |
7908 | _mm512_mask_rsqrt_ph(src:_mm512_undefined_ph(), k:0xffffffff, a) |
7909 | } |
7910 | |
7911 | /// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point |
7912 | /// elements in a, and store the results in dst using writemask k (elements are copied from src when |
7913 | /// the corresponding mask bit is not set). |
7914 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7915 | /// |
7916 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rsqrt_ph) |
7917 | #[inline ] |
7918 | #[target_feature (enable = "avx512fp16" )] |
7919 | #[cfg_attr (test, assert_instr(vrsqrtph))] |
7920 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7921 | pub fn _mm512_mask_rsqrt_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h { |
7922 | unsafe { vrsqrtph_512(a, src, k) } |
7923 | } |
7924 | |
7925 | /// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point |
7926 | /// elements in a, and store the results in dst using zeromask k (elements are zeroed out when the |
7927 | /// corresponding mask bit is not set). |
7928 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7929 | /// |
7930 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rsqrt_ph) |
7931 | #[inline ] |
7932 | #[target_feature (enable = "avx512fp16" )] |
7933 | #[cfg_attr (test, assert_instr(vrsqrtph))] |
7934 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7935 | pub fn _mm512_maskz_rsqrt_ph(k: __mmask32, a: __m512h) -> __m512h { |
7936 | _mm512_mask_rsqrt_ph(src:_mm512_setzero_ph(), k, a) |
7937 | } |
7938 | |
7939 | /// Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point |
7940 | /// element in b, store the result in the lower element of dst, and copy the upper 7 packed elements from a |
7941 | /// to the upper elements of dst. |
7942 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7943 | /// |
7944 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt_sh) |
7945 | #[inline ] |
7946 | #[target_feature (enable = "avx512fp16" )] |
7947 | #[cfg_attr (test, assert_instr(vrsqrtsh))] |
7948 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7949 | pub fn _mm_rsqrt_sh(a: __m128h, b: __m128h) -> __m128h { |
7950 | _mm_mask_rsqrt_sh(src:f16x8::ZERO.as_m128h(), k:0xff, a, b) |
7951 | } |
7952 | |
7953 | /// Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point |
7954 | /// element in b, store the result in the lower element of dst using writemask k (the element is copied from src |
7955 | /// when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst. |
7956 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7957 | /// |
7958 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rsqrt_sh) |
7959 | #[inline ] |
7960 | #[target_feature (enable = "avx512fp16" )] |
7961 | #[cfg_attr (test, assert_instr(vrsqrtsh))] |
7962 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7963 | pub fn _mm_mask_rsqrt_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
7964 | unsafe { vrsqrtsh(a, b, src, k) } |
7965 | } |
7966 | |
7967 | /// Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point |
7968 | /// element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when |
7969 | /// mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst. |
7970 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7971 | /// |
7972 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rsqrt_sh) |
7973 | #[inline ] |
7974 | #[target_feature (enable = "avx512fp16" )] |
7975 | #[cfg_attr (test, assert_instr(vrsqrtsh))] |
7976 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7977 | pub fn _mm_maskz_rsqrt_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
7978 | _mm_mask_rsqrt_sh(src:f16x8::ZERO.as_m128h(), k, a, b) |
7979 | } |
7980 | |
7981 | /// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the |
7982 | /// results in dst. |
7983 | /// |
7984 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_ph) |
7985 | #[inline ] |
7986 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7987 | #[cfg_attr (test, assert_instr(vsqrtph))] |
7988 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7989 | pub fn _mm_sqrt_ph(a: __m128h) -> __m128h { |
7990 | unsafe { simd_fsqrt(a) } |
7991 | } |
7992 | |
7993 | /// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the |
7994 | /// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
7995 | /// |
7996 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_ph) |
7997 | #[inline ] |
7998 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7999 | #[cfg_attr (test, assert_instr(vsqrtph))] |
8000 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8001 | pub fn _mm_mask_sqrt_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h { |
8002 | unsafe { simd_select_bitmask(m:k, yes:_mm_sqrt_ph(a), no:src) } |
8003 | } |
8004 | |
8005 | /// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the |
8006 | /// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
8007 | /// |
8008 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_ph) |
8009 | #[inline ] |
8010 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8011 | #[cfg_attr (test, assert_instr(vsqrtph))] |
8012 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8013 | pub fn _mm_maskz_sqrt_ph(k: __mmask8, a: __m128h) -> __m128h { |
8014 | unsafe { simd_select_bitmask(m:k, yes:_mm_sqrt_ph(a), no:_mm_setzero_ph()) } |
8015 | } |
8016 | |
8017 | /// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the |
8018 | /// results in dst. |
8019 | /// |
8020 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sqrt_ph) |
8021 | #[inline ] |
8022 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8023 | #[cfg_attr (test, assert_instr(vsqrtph))] |
8024 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8025 | pub fn _mm256_sqrt_ph(a: __m256h) -> __m256h { |
8026 | unsafe { simd_fsqrt(a) } |
8027 | } |
8028 | |
8029 | /// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the |
8030 | /// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
8031 | /// |
8032 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sqrt_ph) |
8033 | #[inline ] |
8034 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8035 | #[cfg_attr (test, assert_instr(vsqrtph))] |
8036 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8037 | pub fn _mm256_mask_sqrt_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h { |
8038 | unsafe { simd_select_bitmask(m:k, yes:_mm256_sqrt_ph(a), no:src) } |
8039 | } |
8040 | |
8041 | /// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the |
8042 | /// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
8043 | /// |
8044 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sqrt_ph) |
8045 | #[inline ] |
8046 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8047 | #[cfg_attr (test, assert_instr(vsqrtph))] |
8048 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8049 | pub fn _mm256_maskz_sqrt_ph(k: __mmask16, a: __m256h) -> __m256h { |
8050 | unsafe { simd_select_bitmask(m:k, yes:_mm256_sqrt_ph(a), no:_mm256_setzero_ph()) } |
8051 | } |
8052 | |
8053 | /// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the |
8054 | /// results in dst. |
8055 | /// |
8056 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sqrt_ph) |
8057 | #[inline ] |
8058 | #[target_feature (enable = "avx512fp16" )] |
8059 | #[cfg_attr (test, assert_instr(vsqrtph))] |
8060 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8061 | pub fn _mm512_sqrt_ph(a: __m512h) -> __m512h { |
8062 | unsafe { simd_fsqrt(a) } |
8063 | } |
8064 | |
8065 | /// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the |
8066 | /// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
8067 | /// |
8068 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sqrt_ph) |
8069 | #[inline ] |
8070 | #[target_feature (enable = "avx512fp16" )] |
8071 | #[cfg_attr (test, assert_instr(vsqrtph))] |
8072 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8073 | pub fn _mm512_mask_sqrt_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h { |
8074 | unsafe { simd_select_bitmask(m:k, yes:_mm512_sqrt_ph(a), no:src) } |
8075 | } |
8076 | |
8077 | /// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the |
8078 | /// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
8079 | /// |
8080 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sqrt_ph) |
8081 | #[inline ] |
8082 | #[target_feature (enable = "avx512fp16" )] |
8083 | #[cfg_attr (test, assert_instr(vsqrtph))] |
8084 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8085 | pub fn _mm512_maskz_sqrt_ph(k: __mmask32, a: __m512h) -> __m512h { |
8086 | unsafe { simd_select_bitmask(m:k, yes:_mm512_sqrt_ph(a), no:_mm512_setzero_ph()) } |
8087 | } |
8088 | |
8089 | /// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the |
8090 | /// results in dst. |
8091 | /// Rounding is done according to the rounding parameter, which can be one of: |
8092 | /// |
8093 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
8094 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
8095 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
8096 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
8097 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
8098 | /// |
8099 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sqrt_round_ph) |
8100 | #[inline ] |
8101 | #[target_feature (enable = "avx512fp16" )] |
8102 | #[cfg_attr (test, assert_instr(vsqrtph, ROUNDING = 8))] |
8103 | #[rustc_legacy_const_generics (1)] |
8104 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8105 | pub fn _mm512_sqrt_round_ph<const ROUNDING: i32>(a: __m512h) -> __m512h { |
8106 | unsafe { |
8107 | static_assert_rounding!(ROUNDING); |
8108 | vsqrtph_512(a, ROUNDING) |
8109 | } |
8110 | } |
8111 | |
8112 | /// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the |
8113 | /// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
8114 | /// Rounding is done according to the rounding parameter, which can be one of: |
8115 | /// |
8116 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
8117 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
8118 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
8119 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
8120 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
8121 | /// |
8122 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sqrt_round_ph) |
8123 | #[inline ] |
8124 | #[target_feature (enable = "avx512fp16" )] |
8125 | #[cfg_attr (test, assert_instr(vsqrtph, ROUNDING = 8))] |
8126 | #[rustc_legacy_const_generics (3)] |
8127 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8128 | pub fn _mm512_mask_sqrt_round_ph<const ROUNDING: i32>( |
8129 | src: __m512h, |
8130 | k: __mmask32, |
8131 | a: __m512h, |
8132 | ) -> __m512h { |
8133 | unsafe { |
8134 | static_assert_rounding!(ROUNDING); |
8135 | simd_select_bitmask(m:k, yes:_mm512_sqrt_round_ph::<ROUNDING>(a), no:src) |
8136 | } |
8137 | } |
8138 | |
8139 | /// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the |
8140 | /// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
8141 | /// Rounding is done according to the rounding parameter, which can be one of: |
8142 | /// |
8143 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
8144 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
8145 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
8146 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
8147 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
8148 | /// |
8149 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sqrt_round_ph) |
8150 | #[inline ] |
8151 | #[target_feature (enable = "avx512fp16" )] |
8152 | #[cfg_attr (test, assert_instr(vsqrtph, ROUNDING = 8))] |
8153 | #[rustc_legacy_const_generics (2)] |
8154 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8155 | pub fn _mm512_maskz_sqrt_round_ph<const ROUNDING: i32>(k: __mmask32, a: __m512h) -> __m512h { |
8156 | unsafe { |
8157 | static_assert_rounding!(ROUNDING); |
8158 | simd_select_bitmask(m:k, yes:_mm512_sqrt_round_ph::<ROUNDING>(a), no:_mm512_setzero_ph()) |
8159 | } |
8160 | } |
8161 | |
8162 | /// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store |
8163 | /// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper |
8164 | /// elements of dst. |
8165 | /// |
8166 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_sh) |
8167 | #[inline ] |
8168 | #[target_feature (enable = "avx512fp16" )] |
8169 | #[cfg_attr (test, assert_instr(vsqrtsh))] |
8170 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8171 | pub fn _mm_sqrt_sh(a: __m128h, b: __m128h) -> __m128h { |
8172 | _mm_mask_sqrt_sh(src:f16x8::ZERO.as_m128h(), k:0xff, a, b) |
8173 | } |
8174 | |
8175 | /// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store |
8176 | /// the result in the lower element of dst using writemask k (the element is copied from src when mask |
8177 | /// bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst. |
8178 | /// |
8179 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_sh) |
8180 | #[inline ] |
8181 | #[target_feature (enable = "avx512fp16" )] |
8182 | #[cfg_attr (test, assert_instr(vsqrtsh))] |
8183 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8184 | pub fn _mm_mask_sqrt_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
8185 | _mm_mask_sqrt_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) |
8186 | } |
8187 | |
8188 | /// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store |
8189 | /// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 |
8190 | /// is not set), and copy the upper 7 packed elements from a to the upper elements of dst. |
8191 | /// |
8192 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_sh) |
8193 | #[inline ] |
8194 | #[target_feature (enable = "avx512fp16" )] |
8195 | #[cfg_attr (test, assert_instr(vsqrtsh))] |
8196 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8197 | pub fn _mm_maskz_sqrt_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
8198 | _mm_mask_sqrt_sh(src:f16x8::ZERO.as_m128h(), k, a, b) |
8199 | } |
8200 | |
8201 | /// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store |
8202 | /// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper |
8203 | /// elements of dst. |
8204 | /// Rounding is done according to the rounding parameter, which can be one of: |
8205 | /// |
8206 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
8207 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
8208 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
8209 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
8210 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
8211 | /// |
8212 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_round_sh) |
8213 | #[inline ] |
8214 | #[target_feature (enable = "avx512fp16" )] |
8215 | #[cfg_attr (test, assert_instr(vsqrtsh, ROUNDING = 8))] |
8216 | #[rustc_legacy_const_generics (2)] |
8217 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8218 | pub fn _mm_sqrt_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h { |
8219 | static_assert_rounding!(ROUNDING); |
8220 | _mm_mask_sqrt_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b) |
8221 | } |
8222 | |
8223 | /// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store |
8224 | /// the result in the lower element of dst using writemask k (the element is copied from src when mask |
8225 | /// bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst. |
8226 | /// Rounding is done according to the rounding parameter, which can be one of: |
8227 | /// |
8228 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
8229 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
8230 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
8231 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
8232 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
8233 | /// |
8234 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_round_sh) |
8235 | #[inline ] |
8236 | #[target_feature (enable = "avx512fp16" )] |
8237 | #[cfg_attr (test, assert_instr(vsqrtsh, ROUNDING = 8))] |
8238 | #[rustc_legacy_const_generics (4)] |
8239 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8240 | pub fn _mm_mask_sqrt_round_sh<const ROUNDING: i32>( |
8241 | src: __m128h, |
8242 | k: __mmask8, |
8243 | a: __m128h, |
8244 | b: __m128h, |
8245 | ) -> __m128h { |
8246 | unsafe { |
8247 | static_assert_rounding!(ROUNDING); |
8248 | vsqrtsh(a, b, src, k, ROUNDING) |
8249 | } |
8250 | } |
8251 | |
8252 | /// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store |
8253 | /// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 |
8254 | /// is not set), and copy the upper 7 packed elements from a to the upper elements of dst. |
8255 | /// Rounding is done according to the rounding parameter, which can be one of: |
8256 | /// |
8257 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
8258 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
8259 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
8260 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
8261 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
8262 | /// |
8263 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_round_sh) |
8264 | #[inline ] |
8265 | #[target_feature (enable = "avx512fp16" )] |
8266 | #[cfg_attr (test, assert_instr(vsqrtsh, ROUNDING = 8))] |
8267 | #[rustc_legacy_const_generics (3)] |
8268 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8269 | pub fn _mm_maskz_sqrt_round_sh<const ROUNDING: i32>( |
8270 | k: __mmask8, |
8271 | a: __m128h, |
8272 | b: __m128h, |
8273 | ) -> __m128h { |
8274 | static_assert_rounding!(ROUNDING); |
8275 | _mm_mask_sqrt_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b) |
8276 | } |
8277 | |
8278 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum |
8279 | /// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum |
8280 | /// value when inputs are NaN or signed-zero values. |
8281 | /// |
8282 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_ph) |
8283 | #[inline ] |
8284 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8285 | #[cfg_attr (test, assert_instr(vmaxph))] |
8286 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8287 | pub fn _mm_max_ph(a: __m128h, b: __m128h) -> __m128h { |
8288 | unsafe { vmaxph_128(a, b) } |
8289 | } |
8290 | |
8291 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum |
8292 | /// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
8293 | /// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are |
8294 | /// NaN or signed-zero values. |
8295 | /// |
8296 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_ph) |
8297 | #[inline ] |
8298 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8299 | #[cfg_attr (test, assert_instr(vmaxph))] |
8300 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8301 | pub fn _mm_mask_max_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
8302 | unsafe { simd_select_bitmask(m:k, yes:_mm_max_ph(a, b), no:src) } |
8303 | } |
8304 | |
8305 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum |
8306 | /// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
8307 | /// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are |
8308 | /// NaN or signed-zero values. |
8309 | /// |
8310 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_ph) |
8311 | #[inline ] |
8312 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8313 | #[cfg_attr (test, assert_instr(vmaxph))] |
8314 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8315 | pub fn _mm_maskz_max_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
8316 | unsafe { simd_select_bitmask(m:k, yes:_mm_max_ph(a, b), no:_mm_setzero_ph()) } |
8317 | } |
8318 | |
8319 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum |
8320 | /// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum |
8321 | /// value when inputs are NaN or signed-zero values. |
8322 | /// |
8323 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_ph) |
8324 | #[inline ] |
8325 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8326 | #[cfg_attr (test, assert_instr(vmaxph))] |
8327 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8328 | pub fn _mm256_max_ph(a: __m256h, b: __m256h) -> __m256h { |
8329 | unsafe { vmaxph_256(a, b) } |
8330 | } |
8331 | |
8332 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum |
8333 | /// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
8334 | /// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are |
8335 | /// NaN or signed-zero values. |
8336 | /// |
8337 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_max_ph) |
8338 | #[inline ] |
8339 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8340 | #[cfg_attr (test, assert_instr(vmaxph))] |
8341 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8342 | pub fn _mm256_mask_max_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h { |
8343 | unsafe { simd_select_bitmask(m:k, yes:_mm256_max_ph(a, b), no:src) } |
8344 | } |
8345 | |
8346 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum |
8347 | /// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
8348 | /// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are |
8349 | /// NaN or signed-zero values. |
8350 | /// |
8351 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_max_ph) |
8352 | #[inline ] |
8353 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8354 | #[cfg_attr (test, assert_instr(vmaxph))] |
8355 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8356 | pub fn _mm256_maskz_max_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { |
8357 | unsafe { simd_select_bitmask(m:k, yes:_mm256_max_ph(a, b), no:_mm256_setzero_ph()) } |
8358 | } |
8359 | |
8360 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum |
8361 | /// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum |
8362 | /// value when inputs are NaN or signed-zero values. |
8363 | /// |
8364 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_ph) |
8365 | #[inline ] |
8366 | #[target_feature (enable = "avx512fp16" )] |
8367 | #[cfg_attr (test, assert_instr(vmaxph))] |
8368 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8369 | pub fn _mm512_max_ph(a: __m512h, b: __m512h) -> __m512h { |
8370 | _mm512_max_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b) |
8371 | } |
8372 | |
8373 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum |
8374 | /// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
8375 | /// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are |
8376 | /// NaN or signed-zero values. |
8377 | /// |
8378 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_ph) |
8379 | #[inline ] |
8380 | #[target_feature (enable = "avx512fp16" )] |
8381 | #[cfg_attr (test, assert_instr(vmaxph))] |
8382 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8383 | pub fn _mm512_mask_max_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h { |
8384 | unsafe { simd_select_bitmask(m:k, yes:_mm512_max_ph(a, b), no:src) } |
8385 | } |
8386 | |
8387 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum |
8388 | /// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
8389 | /// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are |
8390 | /// NaN or signed-zero values. |
8391 | /// |
8392 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_ph) |
8393 | #[inline ] |
8394 | #[target_feature (enable = "avx512fp16" )] |
8395 | #[cfg_attr (test, assert_instr(vmaxph))] |
8396 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8397 | pub fn _mm512_maskz_max_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { |
8398 | unsafe { simd_select_bitmask(m:k, yes:_mm512_max_ph(a, b), no:_mm512_setzero_ph()) } |
8399 | } |
8400 | |
8401 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum |
8402 | /// values in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
8403 | /// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are |
8404 | /// NaN or signed-zero values. |
8405 | /// |
8406 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_round_ph) |
8407 | #[inline ] |
8408 | #[target_feature (enable = "avx512fp16" )] |
8409 | #[cfg_attr (test, assert_instr(vmaxph, SAE = 8))] |
8410 | #[rustc_legacy_const_generics (2)] |
8411 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8412 | pub fn _mm512_max_round_ph<const SAE: i32>(a: __m512h, b: __m512h) -> __m512h { |
8413 | unsafe { |
8414 | static_assert_sae!(SAE); |
8415 | vmaxph_512(a, b, SAE) |
8416 | } |
8417 | } |
8418 | |
8419 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum |
8420 | /// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
8421 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the |
8422 | /// IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values. |
8423 | /// |
8424 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_round_ph) |
8425 | #[inline ] |
8426 | #[target_feature (enable = "avx512fp16" )] |
8427 | #[cfg_attr (test, assert_instr(vmaxph, SAE = 8))] |
8428 | #[rustc_legacy_const_generics (4)] |
8429 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8430 | pub fn _mm512_mask_max_round_ph<const SAE: i32>( |
8431 | src: __m512h, |
8432 | k: __mmask32, |
8433 | a: __m512h, |
8434 | b: __m512h, |
8435 | ) -> __m512h { |
8436 | unsafe { |
8437 | static_assert_sae!(SAE); |
8438 | simd_select_bitmask(m:k, yes:_mm512_max_round_ph::<SAE>(a, b), no:src) |
8439 | } |
8440 | } |
8441 | |
8442 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum |
8443 | /// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
8444 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the |
8445 | /// IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values. |
8446 | /// |
8447 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_round_ph) |
8448 | #[inline ] |
8449 | #[target_feature (enable = "avx512fp16" )] |
8450 | #[cfg_attr (test, assert_instr(vmaxph, SAE = 8))] |
8451 | #[rustc_legacy_const_generics (3)] |
8452 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8453 | pub fn _mm512_maskz_max_round_ph<const SAE: i32>(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { |
8454 | unsafe { |
8455 | static_assert_sae!(SAE); |
8456 | simd_select_bitmask(m:k, yes:_mm512_max_round_ph::<SAE>(a, b), no:_mm512_setzero_ph()) |
8457 | } |
8458 | } |
8459 | |
8460 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum |
8461 | /// value in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements |
8462 | /// of dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value |
8463 | /// when inputs are NaN or signed-zero values. |
8464 | /// |
8465 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_sh) |
8466 | #[inline ] |
8467 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8468 | #[cfg_attr (test, assert_instr(vmaxsh))] |
8469 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8470 | pub fn _mm_max_sh(a: __m128h, b: __m128h) -> __m128h { |
8471 | _mm_mask_max_sh(src:_mm_undefined_ph(), k:0xff, a, b) |
8472 | } |
8473 | |
8474 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum |
8475 | /// value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 |
8476 | /// is not set), and copy the upper 7 packed elements from a to the upper elements of dst. Does not follow |
8477 | /// the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values. |
8478 | /// |
8479 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_sh) |
8480 | #[inline ] |
8481 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8482 | #[cfg_attr (test, assert_instr(vmaxsh))] |
8483 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8484 | pub fn _mm_mask_max_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
8485 | _mm_mask_max_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) |
8486 | } |
8487 | |
8488 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value |
8489 | /// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and |
8490 | /// copy the upper 7 packed elements from a to the upper elements of dst. Does not follow the IEEE Standard |
8491 | /// for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values. |
8492 | /// |
8493 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_sh) |
8494 | #[inline ] |
8495 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8496 | #[cfg_attr (test, assert_instr(vmaxsh))] |
8497 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8498 | pub fn _mm_maskz_max_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
8499 | _mm_mask_max_sh(src:f16x8::ZERO.as_m128h(), k, a, b) |
8500 | } |
8501 | |
8502 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value |
8503 | /// in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst. |
8504 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the |
8505 | /// IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values. |
8506 | /// |
8507 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_round_sh) |
8508 | #[inline ] |
8509 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8510 | #[cfg_attr (test, assert_instr(vmaxsh, SAE = 8))] |
8511 | #[rustc_legacy_const_generics (2)] |
8512 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8513 | pub fn _mm_max_round_sh<const SAE: i32>(a: __m128h, b: __m128h) -> __m128h { |
8514 | static_assert_sae!(SAE); |
8515 | _mm_mask_max_round_sh::<SAE>(src:_mm_undefined_ph(), k:0xff, a, b) |
8516 | } |
8517 | |
8518 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value |
8519 | /// in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), |
8520 | /// and copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by |
8521 | /// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic |
8522 | /// (IEEE 754) maximum value when inputs are NaN or signed-zero values. |
8523 | /// |
8524 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_round_sh) |
8525 | #[inline ] |
8526 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8527 | #[cfg_attr (test, assert_instr(vmaxsh, SAE = 8))] |
8528 | #[rustc_legacy_const_generics (4)] |
8529 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8530 | pub fn _mm_mask_max_round_sh<const SAE: i32>( |
8531 | src: __m128h, |
8532 | k: __mmask8, |
8533 | a: __m128h, |
8534 | b: __m128h, |
8535 | ) -> __m128h { |
8536 | unsafe { |
8537 | static_assert_sae!(SAE); |
8538 | vmaxsh(a, b, src, k, SAE) |
8539 | } |
8540 | } |
8541 | |
8542 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value |
8543 | /// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and |
8544 | /// copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by |
8545 | /// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic |
8546 | /// (IEEE 754) maximum value when inputs are NaN or signed-zero values. |
8547 | /// |
8548 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_round_sh) |
8549 | #[inline ] |
8550 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8551 | #[cfg_attr (test, assert_instr(vmaxsh, SAE = 8))] |
8552 | #[rustc_legacy_const_generics (3)] |
8553 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8554 | pub fn _mm_maskz_max_round_sh<const SAE: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
8555 | static_assert_sae!(SAE); |
8556 | _mm_mask_max_round_sh::<SAE>(src:f16x8::ZERO.as_m128h(), k, a, b) |
8557 | } |
8558 | |
8559 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum |
8560 | /// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value |
8561 | /// when inputs are NaN or signed-zero values. |
8562 | /// |
8563 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_ph) |
8564 | #[inline ] |
8565 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8566 | #[cfg_attr (test, assert_instr(vminph))] |
8567 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8568 | pub fn _mm_min_ph(a: __m128h, b: __m128h) -> __m128h { |
8569 | unsafe { vminph_128(a, b) } |
8570 | } |
8571 | |
8572 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum |
8573 | /// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
8574 | /// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are |
8575 | /// NaN or signed-zero values. |
8576 | /// |
8577 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_ph) |
8578 | #[inline ] |
8579 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8580 | #[cfg_attr (test, assert_instr(vminph))] |
8581 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8582 | pub fn _mm_mask_min_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
8583 | unsafe { simd_select_bitmask(m:k, yes:_mm_min_ph(a, b), no:src) } |
8584 | } |
8585 | |
8586 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum |
8587 | /// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
8588 | /// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are |
8589 | /// NaN or signed-zero values. |
8590 | /// |
8591 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_ph) |
8592 | #[inline ] |
8593 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8594 | #[cfg_attr (test, assert_instr(vminph))] |
8595 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8596 | pub fn _mm_maskz_min_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
8597 | unsafe { simd_select_bitmask(m:k, yes:_mm_min_ph(a, b), no:_mm_setzero_ph()) } |
8598 | } |
8599 | |
8600 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum |
8601 | /// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value |
8602 | /// when inputs are NaN or signed-zero values. |
8603 | /// |
8604 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_ph) |
8605 | #[inline ] |
8606 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8607 | #[cfg_attr (test, assert_instr(vminph))] |
8608 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8609 | pub fn _mm256_min_ph(a: __m256h, b: __m256h) -> __m256h { |
8610 | unsafe { vminph_256(a, b) } |
8611 | } |
8612 | |
8613 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum |
8614 | /// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
8615 | /// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are |
8616 | /// NaN or signed-zero values. |
8617 | /// |
8618 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_min_ph) |
8619 | #[inline ] |
8620 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8621 | #[cfg_attr (test, assert_instr(vminph))] |
8622 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8623 | pub fn _mm256_mask_min_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h { |
8624 | unsafe { simd_select_bitmask(m:k, yes:_mm256_min_ph(a, b), no:src) } |
8625 | } |
8626 | |
8627 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum |
8628 | /// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
8629 | /// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are |
8630 | /// NaN or signed-zero values. |
8631 | /// |
8632 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_min_ph) |
8633 | #[inline ] |
8634 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8635 | #[cfg_attr (test, assert_instr(vminph))] |
8636 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8637 | pub fn _mm256_maskz_min_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { |
8638 | unsafe { simd_select_bitmask(m:k, yes:_mm256_min_ph(a, b), no:_mm256_setzero_ph()) } |
8639 | } |
8640 | |
8641 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum |
8642 | /// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value |
8643 | /// when inputs are NaN or signed-zero values. |
8644 | /// |
8645 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_ph) |
8646 | #[inline ] |
8647 | #[target_feature (enable = "avx512fp16" )] |
8648 | #[cfg_attr (test, assert_instr(vminph))] |
8649 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8650 | pub fn _mm512_min_ph(a: __m512h, b: __m512h) -> __m512h { |
8651 | _mm512_min_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b) |
8652 | } |
8653 | |
8654 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum |
8655 | /// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
8656 | /// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are |
8657 | /// NaN or signed-zero values. |
8658 | /// |
8659 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_ph) |
8660 | #[inline ] |
8661 | #[target_feature (enable = "avx512fp16" )] |
8662 | #[cfg_attr (test, assert_instr(vminph))] |
8663 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8664 | pub fn _mm512_mask_min_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h { |
8665 | unsafe { simd_select_bitmask(m:k, yes:_mm512_min_ph(a, b), no:src) } |
8666 | } |
8667 | |
8668 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum |
8669 | /// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
8670 | /// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are |
8671 | /// NaN or signed-zero values. |
8672 | /// |
8673 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_ph) |
8674 | #[inline ] |
8675 | #[target_feature (enable = "avx512fp16" )] |
8676 | #[cfg_attr (test, assert_instr(vminph))] |
8677 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8678 | pub fn _mm512_maskz_min_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { |
8679 | unsafe { simd_select_bitmask(m:k, yes:_mm512_min_ph(a, b), no:_mm512_setzero_ph()) } |
8680 | } |
8681 | |
8682 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum |
8683 | /// values in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not |
8684 | /// follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values. |
8685 | /// |
8686 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_round_ph) |
8687 | #[inline ] |
8688 | #[target_feature (enable = "avx512fp16" )] |
8689 | #[cfg_attr (test, assert_instr(vminph, SAE = 8))] |
8690 | #[rustc_legacy_const_generics (2)] |
8691 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8692 | pub fn _mm512_min_round_ph<const SAE: i32>(a: __m512h, b: __m512h) -> __m512h { |
8693 | unsafe { |
8694 | static_assert_sae!(SAE); |
8695 | vminph_512(a, b, SAE) |
8696 | } |
8697 | } |
8698 | |
8699 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum |
8700 | /// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
8701 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the |
8702 | /// IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values. |
8703 | /// |
8704 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_round_ph) |
8705 | #[inline ] |
8706 | #[target_feature (enable = "avx512fp16" )] |
8707 | #[cfg_attr (test, assert_instr(vminph, SAE = 8))] |
8708 | #[rustc_legacy_const_generics (4)] |
8709 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8710 | pub fn _mm512_mask_min_round_ph<const SAE: i32>( |
8711 | src: __m512h, |
8712 | k: __mmask32, |
8713 | a: __m512h, |
8714 | b: __m512h, |
8715 | ) -> __m512h { |
8716 | unsafe { |
8717 | static_assert_sae!(SAE); |
8718 | simd_select_bitmask(m:k, yes:_mm512_min_round_ph::<SAE>(a, b), no:src) |
8719 | } |
8720 | } |
8721 | |
8722 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum |
8723 | /// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
8724 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the |
8725 | /// IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values. |
8726 | /// |
8727 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_round_ph) |
8728 | #[inline ] |
8729 | #[target_feature (enable = "avx512fp16" )] |
8730 | #[cfg_attr (test, assert_instr(vminph, SAE = 8))] |
8731 | #[rustc_legacy_const_generics (3)] |
8732 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8733 | pub fn _mm512_maskz_min_round_ph<const SAE: i32>(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { |
8734 | unsafe { |
8735 | static_assert_sae!(SAE); |
8736 | simd_select_bitmask(m:k, yes:_mm512_min_round_ph::<SAE>(a, b), no:_mm512_setzero_ph()) |
8737 | } |
8738 | } |
8739 | |
8740 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum |
8741 | /// value in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements |
8742 | /// of dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when |
8743 | /// inputs are NaN or signed-zero values. |
8744 | /// |
8745 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_sh) |
8746 | #[inline ] |
8747 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8748 | #[cfg_attr (test, assert_instr(vminsh))] |
8749 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8750 | pub fn _mm_min_sh(a: __m128h, b: __m128h) -> __m128h { |
8751 | _mm_mask_min_sh(src:_mm_undefined_ph(), k:0xff, a, b) |
8752 | } |
8753 | |
8754 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum |
8755 | /// value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 |
8756 | /// is not set), and copy the upper 7 packed elements from a to the upper elements of dst. Does not follow |
8757 | /// the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values. |
8758 | /// |
8759 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_sh) |
8760 | #[inline ] |
8761 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8762 | #[cfg_attr (test, assert_instr(vminsh))] |
8763 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8764 | pub fn _mm_mask_min_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
8765 | _mm_mask_min_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) |
8766 | } |
8767 | |
8768 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value |
8769 | /// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and |
8770 | /// copy the upper 7 packed elements from a to the upper elements of dst. Does not follow the IEEE Standard |
8771 | /// for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values. |
8772 | /// |
8773 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_sh) |
8774 | #[inline ] |
8775 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8776 | #[cfg_attr (test, assert_instr(vminsh))] |
8777 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8778 | pub fn _mm_maskz_min_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
8779 | _mm_mask_min_sh(src:f16x8::ZERO.as_m128h(), k, a, b) |
8780 | } |
8781 | |
8782 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value |
8783 | /// in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst. |
8784 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the |
8785 | /// IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values. |
8786 | /// |
8787 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_round_sh) |
8788 | #[inline ] |
8789 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8790 | #[cfg_attr (test, assert_instr(vminsh, SAE = 8))] |
8791 | #[rustc_legacy_const_generics (2)] |
8792 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8793 | pub fn _mm_min_round_sh<const SAE: i32>(a: __m128h, b: __m128h) -> __m128h { |
8794 | static_assert_sae!(SAE); |
8795 | _mm_mask_min_round_sh::<SAE>(src:_mm_undefined_ph(), k:0xff, a, b) |
8796 | } |
8797 | |
8798 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value |
8799 | /// in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), |
8800 | /// and copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by |
8801 | /// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic |
8802 | /// (IEEE 754) minimum value when inputs are NaN or signed-zero values. |
8803 | /// |
8804 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_round_sh) |
8805 | #[inline ] |
8806 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8807 | #[cfg_attr (test, assert_instr(vminsh, SAE = 8))] |
8808 | #[rustc_legacy_const_generics (4)] |
8809 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8810 | pub fn _mm_mask_min_round_sh<const SAE: i32>( |
8811 | src: __m128h, |
8812 | k: __mmask8, |
8813 | a: __m128h, |
8814 | b: __m128h, |
8815 | ) -> __m128h { |
8816 | unsafe { |
8817 | static_assert_sae!(SAE); |
8818 | vminsh(a, b, src, k, SAE) |
8819 | } |
8820 | } |
8821 | |
8822 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value |
8823 | /// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and |
8824 | /// copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by |
8825 | /// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic |
8826 | /// (IEEE 754) minimum value when inputs are NaN or signed-zero values. |
8827 | /// |
8828 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_round_sh) |
8829 | #[inline ] |
8830 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8831 | #[cfg_attr (test, assert_instr(vminsh, SAE = 8))] |
8832 | #[rustc_legacy_const_generics (3)] |
8833 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8834 | pub fn _mm_maskz_min_round_sh<const SAE: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
8835 | static_assert_sae!(SAE); |
8836 | _mm_mask_min_round_sh::<SAE>(src:f16x8::ZERO.as_m128h(), k, a, b) |
8837 | } |
8838 | |
8839 | /// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision |
8840 | /// (16-bit) floating-point number representing the integer exponent, and store the results in dst. |
8841 | /// This intrinsic essentially calculates `floor(log2(x))` for each element. |
8842 | /// |
8843 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_ph) |
8844 | #[inline ] |
8845 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8846 | #[cfg_attr (test, assert_instr(vgetexpph))] |
8847 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8848 | pub fn _mm_getexp_ph(a: __m128h) -> __m128h { |
8849 | _mm_mask_getexp_ph(src:_mm_undefined_ph(), k:0xff, a) |
8850 | } |
8851 | |
8852 | /// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision |
8853 | /// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k |
8854 | /// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates |
8855 | /// `floor(log2(x))` for each element. |
8856 | /// |
8857 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_ph) |
8858 | #[inline ] |
8859 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8860 | #[cfg_attr (test, assert_instr(vgetexpph))] |
8861 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8862 | pub fn _mm_mask_getexp_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h { |
8863 | unsafe { vgetexpph_128(a, src, k) } |
8864 | } |
8865 | |
8866 | /// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision |
8867 | /// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask |
8868 | /// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates |
8869 | /// `floor(log2(x))` for each element. |
8870 | /// |
8871 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_ph) |
8872 | #[inline ] |
8873 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8874 | #[cfg_attr (test, assert_instr(vgetexpph))] |
8875 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8876 | pub fn _mm_maskz_getexp_ph(k: __mmask8, a: __m128h) -> __m128h { |
8877 | _mm_mask_getexp_ph(src:_mm_setzero_ph(), k, a) |
8878 | } |
8879 | |
8880 | /// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision |
8881 | /// (16-bit) floating-point number representing the integer exponent, and store the results in dst. |
8882 | /// This intrinsic essentially calculates `floor(log2(x))` for each element. |
8883 | /// |
8884 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_getexp_ph) |
8885 | #[inline ] |
8886 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8887 | #[cfg_attr (test, assert_instr(vgetexpph))] |
8888 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8889 | pub fn _mm256_getexp_ph(a: __m256h) -> __m256h { |
8890 | _mm256_mask_getexp_ph(src:_mm256_undefined_ph(), k:0xffff, a) |
8891 | } |
8892 | |
8893 | /// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision |
8894 | /// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k |
8895 | /// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates |
8896 | /// `floor(log2(x))` for each element. |
8897 | /// |
8898 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_getexp_ph) |
8899 | #[inline ] |
8900 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8901 | #[cfg_attr (test, assert_instr(vgetexpph))] |
8902 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8903 | pub fn _mm256_mask_getexp_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h { |
8904 | unsafe { vgetexpph_256(a, src, k) } |
8905 | } |
8906 | |
8907 | /// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision |
8908 | /// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask |
8909 | /// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates |
8910 | /// `floor(log2(x))` for each element. |
8911 | /// |
8912 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_getexp_ph) |
8913 | #[inline ] |
8914 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8915 | #[cfg_attr (test, assert_instr(vgetexpph))] |
8916 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8917 | pub fn _mm256_maskz_getexp_ph(k: __mmask16, a: __m256h) -> __m256h { |
8918 | _mm256_mask_getexp_ph(src:_mm256_setzero_ph(), k, a) |
8919 | } |
8920 | |
8921 | /// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision |
8922 | /// (16-bit) floating-point number representing the integer exponent, and store the results in dst. |
8923 | /// This intrinsic essentially calculates `floor(log2(x))` for each element. |
8924 | /// |
8925 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getexp_ph) |
8926 | #[inline ] |
8927 | #[target_feature (enable = "avx512fp16" )] |
8928 | #[cfg_attr (test, assert_instr(vgetexpph))] |
8929 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8930 | pub fn _mm512_getexp_ph(a: __m512h) -> __m512h { |
8931 | _mm512_mask_getexp_ph(src:_mm512_undefined_ph(), k:0xffffffff, a) |
8932 | } |
8933 | |
8934 | /// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision |
8935 | /// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k |
8936 | /// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates |
8937 | /// `floor(log2(x))` for each element. |
8938 | /// |
8939 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getexp_ph) |
8940 | #[inline ] |
8941 | #[target_feature (enable = "avx512fp16" )] |
8942 | #[cfg_attr (test, assert_instr(vgetexpph))] |
8943 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8944 | pub fn _mm512_mask_getexp_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h { |
8945 | _mm512_mask_getexp_round_ph::<_MM_FROUND_CUR_DIRECTION>(src, k, a) |
8946 | } |
8947 | |
8948 | /// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision |
8949 | /// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask |
8950 | /// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates |
8951 | /// `floor(log2(x))` for each element. |
8952 | /// |
8953 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getexp_ph) |
8954 | #[inline ] |
8955 | #[target_feature (enable = "avx512fp16" )] |
8956 | #[cfg_attr (test, assert_instr(vgetexpph))] |
8957 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8958 | pub fn _mm512_maskz_getexp_ph(k: __mmask32, a: __m512h) -> __m512h { |
8959 | _mm512_mask_getexp_ph(src:_mm512_setzero_ph(), k, a) |
8960 | } |
8961 | |
8962 | /// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision |
8963 | /// (16-bit) floating-point number representing the integer exponent, and store the results in dst. |
8964 | /// This intrinsic essentially calculates `floor(log2(x))` for each element. Exceptions can be suppressed |
8965 | /// by passing _MM_FROUND_NO_EXC in the sae parameter |
8966 | /// |
8967 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getexp_round_ph) |
8968 | #[inline ] |
8969 | #[target_feature (enable = "avx512fp16" )] |
8970 | #[cfg_attr (test, assert_instr(vgetexpph, SAE = 8))] |
8971 | #[rustc_legacy_const_generics (1)] |
8972 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8973 | pub fn _mm512_getexp_round_ph<const SAE: i32>(a: __m512h) -> __m512h { |
8974 | static_assert_sae!(SAE); |
8975 | _mm512_mask_getexp_round_ph::<SAE>(src:_mm512_undefined_ph(), k:0xffffffff, a) |
8976 | } |
8977 | |
8978 | /// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision |
8979 | /// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k |
8980 | /// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates |
8981 | /// `floor(log2(x))` for each element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
8982 | /// |
8983 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getexp_round_ph) |
8984 | #[inline ] |
8985 | #[target_feature (enable = "avx512fp16" )] |
8986 | #[cfg_attr (test, assert_instr(vgetexpph, SAE = 8))] |
8987 | #[rustc_legacy_const_generics (3)] |
8988 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8989 | pub fn _mm512_mask_getexp_round_ph<const SAE: i32>( |
8990 | src: __m512h, |
8991 | k: __mmask32, |
8992 | a: __m512h, |
8993 | ) -> __m512h { |
8994 | unsafe { |
8995 | static_assert_sae!(SAE); |
8996 | vgetexpph_512(a, src, k, SAE) |
8997 | } |
8998 | } |
8999 | |
9000 | /// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision |
9001 | /// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask |
9002 | /// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates |
9003 | /// `floor(log2(x))` for each element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
9004 | /// |
9005 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getexp_round_ph) |
9006 | #[inline ] |
9007 | #[target_feature (enable = "avx512fp16" )] |
9008 | #[cfg_attr (test, assert_instr(vgetexpph, SAE = 8))] |
9009 | #[rustc_legacy_const_generics (2)] |
9010 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9011 | pub fn _mm512_maskz_getexp_round_ph<const SAE: i32>(k: __mmask32, a: __m512h) -> __m512h { |
9012 | static_assert_sae!(SAE); |
9013 | _mm512_mask_getexp_round_ph::<SAE>(src:_mm512_setzero_ph(), k, a) |
9014 | } |
9015 | |
9016 | /// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision |
9017 | /// (16-bit) floating-point number representing the integer exponent, store the result in the lower element |
9018 | /// of dst, and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially |
9019 | /// calculates `floor(log2(x))` for the lower element. |
9020 | /// |
9021 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_sh) |
9022 | #[inline ] |
9023 | #[target_feature (enable = "avx512fp16" )] |
9024 | #[cfg_attr (test, assert_instr(vgetexpsh))] |
9025 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9026 | pub fn _mm_getexp_sh(a: __m128h, b: __m128h) -> __m128h { |
9027 | _mm_mask_getexp_sh(src:f16x8::ZERO.as_m128h(), k:0xff, a, b) |
9028 | } |
9029 | |
9030 | /// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision |
9031 | /// (16-bit) floating-point number representing the integer exponent, store the result in the lower element |
9032 | /// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 7 |
9033 | /// packed elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))` |
9034 | /// for the lower element. |
9035 | /// |
9036 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_sh) |
9037 | #[inline ] |
9038 | #[target_feature (enable = "avx512fp16" )] |
9039 | #[cfg_attr (test, assert_instr(vgetexpsh))] |
9040 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9041 | pub fn _mm_mask_getexp_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
9042 | _mm_mask_getexp_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) |
9043 | } |
9044 | |
9045 | /// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision |
9046 | /// (16-bit) floating-point number representing the integer exponent, store the result in the lower element |
9047 | /// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed |
9048 | /// elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))` for the |
9049 | /// lower element. |
9050 | /// |
9051 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_sh) |
9052 | #[inline ] |
9053 | #[target_feature (enable = "avx512fp16" )] |
9054 | #[cfg_attr (test, assert_instr(vgetexpsh))] |
9055 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9056 | pub fn _mm_maskz_getexp_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
9057 | _mm_mask_getexp_sh(src:f16x8::ZERO.as_m128h(), k, a, b) |
9058 | } |
9059 | |
9060 | /// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision |
9061 | /// (16-bit) floating-point number representing the integer exponent, store the result in the lower element |
9062 | /// of dst, and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially |
9063 | /// calculates `floor(log2(x))` for the lower element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC |
9064 | /// in the sae parameter |
9065 | /// |
9066 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_round_sh) |
9067 | #[inline ] |
9068 | #[target_feature (enable = "avx512fp16" )] |
9069 | #[cfg_attr (test, assert_instr(vgetexpsh, SAE = 8))] |
9070 | #[rustc_legacy_const_generics (2)] |
9071 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9072 | pub fn _mm_getexp_round_sh<const SAE: i32>(a: __m128h, b: __m128h) -> __m128h { |
9073 | static_assert_sae!(SAE); |
9074 | _mm_mask_getexp_round_sh::<SAE>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b) |
9075 | } |
9076 | |
9077 | /// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision |
9078 | /// (16-bit) floating-point number representing the integer exponent, store the result in the lower element |
9079 | /// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 7 |
9080 | /// packed elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))` |
9081 | /// for the lower element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
9082 | /// |
9083 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_round_sh) |
9084 | #[inline ] |
9085 | #[target_feature (enable = "avx512fp16" )] |
9086 | #[cfg_attr (test, assert_instr(vgetexpsh, SAE = 8))] |
9087 | #[rustc_legacy_const_generics (4)] |
9088 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9089 | pub fn _mm_mask_getexp_round_sh<const SAE: i32>( |
9090 | src: __m128h, |
9091 | k: __mmask8, |
9092 | a: __m128h, |
9093 | b: __m128h, |
9094 | ) -> __m128h { |
9095 | unsafe { |
9096 | static_assert_sae!(SAE); |
9097 | vgetexpsh(a, b, src, k, SAE) |
9098 | } |
9099 | } |
9100 | |
9101 | /// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision |
9102 | /// (16-bit) floating-point number representing the integer exponent, store the result in the lower element |
9103 | /// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed |
9104 | /// elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))` for the |
9105 | /// lower element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
9106 | /// |
9107 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_round_sh) |
9108 | #[inline ] |
9109 | #[target_feature (enable = "avx512fp16" )] |
9110 | #[cfg_attr (test, assert_instr(vgetexpsh, SAE = 8))] |
9111 | #[rustc_legacy_const_generics (3)] |
9112 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9113 | pub fn _mm_maskz_getexp_round_sh<const SAE: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
9114 | static_assert_sae!(SAE); |
9115 | _mm_mask_getexp_round_sh::<SAE>(src:f16x8::ZERO.as_m128h(), k, a, b) |
9116 | } |
9117 | |
9118 | /// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store |
9119 | /// the results in dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends |
9120 | /// on the interval range defined by norm and the sign depends on sign and the source sign. |
9121 | /// |
9122 | /// The mantissa is normalized to the interval specified by interv, which can take the following values: |
9123 | /// |
9124 | /// _MM_MANT_NORM_1_2 // interval [1, 2) |
9125 | /// _MM_MANT_NORM_p5_2 // interval [0.5, 2) |
9126 | /// _MM_MANT_NORM_p5_1 // interval [0.5, 1) |
9127 | /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) |
9128 | /// |
9129 | /// The sign is determined by sc which can take the following values: |
9130 | /// |
9131 | /// _MM_MANT_SIGN_src // sign = sign(src) |
9132 | /// _MM_MANT_SIGN_zero // sign = 0 |
9133 | /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 |
9134 | /// |
9135 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_ph) |
9136 | #[inline ] |
9137 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
9138 | #[cfg_attr (test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))] |
9139 | #[rustc_legacy_const_generics (1, 2)] |
9140 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9141 | pub fn _mm_getmant_ph<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>( |
9142 | a: __m128h, |
9143 | ) -> __m128h { |
9144 | static_assert_uimm_bits!(NORM, 4); |
9145 | static_assert_uimm_bits!(SIGN, 2); |
9146 | _mm_mask_getmant_ph::<NORM, SIGN>(src:_mm_undefined_ph(), k:0xff, a) |
9147 | } |
9148 | |
9149 | /// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store |
9150 | /// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
9151 | /// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined |
9152 | /// by norm and the sign depends on sign and the source sign. |
9153 | /// |
9154 | /// The mantissa is normalized to the interval specified by interv, which can take the following values: |
9155 | /// |
9156 | /// _MM_MANT_NORM_1_2 // interval [1, 2) |
9157 | /// _MM_MANT_NORM_p5_2 // interval [0.5, 2) |
9158 | /// _MM_MANT_NORM_p5_1 // interval [0.5, 1) |
9159 | /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) |
9160 | /// |
9161 | /// The sign is determined by sc which can take the following values: |
9162 | /// |
9163 | /// _MM_MANT_SIGN_src // sign = sign(src) |
9164 | /// _MM_MANT_SIGN_zero // sign = 0 |
9165 | /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 |
9166 | /// |
9167 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_ph) |
9168 | #[inline ] |
9169 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
9170 | #[cfg_attr (test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))] |
9171 | #[rustc_legacy_const_generics (3, 4)] |
9172 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9173 | pub fn _mm_mask_getmant_ph< |
9174 | const NORM: _MM_MANTISSA_NORM_ENUM, |
9175 | const SIGN: _MM_MANTISSA_SIGN_ENUM, |
9176 | >( |
9177 | src: __m128h, |
9178 | k: __mmask8, |
9179 | a: __m128h, |
9180 | ) -> __m128h { |
9181 | unsafe { |
9182 | static_assert_uimm_bits!(NORM, 4); |
9183 | static_assert_uimm_bits!(SIGN, 2); |
9184 | vgetmantph_128(a, (SIGN << 2) | NORM, src, k) |
9185 | } |
9186 | } |
9187 | |
9188 | /// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store |
9189 | /// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
9190 | /// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined |
9191 | /// by norm and the sign depends on sign and the source sign. |
9192 | /// |
9193 | /// The mantissa is normalized to the interval specified by interv, which can take the following values: |
9194 | /// |
9195 | /// _MM_MANT_NORM_1_2 // interval [1, 2) |
9196 | /// _MM_MANT_NORM_p5_2 // interval [0.5, 2) |
9197 | /// _MM_MANT_NORM_p5_1 // interval [0.5, 1) |
9198 | /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) |
9199 | /// |
9200 | /// The sign is determined by sc which can take the following values: |
9201 | /// |
9202 | /// _MM_MANT_SIGN_src // sign = sign(src) |
9203 | /// _MM_MANT_SIGN_zero // sign = 0 |
9204 | /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 |
9205 | /// |
9206 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_ph) |
9207 | #[inline ] |
9208 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
9209 | #[cfg_attr (test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))] |
9210 | #[rustc_legacy_const_generics (2, 3)] |
9211 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9212 | pub fn _mm_maskz_getmant_ph< |
9213 | const NORM: _MM_MANTISSA_NORM_ENUM, |
9214 | const SIGN: _MM_MANTISSA_SIGN_ENUM, |
9215 | >( |
9216 | k: __mmask8, |
9217 | a: __m128h, |
9218 | ) -> __m128h { |
9219 | static_assert_uimm_bits!(NORM, 4); |
9220 | static_assert_uimm_bits!(SIGN, 2); |
9221 | _mm_mask_getmant_ph::<NORM, SIGN>(src:_mm_setzero_ph(), k, a) |
9222 | } |
9223 | |
9224 | /// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store |
9225 | /// the results in dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends |
9226 | /// on the interval range defined by norm and the sign depends on sign and the source sign. |
9227 | /// |
9228 | /// The mantissa is normalized to the interval specified by interv, which can take the following values: |
9229 | /// |
9230 | /// _MM_MANT_NORM_1_2 // interval [1, 2) |
9231 | /// _MM_MANT_NORM_p5_2 // interval [0.5, 2) |
9232 | /// _MM_MANT_NORM_p5_1 // interval [0.5, 1) |
9233 | /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) |
9234 | /// |
9235 | /// The sign is determined by sc which can take the following values: |
9236 | /// |
9237 | /// _MM_MANT_SIGN_src // sign = sign(src) |
9238 | /// _MM_MANT_SIGN_zero // sign = 0 |
9239 | /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 |
9240 | /// |
9241 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_getmant_ph) |
9242 | #[inline ] |
9243 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
9244 | #[cfg_attr (test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))] |
9245 | #[rustc_legacy_const_generics (1, 2)] |
9246 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9247 | pub fn _mm256_getmant_ph<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>( |
9248 | a: __m256h, |
9249 | ) -> __m256h { |
9250 | static_assert_uimm_bits!(NORM, 4); |
9251 | static_assert_uimm_bits!(SIGN, 2); |
9252 | _mm256_mask_getmant_ph::<NORM, SIGN>(src:_mm256_undefined_ph(), k:0xffff, a) |
9253 | } |
9254 | |
9255 | /// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store |
9256 | /// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
9257 | /// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined |
9258 | /// by norm and the sign depends on sign and the source sign. |
9259 | /// |
9260 | /// The mantissa is normalized to the interval specified by interv, which can take the following values: |
9261 | /// |
9262 | /// _MM_MANT_NORM_1_2 // interval [1, 2) |
9263 | /// _MM_MANT_NORM_p5_2 // interval [0.5, 2) |
9264 | /// _MM_MANT_NORM_p5_1 // interval [0.5, 1) |
9265 | /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) |
9266 | /// |
9267 | /// The sign is determined by sc which can take the following values: |
9268 | /// |
9269 | /// _MM_MANT_SIGN_src // sign = sign(src) |
9270 | /// _MM_MANT_SIGN_zero // sign = 0 |
9271 | /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 |
9272 | /// |
9273 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_getmant_ph) |
9274 | #[inline ] |
9275 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
9276 | #[cfg_attr (test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))] |
9277 | #[rustc_legacy_const_generics (3, 4)] |
9278 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9279 | pub fn _mm256_mask_getmant_ph< |
9280 | const NORM: _MM_MANTISSA_NORM_ENUM, |
9281 | const SIGN: _MM_MANTISSA_SIGN_ENUM, |
9282 | >( |
9283 | src: __m256h, |
9284 | k: __mmask16, |
9285 | a: __m256h, |
9286 | ) -> __m256h { |
9287 | unsafe { |
9288 | static_assert_uimm_bits!(NORM, 4); |
9289 | static_assert_uimm_bits!(SIGN, 2); |
9290 | vgetmantph_256(a, (SIGN << 2) | NORM, src, k) |
9291 | } |
9292 | } |
9293 | |
9294 | /// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store |
9295 | /// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
9296 | /// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined |
9297 | /// by norm and the sign depends on sign and the source sign. |
9298 | /// |
9299 | /// The mantissa is normalized to the interval specified by interv, which can take the following values: |
9300 | /// |
9301 | /// _MM_MANT_NORM_1_2 // interval [1, 2) |
9302 | /// _MM_MANT_NORM_p5_2 // interval [0.5, 2) |
9303 | /// _MM_MANT_NORM_p5_1 // interval [0.5, 1) |
9304 | /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) |
9305 | /// |
9306 | /// The sign is determined by sc which can take the following values: |
9307 | /// |
9308 | /// _MM_MANT_SIGN_src // sign = sign(src) |
9309 | /// _MM_MANT_SIGN_zero // sign = 0 |
9310 | /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 |
9311 | /// |
9312 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_getmant_ph) |
9313 | #[inline ] |
9314 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
9315 | #[cfg_attr (test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))] |
9316 | #[rustc_legacy_const_generics (2, 3)] |
9317 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9318 | pub fn _mm256_maskz_getmant_ph< |
9319 | const NORM: _MM_MANTISSA_NORM_ENUM, |
9320 | const SIGN: _MM_MANTISSA_SIGN_ENUM, |
9321 | >( |
9322 | k: __mmask16, |
9323 | a: __m256h, |
9324 | ) -> __m256h { |
9325 | static_assert_uimm_bits!(NORM, 4); |
9326 | static_assert_uimm_bits!(SIGN, 2); |
9327 | _mm256_mask_getmant_ph::<NORM, SIGN>(src:_mm256_setzero_ph(), k, a) |
9328 | } |
9329 | |
9330 | /// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store |
9331 | /// the results in dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends |
9332 | /// on the interval range defined by norm and the sign depends on sign and the source sign. |
9333 | /// |
9334 | /// The mantissa is normalized to the interval specified by interv, which can take the following values: |
9335 | /// |
9336 | /// _MM_MANT_NORM_1_2 // interval [1, 2) |
9337 | /// _MM_MANT_NORM_p5_2 // interval [0.5, 2) |
9338 | /// _MM_MANT_NORM_p5_1 // interval [0.5, 1) |
9339 | /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) |
9340 | /// |
9341 | /// The sign is determined by sc which can take the following values: |
9342 | /// |
9343 | /// _MM_MANT_SIGN_src // sign = sign(src) |
9344 | /// _MM_MANT_SIGN_zero // sign = 0 |
9345 | /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 |
9346 | /// |
9347 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getmant_ph) |
9348 | #[inline ] |
9349 | #[target_feature (enable = "avx512fp16" )] |
9350 | #[cfg_attr (test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))] |
9351 | #[rustc_legacy_const_generics (1, 2)] |
9352 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9353 | pub fn _mm512_getmant_ph<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>( |
9354 | a: __m512h, |
9355 | ) -> __m512h { |
9356 | static_assert_uimm_bits!(NORM, 4); |
9357 | static_assert_uimm_bits!(SIGN, 2); |
9358 | _mm512_mask_getmant_ph::<NORM, SIGN>(src:_mm512_undefined_ph(), k:0xffffffff, a) |
9359 | } |
9360 | |
9361 | /// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store |
9362 | /// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
9363 | /// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined |
9364 | /// by norm and the sign depends on sign and the source sign. |
9365 | /// |
9366 | /// The mantissa is normalized to the interval specified by interv, which can take the following values: |
9367 | /// |
9368 | /// _MM_MANT_NORM_1_2 // interval [1, 2) |
9369 | /// _MM_MANT_NORM_p5_2 // interval [0.5, 2) |
9370 | /// _MM_MANT_NORM_p5_1 // interval [0.5, 1) |
9371 | /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) |
9372 | /// |
9373 | /// The sign is determined by sc which can take the following values: |
9374 | /// |
9375 | /// _MM_MANT_SIGN_src // sign = sign(src) |
9376 | /// _MM_MANT_SIGN_zero // sign = 0 |
9377 | /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 |
9378 | /// |
9379 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getmant_ph) |
9380 | #[inline ] |
9381 | #[target_feature (enable = "avx512fp16" )] |
9382 | #[cfg_attr (test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))] |
9383 | #[rustc_legacy_const_generics (3, 4)] |
9384 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9385 | pub fn _mm512_mask_getmant_ph< |
9386 | const NORM: _MM_MANTISSA_NORM_ENUM, |
9387 | const SIGN: _MM_MANTISSA_SIGN_ENUM, |
9388 | >( |
9389 | src: __m512h, |
9390 | k: __mmask32, |
9391 | a: __m512h, |
9392 | ) -> __m512h { |
9393 | static_assert_uimm_bits!(NORM, 4); |
9394 | static_assert_uimm_bits!(SIGN, 2); |
9395 | _mm512_mask_getmant_round_ph::<NORM, SIGN, _MM_FROUND_CUR_DIRECTION>(src, k, a) |
9396 | } |
9397 | |
9398 | /// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store |
9399 | /// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
9400 | /// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined |
9401 | /// by norm and the sign depends on sign and the source sign. |
9402 | /// |
9403 | /// The mantissa is normalized to the interval specified by interv, which can take the following values: |
9404 | /// |
9405 | /// _MM_MANT_NORM_1_2 // interval [1, 2) |
9406 | /// _MM_MANT_NORM_p5_2 // interval [0.5, 2) |
9407 | /// _MM_MANT_NORM_p5_1 // interval [0.5, 1) |
9408 | /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) |
9409 | /// |
9410 | /// The sign is determined by sc which can take the following values: |
9411 | /// |
9412 | /// _MM_MANT_SIGN_src // sign = sign(src) |
9413 | /// _MM_MANT_SIGN_zero // sign = 0 |
9414 | /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 |
9415 | /// |
9416 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getmant_ph) |
9417 | #[inline ] |
9418 | #[target_feature (enable = "avx512fp16" )] |
9419 | #[cfg_attr (test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))] |
9420 | #[rustc_legacy_const_generics (2, 3)] |
9421 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9422 | pub fn _mm512_maskz_getmant_ph< |
9423 | const NORM: _MM_MANTISSA_NORM_ENUM, |
9424 | const SIGN: _MM_MANTISSA_SIGN_ENUM, |
9425 | >( |
9426 | k: __mmask32, |
9427 | a: __m512h, |
9428 | ) -> __m512h { |
9429 | static_assert_uimm_bits!(NORM, 4); |
9430 | static_assert_uimm_bits!(SIGN, 2); |
9431 | _mm512_mask_getmant_ph::<NORM, SIGN>(src:_mm512_setzero_ph(), k, a) |
9432 | } |
9433 | |
9434 | /// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store |
9435 | /// the results in dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends |
9436 | /// on the interval range defined by norm and the sign depends on sign and the source sign. Exceptions can |
9437 | /// be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
9438 | /// |
9439 | /// The mantissa is normalized to the interval specified by interv, which can take the following values: |
9440 | /// |
9441 | /// _MM_MANT_NORM_1_2 // interval [1, 2) |
9442 | /// _MM_MANT_NORM_p5_2 // interval [0.5, 2) |
9443 | /// _MM_MANT_NORM_p5_1 // interval [0.5, 1) |
9444 | /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) |
9445 | /// |
9446 | /// The sign is determined by sc which can take the following values: |
9447 | /// |
9448 | /// _MM_MANT_SIGN_src // sign = sign(src) |
9449 | /// _MM_MANT_SIGN_zero // sign = 0 |
9450 | /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 |
9451 | /// |
9452 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
9453 | /// |
9454 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getmant_round_ph) |
9455 | #[inline ] |
9456 | #[target_feature (enable = "avx512fp16" )] |
9457 | #[cfg_attr (test, assert_instr(vgetmantph, NORM = 0, SIGN = 0, SAE = 8))] |
9458 | #[rustc_legacy_const_generics (1, 2, 3)] |
9459 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9460 | pub fn _mm512_getmant_round_ph< |
9461 | const NORM: _MM_MANTISSA_NORM_ENUM, |
9462 | const SIGN: _MM_MANTISSA_SIGN_ENUM, |
9463 | const SAE: i32, |
9464 | >( |
9465 | a: __m512h, |
9466 | ) -> __m512h { |
9467 | static_assert_uimm_bits!(NORM, 4); |
9468 | static_assert_uimm_bits!(SIGN, 2); |
9469 | static_assert_sae!(SAE); |
9470 | _mm512_mask_getmant_round_ph::<NORM, SIGN, SAE>(src:_mm512_undefined_ph(), k:0xffffffff, a) |
9471 | } |
9472 | |
9473 | /// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store |
9474 | /// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
9475 | /// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined |
9476 | /// by norm and the sign depends on sign and the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC |
9477 | /// in the sae parameter |
9478 | /// |
9479 | /// The mantissa is normalized to the interval specified by interv, which can take the following values: |
9480 | /// |
9481 | /// _MM_MANT_NORM_1_2 // interval [1, 2) |
9482 | /// _MM_MANT_NORM_p5_2 // interval [0.5, 2) |
9483 | /// _MM_MANT_NORM_p5_1 // interval [0.5, 1) |
9484 | /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) |
9485 | /// |
9486 | /// The sign is determined by sc which can take the following values: |
9487 | /// |
9488 | /// _MM_MANT_SIGN_src // sign = sign(src) |
9489 | /// _MM_MANT_SIGN_zero // sign = 0 |
9490 | /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 |
9491 | /// |
9492 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
9493 | /// |
9494 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getmant_round_ph) |
9495 | #[inline ] |
9496 | #[target_feature (enable = "avx512fp16" )] |
9497 | #[cfg_attr (test, assert_instr(vgetmantph, NORM = 0, SIGN = 0, SAE = 8))] |
9498 | #[rustc_legacy_const_generics (3, 4, 5)] |
9499 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9500 | pub fn _mm512_mask_getmant_round_ph< |
9501 | const NORM: _MM_MANTISSA_NORM_ENUM, |
9502 | const SIGN: _MM_MANTISSA_SIGN_ENUM, |
9503 | const SAE: i32, |
9504 | >( |
9505 | src: __m512h, |
9506 | k: __mmask32, |
9507 | a: __m512h, |
9508 | ) -> __m512h { |
9509 | unsafe { |
9510 | static_assert_uimm_bits!(NORM, 4); |
9511 | static_assert_uimm_bits!(SIGN, 2); |
9512 | static_assert_sae!(SAE); |
9513 | vgetmantph_512(a, (SIGN << 2) | NORM, src, k, SAE) |
9514 | } |
9515 | } |
9516 | |
9517 | /// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store |
9518 | /// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
9519 | /// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined |
9520 | /// by norm and the sign depends on sign and the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC |
9521 | /// in the sae parameter |
9522 | /// |
9523 | /// The mantissa is normalized to the interval specified by interv, which can take the following values: |
9524 | /// |
9525 | /// _MM_MANT_NORM_1_2 // interval [1, 2) |
9526 | /// _MM_MANT_NORM_p5_2 // interval [0.5, 2) |
9527 | /// _MM_MANT_NORM_p5_1 // interval [0.5, 1) |
9528 | /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) |
9529 | /// |
9530 | /// The sign is determined by sc which can take the following values: |
9531 | /// |
9532 | /// _MM_MANT_SIGN_src // sign = sign(src) |
9533 | /// _MM_MANT_SIGN_zero // sign = 0 |
9534 | /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 |
9535 | /// |
9536 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
9537 | /// |
9538 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getmant_round_ph) |
9539 | #[inline ] |
9540 | #[target_feature (enable = "avx512fp16" )] |
9541 | #[cfg_attr (test, assert_instr(vgetmantph, NORM = 0, SIGN = 0, SAE = 8))] |
9542 | #[rustc_legacy_const_generics (2, 3, 4)] |
9543 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9544 | pub fn _mm512_maskz_getmant_round_ph< |
9545 | const NORM: _MM_MANTISSA_NORM_ENUM, |
9546 | const SIGN: _MM_MANTISSA_SIGN_ENUM, |
9547 | const SAE: i32, |
9548 | >( |
9549 | k: __mmask32, |
9550 | a: __m512h, |
9551 | ) -> __m512h { |
9552 | static_assert_uimm_bits!(NORM, 4); |
9553 | static_assert_uimm_bits!(SIGN, 2); |
9554 | static_assert_sae!(SAE); |
9555 | _mm512_mask_getmant_round_ph::<NORM, SIGN, SAE>(src:_mm512_setzero_ph(), k, a) |
9556 | } |
9557 | |
9558 | /// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store |
9559 | /// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper |
9560 | /// elements of dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends |
9561 | /// on the interval range defined by norm and the sign depends on sign and the source sign. |
9562 | /// |
9563 | /// The mantissa is normalized to the interval specified by interv, which can take the following values: |
9564 | /// |
9565 | /// _MM_MANT_NORM_1_2 // interval [1, 2) |
9566 | /// _MM_MANT_NORM_p5_2 // interval [0.5, 2) |
9567 | /// _MM_MANT_NORM_p5_1 // interval [0.5, 1) |
9568 | /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) |
9569 | /// |
9570 | /// The sign is determined by sc which can take the following values: |
9571 | /// |
9572 | /// _MM_MANT_SIGN_src // sign = sign(src) |
9573 | /// _MM_MANT_SIGN_zero // sign = 0 |
9574 | /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 |
9575 | /// |
9576 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_sh) |
9577 | #[inline ] |
9578 | #[target_feature (enable = "avx512fp16" )] |
9579 | #[cfg_attr (test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0))] |
9580 | #[rustc_legacy_const_generics (2, 3)] |
9581 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9582 | pub fn _mm_getmant_sh<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>( |
9583 | a: __m128h, |
9584 | b: __m128h, |
9585 | ) -> __m128h { |
9586 | static_assert_uimm_bits!(NORM, 4); |
9587 | static_assert_uimm_bits!(SIGN, 2); |
9588 | _mm_mask_getmant_sh::<NORM, SIGN>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b) |
9589 | } |
9590 | |
9591 | /// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store |
9592 | /// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), |
9593 | /// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates |
9594 | /// `±(2^k)*|x.significand|`, where k depends on the interval range defined by norm and the sign depends on sign and |
9595 | /// the source sign. |
9596 | /// |
9597 | /// The mantissa is normalized to the interval specified by interv, which can take the following values: |
9598 | /// |
9599 | /// _MM_MANT_NORM_1_2 // interval [1, 2) |
9600 | /// _MM_MANT_NORM_p5_2 // interval [0.5, 2) |
9601 | /// _MM_MANT_NORM_p5_1 // interval [0.5, 1) |
9602 | /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) |
9603 | /// |
9604 | /// The sign is determined by sc which can take the following values: |
9605 | /// |
9606 | /// _MM_MANT_SIGN_src // sign = sign(src) |
9607 | /// _MM_MANT_SIGN_zero // sign = 0 |
9608 | /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 |
9609 | /// |
9610 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_sh) |
9611 | #[inline ] |
9612 | #[target_feature (enable = "avx512fp16" )] |
9613 | #[cfg_attr (test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0))] |
9614 | #[rustc_legacy_const_generics (4, 5)] |
9615 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9616 | pub fn _mm_mask_getmant_sh< |
9617 | const NORM: _MM_MANTISSA_NORM_ENUM, |
9618 | const SIGN: _MM_MANTISSA_SIGN_ENUM, |
9619 | >( |
9620 | src: __m128h, |
9621 | k: __mmask8, |
9622 | a: __m128h, |
9623 | b: __m128h, |
9624 | ) -> __m128h { |
9625 | static_assert_uimm_bits!(NORM, 4); |
9626 | static_assert_uimm_bits!(SIGN, 2); |
9627 | _mm_mask_getmant_round_sh::<NORM, SIGN, _MM_FROUND_CUR_DIRECTION>(src, k, a, b) |
9628 | } |
9629 | |
9630 | /// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store |
9631 | /// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), |
9632 | /// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates |
9633 | /// `±(2^k)*|x.significand|`, where k depends on the interval range defined by norm and the sign depends on sign and |
9634 | /// the source sign. |
9635 | /// |
9636 | /// The mantissa is normalized to the interval specified by interv, which can take the following values: |
9637 | /// |
9638 | /// _MM_MANT_NORM_1_2 // interval [1, 2) |
9639 | /// _MM_MANT_NORM_p5_2 // interval [0.5, 2) |
9640 | /// _MM_MANT_NORM_p5_1 // interval [0.5, 1) |
9641 | /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) |
9642 | /// |
9643 | /// The sign is determined by sc which can take the following values: |
9644 | /// |
9645 | /// _MM_MANT_SIGN_src // sign = sign(src) |
9646 | /// _MM_MANT_SIGN_zero // sign = 0 |
9647 | /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 |
9648 | /// |
9649 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_sh) |
9650 | #[inline ] |
9651 | #[target_feature (enable = "avx512fp16" )] |
9652 | #[cfg_attr (test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0))] |
9653 | #[rustc_legacy_const_generics (3, 4)] |
9654 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9655 | pub fn _mm_maskz_getmant_sh< |
9656 | const NORM: _MM_MANTISSA_NORM_ENUM, |
9657 | const SIGN: _MM_MANTISSA_SIGN_ENUM, |
9658 | >( |
9659 | k: __mmask8, |
9660 | a: __m128h, |
9661 | b: __m128h, |
9662 | ) -> __m128h { |
9663 | static_assert_uimm_bits!(NORM, 4); |
9664 | static_assert_uimm_bits!(SIGN, 2); |
9665 | _mm_mask_getmant_sh::<NORM, SIGN>(src:f16x8::ZERO.as_m128h(), k, a, b) |
9666 | } |
9667 | |
9668 | /// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store |
9669 | /// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper |
9670 | /// elements of dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends |
9671 | /// on the interval range defined by norm and the sign depends on sign and the source sign. Exceptions can |
9672 | /// be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
9673 | /// |
9674 | /// The mantissa is normalized to the interval specified by interv, which can take the following values: |
9675 | /// |
9676 | /// _MM_MANT_NORM_1_2 // interval [1, 2) |
9677 | /// _MM_MANT_NORM_p5_2 // interval [0.5, 2) |
9678 | /// _MM_MANT_NORM_p5_1 // interval [0.5, 1) |
9679 | /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) |
9680 | /// |
9681 | /// The sign is determined by sc which can take the following values: |
9682 | /// |
9683 | /// _MM_MANT_SIGN_src // sign = sign(src) |
9684 | /// _MM_MANT_SIGN_zero // sign = 0 |
9685 | /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 |
9686 | /// |
9687 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
9688 | /// |
9689 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_round_sh) |
9690 | #[inline ] |
9691 | #[target_feature (enable = "avx512fp16" )] |
9692 | #[cfg_attr (test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0, SAE = 8))] |
9693 | #[rustc_legacy_const_generics (2, 3, 4)] |
9694 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9695 | pub fn _mm_getmant_round_sh< |
9696 | const NORM: _MM_MANTISSA_NORM_ENUM, |
9697 | const SIGN: _MM_MANTISSA_SIGN_ENUM, |
9698 | const SAE: i32, |
9699 | >( |
9700 | a: __m128h, |
9701 | b: __m128h, |
9702 | ) -> __m128h { |
9703 | static_assert_uimm_bits!(NORM, 4); |
9704 | static_assert_uimm_bits!(SIGN, 2); |
9705 | static_assert_sae!(SAE); |
9706 | _mm_mask_getmant_round_sh::<NORM, SIGN, SAE>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b) |
9707 | } |
9708 | |
9709 | /// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store |
9710 | /// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), |
9711 | /// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates |
9712 | /// `±(2^k)*|x.significand|`, where k depends on the interval range defined by norm and the sign depends on sign and |
9713 | /// the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
9714 | /// |
9715 | /// The mantissa is normalized to the interval specified by interv, which can take the following values: |
9716 | /// |
9717 | /// _MM_MANT_NORM_1_2 // interval [1, 2) |
9718 | /// _MM_MANT_NORM_p5_2 // interval [0.5, 2) |
9719 | /// _MM_MANT_NORM_p5_1 // interval [0.5, 1) |
9720 | /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) |
9721 | /// |
9722 | /// The sign is determined by sc which can take the following values: |
9723 | /// |
9724 | /// _MM_MANT_SIGN_src // sign = sign(src) |
9725 | /// _MM_MANT_SIGN_zero // sign = 0 |
9726 | /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 |
9727 | /// |
9728 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
9729 | /// |
9730 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_round_sh) |
9731 | #[inline ] |
9732 | #[target_feature (enable = "avx512fp16" )] |
9733 | #[cfg_attr (test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0, SAE = 8))] |
9734 | #[rustc_legacy_const_generics (4, 5, 6)] |
9735 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9736 | pub fn _mm_mask_getmant_round_sh< |
9737 | const NORM: _MM_MANTISSA_NORM_ENUM, |
9738 | const SIGN: _MM_MANTISSA_SIGN_ENUM, |
9739 | const SAE: i32, |
9740 | >( |
9741 | src: __m128h, |
9742 | k: __mmask8, |
9743 | a: __m128h, |
9744 | b: __m128h, |
9745 | ) -> __m128h { |
9746 | unsafe { |
9747 | static_assert_uimm_bits!(NORM, 4); |
9748 | static_assert_uimm_bits!(SIGN, 2); |
9749 | static_assert_sae!(SAE); |
9750 | vgetmantsh(a, b, (SIGN << 2) | NORM, src, k, SAE) |
9751 | } |
9752 | } |
9753 | |
9754 | /// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store |
9755 | /// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), |
9756 | /// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates |
9757 | /// `±(2^k)*|x.significand|`, where k depends on the interval range defined by norm and the sign depends on sign and |
9758 | /// the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
9759 | /// |
9760 | /// The mantissa is normalized to the interval specified by interv, which can take the following values: |
9761 | /// |
9762 | /// _MM_MANT_NORM_1_2 // interval [1, 2) |
9763 | /// _MM_MANT_NORM_p5_2 // interval [0.5, 2) |
9764 | /// _MM_MANT_NORM_p5_1 // interval [0.5, 1) |
9765 | /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) |
9766 | /// |
9767 | /// The sign is determined by sc which can take the following values: |
9768 | /// |
9769 | /// _MM_MANT_SIGN_src // sign = sign(src) |
9770 | /// _MM_MANT_SIGN_zero // sign = 0 |
9771 | /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 |
9772 | /// |
9773 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
9774 | /// |
9775 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_round_sh) |
9776 | #[inline ] |
9777 | #[target_feature (enable = "avx512fp16" )] |
9778 | #[cfg_attr (test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0, SAE = 8))] |
9779 | #[rustc_legacy_const_generics (3, 4, 5)] |
9780 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9781 | pub fn _mm_maskz_getmant_round_sh< |
9782 | const NORM: _MM_MANTISSA_NORM_ENUM, |
9783 | const SIGN: _MM_MANTISSA_SIGN_ENUM, |
9784 | const SAE: i32, |
9785 | >( |
9786 | k: __mmask8, |
9787 | a: __m128h, |
9788 | b: __m128h, |
9789 | ) -> __m128h { |
9790 | static_assert_uimm_bits!(NORM, 4); |
9791 | static_assert_uimm_bits!(SIGN, 2); |
9792 | static_assert_sae!(SAE); |
9793 | _mm_mask_getmant_round_sh::<NORM, SIGN, SAE>(src:f16x8::ZERO.as_m128h(), k, a, b) |
9794 | } |
9795 | |
9796 | /// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits |
9797 | /// specified by imm8, and store the results in dst. |
9798 | /// |
9799 | /// Rounding is done according to the imm8 parameter, which can be one of: |
9800 | /// |
9801 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
9802 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
9803 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
9804 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
9805 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
9806 | /// |
9807 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_ph) |
9808 | #[inline ] |
9809 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
9810 | #[cfg_attr (test, assert_instr(vrndscaleph, IMM8 = 0))] |
9811 | #[rustc_legacy_const_generics (1)] |
9812 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9813 | pub fn _mm_roundscale_ph<const IMM8: i32>(a: __m128h) -> __m128h { |
9814 | static_assert_uimm_bits!(IMM8, 8); |
9815 | _mm_mask_roundscale_ph::<IMM8>(src:_mm_undefined_ph(), k:0xff, a) |
9816 | } |
9817 | |
9818 | /// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits |
9819 | /// specified by imm8, and store the results in dst using writemask k (elements are copied from src when |
9820 | /// the corresponding mask bit is not set). |
9821 | /// |
9822 | /// Rounding is done according to the imm8 parameter, which can be one of: |
9823 | /// |
9824 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
9825 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
9826 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
9827 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
9828 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
9829 | /// |
9830 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_ph) |
9831 | #[inline ] |
9832 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
9833 | #[cfg_attr (test, assert_instr(vrndscaleph, IMM8 = 0))] |
9834 | #[rustc_legacy_const_generics (3)] |
9835 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9836 | pub fn _mm_mask_roundscale_ph<const IMM8: i32>(src: __m128h, k: __mmask8, a: __m128h) -> __m128h { |
9837 | unsafe { |
9838 | static_assert_uimm_bits!(IMM8, 8); |
9839 | vrndscaleph_128(a, IMM8, src, k) |
9840 | } |
9841 | } |
9842 | |
9843 | /// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits |
9844 | /// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding |
9845 | /// mask bit is not set). |
9846 | /// |
9847 | /// Rounding is done according to the imm8 parameter, which can be one of: |
9848 | /// |
9849 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
9850 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
9851 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
9852 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
9853 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
9854 | /// |
9855 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_ph) |
9856 | #[inline ] |
9857 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
9858 | #[cfg_attr (test, assert_instr(vrndscaleph, IMM8 = 0))] |
9859 | #[rustc_legacy_const_generics (2)] |
9860 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9861 | pub fn _mm_maskz_roundscale_ph<const IMM8: i32>(k: __mmask8, a: __m128h) -> __m128h { |
9862 | static_assert_uimm_bits!(IMM8, 8); |
9863 | _mm_mask_roundscale_ph::<IMM8>(src:_mm_setzero_ph(), k, a) |
9864 | } |
9865 | |
9866 | /// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits |
9867 | /// specified by imm8, and store the results in dst. |
9868 | /// |
9869 | /// Rounding is done according to the imm8 parameter, which can be one of: |
9870 | /// |
9871 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
9872 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
9873 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
9874 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
9875 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
9876 | /// |
9877 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_roundscale_ph) |
9878 | #[inline ] |
9879 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
9880 | #[cfg_attr (test, assert_instr(vrndscaleph, IMM8 = 0))] |
9881 | #[rustc_legacy_const_generics (1)] |
9882 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9883 | pub fn _mm256_roundscale_ph<const IMM8: i32>(a: __m256h) -> __m256h { |
9884 | static_assert_uimm_bits!(IMM8, 8); |
9885 | _mm256_mask_roundscale_ph::<IMM8>(src:_mm256_undefined_ph(), k:0xffff, a) |
9886 | } |
9887 | |
9888 | /// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits |
9889 | /// specified by imm8, and store the results in dst using writemask k (elements are copied from src when |
9890 | /// the corresponding mask bit is not set). |
9891 | /// |
9892 | /// Rounding is done according to the imm8 parameter, which can be one of: |
9893 | /// |
9894 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
9895 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
9896 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
9897 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
9898 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
9899 | /// |
9900 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_roundscale_ph) |
9901 | #[inline ] |
9902 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
9903 | #[cfg_attr (test, assert_instr(vrndscaleph, IMM8 = 0))] |
9904 | #[rustc_legacy_const_generics (3)] |
9905 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9906 | pub fn _mm256_mask_roundscale_ph<const IMM8: i32>( |
9907 | src: __m256h, |
9908 | k: __mmask16, |
9909 | a: __m256h, |
9910 | ) -> __m256h { |
9911 | unsafe { |
9912 | static_assert_uimm_bits!(IMM8, 8); |
9913 | vrndscaleph_256(a, IMM8, src, k) |
9914 | } |
9915 | } |
9916 | |
9917 | /// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits |
9918 | /// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding |
9919 | /// mask bit is not set). |
9920 | /// |
9921 | /// Rounding is done according to the imm8 parameter, which can be one of: |
9922 | /// |
9923 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
9924 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
9925 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
9926 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
9927 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
9928 | /// |
9929 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_roundscale_ph) |
9930 | #[inline ] |
9931 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
9932 | #[cfg_attr (test, assert_instr(vrndscaleph, IMM8 = 0))] |
9933 | #[rustc_legacy_const_generics (2)] |
9934 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9935 | pub fn _mm256_maskz_roundscale_ph<const IMM8: i32>(k: __mmask16, a: __m256h) -> __m256h { |
9936 | static_assert_uimm_bits!(IMM8, 8); |
9937 | _mm256_mask_roundscale_ph::<IMM8>(src:_mm256_setzero_ph(), k, a) |
9938 | } |
9939 | |
9940 | /// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits |
9941 | /// specified by imm8, and store the results in dst. |
9942 | /// |
9943 | /// Rounding is done according to the imm8 parameter, which can be one of: |
9944 | /// |
9945 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
9946 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
9947 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
9948 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
9949 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
9950 | /// |
9951 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_roundscale_ph) |
9952 | #[inline ] |
9953 | #[target_feature (enable = "avx512fp16" )] |
9954 | #[cfg_attr (test, assert_instr(vrndscaleph, IMM8 = 0))] |
9955 | #[rustc_legacy_const_generics (1)] |
9956 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9957 | pub fn _mm512_roundscale_ph<const IMM8: i32>(a: __m512h) -> __m512h { |
9958 | static_assert_uimm_bits!(IMM8, 8); |
9959 | _mm512_mask_roundscale_ph::<IMM8>(src:_mm512_undefined_ph(), k:0xffffffff, a) |
9960 | } |
9961 | |
9962 | /// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits |
9963 | /// specified by imm8, and store the results in dst using writemask k (elements are copied from src when |
9964 | /// the corresponding mask bit is not set). |
9965 | /// |
9966 | /// Rounding is done according to the imm8 parameter, which can be one of: |
9967 | /// |
9968 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
9969 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
9970 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
9971 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
9972 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
9973 | /// |
9974 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_roundscale_ph) |
9975 | #[inline ] |
9976 | #[target_feature (enable = "avx512fp16" )] |
9977 | #[cfg_attr (test, assert_instr(vrndscaleph, IMM8 = 0))] |
9978 | #[rustc_legacy_const_generics (3)] |
9979 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9980 | pub fn _mm512_mask_roundscale_ph<const IMM8: i32>( |
9981 | src: __m512h, |
9982 | k: __mmask32, |
9983 | a: __m512h, |
9984 | ) -> __m512h { |
9985 | static_assert_uimm_bits!(IMM8, 8); |
9986 | _mm512_mask_roundscale_round_ph::<IMM8, _MM_FROUND_CUR_DIRECTION>(src, k, a) |
9987 | } |
9988 | |
9989 | /// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits |
9990 | /// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding |
9991 | /// mask bit is not set). |
9992 | /// |
9993 | /// Rounding is done according to the imm8 parameter, which can be one of: |
9994 | /// |
9995 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
9996 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
9997 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
9998 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
9999 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10000 | /// |
10001 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_roundscale_ph) |
10002 | #[inline ] |
10003 | #[target_feature (enable = "avx512fp16" )] |
10004 | #[cfg_attr (test, assert_instr(vrndscaleph, IMM8 = 0))] |
10005 | #[rustc_legacy_const_generics (2)] |
10006 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10007 | pub fn _mm512_maskz_roundscale_ph<const IMM8: i32>(k: __mmask32, a: __m512h) -> __m512h { |
10008 | static_assert_uimm_bits!(IMM8, 8); |
10009 | _mm512_mask_roundscale_ph::<IMM8>(src:_mm512_setzero_ph(), k, a) |
10010 | } |
10011 | |
10012 | /// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits |
10013 | /// specified by imm8, and store the results in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC |
10014 | /// in the sae parameter |
10015 | /// |
10016 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10017 | /// |
10018 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10019 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10020 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10021 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10022 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10023 | /// |
10024 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_roundscale_round_ph) |
10025 | #[inline ] |
10026 | #[target_feature (enable = "avx512fp16" )] |
10027 | #[cfg_attr (test, assert_instr(vrndscaleph, IMM8 = 0, SAE = 8))] |
10028 | #[rustc_legacy_const_generics (1, 2)] |
10029 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10030 | pub fn _mm512_roundscale_round_ph<const IMM8: i32, const SAE: i32>(a: __m512h) -> __m512h { |
10031 | static_assert_uimm_bits!(IMM8, 8); |
10032 | static_assert_sae!(SAE); |
10033 | _mm512_mask_roundscale_round_ph::<IMM8, SAE>(src:_mm512_undefined_ph(), k:0xffffffff, a) |
10034 | } |
10035 | |
10036 | /// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits |
10037 | /// specified by imm8, and store the results in dst using writemask k (elements are copied from src when |
10038 | /// the corresponding mask bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC |
10039 | /// in the sae parameter |
10040 | /// |
10041 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10042 | /// |
10043 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10044 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10045 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10046 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10047 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10048 | /// |
10049 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_roundscale_round_ph) |
10050 | #[inline ] |
10051 | #[target_feature (enable = "avx512fp16" )] |
10052 | #[cfg_attr (test, assert_instr(vrndscaleph, IMM8 = 0, SAE = 8))] |
10053 | #[rustc_legacy_const_generics (3, 4)] |
10054 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10055 | pub fn _mm512_mask_roundscale_round_ph<const IMM8: i32, const SAE: i32>( |
10056 | src: __m512h, |
10057 | k: __mmask32, |
10058 | a: __m512h, |
10059 | ) -> __m512h { |
10060 | unsafe { |
10061 | static_assert_uimm_bits!(IMM8, 8); |
10062 | static_assert_sae!(SAE); |
10063 | vrndscaleph_512(a, IMM8, src, k, SAE) |
10064 | } |
10065 | } |
10066 | |
10067 | /// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits |
10068 | /// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding |
10069 | /// mask bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
10070 | /// |
10071 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10072 | /// |
10073 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10074 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10075 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10076 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10077 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10078 | /// |
10079 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_roundscale_round_ph) |
10080 | #[inline ] |
10081 | #[target_feature (enable = "avx512fp16" )] |
10082 | #[cfg_attr (test, assert_instr(vrndscaleph, IMM8 = 0, SAE = 8))] |
10083 | #[rustc_legacy_const_generics (2, 3)] |
10084 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10085 | pub fn _mm512_maskz_roundscale_round_ph<const IMM8: i32, const SAE: i32>( |
10086 | k: __mmask32, |
10087 | a: __m512h, |
10088 | ) -> __m512h { |
10089 | static_assert_uimm_bits!(IMM8, 8); |
10090 | static_assert_sae!(SAE); |
10091 | _mm512_mask_roundscale_round_ph::<IMM8, SAE>(src:_mm512_setzero_ph(), k, a) |
10092 | } |
10093 | |
10094 | /// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits |
10095 | /// specified by imm8, store the result in the lower element of dst, and copy the upper 7 packed elements |
10096 | /// from a to the upper elements of dst. |
10097 | /// |
10098 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10099 | /// |
10100 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10101 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10102 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10103 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10104 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10105 | /// |
10106 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_sh) |
10107 | #[inline ] |
10108 | #[target_feature (enable = "avx512fp16" )] |
10109 | #[cfg_attr (test, assert_instr(vrndscalesh, IMM8 = 0))] |
10110 | #[rustc_legacy_const_generics (2)] |
10111 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10112 | pub fn _mm_roundscale_sh<const IMM8: i32>(a: __m128h, b: __m128h) -> __m128h { |
10113 | static_assert_uimm_bits!(IMM8, 8); |
10114 | _mm_mask_roundscale_sh::<IMM8>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b) |
10115 | } |
10116 | |
10117 | /// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits |
10118 | /// specified by imm8, store the result in the lower element of dst using writemask k (the element is copied |
10119 | /// from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst. |
10120 | /// |
10121 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10122 | /// |
10123 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10124 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10125 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10126 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10127 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10128 | /// |
10129 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_sh) |
10130 | #[inline ] |
10131 | #[target_feature (enable = "avx512fp16" )] |
10132 | #[cfg_attr (test, assert_instr(vrndscalesh, IMM8 = 0))] |
10133 | #[rustc_legacy_const_generics (4)] |
10134 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10135 | pub fn _mm_mask_roundscale_sh<const IMM8: i32>( |
10136 | src: __m128h, |
10137 | k: __mmask8, |
10138 | a: __m128h, |
10139 | b: __m128h, |
10140 | ) -> __m128h { |
10141 | static_assert_uimm_bits!(IMM8, 8); |
10142 | _mm_mask_roundscale_round_sh::<IMM8, _MM_FROUND_CUR_DIRECTION>(src, k, a, b) |
10143 | } |
10144 | |
10145 | /// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits |
10146 | /// specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed |
10147 | /// out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst. |
10148 | /// |
10149 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10150 | /// |
10151 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10152 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10153 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10154 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10155 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10156 | /// |
10157 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_sh) |
10158 | #[inline ] |
10159 | #[target_feature (enable = "avx512fp16" )] |
10160 | #[cfg_attr (test, assert_instr(vrndscalesh, IMM8 = 0))] |
10161 | #[rustc_legacy_const_generics (3)] |
10162 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10163 | pub fn _mm_maskz_roundscale_sh<const IMM8: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
10164 | static_assert_uimm_bits!(IMM8, 8); |
10165 | _mm_mask_roundscale_sh::<IMM8>(src:f16x8::ZERO.as_m128h(), k, a, b) |
10166 | } |
10167 | |
10168 | /// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits |
10169 | /// specified by imm8, store the result in the lower element of dst, and copy the upper 7 packed elements |
10170 | /// from a to the upper elements of dst. |
10171 | /// |
10172 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10173 | /// |
10174 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10175 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10176 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10177 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10178 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10179 | /// |
10180 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
10181 | /// |
10182 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_round_sh) |
10183 | #[inline ] |
10184 | #[target_feature (enable = "avx512fp16" )] |
10185 | #[cfg_attr (test, assert_instr(vrndscalesh, IMM8 = 0, SAE = 8))] |
10186 | #[rustc_legacy_const_generics (2, 3)] |
10187 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10188 | pub fn _mm_roundscale_round_sh<const IMM8: i32, const SAE: i32>(a: __m128h, b: __m128h) -> __m128h { |
10189 | static_assert_uimm_bits!(IMM8, 8); |
10190 | static_assert_sae!(SAE); |
10191 | _mm_mask_roundscale_round_sh::<IMM8, SAE>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b) |
10192 | } |
10193 | |
10194 | /// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits |
10195 | /// specified by imm8, store the result in the lower element of dst using writemask k (the element is copied |
10196 | /// from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst. |
10197 | /// |
10198 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10199 | /// |
10200 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10201 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10202 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10203 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10204 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10205 | /// |
10206 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
10207 | /// |
10208 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_round_sh) |
10209 | #[inline ] |
10210 | #[target_feature (enable = "avx512fp16" )] |
10211 | #[cfg_attr (test, assert_instr(vrndscalesh, IMM8 = 0, SAE = 8))] |
10212 | #[rustc_legacy_const_generics (4, 5)] |
10213 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10214 | pub fn _mm_mask_roundscale_round_sh<const IMM8: i32, const SAE: i32>( |
10215 | src: __m128h, |
10216 | k: __mmask8, |
10217 | a: __m128h, |
10218 | b: __m128h, |
10219 | ) -> __m128h { |
10220 | unsafe { |
10221 | static_assert_uimm_bits!(IMM8, 8); |
10222 | static_assert_sae!(SAE); |
10223 | vrndscalesh(a, b, src, k, IMM8, SAE) |
10224 | } |
10225 | } |
10226 | |
10227 | /// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits |
10228 | /// specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed |
10229 | /// out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst. |
10230 | /// |
10231 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10232 | /// |
10233 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10234 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10235 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10236 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10237 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10238 | /// |
10239 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
10240 | /// |
10241 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_round_sh) |
10242 | #[inline ] |
10243 | #[target_feature (enable = "avx512fp16" )] |
10244 | #[cfg_attr (test, assert_instr(vrndscalesh, IMM8 = 0, SAE = 8))] |
10245 | #[rustc_legacy_const_generics (3, 4)] |
10246 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10247 | pub fn _mm_maskz_roundscale_round_sh<const IMM8: i32, const SAE: i32>( |
10248 | k: __mmask8, |
10249 | a: __m128h, |
10250 | b: __m128h, |
10251 | ) -> __m128h { |
10252 | static_assert_uimm_bits!(IMM8, 8); |
10253 | static_assert_sae!(SAE); |
10254 | _mm_mask_roundscale_round_sh::<IMM8, SAE>(src:f16x8::ZERO.as_m128h(), k, a, b) |
10255 | } |
10256 | |
10257 | /// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store |
10258 | /// the results in dst. |
10259 | /// |
10260 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_ph) |
10261 | #[inline ] |
10262 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
10263 | #[cfg_attr (test, assert_instr(vscalefph))] |
10264 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10265 | pub fn _mm_scalef_ph(a: __m128h, b: __m128h) -> __m128h { |
10266 | _mm_mask_scalef_ph(src:_mm_undefined_ph(), k:0xff, a, b) |
10267 | } |
10268 | |
10269 | /// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store |
10270 | /// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
10271 | /// |
10272 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_ph) |
10273 | #[inline ] |
10274 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
10275 | #[cfg_attr (test, assert_instr(vscalefph))] |
10276 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10277 | pub fn _mm_mask_scalef_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
10278 | unsafe { vscalefph_128(a, b, src, k) } |
10279 | } |
10280 | |
10281 | /// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store |
10282 | /// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
10283 | /// |
10284 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_ph) |
10285 | #[inline ] |
10286 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
10287 | #[cfg_attr (test, assert_instr(vscalefph))] |
10288 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10289 | pub fn _mm_maskz_scalef_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
10290 | _mm_mask_scalef_ph(src:_mm_setzero_ph(), k, a, b) |
10291 | } |
10292 | |
10293 | /// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store |
10294 | /// the results in dst. |
10295 | /// |
10296 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_scalef_ph) |
10297 | #[inline ] |
10298 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
10299 | #[cfg_attr (test, assert_instr(vscalefph))] |
10300 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10301 | pub fn _mm256_scalef_ph(a: __m256h, b: __m256h) -> __m256h { |
10302 | _mm256_mask_scalef_ph(src:_mm256_undefined_ph(), k:0xffff, a, b) |
10303 | } |
10304 | |
10305 | /// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store |
10306 | /// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
10307 | /// |
10308 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_scalef_ph) |
10309 | #[inline ] |
10310 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
10311 | #[cfg_attr (test, assert_instr(vscalefph))] |
10312 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10313 | pub fn _mm256_mask_scalef_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h { |
10314 | unsafe { vscalefph_256(a, b, src, k) } |
10315 | } |
10316 | |
10317 | /// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store |
10318 | /// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
10319 | /// |
10320 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_scalef_ph) |
10321 | #[inline ] |
10322 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
10323 | #[cfg_attr (test, assert_instr(vscalefph))] |
10324 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10325 | pub fn _mm256_maskz_scalef_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { |
10326 | _mm256_mask_scalef_ph(src:_mm256_setzero_ph(), k, a, b) |
10327 | } |
10328 | |
10329 | /// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store |
10330 | /// the results in dst. |
10331 | /// |
10332 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_scalef_ph) |
10333 | #[inline ] |
10334 | #[target_feature (enable = "avx512fp16" )] |
10335 | #[cfg_attr (test, assert_instr(vscalefph))] |
10336 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10337 | pub fn _mm512_scalef_ph(a: __m512h, b: __m512h) -> __m512h { |
10338 | _mm512_mask_scalef_ph(src:_mm512_undefined_ph(), k:0xffffffff, a, b) |
10339 | } |
10340 | |
10341 | /// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store |
10342 | /// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
10343 | /// |
10344 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_scalef_ph) |
10345 | #[inline ] |
10346 | #[target_feature (enable = "avx512fp16" )] |
10347 | #[cfg_attr (test, assert_instr(vscalefph))] |
10348 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10349 | pub fn _mm512_mask_scalef_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h { |
10350 | _mm512_mask_scalef_round_ph::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) |
10351 | } |
10352 | |
10353 | /// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store |
10354 | /// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
10355 | /// |
10356 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_scalef_ph) |
10357 | #[inline ] |
10358 | #[target_feature (enable = "avx512fp16" )] |
10359 | #[cfg_attr (test, assert_instr(vscalefph))] |
10360 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10361 | pub fn _mm512_maskz_scalef_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { |
10362 | _mm512_mask_scalef_ph(src:_mm512_setzero_ph(), k, a, b) |
10363 | } |
10364 | |
10365 | /// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store |
10366 | /// the results in dst. |
10367 | /// |
10368 | /// Rounding is done according to the rounding parameter, which can be one of: |
10369 | /// |
10370 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
10371 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
10372 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
10373 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
10374 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10375 | /// |
10376 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_scalef_round_ph) |
10377 | #[inline ] |
10378 | #[target_feature (enable = "avx512fp16" )] |
10379 | #[cfg_attr (test, assert_instr(vscalefph, ROUNDING = 8))] |
10380 | #[rustc_legacy_const_generics (2)] |
10381 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10382 | pub fn _mm512_scalef_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h { |
10383 | static_assert_rounding!(ROUNDING); |
10384 | _mm512_mask_scalef_round_ph::<ROUNDING>(src:_mm512_undefined_ph(), k:0xffffffff, a, b) |
10385 | } |
10386 | |
10387 | /// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store |
10388 | /// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
10389 | /// |
10390 | /// Rounding is done according to the rounding parameter, which can be one of: |
10391 | /// |
10392 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
10393 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
10394 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
10395 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
10396 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10397 | /// |
10398 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_scalef_round_ph) |
10399 | #[inline ] |
10400 | #[target_feature (enable = "avx512fp16" )] |
10401 | #[cfg_attr (test, assert_instr(vscalefph, ROUNDING = 8))] |
10402 | #[rustc_legacy_const_generics (4)] |
10403 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10404 | pub fn _mm512_mask_scalef_round_ph<const ROUNDING: i32>( |
10405 | src: __m512h, |
10406 | k: __mmask32, |
10407 | a: __m512h, |
10408 | b: __m512h, |
10409 | ) -> __m512h { |
10410 | unsafe { |
10411 | static_assert_rounding!(ROUNDING); |
10412 | vscalefph_512(a, b, src, k, ROUNDING) |
10413 | } |
10414 | } |
10415 | |
10416 | /// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store |
10417 | /// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
10418 | /// |
10419 | /// Rounding is done according to the rounding parameter, which can be one of: |
10420 | /// |
10421 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
10422 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
10423 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
10424 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
10425 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10426 | /// |
10427 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_scalef_round_ph) |
10428 | #[inline ] |
10429 | #[target_feature (enable = "avx512fp16" )] |
10430 | #[cfg_attr (test, assert_instr(vscalefph, ROUNDING = 8))] |
10431 | #[rustc_legacy_const_generics (3)] |
10432 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10433 | pub fn _mm512_maskz_scalef_round_ph<const ROUNDING: i32>( |
10434 | k: __mmask32, |
10435 | a: __m512h, |
10436 | b: __m512h, |
10437 | ) -> __m512h { |
10438 | static_assert_rounding!(ROUNDING); |
10439 | _mm512_mask_scalef_round_ph::<ROUNDING>(src:_mm512_setzero_ph(), k, a, b) |
10440 | } |
10441 | |
10442 | /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store |
10443 | /// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper |
10444 | /// elements of dst. |
10445 | /// |
10446 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_sh) |
10447 | #[inline ] |
10448 | #[target_feature (enable = "avx512fp16" )] |
10449 | #[cfg_attr (test, assert_instr(vscalefsh))] |
10450 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10451 | pub fn _mm_scalef_sh(a: __m128h, b: __m128h) -> __m128h { |
10452 | _mm_mask_scalef_sh(src:f16x8::ZERO.as_m128h(), k:0xff, a, b) |
10453 | } |
10454 | |
10455 | /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store |
10456 | /// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), |
10457 | /// and copy the upper 7 packed elements from a to the upper elements of dst. |
10458 | /// |
10459 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_sh) |
10460 | #[inline ] |
10461 | #[target_feature (enable = "avx512fp16" )] |
10462 | #[cfg_attr (test, assert_instr(vscalefsh))] |
10463 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10464 | pub fn _mm_mask_scalef_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
10465 | _mm_mask_scalef_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) |
10466 | } |
10467 | |
10468 | /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store |
10469 | /// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), |
10470 | /// and copy the upper 7 packed elements from a to the upper elements of dst. |
10471 | /// |
10472 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_sh) |
10473 | #[inline ] |
10474 | #[target_feature (enable = "avx512fp16" )] |
10475 | #[cfg_attr (test, assert_instr(vscalefsh))] |
10476 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10477 | pub fn _mm_maskz_scalef_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
10478 | _mm_mask_scalef_sh(src:f16x8::ZERO.as_m128h(), k, a, b) |
10479 | } |
10480 | |
10481 | /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store |
10482 | /// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper |
10483 | /// elements of dst. |
10484 | /// |
10485 | /// Rounding is done according to the rounding parameter, which can be one of: |
10486 | /// |
10487 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
10488 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
10489 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
10490 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
10491 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10492 | /// |
10493 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_round_sh) |
10494 | #[inline ] |
10495 | #[target_feature (enable = "avx512fp16" )] |
10496 | #[cfg_attr (test, assert_instr(vscalefsh, ROUNDING = 8))] |
10497 | #[rustc_legacy_const_generics (2)] |
10498 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10499 | pub fn _mm_scalef_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h { |
10500 | static_assert_rounding!(ROUNDING); |
10501 | _mm_mask_scalef_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b) |
10502 | } |
10503 | |
10504 | /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store |
10505 | /// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), |
10506 | /// and copy the upper 7 packed elements from a to the upper elements of dst. |
10507 | /// |
10508 | /// Rounding is done according to the rounding parameter, which can be one of: |
10509 | /// |
10510 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
10511 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
10512 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
10513 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
10514 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10515 | /// |
10516 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_round_sh) |
10517 | #[inline ] |
10518 | #[target_feature (enable = "avx512fp16" )] |
10519 | #[cfg_attr (test, assert_instr(vscalefsh, ROUNDING = 8))] |
10520 | #[rustc_legacy_const_generics (4)] |
10521 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10522 | pub fn _mm_mask_scalef_round_sh<const ROUNDING: i32>( |
10523 | src: __m128h, |
10524 | k: __mmask8, |
10525 | a: __m128h, |
10526 | b: __m128h, |
10527 | ) -> __m128h { |
10528 | unsafe { |
10529 | static_assert_rounding!(ROUNDING); |
10530 | vscalefsh(a, b, src, k, ROUNDING) |
10531 | } |
10532 | } |
10533 | |
10534 | /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store |
10535 | /// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), |
10536 | /// and copy the upper 7 packed elements from a to the upper elements of dst. |
10537 | /// |
10538 | /// Rounding is done according to the rounding parameter, which can be one of: |
10539 | /// |
10540 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
10541 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
10542 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
10543 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
10544 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10545 | /// |
10546 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_round_sh) |
10547 | #[inline ] |
10548 | #[target_feature (enable = "avx512fp16" )] |
10549 | #[cfg_attr (test, assert_instr(vscalefsh, ROUNDING = 8))] |
10550 | #[rustc_legacy_const_generics (3)] |
10551 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10552 | pub fn _mm_maskz_scalef_round_sh<const ROUNDING: i32>( |
10553 | k: __mmask8, |
10554 | a: __m128h, |
10555 | b: __m128h, |
10556 | ) -> __m128h { |
10557 | static_assert_rounding!(ROUNDING); |
10558 | _mm_mask_scalef_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b) |
10559 | } |
10560 | |
10561 | /// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the |
10562 | /// number of bits specified by imm8, and store the results in dst. |
10563 | /// |
10564 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10565 | /// |
10566 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10567 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10568 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10569 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10570 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10571 | /// |
10572 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_ph) |
10573 | #[inline ] |
10574 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
10575 | #[cfg_attr (test, assert_instr(vreduceph, IMM8 = 0))] |
10576 | #[rustc_legacy_const_generics (1)] |
10577 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10578 | pub fn _mm_reduce_ph<const IMM8: i32>(a: __m128h) -> __m128h { |
10579 | static_assert_uimm_bits!(IMM8, 8); |
10580 | _mm_mask_reduce_ph::<IMM8>(src:_mm_undefined_ph(), k:0xff, a) |
10581 | } |
10582 | |
10583 | /// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the |
10584 | /// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied |
10585 | /// from src when the corresponding mask bit is not set). |
10586 | /// |
10587 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10588 | /// |
10589 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10590 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10591 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10592 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10593 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10594 | /// |
10595 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_ph) |
10596 | #[inline ] |
10597 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
10598 | #[cfg_attr (test, assert_instr(vreduceph, IMM8 = 0))] |
10599 | #[rustc_legacy_const_generics (3)] |
10600 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10601 | pub fn _mm_mask_reduce_ph<const IMM8: i32>(src: __m128h, k: __mmask8, a: __m128h) -> __m128h { |
10602 | unsafe { |
10603 | static_assert_uimm_bits!(IMM8, 8); |
10604 | vreduceph_128(a, IMM8, src, k) |
10605 | } |
10606 | } |
10607 | |
10608 | /// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the |
10609 | /// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed |
10610 | /// out when the corresponding mask bit is not set). |
10611 | /// |
10612 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10613 | /// |
10614 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10615 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10616 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10617 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10618 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10619 | /// |
10620 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_ph) |
10621 | #[inline ] |
10622 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
10623 | #[cfg_attr (test, assert_instr(vreduceph, IMM8 = 0))] |
10624 | #[rustc_legacy_const_generics (2)] |
10625 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10626 | pub fn _mm_maskz_reduce_ph<const IMM8: i32>(k: __mmask8, a: __m128h) -> __m128h { |
10627 | static_assert_uimm_bits!(IMM8, 8); |
10628 | _mm_mask_reduce_ph::<IMM8>(src:_mm_setzero_ph(), k, a) |
10629 | } |
10630 | |
10631 | /// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the |
10632 | /// number of bits specified by imm8, and store the results in dst. |
10633 | /// |
10634 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10635 | /// |
10636 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10637 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10638 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10639 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10640 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10641 | /// |
10642 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_ph) |
10643 | #[inline ] |
10644 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
10645 | #[cfg_attr (test, assert_instr(vreduceph, IMM8 = 0))] |
10646 | #[rustc_legacy_const_generics (1)] |
10647 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10648 | pub fn _mm256_reduce_ph<const IMM8: i32>(a: __m256h) -> __m256h { |
10649 | static_assert_uimm_bits!(IMM8, 8); |
10650 | _mm256_mask_reduce_ph::<IMM8>(src:_mm256_undefined_ph(), k:0xffff, a) |
10651 | } |
10652 | |
10653 | /// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the |
10654 | /// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied |
10655 | /// from src when the corresponding mask bit is not set). |
10656 | /// |
10657 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10658 | /// |
10659 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10660 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10661 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10662 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10663 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10664 | /// |
10665 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_reduce_ph) |
10666 | #[inline ] |
10667 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
10668 | #[cfg_attr (test, assert_instr(vreduceph, IMM8 = 0))] |
10669 | #[rustc_legacy_const_generics (3)] |
10670 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10671 | pub fn _mm256_mask_reduce_ph<const IMM8: i32>(src: __m256h, k: __mmask16, a: __m256h) -> __m256h { |
10672 | unsafe { |
10673 | static_assert_uimm_bits!(IMM8, 8); |
10674 | vreduceph_256(a, IMM8, src, k) |
10675 | } |
10676 | } |
10677 | |
10678 | /// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the |
10679 | /// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed |
10680 | /// out when the corresponding mask bit is not set). |
10681 | /// |
10682 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10683 | /// |
10684 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10685 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10686 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10687 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10688 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10689 | /// |
10690 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_reduce_ph) |
10691 | #[inline ] |
10692 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
10693 | #[cfg_attr (test, assert_instr(vreduceph, IMM8 = 0))] |
10694 | #[rustc_legacy_const_generics (2)] |
10695 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10696 | pub fn _mm256_maskz_reduce_ph<const IMM8: i32>(k: __mmask16, a: __m256h) -> __m256h { |
10697 | static_assert_uimm_bits!(IMM8, 8); |
10698 | _mm256_mask_reduce_ph::<IMM8>(src:_mm256_setzero_ph(), k, a) |
10699 | } |
10700 | |
10701 | /// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the |
10702 | /// number of bits specified by imm8, and store the results in dst. |
10703 | /// |
10704 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10705 | /// |
10706 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10707 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10708 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10709 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10710 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10711 | /// |
10712 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_ph) |
10713 | #[inline ] |
10714 | #[target_feature (enable = "avx512fp16" )] |
10715 | #[cfg_attr (test, assert_instr(vreduceph, IMM8 = 0))] |
10716 | #[rustc_legacy_const_generics (1)] |
10717 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10718 | pub fn _mm512_reduce_ph<const IMM8: i32>(a: __m512h) -> __m512h { |
10719 | static_assert_uimm_bits!(IMM8, 8); |
10720 | _mm512_mask_reduce_ph::<IMM8>(src:_mm512_undefined_ph(), k:0xffffffff, a) |
10721 | } |
10722 | |
10723 | /// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the |
10724 | /// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied |
10725 | /// from src when the corresponding mask bit is not set). |
10726 | /// |
10727 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10728 | /// |
10729 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10730 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10731 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10732 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10733 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10734 | /// |
10735 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_ph) |
10736 | #[inline ] |
10737 | #[target_feature (enable = "avx512fp16" )] |
10738 | #[cfg_attr (test, assert_instr(vreduceph, IMM8 = 0))] |
10739 | #[rustc_legacy_const_generics (3)] |
10740 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10741 | pub fn _mm512_mask_reduce_ph<const IMM8: i32>(src: __m512h, k: __mmask32, a: __m512h) -> __m512h { |
10742 | static_assert_uimm_bits!(IMM8, 8); |
10743 | _mm512_mask_reduce_round_ph::<IMM8, _MM_FROUND_CUR_DIRECTION>(src, k, a) |
10744 | } |
10745 | |
10746 | /// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the |
10747 | /// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed |
10748 | /// out when the corresponding mask bit is not set). |
10749 | /// |
10750 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10751 | /// |
10752 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10753 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10754 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10755 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10756 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10757 | /// |
10758 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_reduce_ph) |
10759 | #[inline ] |
10760 | #[target_feature (enable = "avx512fp16" )] |
10761 | #[cfg_attr (test, assert_instr(vreduceph, IMM8 = 0))] |
10762 | #[rustc_legacy_const_generics (2)] |
10763 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10764 | pub fn _mm512_maskz_reduce_ph<const IMM8: i32>(k: __mmask32, a: __m512h) -> __m512h { |
10765 | static_assert_uimm_bits!(IMM8, 8); |
10766 | _mm512_mask_reduce_ph::<IMM8>(src:_mm512_setzero_ph(), k, a) |
10767 | } |
10768 | |
10769 | /// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the |
10770 | /// number of bits specified by imm8, and store the results in dst. |
10771 | /// |
10772 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10773 | /// |
10774 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10775 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10776 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10777 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10778 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10779 | /// |
10780 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
10781 | /// |
10782 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_round_ph) |
10783 | #[inline ] |
10784 | #[target_feature (enable = "avx512fp16" )] |
10785 | #[cfg_attr (test, assert_instr(vreduceph, IMM8 = 0, SAE = 8))] |
10786 | #[rustc_legacy_const_generics (1, 2)] |
10787 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10788 | pub fn _mm512_reduce_round_ph<const IMM8: i32, const SAE: i32>(a: __m512h) -> __m512h { |
10789 | static_assert_uimm_bits!(IMM8, 8); |
10790 | static_assert_sae!(SAE); |
10791 | _mm512_mask_reduce_round_ph::<IMM8, SAE>(src:_mm512_undefined_ph(), k:0xffffffff, a) |
10792 | } |
10793 | |
10794 | /// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the |
10795 | /// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied |
10796 | /// from src when the corresponding mask bit is not set). |
10797 | /// |
10798 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10799 | /// |
10800 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10801 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10802 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10803 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10804 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10805 | /// |
10806 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
10807 | /// |
10808 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_round_ph) |
10809 | #[inline ] |
10810 | #[target_feature (enable = "avx512fp16" )] |
10811 | #[cfg_attr (test, assert_instr(vreduceph, IMM8 = 0, SAE = 8))] |
10812 | #[rustc_legacy_const_generics (3, 4)] |
10813 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10814 | pub fn _mm512_mask_reduce_round_ph<const IMM8: i32, const SAE: i32>( |
10815 | src: __m512h, |
10816 | k: __mmask32, |
10817 | a: __m512h, |
10818 | ) -> __m512h { |
10819 | unsafe { |
10820 | static_assert_uimm_bits!(IMM8, 8); |
10821 | static_assert_sae!(SAE); |
10822 | vreduceph_512(a, IMM8, src, k, SAE) |
10823 | } |
10824 | } |
10825 | |
10826 | /// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the |
10827 | /// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed |
10828 | /// out when the corresponding mask bit is not set). |
10829 | /// |
10830 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10831 | /// |
10832 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10833 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10834 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10835 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10836 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10837 | /// |
10838 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
10839 | /// |
10840 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_reduce_round_ph) |
10841 | #[inline ] |
10842 | #[target_feature (enable = "avx512fp16" )] |
10843 | #[cfg_attr (test, assert_instr(vreduceph, IMM8 = 0, SAE = 8))] |
10844 | #[rustc_legacy_const_generics (2, 3)] |
10845 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10846 | pub fn _mm512_maskz_reduce_round_ph<const IMM8: i32, const SAE: i32>( |
10847 | k: __mmask32, |
10848 | a: __m512h, |
10849 | ) -> __m512h { |
10850 | static_assert_uimm_bits!(IMM8, 8); |
10851 | static_assert_sae!(SAE); |
10852 | _mm512_mask_reduce_round_ph::<IMM8, SAE>(src:_mm512_setzero_ph(), k, a) |
10853 | } |
10854 | |
10855 | /// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by |
10856 | /// the number of bits specified by imm8, store the result in the lower element of dst, and copy the |
10857 | /// upper 7 packed elements from a to the upper elements of dst. |
10858 | /// |
10859 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10860 | /// |
10861 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10862 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10863 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10864 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10865 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10866 | /// |
10867 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_sh) |
10868 | #[inline ] |
10869 | #[target_feature (enable = "avx512fp16" )] |
10870 | #[cfg_attr (test, assert_instr(vreducesh, IMM8 = 0))] |
10871 | #[rustc_legacy_const_generics (2)] |
10872 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10873 | pub fn _mm_reduce_sh<const IMM8: i32>(a: __m128h, b: __m128h) -> __m128h { |
10874 | static_assert_uimm_bits!(IMM8, 8); |
10875 | _mm_mask_reduce_sh::<IMM8>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b) |
10876 | } |
10877 | |
10878 | /// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by |
10879 | /// the number of bits specified by imm8, store the result in the lower element of dst using writemask k |
10880 | /// (the element is copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from |
10881 | /// a to the upper elements of dst. |
10882 | /// |
10883 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10884 | /// |
10885 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10886 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10887 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10888 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10889 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10890 | /// |
10891 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_sh) |
10892 | #[inline ] |
10893 | #[target_feature (enable = "avx512fp16" )] |
10894 | #[cfg_attr (test, assert_instr(vreducesh, IMM8 = 0))] |
10895 | #[rustc_legacy_const_generics (4)] |
10896 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10897 | pub fn _mm_mask_reduce_sh<const IMM8: i32>( |
10898 | src: __m128h, |
10899 | k: __mmask8, |
10900 | a: __m128h, |
10901 | b: __m128h, |
10902 | ) -> __m128h { |
10903 | static_assert_uimm_bits!(IMM8, 8); |
10904 | _mm_mask_reduce_round_sh::<IMM8, _MM_FROUND_CUR_DIRECTION>(src, k, a, b) |
10905 | } |
10906 | |
10907 | /// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by |
10908 | /// the number of bits specified by imm8, store the result in the lower element of dst using zeromask k |
10909 | /// (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a |
10910 | /// to the upper elements of dst. |
10911 | /// |
10912 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10913 | /// |
10914 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10915 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10916 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10917 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10918 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10919 | /// |
10920 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_sh) |
10921 | #[inline ] |
10922 | #[target_feature (enable = "avx512fp16" )] |
10923 | #[cfg_attr (test, assert_instr(vreducesh, IMM8 = 0))] |
10924 | #[rustc_legacy_const_generics (3)] |
10925 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10926 | pub fn _mm_maskz_reduce_sh<const IMM8: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
10927 | static_assert_uimm_bits!(IMM8, 8); |
10928 | _mm_mask_reduce_sh::<IMM8>(src:f16x8::ZERO.as_m128h(), k, a, b) |
10929 | } |
10930 | |
10931 | /// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by |
10932 | /// the number of bits specified by imm8, store the result in the lower element of dst, and copy the upper |
10933 | /// 7 packed elements from a to the upper elements of dst. |
10934 | /// |
10935 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10936 | /// |
10937 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10938 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10939 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10940 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10941 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10942 | /// |
10943 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
10944 | /// |
10945 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_round_sh) |
10946 | #[inline ] |
10947 | #[target_feature (enable = "avx512fp16" )] |
10948 | #[cfg_attr (test, assert_instr(vreducesh, IMM8 = 0, SAE = 8))] |
10949 | #[rustc_legacy_const_generics (2, 3)] |
10950 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10951 | pub fn _mm_reduce_round_sh<const IMM8: i32, const SAE: i32>(a: __m128h, b: __m128h) -> __m128h { |
10952 | static_assert_uimm_bits!(IMM8, 8); |
10953 | static_assert_sae!(SAE); |
10954 | _mm_mask_reduce_round_sh::<IMM8, SAE>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b) |
10955 | } |
10956 | |
10957 | /// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by |
10958 | /// the number of bits specified by imm8, store the result in the lower element of dst using writemask k |
10959 | /// (the element is copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a |
10960 | /// to the upper elements of dst. |
10961 | /// |
10962 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10963 | /// |
10964 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10965 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10966 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10967 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10968 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10969 | /// |
10970 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
10971 | /// |
10972 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_round_sh) |
10973 | #[inline ] |
10974 | #[target_feature (enable = "avx512fp16" )] |
10975 | #[cfg_attr (test, assert_instr(vreducesh, IMM8 = 0, SAE = 8))] |
10976 | #[rustc_legacy_const_generics (4, 5)] |
10977 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10978 | pub fn _mm_mask_reduce_round_sh<const IMM8: i32, const SAE: i32>( |
10979 | src: __m128h, |
10980 | k: __mmask8, |
10981 | a: __m128h, |
10982 | b: __m128h, |
10983 | ) -> __m128h { |
10984 | unsafe { |
10985 | static_assert_uimm_bits!(IMM8, 8); |
10986 | static_assert_sae!(SAE); |
10987 | vreducesh(a, b, src, k, IMM8, SAE) |
10988 | } |
10989 | } |
10990 | |
10991 | /// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by |
10992 | /// the number of bits specified by imm8, store the result in the lower element of dst using zeromask k |
10993 | /// (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a |
10994 | /// to the upper elements of dst. |
10995 | /// |
10996 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10997 | /// |
10998 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10999 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
11000 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
11001 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
11002 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
11003 | /// |
11004 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
11005 | /// |
11006 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_round_sh) |
11007 | #[inline ] |
11008 | #[target_feature (enable = "avx512fp16" )] |
11009 | #[cfg_attr (test, assert_instr(vreducesh, IMM8 = 0, SAE = 8))] |
11010 | #[rustc_legacy_const_generics (3, 4)] |
11011 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11012 | pub fn _mm_maskz_reduce_round_sh<const IMM8: i32, const SAE: i32>( |
11013 | k: __mmask8, |
11014 | a: __m128h, |
11015 | b: __m128h, |
11016 | ) -> __m128h { |
11017 | static_assert_uimm_bits!(IMM8, 8); |
11018 | static_assert_sae!(SAE); |
11019 | _mm_mask_reduce_round_sh::<IMM8, SAE>(src:f16x8::ZERO.as_m128h(), k, a, b) |
11020 | } |
11021 | |
11022 | /// Reduce the packed half-precision (16-bit) floating-point elements in a by addition. Returns the |
11023 | /// sum of all elements in a. |
11024 | /// |
11025 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_add_ph) |
11026 | #[inline ] |
11027 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11028 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11029 | pub fn _mm_reduce_add_ph(a: __m128h) -> f16 { |
11030 | unsafe { |
11031 | let b: __m128h = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]); |
11032 | let a: __m128h = _mm_add_ph(a, b); |
11033 | let b: __m128h = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]); |
11034 | let a: __m128h = _mm_add_ph(a, b); |
11035 | simd_extract::<_, f16>(x:a, idx:0) + simd_extract::<_, f16>(x:a, idx:1) |
11036 | } |
11037 | } |
11038 | |
11039 | /// Reduce the packed half-precision (16-bit) floating-point elements in a by addition. Returns the |
11040 | /// sum of all elements in a. |
11041 | /// |
11042 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_add_ph) |
11043 | #[inline ] |
11044 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11045 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11046 | pub fn _mm256_reduce_add_ph(a: __m256h) -> f16 { |
11047 | unsafe { |
11048 | let p: __m128h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); |
11049 | let q: __m128h = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); |
11050 | _mm_reduce_add_ph(_mm_add_ph(a:p, b:q)) |
11051 | } |
11052 | } |
11053 | |
11054 | /// Reduce the packed half-precision (16-bit) floating-point elements in a by addition. Returns the |
11055 | /// sum of all elements in a. |
11056 | /// |
11057 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_add_ph) |
11058 | #[inline ] |
11059 | #[target_feature (enable = "avx512fp16" )] |
11060 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11061 | pub fn _mm512_reduce_add_ph(a: __m512h) -> f16 { |
11062 | unsafe { |
11063 | let p: __m256h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); |
11064 | let q: __m256h = simd_shuffle!( |
11065 | a, |
11066 | a, |
11067 | [ |
11068 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 |
11069 | ] |
11070 | ); |
11071 | _mm256_reduce_add_ph(_mm256_add_ph(a:p, b:q)) |
11072 | } |
11073 | } |
11074 | |
11075 | /// Reduce the packed half-precision (16-bit) floating-point elements in a by multiplication. Returns |
11076 | /// the product of all elements in a. |
11077 | /// |
11078 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_mul_ph) |
11079 | #[inline ] |
11080 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11081 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11082 | pub fn _mm_reduce_mul_ph(a: __m128h) -> f16 { |
11083 | unsafe { |
11084 | let b: __m128h = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]); |
11085 | let a: __m128h = _mm_mul_ph(a, b); |
11086 | let b: __m128h = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]); |
11087 | let a: __m128h = _mm_mul_ph(a, b); |
11088 | simd_extract::<_, f16>(x:a, idx:0) * simd_extract::<_, f16>(x:a, idx:1) |
11089 | } |
11090 | } |
11091 | |
11092 | /// Reduce the packed half-precision (16-bit) floating-point elements in a by multiplication. Returns |
11093 | /// the product of all elements in a. |
11094 | /// |
11095 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_mul_ph) |
11096 | #[inline ] |
11097 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11098 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11099 | pub fn _mm256_reduce_mul_ph(a: __m256h) -> f16 { |
11100 | unsafe { |
11101 | let p: __m128h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); |
11102 | let q: __m128h = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); |
11103 | _mm_reduce_mul_ph(_mm_mul_ph(a:p, b:q)) |
11104 | } |
11105 | } |
11106 | |
11107 | /// Reduce the packed half-precision (16-bit) floating-point elements in a by multiplication. Returns |
11108 | /// the product of all elements in a. |
11109 | /// |
11110 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_mul_ph) |
11111 | #[inline ] |
11112 | #[target_feature (enable = "avx512fp16" )] |
11113 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11114 | pub unsafe fn _mm512_reduce_mul_ph(a: __m512h) -> f16 { |
11115 | unsafe { |
11116 | let p: __m256h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); |
11117 | let q: __m256h = simd_shuffle!( |
11118 | a, |
11119 | a, |
11120 | [ |
11121 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 |
11122 | ] |
11123 | ); |
11124 | _mm256_reduce_mul_ph(_mm256_mul_ph(a:p, b:q)) |
11125 | } |
11126 | } |
11127 | |
11128 | /// Reduce the packed half-precision (16-bit) floating-point elements in a by minimum. Returns the |
11129 | /// minimum of all elements in a. |
11130 | /// |
11131 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_min_ph) |
11132 | #[inline ] |
11133 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11134 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11135 | pub fn _mm_reduce_min_ph(a: __m128h) -> f16 { |
11136 | unsafe { |
11137 | let b: __m128h = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]); |
11138 | let a: __m128h = _mm_min_ph(a, b); |
11139 | let b: __m128h = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]); |
11140 | let a: __m128h = _mm_min_ph(a, b); |
11141 | let b: __m128h = simd_shuffle!(a, a, [1, 0, 2, 3, 4, 5, 6, 7]); |
11142 | simd_extract!(_mm_min_sh(a, b), 0) |
11143 | } |
11144 | } |
11145 | |
11146 | /// Reduce the packed half-precision (16-bit) floating-point elements in a by minimum. Returns the |
11147 | /// minimum of all elements in a. |
11148 | /// |
11149 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_min_ph) |
11150 | #[inline ] |
11151 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11152 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11153 | pub fn _mm256_reduce_min_ph(a: __m256h) -> f16 { |
11154 | unsafe { |
11155 | let p: __m128h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); |
11156 | let q: __m128h = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); |
11157 | _mm_reduce_min_ph(_mm_min_ph(a:p, b:q)) |
11158 | } |
11159 | } |
11160 | |
11161 | /// Reduce the packed half-precision (16-bit) floating-point elements in a by minimum. Returns the |
11162 | /// minimum of all elements in a. |
11163 | /// |
11164 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_min_ph) |
11165 | #[inline ] |
11166 | #[target_feature (enable = "avx512fp16" )] |
11167 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11168 | pub fn _mm512_reduce_min_ph(a: __m512h) -> f16 { |
11169 | unsafe { |
11170 | let p: __m256h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); |
11171 | let q: __m256h = simd_shuffle!( |
11172 | a, |
11173 | a, |
11174 | [ |
11175 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 |
11176 | ] |
11177 | ); |
11178 | _mm256_reduce_min_ph(_mm256_min_ph(a:p, b:q)) |
11179 | } |
11180 | } |
11181 | |
11182 | /// Reduce the packed half-precision (16-bit) floating-point elements in a by maximum. Returns the |
11183 | /// maximum of all elements in a. |
11184 | /// |
11185 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_max_ph) |
11186 | #[inline ] |
11187 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11188 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11189 | pub fn _mm_reduce_max_ph(a: __m128h) -> f16 { |
11190 | unsafe { |
11191 | let b: __m128h = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]); |
11192 | let a: __m128h = _mm_max_ph(a, b); |
11193 | let b: __m128h = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]); |
11194 | let a: __m128h = _mm_max_ph(a, b); |
11195 | let b: __m128h = simd_shuffle!(a, a, [1, 0, 2, 3, 4, 5, 6, 7]); |
11196 | simd_extract!(_mm_max_sh(a, b), 0) |
11197 | } |
11198 | } |
11199 | |
11200 | /// Reduce the packed half-precision (16-bit) floating-point elements in a by maximum. Returns the |
11201 | /// maximum of all elements in a. |
11202 | /// |
11203 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_max_ph) |
11204 | #[inline ] |
11205 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11206 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11207 | pub fn _mm256_reduce_max_ph(a: __m256h) -> f16 { |
11208 | unsafe { |
11209 | let p: __m128h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); |
11210 | let q: __m128h = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); |
11211 | _mm_reduce_max_ph(_mm_max_ph(a:p, b:q)) |
11212 | } |
11213 | } |
11214 | |
11215 | /// Reduce the packed half-precision (16-bit) floating-point elements in a by maximum. Returns the |
11216 | /// maximum of all elements in a. |
11217 | /// |
11218 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_max_ph) |
11219 | #[inline ] |
11220 | #[target_feature (enable = "avx512fp16" )] |
11221 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11222 | pub fn _mm512_reduce_max_ph(a: __m512h) -> f16 { |
11223 | unsafe { |
11224 | let p: __m256h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); |
11225 | let q: __m256h = simd_shuffle!( |
11226 | a, |
11227 | a, |
11228 | [ |
11229 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 |
11230 | ] |
11231 | ); |
11232 | _mm256_reduce_max_ph(_mm256_max_ph(a:p, b:q)) |
11233 | } |
11234 | } |
11235 | |
11236 | macro_rules! fpclass_asm { // FIXME: use LLVM intrinsics |
11237 | ($mask_type: ty, $reg: ident, $a: expr) => {{ |
11238 | let dst: $mask_type; |
11239 | asm!( |
11240 | "vfpclassph {k}, {src}, {imm8}" , |
11241 | k = lateout(kreg) dst, |
11242 | src = in($reg) $a, |
11243 | imm8 = const IMM8, |
11244 | options(pure, nomem, nostack) |
11245 | ); |
11246 | dst |
11247 | }}; |
11248 | ($mask_type: ty, $mask: expr, $reg: ident, $a: expr) => {{ |
11249 | let dst: $mask_type; |
11250 | asm!( |
11251 | "vfpclassph {k} {{ {mask} }}, {src}, {imm8}" , |
11252 | k = lateout(kreg) dst, |
11253 | mask = in(kreg) $mask, |
11254 | src = in($reg) $a, |
11255 | imm8 = const IMM8, |
11256 | options(pure, nomem, nostack) |
11257 | ); |
11258 | dst |
11259 | }}; |
11260 | } |
11261 | |
11262 | /// Test packed half-precision (16-bit) floating-point elements in a for special categories specified |
11263 | /// by imm8, and store the results in mask vector k. |
11264 | /// imm can be a combination of: |
11265 | /// |
11266 | /// 0x01 // QNaN |
11267 | /// 0x02 // Positive Zero |
11268 | /// 0x04 // Negative Zero |
11269 | /// 0x08 // Positive Infinity |
11270 | /// 0x10 // Negative Infinity |
11271 | /// 0x20 // Denormal |
11272 | /// 0x40 // Negative |
11273 | /// 0x80 // SNaN |
11274 | /// |
11275 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fpclass_ph_mask) |
11276 | #[inline ] |
11277 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11278 | #[cfg_attr (test, assert_instr(vfpclassph, IMM8 = 0))] |
11279 | #[rustc_legacy_const_generics (1)] |
11280 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11281 | pub fn _mm_fpclass_ph_mask<const IMM8: i32>(a: __m128h) -> __mmask8 { |
11282 | unsafe { |
11283 | static_assert_uimm_bits!(IMM8, 8); |
11284 | fpclass_asm!(__mmask8, xmm_reg, a) |
11285 | } |
11286 | } |
11287 | |
11288 | /// Test packed half-precision (16-bit) floating-point elements in a for special categories specified |
11289 | /// by imm8, and store the results in mask vector k using zeromask k (elements are zeroed out when the |
11290 | /// corresponding mask bit is not set). |
11291 | /// imm can be a combination of: |
11292 | /// |
11293 | /// 0x01 // QNaN |
11294 | /// 0x02 // Positive Zero |
11295 | /// 0x04 // Negative Zero |
11296 | /// 0x08 // Positive Infinity |
11297 | /// 0x10 // Negative Infinity |
11298 | /// 0x20 // Denormal |
11299 | /// 0x40 // Negative |
11300 | /// 0x80 // SNaN |
11301 | /// |
11302 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fpclass_ph_mask) |
11303 | #[inline ] |
11304 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11305 | #[cfg_attr (test, assert_instr(vfpclassph, IMM8 = 0))] |
11306 | #[rustc_legacy_const_generics (2)] |
11307 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11308 | pub fn _mm_mask_fpclass_ph_mask<const IMM8: i32>(k1: __mmask8, a: __m128h) -> __mmask8 { |
11309 | unsafe { |
11310 | static_assert_uimm_bits!(IMM8, 8); |
11311 | fpclass_asm!(__mmask8, k1, xmm_reg, a) |
11312 | } |
11313 | } |
11314 | |
11315 | /// Test packed half-precision (16-bit) floating-point elements in a for special categories specified |
11316 | /// by imm8, and store the results in mask vector k. |
11317 | /// imm can be a combination of: |
11318 | /// |
11319 | /// 0x01 // QNaN |
11320 | /// 0x02 // Positive Zero |
11321 | /// 0x04 // Negative Zero |
11322 | /// 0x08 // Positive Infinity |
11323 | /// 0x10 // Negative Infinity |
11324 | /// 0x20 // Denormal |
11325 | /// 0x40 // Negative |
11326 | /// 0x80 // SNaN |
11327 | /// |
11328 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fpclass_ph_mask) |
11329 | #[inline ] |
11330 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11331 | #[cfg_attr (test, assert_instr(vfpclassph, IMM8 = 0))] |
11332 | #[rustc_legacy_const_generics (1)] |
11333 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11334 | pub fn _mm256_fpclass_ph_mask<const IMM8: i32>(a: __m256h) -> __mmask16 { |
11335 | unsafe { |
11336 | static_assert_uimm_bits!(IMM8, 8); |
11337 | fpclass_asm!(__mmask16, ymm_reg, a) |
11338 | } |
11339 | } |
11340 | |
11341 | /// Test packed half-precision (16-bit) floating-point elements in a for special categories specified |
11342 | /// by imm8, and store the results in mask vector k using zeromask k (elements are zeroed out when the |
11343 | /// corresponding mask bit is not set). |
11344 | /// imm can be a combination of: |
11345 | /// |
11346 | /// 0x01 // QNaN |
11347 | /// 0x02 // Positive Zero |
11348 | /// 0x04 // Negative Zero |
11349 | /// 0x08 // Positive Infinity |
11350 | /// 0x10 // Negative Infinity |
11351 | /// 0x20 // Denormal |
11352 | /// 0x40 // Negative |
11353 | /// 0x80 // SNaN |
11354 | /// |
11355 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fpclass_ph_mask) |
11356 | #[inline ] |
11357 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11358 | #[cfg_attr (test, assert_instr(vfpclassph, IMM8 = 0))] |
11359 | #[rustc_legacy_const_generics (2)] |
11360 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11361 | pub fn _mm256_mask_fpclass_ph_mask<const IMM8: i32>(k1: __mmask16, a: __m256h) -> __mmask16 { |
11362 | unsafe { |
11363 | static_assert_uimm_bits!(IMM8, 8); |
11364 | fpclass_asm!(__mmask16, k1, ymm_reg, a) |
11365 | } |
11366 | } |
11367 | |
11368 | /// Test packed half-precision (16-bit) floating-point elements in a for special categories specified |
11369 | /// by imm8, and store the results in mask vector k. |
11370 | /// imm can be a combination of: |
11371 | /// |
11372 | /// 0x01 // QNaN |
11373 | /// 0x02 // Positive Zero |
11374 | /// 0x04 // Negative Zero |
11375 | /// 0x08 // Positive Infinity |
11376 | /// 0x10 // Negative Infinity |
11377 | /// 0x20 // Denormal |
11378 | /// 0x40 // Negative |
11379 | /// 0x80 // SNaN |
11380 | /// |
11381 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fpclass_ph_mask) |
11382 | #[inline ] |
11383 | #[target_feature (enable = "avx512fp16" )] |
11384 | #[cfg_attr (test, assert_instr(vfpclassph, IMM8 = 0))] |
11385 | #[rustc_legacy_const_generics (1)] |
11386 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11387 | pub fn _mm512_fpclass_ph_mask<const IMM8: i32>(a: __m512h) -> __mmask32 { |
11388 | unsafe { |
11389 | static_assert_uimm_bits!(IMM8, 8); |
11390 | fpclass_asm!(__mmask32, zmm_reg, a) |
11391 | } |
11392 | } |
11393 | |
11394 | /// Test packed half-precision (16-bit) floating-point elements in a for special categories specified |
11395 | /// by imm8, and store the results in mask vector k using zeromask k (elements are zeroed out when the |
11396 | /// corresponding mask bit is not set). |
11397 | /// imm can be a combination of: |
11398 | /// |
11399 | /// 0x01 // QNaN |
11400 | /// 0x02 // Positive Zero |
11401 | /// 0x04 // Negative Zero |
11402 | /// 0x08 // Positive Infinity |
11403 | /// 0x10 // Negative Infinity |
11404 | /// 0x20 // Denormal |
11405 | /// 0x40 // Negative |
11406 | /// 0x80 // SNaN |
11407 | /// |
11408 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fpclass_ph_mask) |
11409 | #[inline ] |
11410 | #[target_feature (enable = "avx512fp16" )] |
11411 | #[cfg_attr (test, assert_instr(vfpclassph, IMM8 = 0))] |
11412 | #[rustc_legacy_const_generics (2)] |
11413 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11414 | pub fn _mm512_mask_fpclass_ph_mask<const IMM8: i32>(k1: __mmask32, a: __m512h) -> __mmask32 { |
11415 | unsafe { |
11416 | static_assert_uimm_bits!(IMM8, 8); |
11417 | fpclass_asm!(__mmask32, k1, zmm_reg, a) |
11418 | } |
11419 | } |
11420 | |
11421 | /// Test the lower half-precision (16-bit) floating-point element in a for special categories specified |
11422 | /// by imm8, and store the result in mask vector k. |
11423 | /// imm can be a combination of: |
11424 | /// |
11425 | /// 0x01 // QNaN |
11426 | /// 0x02 // Positive Zero |
11427 | /// 0x04 // Negative Zero |
11428 | /// 0x08 // Positive Infinity |
11429 | /// 0x10 // Negative Infinity |
11430 | /// 0x20 // Denormal |
11431 | /// 0x40 // Negative |
11432 | /// 0x80 // SNaN |
11433 | /// |
11434 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fpclass_sh_mask) |
11435 | #[inline ] |
11436 | #[target_feature (enable = "avx512fp16" )] |
11437 | #[cfg_attr (test, assert_instr(vfpclasssh, IMM8 = 0))] |
11438 | #[rustc_legacy_const_generics (1)] |
11439 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11440 | pub fn _mm_fpclass_sh_mask<const IMM8: i32>(a: __m128h) -> __mmask8 { |
11441 | _mm_mask_fpclass_sh_mask::<IMM8>(k1:0xff, a) |
11442 | } |
11443 | |
11444 | /// Test the lower half-precision (16-bit) floating-point element in a for special categories specified |
11445 | /// by imm8, and store the result in mask vector k using zeromask k (elements are zeroed out when the |
11446 | /// corresponding mask bit is not set). |
11447 | /// imm can be a combination of: |
11448 | /// |
11449 | /// 0x01 // QNaN |
11450 | /// 0x02 // Positive Zero |
11451 | /// 0x04 // Negative Zero |
11452 | /// 0x08 // Positive Infinity |
11453 | /// 0x10 // Negative Infinity |
11454 | /// 0x20 // Denormal |
11455 | /// 0x40 // Negative |
11456 | /// 0x80 // SNaN |
11457 | /// |
11458 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fpclass_sh_mask) |
11459 | #[inline ] |
11460 | #[target_feature (enable = "avx512fp16" )] |
11461 | #[cfg_attr (test, assert_instr(vfpclasssh, IMM8 = 0))] |
11462 | #[rustc_legacy_const_generics (2)] |
11463 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11464 | pub fn _mm_mask_fpclass_sh_mask<const IMM8: i32>(k1: __mmask8, a: __m128h) -> __mmask8 { |
11465 | unsafe { |
11466 | static_assert_uimm_bits!(IMM8, 8); |
11467 | vfpclasssh(a, IMM8, k:k1) |
11468 | } |
11469 | } |
11470 | |
11471 | /// Blend packed half-precision (16-bit) floating-point elements from a and b using control mask k, |
11472 | /// and store the results in dst. |
11473 | /// |
11474 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_blend_ph) |
11475 | #[inline ] |
11476 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11477 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11478 | pub fn _mm_mask_blend_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
11479 | unsafe { simd_select_bitmask(m:k, yes:b, no:a) } |
11480 | } |
11481 | |
11482 | /// Blend packed half-precision (16-bit) floating-point elements from a and b using control mask k, |
11483 | /// and store the results in dst. |
11484 | /// |
11485 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_blend_ph) |
11486 | #[inline ] |
11487 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11488 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11489 | pub fn _mm256_mask_blend_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { |
11490 | unsafe { simd_select_bitmask(m:k, yes:b, no:a) } |
11491 | } |
11492 | |
11493 | /// Blend packed half-precision (16-bit) floating-point elements from a and b using control mask k, |
11494 | /// and store the results in dst. |
11495 | /// |
11496 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_blend_ph) |
11497 | #[inline ] |
11498 | #[target_feature (enable = "avx512fp16" )] |
11499 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11500 | pub fn _mm512_mask_blend_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { |
11501 | unsafe { simd_select_bitmask(m:k, yes:b, no:a) } |
11502 | } |
11503 | |
11504 | /// Shuffle half-precision (16-bit) floating-point elements in a and b using the corresponding selector |
11505 | /// and index in idx, and store the results in dst. |
11506 | /// |
11507 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutex2var_ph) |
11508 | #[inline ] |
11509 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11510 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11511 | pub fn _mm_permutex2var_ph(a: __m128h, idx: __m128i, b: __m128h) -> __m128h { |
11512 | _mm_castsi128_ph(_mm_permutex2var_epi16( |
11513 | a:_mm_castph_si128(a), |
11514 | idx, |
11515 | b:_mm_castph_si128(b), |
11516 | )) |
11517 | } |
11518 | |
11519 | /// Shuffle half-precision (16-bit) floating-point elements in a and b using the corresponding selector |
11520 | /// and index in idx, and store the results in dst. |
11521 | /// |
11522 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutex2var_ph) |
11523 | #[inline ] |
11524 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11525 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11526 | pub fn _mm256_permutex2var_ph(a: __m256h, idx: __m256i, b: __m256h) -> __m256h { |
11527 | _mm256_castsi256_ph(_mm256_permutex2var_epi16( |
11528 | a:_mm256_castph_si256(a), |
11529 | idx, |
11530 | b:_mm256_castph_si256(b), |
11531 | )) |
11532 | } |
11533 | |
11534 | /// Shuffle half-precision (16-bit) floating-point elements in a and b using the corresponding selector |
11535 | /// and index in idx, and store the results in dst. |
11536 | /// |
11537 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutex2var_ph) |
11538 | #[inline ] |
11539 | #[target_feature (enable = "avx512fp16" )] |
11540 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11541 | pub fn _mm512_permutex2var_ph(a: __m512h, idx: __m512i, b: __m512h) -> __m512h { |
11542 | _mm512_castsi512_ph(_mm512_permutex2var_epi16( |
11543 | a:_mm512_castph_si512(a), |
11544 | idx, |
11545 | b:_mm512_castph_si512(b), |
11546 | )) |
11547 | } |
11548 | |
11549 | /// Shuffle half-precision (16-bit) floating-point elements in a using the corresponding index in idx, |
11550 | /// and store the results in dst. |
11551 | /// |
11552 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutexvar_ph) |
11553 | #[inline ] |
11554 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11555 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11556 | pub fn _mm_permutexvar_ph(idx: __m128i, a: __m128h) -> __m128h { |
11557 | _mm_castsi128_ph(_mm_permutexvar_epi16(idx, a:_mm_castph_si128(a))) |
11558 | } |
11559 | |
11560 | /// Shuffle half-precision (16-bit) floating-point elements in a using the corresponding index in idx, |
11561 | /// and store the results in dst. |
11562 | /// |
11563 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutexvar_ph) |
11564 | #[inline ] |
11565 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11566 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11567 | pub fn _mm256_permutexvar_ph(idx: __m256i, a: __m256h) -> __m256h { |
11568 | _mm256_castsi256_ph(_mm256_permutexvar_epi16(idx, a:_mm256_castph_si256(a))) |
11569 | } |
11570 | |
11571 | /// Shuffle half-precision (16-bit) floating-point elements in a using the corresponding index in idx, |
11572 | /// and store the results in dst. |
11573 | /// |
11574 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutexvar_ph) |
11575 | #[inline ] |
11576 | #[target_feature (enable = "avx512fp16" )] |
11577 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11578 | pub fn _mm512_permutexvar_ph(idx: __m512i, a: __m512h) -> __m512h { |
11579 | _mm512_castsi512_ph(_mm512_permutexvar_epi16(idx, a:_mm512_castph_si512(a))) |
11580 | } |
11581 | |
11582 | /// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11583 | /// and store the results in dst. |
11584 | /// |
11585 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi16_ph) |
11586 | #[inline ] |
11587 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11588 | #[cfg_attr (test, assert_instr(vcvtw2ph))] |
11589 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11590 | pub fn _mm_cvtepi16_ph(a: __m128i) -> __m128h { |
11591 | unsafe { vcvtw2ph_128(a.as_i16x8(), _MM_FROUND_CUR_DIRECTION) } |
11592 | } |
11593 | |
11594 | /// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11595 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
11596 | /// mask bit is not set). |
11597 | /// |
11598 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi16_ph) |
11599 | #[inline ] |
11600 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11601 | #[cfg_attr (test, assert_instr(vcvtw2ph))] |
11602 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11603 | pub fn _mm_mask_cvtepi16_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h { |
11604 | unsafe { simd_select_bitmask(m:k, yes:_mm_cvtepi16_ph(a), no:src) } |
11605 | } |
11606 | |
11607 | /// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11608 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
11609 | /// |
11610 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi16_ph) |
11611 | #[inline ] |
11612 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11613 | #[cfg_attr (test, assert_instr(vcvtw2ph))] |
11614 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11615 | pub fn _mm_maskz_cvtepi16_ph(k: __mmask8, a: __m128i) -> __m128h { |
11616 | _mm_mask_cvtepi16_ph(src:_mm_setzero_ph(), k, a) |
11617 | } |
11618 | |
11619 | /// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11620 | /// and store the results in dst. |
11621 | /// |
11622 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi16_ph) |
11623 | #[inline ] |
11624 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11625 | #[cfg_attr (test, assert_instr(vcvtw2ph))] |
11626 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11627 | pub fn _mm256_cvtepi16_ph(a: __m256i) -> __m256h { |
11628 | unsafe { vcvtw2ph_256(a.as_i16x16(), _MM_FROUND_CUR_DIRECTION) } |
11629 | } |
11630 | |
11631 | /// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11632 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
11633 | /// mask bit is not set). |
11634 | /// |
11635 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi16_ph) |
11636 | #[inline ] |
11637 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11638 | #[cfg_attr (test, assert_instr(vcvtw2ph))] |
11639 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11640 | pub fn _mm256_mask_cvtepi16_ph(src: __m256h, k: __mmask16, a: __m256i) -> __m256h { |
11641 | unsafe { simd_select_bitmask(m:k, yes:_mm256_cvtepi16_ph(a), no:src) } |
11642 | } |
11643 | |
11644 | /// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11645 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
11646 | /// |
11647 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi16_ph) |
11648 | #[inline ] |
11649 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11650 | #[cfg_attr (test, assert_instr(vcvtw2ph))] |
11651 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11652 | pub fn _mm256_maskz_cvtepi16_ph(k: __mmask16, a: __m256i) -> __m256h { |
11653 | _mm256_mask_cvtepi16_ph(src:_mm256_setzero_ph(), k, a) |
11654 | } |
11655 | |
11656 | /// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11657 | /// and store the results in dst. |
11658 | /// |
11659 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi16_ph) |
11660 | #[inline ] |
11661 | #[target_feature (enable = "avx512fp16" )] |
11662 | #[cfg_attr (test, assert_instr(vcvtw2ph))] |
11663 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11664 | pub fn _mm512_cvtepi16_ph(a: __m512i) -> __m512h { |
11665 | unsafe { vcvtw2ph_512(a.as_i16x32(), _MM_FROUND_CUR_DIRECTION) } |
11666 | } |
11667 | |
11668 | /// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11669 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
11670 | /// mask bit is not set). |
11671 | /// |
11672 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi16_ph) |
11673 | #[inline ] |
11674 | #[target_feature (enable = "avx512fp16" )] |
11675 | #[cfg_attr (test, assert_instr(vcvtw2ph))] |
11676 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11677 | pub fn _mm512_mask_cvtepi16_ph(src: __m512h, k: __mmask32, a: __m512i) -> __m512h { |
11678 | unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepi16_ph(a), no:src) } |
11679 | } |
11680 | |
11681 | /// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11682 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
11683 | /// |
11684 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi16_ph) |
11685 | #[inline ] |
11686 | #[target_feature (enable = "avx512fp16" )] |
11687 | #[cfg_attr (test, assert_instr(vcvtw2ph))] |
11688 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11689 | pub fn _mm512_maskz_cvtepi16_ph(k: __mmask32, a: __m512i) -> __m512h { |
11690 | _mm512_mask_cvtepi16_ph(src:_mm512_setzero_ph(), k, a) |
11691 | } |
11692 | |
11693 | /// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11694 | /// and store the results in dst. |
11695 | /// |
11696 | /// Rounding is done according to the rounding parameter, which can be one of: |
11697 | /// |
11698 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
11699 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
11700 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
11701 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
11702 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
11703 | /// |
11704 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi16_ph) |
11705 | #[inline ] |
11706 | #[target_feature (enable = "avx512fp16" )] |
11707 | #[cfg_attr (test, assert_instr(vcvtw2ph, ROUNDING = 8))] |
11708 | #[rustc_legacy_const_generics (1)] |
11709 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11710 | pub fn _mm512_cvt_roundepi16_ph<const ROUNDING: i32>(a: __m512i) -> __m512h { |
11711 | unsafe { |
11712 | static_assert_rounding!(ROUNDING); |
11713 | vcvtw2ph_512(a.as_i16x32(), ROUNDING) |
11714 | } |
11715 | } |
11716 | |
11717 | /// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11718 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
11719 | /// mask bit is not set). |
11720 | /// |
11721 | /// Rounding is done according to the rounding parameter, which can be one of: |
11722 | /// |
11723 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
11724 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
11725 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
11726 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
11727 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
11728 | /// |
11729 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi16_ph) |
11730 | #[inline ] |
11731 | #[target_feature (enable = "avx512fp16" )] |
11732 | #[cfg_attr (test, assert_instr(vcvtw2ph, ROUNDING = 8))] |
11733 | #[rustc_legacy_const_generics (3)] |
11734 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11735 | pub fn _mm512_mask_cvt_roundepi16_ph<const ROUNDING: i32>( |
11736 | src: __m512h, |
11737 | k: __mmask32, |
11738 | a: __m512i, |
11739 | ) -> __m512h { |
11740 | unsafe { |
11741 | static_assert_rounding!(ROUNDING); |
11742 | simd_select_bitmask(m:k, yes:_mm512_cvt_roundepi16_ph::<ROUNDING>(a), no:src) |
11743 | } |
11744 | } |
11745 | |
11746 | /// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11747 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
11748 | /// |
11749 | /// Rounding is done according to the rounding parameter, which can be one of: |
11750 | /// |
11751 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
11752 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
11753 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
11754 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
11755 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
11756 | /// |
11757 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi16_ph) |
11758 | #[inline ] |
11759 | #[target_feature (enable = "avx512fp16" )] |
11760 | #[cfg_attr (test, assert_instr(vcvtw2ph, ROUNDING = 8))] |
11761 | #[rustc_legacy_const_generics (2)] |
11762 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11763 | pub fn _mm512_maskz_cvt_roundepi16_ph<const ROUNDING: i32>(k: __mmask32, a: __m512i) -> __m512h { |
11764 | static_assert_rounding!(ROUNDING); |
11765 | _mm512_mask_cvt_roundepi16_ph::<ROUNDING>(src:_mm512_setzero_ph(), k, a) |
11766 | } |
11767 | |
11768 | /// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11769 | /// and store the results in dst. |
11770 | /// |
11771 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu16_ph) |
11772 | #[inline ] |
11773 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11774 | #[cfg_attr (test, assert_instr(vcvtuw2ph))] |
11775 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11776 | pub fn _mm_cvtepu16_ph(a: __m128i) -> __m128h { |
11777 | unsafe { vcvtuw2ph_128(a.as_u16x8(), _MM_FROUND_CUR_DIRECTION) } |
11778 | } |
11779 | |
11780 | /// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11781 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
11782 | /// mask bit is not set). |
11783 | /// |
11784 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu16_ph) |
11785 | #[inline ] |
11786 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11787 | #[cfg_attr (test, assert_instr(vcvtuw2ph))] |
11788 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11789 | pub fn _mm_mask_cvtepu16_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h { |
11790 | unsafe { simd_select_bitmask(m:k, yes:_mm_cvtepu16_ph(a), no:src) } |
11791 | } |
11792 | |
11793 | /// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11794 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
11795 | /// |
11796 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu16_ph) |
11797 | #[inline ] |
11798 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11799 | #[cfg_attr (test, assert_instr(vcvtuw2ph))] |
11800 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11801 | pub fn _mm_maskz_cvtepu16_ph(k: __mmask8, a: __m128i) -> __m128h { |
11802 | _mm_mask_cvtepu16_ph(src:_mm_setzero_ph(), k, a) |
11803 | } |
11804 | |
11805 | /// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11806 | /// and store the results in dst. |
11807 | /// |
11808 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu16_ph) |
11809 | #[inline ] |
11810 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11811 | #[cfg_attr (test, assert_instr(vcvtuw2ph))] |
11812 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11813 | pub fn _mm256_cvtepu16_ph(a: __m256i) -> __m256h { |
11814 | unsafe { vcvtuw2ph_256(a.as_u16x16(), _MM_FROUND_CUR_DIRECTION) } |
11815 | } |
11816 | |
11817 | /// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11818 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
11819 | /// mask bit is not set). |
11820 | /// |
11821 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu16_ph) |
11822 | #[inline ] |
11823 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11824 | #[cfg_attr (test, assert_instr(vcvtuw2ph))] |
11825 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11826 | pub fn _mm256_mask_cvtepu16_ph(src: __m256h, k: __mmask16, a: __m256i) -> __m256h { |
11827 | unsafe { simd_select_bitmask(m:k, yes:_mm256_cvtepu16_ph(a), no:src) } |
11828 | } |
11829 | |
11830 | /// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11831 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
11832 | /// |
11833 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu16_ph) |
11834 | #[inline ] |
11835 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11836 | #[cfg_attr (test, assert_instr(vcvtuw2ph))] |
11837 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11838 | pub fn _mm256_maskz_cvtepu16_ph(k: __mmask16, a: __m256i) -> __m256h { |
11839 | _mm256_mask_cvtepu16_ph(src:_mm256_setzero_ph(), k, a) |
11840 | } |
11841 | |
11842 | /// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11843 | /// and store the results in dst. |
11844 | /// |
11845 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu16_ph) |
11846 | #[inline ] |
11847 | #[target_feature (enable = "avx512fp16" )] |
11848 | #[cfg_attr (test, assert_instr(vcvtuw2ph))] |
11849 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11850 | pub fn _mm512_cvtepu16_ph(a: __m512i) -> __m512h { |
11851 | unsafe { vcvtuw2ph_512(a.as_u16x32(), _MM_FROUND_CUR_DIRECTION) } |
11852 | } |
11853 | |
11854 | /// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11855 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
11856 | /// mask bit is not set). |
11857 | /// |
11858 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu16_ph) |
11859 | #[inline ] |
11860 | #[target_feature (enable = "avx512fp16" )] |
11861 | #[cfg_attr (test, assert_instr(vcvtuw2ph))] |
11862 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11863 | pub fn _mm512_mask_cvtepu16_ph(src: __m512h, k: __mmask32, a: __m512i) -> __m512h { |
11864 | unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepu16_ph(a), no:src) } |
11865 | } |
11866 | |
11867 | /// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11868 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
11869 | /// |
11870 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu16_ph) |
11871 | #[inline ] |
11872 | #[target_feature (enable = "avx512fp16" )] |
11873 | #[cfg_attr (test, assert_instr(vcvtuw2ph))] |
11874 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11875 | pub fn _mm512_maskz_cvtepu16_ph(k: __mmask32, a: __m512i) -> __m512h { |
11876 | _mm512_mask_cvtepu16_ph(src:_mm512_setzero_ph(), k, a) |
11877 | } |
11878 | |
11879 | /// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11880 | /// and store the results in dst. |
11881 | /// |
11882 | /// Rounding is done according to the rounding parameter, which can be one of: |
11883 | /// |
11884 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
11885 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
11886 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
11887 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
11888 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
11889 | /// |
11890 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepu16_ph) |
11891 | #[inline ] |
11892 | #[target_feature (enable = "avx512fp16" )] |
11893 | #[cfg_attr (test, assert_instr(vcvtuw2ph, ROUNDING = 8))] |
11894 | #[rustc_legacy_const_generics (1)] |
11895 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11896 | pub fn _mm512_cvt_roundepu16_ph<const ROUNDING: i32>(a: __m512i) -> __m512h { |
11897 | unsafe { |
11898 | static_assert_rounding!(ROUNDING); |
11899 | vcvtuw2ph_512(a.as_u16x32(), ROUNDING) |
11900 | } |
11901 | } |
11902 | |
11903 | /// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11904 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
11905 | /// mask bit is not set). |
11906 | /// |
11907 | /// Rounding is done according to the rounding parameter, which can be one of: |
11908 | /// |
11909 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
11910 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
11911 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
11912 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
11913 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
11914 | /// |
11915 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepu16_ph) |
11916 | #[inline ] |
11917 | #[target_feature (enable = "avx512fp16" )] |
11918 | #[cfg_attr (test, assert_instr(vcvtuw2ph, ROUNDING = 8))] |
11919 | #[rustc_legacy_const_generics (3)] |
11920 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11921 | pub fn _mm512_mask_cvt_roundepu16_ph<const ROUNDING: i32>( |
11922 | src: __m512h, |
11923 | k: __mmask32, |
11924 | a: __m512i, |
11925 | ) -> __m512h { |
11926 | unsafe { |
11927 | static_assert_rounding!(ROUNDING); |
11928 | simd_select_bitmask(m:k, yes:_mm512_cvt_roundepu16_ph::<ROUNDING>(a), no:src) |
11929 | } |
11930 | } |
11931 | |
11932 | /// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11933 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
11934 | /// |
11935 | /// Rounding is done according to the rounding parameter, which can be one of: |
11936 | /// |
11937 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
11938 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
11939 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
11940 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
11941 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
11942 | /// |
11943 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepu16_ph) |
11944 | #[inline ] |
11945 | #[target_feature (enable = "avx512fp16" )] |
11946 | #[cfg_attr (test, assert_instr(vcvtuw2ph, ROUNDING = 8))] |
11947 | #[rustc_legacy_const_generics (2)] |
11948 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11949 | pub fn _mm512_maskz_cvt_roundepu16_ph<const ROUNDING: i32>(k: __mmask32, a: __m512i) -> __m512h { |
11950 | static_assert_rounding!(ROUNDING); |
11951 | _mm512_mask_cvt_roundepu16_ph::<ROUNDING>(src:_mm512_setzero_ph(), k, a) |
11952 | } |
11953 | |
11954 | /// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11955 | /// and store the results in dst. The upper 64 bits of dst are zeroed out. |
11956 | /// |
11957 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_ph) |
11958 | #[inline ] |
11959 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11960 | #[cfg_attr (test, assert_instr(vcvtdq2ph))] |
11961 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11962 | pub fn _mm_cvtepi32_ph(a: __m128i) -> __m128h { |
11963 | _mm_mask_cvtepi32_ph(src:_mm_setzero_ph(), k:0xff, a) |
11964 | } |
11965 | |
11966 | /// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11967 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
11968 | /// mask bit is not set). The upper 64 bits of dst are zeroed out. |
11969 | /// |
11970 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi32_ph) |
11971 | #[inline ] |
11972 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11973 | #[cfg_attr (test, assert_instr(vcvtdq2ph))] |
11974 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11975 | pub fn _mm_mask_cvtepi32_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h { |
11976 | unsafe { vcvtdq2ph_128(a.as_i32x4(), src, k) } |
11977 | } |
11978 | |
11979 | /// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11980 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
11981 | /// The upper 64 bits of dst are zeroed out. |
11982 | /// |
11983 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi32_ph) |
11984 | #[inline ] |
11985 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11986 | #[cfg_attr (test, assert_instr(vcvtdq2ph))] |
11987 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11988 | pub fn _mm_maskz_cvtepi32_ph(k: __mmask8, a: __m128i) -> __m128h { |
11989 | _mm_mask_cvtepi32_ph(src:_mm_setzero_ph(), k, a) |
11990 | } |
11991 | |
11992 | /// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11993 | /// and store the results in dst. |
11994 | /// |
11995 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi32_ph) |
11996 | #[inline ] |
11997 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11998 | #[cfg_attr (test, assert_instr(vcvtdq2ph))] |
11999 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12000 | pub fn _mm256_cvtepi32_ph(a: __m256i) -> __m128h { |
12001 | unsafe { vcvtdq2ph_256(a.as_i32x8(), _MM_FROUND_CUR_DIRECTION) } |
12002 | } |
12003 | |
12004 | /// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12005 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
12006 | /// mask bit is not set). |
12007 | /// |
12008 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi32_ph) |
12009 | #[inline ] |
12010 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12011 | #[cfg_attr (test, assert_instr(vcvtdq2ph))] |
12012 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12013 | pub fn _mm256_mask_cvtepi32_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h { |
12014 | unsafe { simd_select_bitmask(m:k, yes:_mm256_cvtepi32_ph(a), no:src) } |
12015 | } |
12016 | |
12017 | /// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12018 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
12019 | /// |
12020 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi32_ph) |
12021 | #[inline ] |
12022 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12023 | #[cfg_attr (test, assert_instr(vcvtdq2ph))] |
12024 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12025 | pub fn _mm256_maskz_cvtepi32_ph(k: __mmask8, a: __m256i) -> __m128h { |
12026 | _mm256_mask_cvtepi32_ph(src:_mm_setzero_ph(), k, a) |
12027 | } |
12028 | |
12029 | /// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12030 | /// and store the results in dst. |
12031 | /// |
12032 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi32_ph) |
12033 | #[inline ] |
12034 | #[target_feature (enable = "avx512fp16" )] |
12035 | #[cfg_attr (test, assert_instr(vcvtdq2ph))] |
12036 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12037 | pub fn _mm512_cvtepi32_ph(a: __m512i) -> __m256h { |
12038 | unsafe { vcvtdq2ph_512(a.as_i32x16(), _MM_FROUND_CUR_DIRECTION) } |
12039 | } |
12040 | |
12041 | /// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12042 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
12043 | /// mask bit is not set). |
12044 | /// |
12045 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi32_ph) |
12046 | #[inline ] |
12047 | #[target_feature (enable = "avx512fp16" )] |
12048 | #[cfg_attr (test, assert_instr(vcvtdq2ph))] |
12049 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12050 | pub fn _mm512_mask_cvtepi32_ph(src: __m256h, k: __mmask16, a: __m512i) -> __m256h { |
12051 | unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepi32_ph(a), no:src) } |
12052 | } |
12053 | |
12054 | /// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12055 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
12056 | /// |
12057 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi32_ph) |
12058 | #[inline ] |
12059 | #[target_feature (enable = "avx512fp16" )] |
12060 | #[cfg_attr (test, assert_instr(vcvtdq2ph))] |
12061 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12062 | pub fn _mm512_maskz_cvtepi32_ph(k: __mmask16, a: __m512i) -> __m256h { |
12063 | _mm512_mask_cvtepi32_ph(src:f16x16::ZERO.as_m256h(), k, a) |
12064 | } |
12065 | |
12066 | /// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12067 | /// and store the results in dst. |
12068 | /// |
12069 | /// Rounding is done according to the rounding parameter, which can be one of: |
12070 | /// |
12071 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
12072 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
12073 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
12074 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
12075 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
12076 | /// |
12077 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi32_ph) |
12078 | #[inline ] |
12079 | #[target_feature (enable = "avx512fp16" )] |
12080 | #[cfg_attr (test, assert_instr(vcvtdq2ph, ROUNDING = 8))] |
12081 | #[rustc_legacy_const_generics (1)] |
12082 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12083 | pub fn _mm512_cvt_roundepi32_ph<const ROUNDING: i32>(a: __m512i) -> __m256h { |
12084 | unsafe { |
12085 | static_assert_rounding!(ROUNDING); |
12086 | vcvtdq2ph_512(a.as_i32x16(), ROUNDING) |
12087 | } |
12088 | } |
12089 | |
12090 | /// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12091 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
12092 | /// mask bit is not set). |
12093 | /// |
12094 | /// Rounding is done according to the rounding parameter, which can be one of: |
12095 | /// |
12096 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
12097 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
12098 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
12099 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
12100 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
12101 | /// |
12102 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi32_ph) |
12103 | #[inline ] |
12104 | #[target_feature (enable = "avx512fp16" )] |
12105 | #[cfg_attr (test, assert_instr(vcvtdq2ph, ROUNDING = 8))] |
12106 | #[rustc_legacy_const_generics (3)] |
12107 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12108 | pub fn _mm512_mask_cvt_roundepi32_ph<const ROUNDING: i32>( |
12109 | src: __m256h, |
12110 | k: __mmask16, |
12111 | a: __m512i, |
12112 | ) -> __m256h { |
12113 | unsafe { |
12114 | static_assert_rounding!(ROUNDING); |
12115 | simd_select_bitmask(m:k, yes:_mm512_cvt_roundepi32_ph::<ROUNDING>(a), no:src) |
12116 | } |
12117 | } |
12118 | |
12119 | /// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12120 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
12121 | /// |
12122 | /// Rounding is done according to the rounding parameter, which can be one of: |
12123 | /// |
12124 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
12125 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
12126 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
12127 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
12128 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
12129 | /// |
12130 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi32_ph) |
12131 | #[inline ] |
12132 | #[target_feature (enable = "avx512fp16" )] |
12133 | #[cfg_attr (test, assert_instr(vcvtdq2ph, ROUNDING = 8))] |
12134 | #[rustc_legacy_const_generics (2)] |
12135 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12136 | pub fn _mm512_maskz_cvt_roundepi32_ph<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m256h { |
12137 | static_assert_rounding!(ROUNDING); |
12138 | _mm512_mask_cvt_roundepi32_ph::<ROUNDING>(src:f16x16::ZERO.as_m256h(), k, a) |
12139 | } |
12140 | |
12141 | /// Convert the signed 32-bit integer b to a half-precision (16-bit) floating-point element, store the |
12142 | /// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements |
12143 | /// of dst. |
12144 | /// |
12145 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvti32_sh) |
12146 | #[inline ] |
12147 | #[target_feature (enable = "avx512fp16" )] |
12148 | #[cfg_attr (test, assert_instr(vcvtsi2sh))] |
12149 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12150 | pub fn _mm_cvti32_sh(a: __m128h, b: i32) -> __m128h { |
12151 | unsafe { vcvtsi2sh(a, b, _MM_FROUND_CUR_DIRECTION) } |
12152 | } |
12153 | |
12154 | /// Convert the signed 32-bit integer b to a half-precision (16-bit) floating-point element, store the |
12155 | /// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements |
12156 | /// of dst. |
12157 | /// |
12158 | /// Rounding is done according to the rounding parameter, which can be one of: |
12159 | /// |
12160 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
12161 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
12162 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
12163 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
12164 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
12165 | /// |
12166 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi32_sh) |
12167 | #[inline ] |
12168 | #[target_feature (enable = "avx512fp16" )] |
12169 | #[cfg_attr (test, assert_instr(vcvtsi2sh, ROUNDING = 8))] |
12170 | #[rustc_legacy_const_generics (2)] |
12171 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12172 | pub fn _mm_cvt_roundi32_sh<const ROUNDING: i32>(a: __m128h, b: i32) -> __m128h { |
12173 | unsafe { |
12174 | static_assert_rounding!(ROUNDING); |
12175 | vcvtsi2sh(a, b, ROUNDING) |
12176 | } |
12177 | } |
12178 | |
12179 | /// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12180 | /// and store the results in dst. The upper 64 bits of dst are zeroed out. |
12181 | /// |
12182 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu32_ph) |
12183 | #[inline ] |
12184 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12185 | #[cfg_attr (test, assert_instr(vcvtudq2ph))] |
12186 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12187 | pub fn _mm_cvtepu32_ph(a: __m128i) -> __m128h { |
12188 | _mm_mask_cvtepu32_ph(src:_mm_setzero_ph(), k:0xff, a) |
12189 | } |
12190 | |
12191 | /// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12192 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
12193 | /// mask bit is not set). The upper 64 bits of dst are zeroed out. |
12194 | /// |
12195 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu32_ph) |
12196 | #[inline ] |
12197 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12198 | #[cfg_attr (test, assert_instr(vcvtudq2ph))] |
12199 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12200 | pub fn _mm_mask_cvtepu32_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h { |
12201 | unsafe { vcvtudq2ph_128(a.as_u32x4(), src, k) } |
12202 | } |
12203 | |
12204 | /// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12205 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
12206 | /// The upper 64 bits of dst are zeroed out. |
12207 | /// |
12208 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu32_ph) |
12209 | #[inline ] |
12210 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12211 | #[cfg_attr (test, assert_instr(vcvtudq2ph))] |
12212 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12213 | pub fn _mm_maskz_cvtepu32_ph(k: __mmask8, a: __m128i) -> __m128h { |
12214 | _mm_mask_cvtepu32_ph(src:_mm_setzero_ph(), k, a) |
12215 | } |
12216 | |
12217 | /// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12218 | /// and store the results in dst. |
12219 | /// |
12220 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu32_ph) |
12221 | #[inline ] |
12222 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12223 | #[cfg_attr (test, assert_instr(vcvtudq2ph))] |
12224 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12225 | pub fn _mm256_cvtepu32_ph(a: __m256i) -> __m128h { |
12226 | unsafe { vcvtudq2ph_256(a.as_u32x8(), _MM_FROUND_CUR_DIRECTION) } |
12227 | } |
12228 | |
12229 | /// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12230 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
12231 | /// mask bit is not set). |
12232 | /// |
12233 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu32_ph) |
12234 | #[inline ] |
12235 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12236 | #[cfg_attr (test, assert_instr(vcvtudq2ph))] |
12237 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12238 | pub fn _mm256_mask_cvtepu32_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h { |
12239 | unsafe { simd_select_bitmask(m:k, yes:_mm256_cvtepu32_ph(a), no:src) } |
12240 | } |
12241 | |
12242 | /// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12243 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
12244 | /// |
12245 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu32_ph) |
12246 | #[inline ] |
12247 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12248 | #[cfg_attr (test, assert_instr(vcvtudq2ph))] |
12249 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12250 | pub fn _mm256_maskz_cvtepu32_ph(k: __mmask8, a: __m256i) -> __m128h { |
12251 | _mm256_mask_cvtepu32_ph(src:_mm_setzero_ph(), k, a) |
12252 | } |
12253 | |
12254 | /// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12255 | /// and store the results in dst. |
12256 | /// |
12257 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu32_ph) |
12258 | #[inline ] |
12259 | #[target_feature (enable = "avx512fp16" )] |
12260 | #[cfg_attr (test, assert_instr(vcvtudq2ph))] |
12261 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12262 | pub fn _mm512_cvtepu32_ph(a: __m512i) -> __m256h { |
12263 | unsafe { vcvtudq2ph_512(a.as_u32x16(), _MM_FROUND_CUR_DIRECTION) } |
12264 | } |
12265 | |
12266 | /// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12267 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
12268 | /// mask bit is not set). |
12269 | /// |
12270 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu32_ph) |
12271 | #[inline ] |
12272 | #[target_feature (enable = "avx512fp16" )] |
12273 | #[cfg_attr (test, assert_instr(vcvtudq2ph))] |
12274 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12275 | pub fn _mm512_mask_cvtepu32_ph(src: __m256h, k: __mmask16, a: __m512i) -> __m256h { |
12276 | unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepu32_ph(a), no:src) } |
12277 | } |
12278 | |
12279 | /// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12280 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
12281 | /// |
12282 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu32_ph) |
12283 | #[inline ] |
12284 | #[target_feature (enable = "avx512fp16" )] |
12285 | #[cfg_attr (test, assert_instr(vcvtudq2ph))] |
12286 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12287 | pub fn _mm512_maskz_cvtepu32_ph(k: __mmask16, a: __m512i) -> __m256h { |
12288 | _mm512_mask_cvtepu32_ph(src:f16x16::ZERO.as_m256h(), k, a) |
12289 | } |
12290 | |
12291 | /// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12292 | /// and store the results in dst. |
12293 | /// |
12294 | /// Rounding is done according to the rounding parameter, which can be one of: |
12295 | /// |
12296 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
12297 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
12298 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
12299 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
12300 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
12301 | /// |
12302 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepu32_ph) |
12303 | #[inline ] |
12304 | #[target_feature (enable = "avx512fp16" )] |
12305 | #[cfg_attr (test, assert_instr(vcvtudq2ph, ROUNDING = 8))] |
12306 | #[rustc_legacy_const_generics (1)] |
12307 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12308 | pub fn _mm512_cvt_roundepu32_ph<const ROUNDING: i32>(a: __m512i) -> __m256h { |
12309 | unsafe { |
12310 | static_assert_rounding!(ROUNDING); |
12311 | vcvtudq2ph_512(a.as_u32x16(), ROUNDING) |
12312 | } |
12313 | } |
12314 | |
12315 | /// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12316 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
12317 | /// mask bit is not set). |
12318 | /// |
12319 | /// Rounding is done according to the rounding parameter, which can be one of: |
12320 | /// |
12321 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
12322 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
12323 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
12324 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
12325 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
12326 | /// |
12327 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepu32_ph) |
12328 | #[inline ] |
12329 | #[target_feature (enable = "avx512fp16" )] |
12330 | #[cfg_attr (test, assert_instr(vcvtudq2ph, ROUNDING = 8))] |
12331 | #[rustc_legacy_const_generics (3)] |
12332 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12333 | pub fn _mm512_mask_cvt_roundepu32_ph<const ROUNDING: i32>( |
12334 | src: __m256h, |
12335 | k: __mmask16, |
12336 | a: __m512i, |
12337 | ) -> __m256h { |
12338 | unsafe { |
12339 | static_assert_rounding!(ROUNDING); |
12340 | simd_select_bitmask(m:k, yes:_mm512_cvt_roundepu32_ph::<ROUNDING>(a), no:src) |
12341 | } |
12342 | } |
12343 | |
12344 | /// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12345 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
12346 | /// |
12347 | /// Rounding is done according to the rounding parameter, which can be one of: |
12348 | /// |
12349 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
12350 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
12351 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
12352 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
12353 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
12354 | /// |
12355 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepu32_ph) |
12356 | #[inline ] |
12357 | #[target_feature (enable = "avx512fp16" )] |
12358 | #[cfg_attr (test, assert_instr(vcvtudq2ph, ROUNDING = 8))] |
12359 | #[rustc_legacy_const_generics (2)] |
12360 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12361 | pub fn _mm512_maskz_cvt_roundepu32_ph<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m256h { |
12362 | static_assert_rounding!(ROUNDING); |
12363 | _mm512_mask_cvt_roundepu32_ph::<ROUNDING>(src:f16x16::ZERO.as_m256h(), k, a) |
12364 | } |
12365 | |
12366 | /// Convert the unsigned 32-bit integer b to a half-precision (16-bit) floating-point element, store the |
12367 | /// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements |
12368 | /// of dst. |
12369 | /// |
12370 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu32_sh) |
12371 | #[inline ] |
12372 | #[target_feature (enable = "avx512fp16" )] |
12373 | #[cfg_attr (test, assert_instr(vcvtusi2sh))] |
12374 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12375 | pub fn _mm_cvtu32_sh(a: __m128h, b: u32) -> __m128h { |
12376 | unsafe { vcvtusi2sh(a, b, _MM_FROUND_CUR_DIRECTION) } |
12377 | } |
12378 | |
12379 | /// Convert the unsigned 32-bit integer b to a half-precision (16-bit) floating-point element, store the |
12380 | /// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements |
12381 | /// of dst. |
12382 | /// |
12383 | /// Rounding is done according to the rounding parameter, which can be one of: |
12384 | /// |
12385 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
12386 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
12387 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
12388 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
12389 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
12390 | /// |
12391 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundu32_sh) |
12392 | #[inline ] |
12393 | #[target_feature (enable = "avx512fp16" )] |
12394 | #[cfg_attr (test, assert_instr(vcvtusi2sh, ROUNDING = 8))] |
12395 | #[rustc_legacy_const_generics (2)] |
12396 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12397 | pub fn _mm_cvt_roundu32_sh<const ROUNDING: i32>(a: __m128h, b: u32) -> __m128h { |
12398 | unsafe { |
12399 | static_assert_rounding!(ROUNDING); |
12400 | vcvtusi2sh(a, b, ROUNDING) |
12401 | } |
12402 | } |
12403 | |
12404 | /// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12405 | /// and store the results in dst. The upper 96 bits of dst are zeroed out. |
12406 | /// |
12407 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi64_ph) |
12408 | #[inline ] |
12409 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12410 | #[cfg_attr (test, assert_instr(vcvtqq2ph))] |
12411 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12412 | pub fn _mm_cvtepi64_ph(a: __m128i) -> __m128h { |
12413 | _mm_mask_cvtepi64_ph(src:_mm_setzero_ph(), k:0xff, a) |
12414 | } |
12415 | |
12416 | /// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12417 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
12418 | /// mask bit is not set). The upper 96 bits of dst are zeroed out. |
12419 | /// |
12420 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi64_ph) |
12421 | #[inline ] |
12422 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12423 | #[cfg_attr (test, assert_instr(vcvtqq2ph))] |
12424 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12425 | pub fn _mm_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h { |
12426 | unsafe { vcvtqq2ph_128(a.as_i64x2(), src, k) } |
12427 | } |
12428 | |
12429 | /// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12430 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
12431 | /// The upper 96 bits of dst are zeroed out. |
12432 | /// |
12433 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi64_ph) |
12434 | #[inline ] |
12435 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12436 | #[cfg_attr (test, assert_instr(vcvtqq2ph))] |
12437 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12438 | pub fn _mm_maskz_cvtepi64_ph(k: __mmask8, a: __m128i) -> __m128h { |
12439 | _mm_mask_cvtepi64_ph(src:_mm_setzero_ph(), k, a) |
12440 | } |
12441 | |
12442 | /// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12443 | /// and store the results in dst. The upper 64 bits of dst are zeroed out. |
12444 | /// |
12445 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi64_ph) |
12446 | #[inline ] |
12447 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12448 | #[cfg_attr (test, assert_instr(vcvtqq2ph))] |
12449 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12450 | pub fn _mm256_cvtepi64_ph(a: __m256i) -> __m128h { |
12451 | _mm256_mask_cvtepi64_ph(src:_mm_setzero_ph(), k:0xff, a) |
12452 | } |
12453 | |
12454 | /// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12455 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
12456 | /// mask bit is not set). The upper 64 bits of dst are zeroed out. |
12457 | /// |
12458 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi64_ph) |
12459 | #[inline ] |
12460 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12461 | #[cfg_attr (test, assert_instr(vcvtqq2ph))] |
12462 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12463 | pub fn _mm256_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h { |
12464 | unsafe { vcvtqq2ph_256(a.as_i64x4(), src, k) } |
12465 | } |
12466 | |
12467 | /// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12468 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
12469 | /// The upper 64 bits of dst are zeroed out. |
12470 | /// |
12471 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi64_ph) |
12472 | #[inline ] |
12473 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12474 | #[cfg_attr (test, assert_instr(vcvtqq2ph))] |
12475 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12476 | pub fn _mm256_maskz_cvtepi64_ph(k: __mmask8, a: __m256i) -> __m128h { |
12477 | _mm256_mask_cvtepi64_ph(src:_mm_setzero_ph(), k, a) |
12478 | } |
12479 | |
12480 | /// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12481 | /// and store the results in dst. |
12482 | /// |
12483 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi64_ph) |
12484 | #[inline ] |
12485 | #[target_feature (enable = "avx512fp16" )] |
12486 | #[cfg_attr (test, assert_instr(vcvtqq2ph))] |
12487 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12488 | pub fn _mm512_cvtepi64_ph(a: __m512i) -> __m128h { |
12489 | unsafe { vcvtqq2ph_512(a.as_i64x8(), _MM_FROUND_CUR_DIRECTION) } |
12490 | } |
12491 | |
12492 | /// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12493 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
12494 | /// mask bit is not set). |
12495 | /// |
12496 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi64_ph) |
12497 | #[inline ] |
12498 | #[target_feature (enable = "avx512fp16" )] |
12499 | #[cfg_attr (test, assert_instr(vcvtqq2ph))] |
12500 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12501 | pub fn _mm512_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m512i) -> __m128h { |
12502 | unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepi64_ph(a), no:src) } |
12503 | } |
12504 | |
12505 | /// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12506 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
12507 | /// |
12508 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi64_ph) |
12509 | #[inline ] |
12510 | #[target_feature (enable = "avx512fp16" )] |
12511 | #[cfg_attr (test, assert_instr(vcvtqq2ph))] |
12512 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12513 | pub fn _mm512_maskz_cvtepi64_ph(k: __mmask8, a: __m512i) -> __m128h { |
12514 | _mm512_mask_cvtepi64_ph(src:f16x8::ZERO.as_m128h(), k, a) |
12515 | } |
12516 | |
12517 | /// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12518 | /// and store the results in dst. |
12519 | /// |
12520 | /// Rounding is done according to the rounding parameter, which can be one of: |
12521 | /// |
12522 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
12523 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
12524 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
12525 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
12526 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
12527 | /// |
12528 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi64_ph) |
12529 | #[inline ] |
12530 | #[target_feature (enable = "avx512fp16" )] |
12531 | #[cfg_attr (test, assert_instr(vcvtqq2ph, ROUNDING = 8))] |
12532 | #[rustc_legacy_const_generics (1)] |
12533 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12534 | pub fn _mm512_cvt_roundepi64_ph<const ROUNDING: i32>(a: __m512i) -> __m128h { |
12535 | unsafe { |
12536 | static_assert_rounding!(ROUNDING); |
12537 | vcvtqq2ph_512(a.as_i64x8(), ROUNDING) |
12538 | } |
12539 | } |
12540 | |
12541 | /// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12542 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
12543 | /// mask bit is not set). |
12544 | /// |
12545 | /// Rounding is done according to the rounding parameter, which can be one of: |
12546 | /// |
12547 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
12548 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
12549 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
12550 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
12551 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
12552 | /// |
12553 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi64_ph) |
12554 | #[inline ] |
12555 | #[target_feature (enable = "avx512fp16" )] |
12556 | #[cfg_attr (test, assert_instr(vcvtqq2ph, ROUNDING = 8))] |
12557 | #[rustc_legacy_const_generics (3)] |
12558 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12559 | pub fn _mm512_mask_cvt_roundepi64_ph<const ROUNDING: i32>( |
12560 | src: __m128h, |
12561 | k: __mmask8, |
12562 | a: __m512i, |
12563 | ) -> __m128h { |
12564 | unsafe { |
12565 | static_assert_rounding!(ROUNDING); |
12566 | simd_select_bitmask(m:k, yes:_mm512_cvt_roundepi64_ph::<ROUNDING>(a), no:src) |
12567 | } |
12568 | } |
12569 | |
12570 | /// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12571 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
12572 | /// |
12573 | /// Rounding is done according to the rounding parameter, which can be one of: |
12574 | /// |
12575 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
12576 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
12577 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
12578 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
12579 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
12580 | /// |
12581 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi64_ph) |
12582 | #[inline ] |
12583 | #[target_feature (enable = "avx512fp16" )] |
12584 | #[cfg_attr (test, assert_instr(vcvtqq2ph, ROUNDING = 8))] |
12585 | #[rustc_legacy_const_generics (2)] |
12586 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12587 | pub fn _mm512_maskz_cvt_roundepi64_ph<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m128h { |
12588 | static_assert_rounding!(ROUNDING); |
12589 | _mm512_mask_cvt_roundepi64_ph::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a) |
12590 | } |
12591 | |
12592 | /// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12593 | /// and store the results in dst. The upper 96 bits of dst are zeroed out. |
12594 | /// |
12595 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu64_ph) |
12596 | #[inline ] |
12597 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12598 | #[cfg_attr (test, assert_instr(vcvtuqq2ph))] |
12599 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12600 | pub fn _mm_cvtepu64_ph(a: __m128i) -> __m128h { |
12601 | _mm_mask_cvtepu64_ph(src:_mm_setzero_ph(), k:0xff, a) |
12602 | } |
12603 | |
12604 | /// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12605 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
12606 | /// mask bit is not set). The upper 96 bits of dst are zeroed out. |
12607 | /// |
12608 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu64_ph) |
12609 | #[inline ] |
12610 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12611 | #[cfg_attr (test, assert_instr(vcvtuqq2ph))] |
12612 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12613 | pub fn _mm_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h { |
12614 | unsafe { vcvtuqq2ph_128(a.as_u64x2(), src, k) } |
12615 | } |
12616 | |
12617 | /// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12618 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
12619 | /// The upper 96 bits of dst are zeroed out. |
12620 | /// |
12621 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu64_ph) |
12622 | #[inline ] |
12623 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12624 | #[cfg_attr (test, assert_instr(vcvtuqq2ph))] |
12625 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12626 | pub fn _mm_maskz_cvtepu64_ph(k: __mmask8, a: __m128i) -> __m128h { |
12627 | _mm_mask_cvtepu64_ph(src:_mm_setzero_ph(), k, a) |
12628 | } |
12629 | |
12630 | /// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12631 | /// and store the results in dst. The upper 64 bits of dst are zeroed out. |
12632 | /// |
12633 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu64_ph) |
12634 | #[inline ] |
12635 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12636 | #[cfg_attr (test, assert_instr(vcvtuqq2ph))] |
12637 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12638 | pub fn _mm256_cvtepu64_ph(a: __m256i) -> __m128h { |
12639 | _mm256_mask_cvtepu64_ph(src:_mm_setzero_ph(), k:0xff, a) |
12640 | } |
12641 | |
12642 | /// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12643 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
12644 | /// mask bit is not set). The upper 64 bits of dst are zeroed out. |
12645 | /// |
12646 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu64_ph) |
12647 | #[inline ] |
12648 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12649 | #[cfg_attr (test, assert_instr(vcvtuqq2ph))] |
12650 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12651 | pub fn _mm256_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h { |
12652 | unsafe { vcvtuqq2ph_256(a.as_u64x4(), src, k) } |
12653 | } |
12654 | |
12655 | /// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12656 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
12657 | /// The upper 64 bits of dst are zeroed out. |
12658 | /// |
12659 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu64_ph) |
12660 | #[inline ] |
12661 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12662 | #[cfg_attr (test, assert_instr(vcvtuqq2ph))] |
12663 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12664 | pub fn _mm256_maskz_cvtepu64_ph(k: __mmask8, a: __m256i) -> __m128h { |
12665 | _mm256_mask_cvtepu64_ph(src:_mm_setzero_ph(), k, a) |
12666 | } |
12667 | |
12668 | /// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12669 | /// and store the results in dst. |
12670 | /// |
12671 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu64_ph) |
12672 | #[inline ] |
12673 | #[target_feature (enable = "avx512fp16" )] |
12674 | #[cfg_attr (test, assert_instr(vcvtuqq2ph))] |
12675 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12676 | pub fn _mm512_cvtepu64_ph(a: __m512i) -> __m128h { |
12677 | unsafe { vcvtuqq2ph_512(a.as_u64x8(), _MM_FROUND_CUR_DIRECTION) } |
12678 | } |
12679 | |
12680 | /// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12681 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
12682 | /// mask bit is not set). |
12683 | /// |
12684 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu64_ph) |
12685 | #[inline ] |
12686 | #[target_feature (enable = "avx512fp16" )] |
12687 | #[cfg_attr (test, assert_instr(vcvtuqq2ph))] |
12688 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12689 | pub fn _mm512_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m512i) -> __m128h { |
12690 | unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepu64_ph(a), no:src) } |
12691 | } |
12692 | |
12693 | /// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12694 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
12695 | /// |
12696 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu64_ph) |
12697 | #[inline ] |
12698 | #[target_feature (enable = "avx512fp16" )] |
12699 | #[cfg_attr (test, assert_instr(vcvtuqq2ph))] |
12700 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12701 | pub fn _mm512_maskz_cvtepu64_ph(k: __mmask8, a: __m512i) -> __m128h { |
12702 | _mm512_mask_cvtepu64_ph(src:f16x8::ZERO.as_m128h(), k, a) |
12703 | } |
12704 | |
12705 | /// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12706 | /// and store the results in dst. |
12707 | /// |
12708 | /// Rounding is done according to the rounding parameter, which can be one of: |
12709 | /// |
12710 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
12711 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
12712 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
12713 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
12714 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
12715 | /// |
12716 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepu64_ph) |
12717 | #[inline ] |
12718 | #[target_feature (enable = "avx512fp16" )] |
12719 | #[cfg_attr (test, assert_instr(vcvtuqq2ph, ROUNDING = 8))] |
12720 | #[rustc_legacy_const_generics (1)] |
12721 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12722 | pub fn _mm512_cvt_roundepu64_ph<const ROUNDING: i32>(a: __m512i) -> __m128h { |
12723 | unsafe { |
12724 | static_assert_rounding!(ROUNDING); |
12725 | vcvtuqq2ph_512(a.as_u64x8(), ROUNDING) |
12726 | } |
12727 | } |
12728 | |
12729 | /// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12730 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
12731 | /// mask bit is not set). |
12732 | /// |
12733 | /// Rounding is done according to the rounding parameter, which can be one of: |
12734 | /// |
12735 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
12736 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
12737 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
12738 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
12739 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
12740 | /// |
12741 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepu64_ph) |
12742 | #[inline ] |
12743 | #[target_feature (enable = "avx512fp16" )] |
12744 | #[cfg_attr (test, assert_instr(vcvtuqq2ph, ROUNDING = 8))] |
12745 | #[rustc_legacy_const_generics (3)] |
12746 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12747 | pub fn _mm512_mask_cvt_roundepu64_ph<const ROUNDING: i32>( |
12748 | src: __m128h, |
12749 | k: __mmask8, |
12750 | a: __m512i, |
12751 | ) -> __m128h { |
12752 | unsafe { |
12753 | static_assert_rounding!(ROUNDING); |
12754 | simd_select_bitmask(m:k, yes:_mm512_cvt_roundepu64_ph::<ROUNDING>(a), no:src) |
12755 | } |
12756 | } |
12757 | |
12758 | /// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12759 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
12760 | /// |
12761 | /// Rounding is done according to the rounding parameter, which can be one of: |
12762 | /// |
12763 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
12764 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
12765 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
12766 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
12767 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
12768 | /// |
12769 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepu64_ph) |
12770 | #[inline ] |
12771 | #[target_feature (enable = "avx512fp16" )] |
12772 | #[cfg_attr (test, assert_instr(vcvtuqq2ph, ROUNDING = 8))] |
12773 | #[rustc_legacy_const_generics (2)] |
12774 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12775 | pub fn _mm512_maskz_cvt_roundepu64_ph<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m128h { |
12776 | static_assert_rounding!(ROUNDING); |
12777 | _mm512_mask_cvt_roundepu64_ph::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a) |
12778 | } |
12779 | |
12780 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) |
12781 | /// floating-point elements, and store the results in dst. |
12782 | /// |
12783 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtxps_ph) |
12784 | #[inline ] |
12785 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12786 | #[cfg_attr (test, assert_instr(vcvtps2phx))] |
12787 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12788 | pub fn _mm_cvtxps_ph(a: __m128) -> __m128h { |
12789 | _mm_mask_cvtxps_ph(src:_mm_setzero_ph(), k:0xff, a) |
12790 | } |
12791 | |
12792 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) |
12793 | /// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst |
12794 | /// when the corresponding mask bit is not set). The upper 64 bits of dst are zeroed out. |
12795 | /// |
12796 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtxps_ph) |
12797 | #[inline ] |
12798 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12799 | #[cfg_attr (test, assert_instr(vcvtps2phx))] |
12800 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12801 | pub fn _mm_mask_cvtxps_ph(src: __m128h, k: __mmask8, a: __m128) -> __m128h { |
12802 | unsafe { vcvtps2phx_128(a, src, k) } |
12803 | } |
12804 | |
12805 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) |
12806 | /// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the |
12807 | /// corresponding mask bit is not set). The upper 64 bits of dst are zeroed out. |
12808 | /// |
12809 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtxps_ph) |
12810 | #[inline ] |
12811 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12812 | #[cfg_attr (test, assert_instr(vcvtps2phx))] |
12813 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12814 | pub fn _mm_maskz_cvtxps_ph(k: __mmask8, a: __m128) -> __m128h { |
12815 | _mm_mask_cvtxps_ph(src:_mm_setzero_ph(), k, a) |
12816 | } |
12817 | |
12818 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) |
12819 | /// floating-point elements, and store the results in dst. |
12820 | /// |
12821 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtxps_ph) |
12822 | #[inline ] |
12823 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12824 | #[cfg_attr (test, assert_instr(vcvtps2phx))] |
12825 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12826 | pub fn _mm256_cvtxps_ph(a: __m256) -> __m128h { |
12827 | _mm256_mask_cvtxps_ph(src:_mm_setzero_ph(), k:0xff, a) |
12828 | } |
12829 | |
12830 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) |
12831 | /// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst |
12832 | /// when the corresponding mask bit is not set). |
12833 | /// |
12834 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtxps_ph) |
12835 | #[inline ] |
12836 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12837 | #[cfg_attr (test, assert_instr(vcvtps2phx))] |
12838 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12839 | pub fn _mm256_mask_cvtxps_ph(src: __m128h, k: __mmask8, a: __m256) -> __m128h { |
12840 | unsafe { vcvtps2phx_256(a, src, k) } |
12841 | } |
12842 | |
12843 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) |
12844 | /// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the |
12845 | /// corresponding mask bit is not set). |
12846 | /// |
12847 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtxps_ph) |
12848 | #[inline ] |
12849 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12850 | #[cfg_attr (test, assert_instr(vcvtps2phx))] |
12851 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12852 | pub fn _mm256_maskz_cvtxps_ph(k: __mmask8, a: __m256) -> __m128h { |
12853 | _mm256_mask_cvtxps_ph(src:_mm_setzero_ph(), k, a) |
12854 | } |
12855 | |
12856 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) |
12857 | /// floating-point elements, and store the results in dst. |
12858 | /// |
12859 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtxps_ph) |
12860 | #[inline ] |
12861 | #[target_feature (enable = "avx512fp16" )] |
12862 | #[cfg_attr (test, assert_instr(vcvtps2phx))] |
12863 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12864 | pub fn _mm512_cvtxps_ph(a: __m512) -> __m256h { |
12865 | _mm512_mask_cvtxps_ph(src:f16x16::ZERO.as_m256h(), k:0xffff, a) |
12866 | } |
12867 | |
12868 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) |
12869 | /// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst |
12870 | /// when the corresponding mask bit is not set). |
12871 | /// |
12872 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtxps_ph) |
12873 | #[inline ] |
12874 | #[target_feature (enable = "avx512fp16" )] |
12875 | #[cfg_attr (test, assert_instr(vcvtps2phx))] |
12876 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12877 | pub fn _mm512_mask_cvtxps_ph(src: __m256h, k: __mmask16, a: __m512) -> __m256h { |
12878 | unsafe { vcvtps2phx_512(a, src, k, _MM_FROUND_CUR_DIRECTION) } |
12879 | } |
12880 | |
12881 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) |
12882 | /// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the |
12883 | /// corresponding mask bit is not set). |
12884 | /// |
12885 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtxps_ph) |
12886 | #[inline ] |
12887 | #[target_feature (enable = "avx512fp16" )] |
12888 | #[cfg_attr (test, assert_instr(vcvtps2phx))] |
12889 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12890 | pub fn _mm512_maskz_cvtxps_ph(k: __mmask16, a: __m512) -> __m256h { |
12891 | _mm512_mask_cvtxps_ph(src:f16x16::ZERO.as_m256h(), k, a) |
12892 | } |
12893 | |
12894 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) |
12895 | /// floating-point elements, and store the results in dst. |
12896 | /// |
12897 | /// Rounding is done according to the rounding parameter, which can be one of: |
12898 | /// |
12899 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
12900 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
12901 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
12902 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
12903 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
12904 | /// |
12905 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtx_roundps_ph) |
12906 | #[inline ] |
12907 | #[target_feature (enable = "avx512fp16" )] |
12908 | #[cfg_attr (test, assert_instr(vcvtps2phx, ROUNDING = 8))] |
12909 | #[rustc_legacy_const_generics (1)] |
12910 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12911 | pub fn _mm512_cvtx_roundps_ph<const ROUNDING: i32>(a: __m512) -> __m256h { |
12912 | static_assert_rounding!(ROUNDING); |
12913 | _mm512_mask_cvtx_roundps_ph::<ROUNDING>(src:f16x16::ZERO.as_m256h(), k:0xffff, a) |
12914 | } |
12915 | |
12916 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) |
12917 | /// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst |
12918 | /// when the corresponding mask bit is not set). |
12919 | /// |
12920 | /// Rounding is done according to the rounding parameter, which can be one of: |
12921 | /// |
12922 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
12923 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
12924 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
12925 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
12926 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
12927 | /// |
12928 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtx_roundps_ph) |
12929 | #[inline ] |
12930 | #[target_feature (enable = "avx512fp16" )] |
12931 | #[cfg_attr (test, assert_instr(vcvtps2phx, ROUNDING = 8))] |
12932 | #[rustc_legacy_const_generics (3)] |
12933 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12934 | pub fn _mm512_mask_cvtx_roundps_ph<const ROUNDING: i32>( |
12935 | src: __m256h, |
12936 | k: __mmask16, |
12937 | a: __m512, |
12938 | ) -> __m256h { |
12939 | unsafe { |
12940 | static_assert_rounding!(ROUNDING); |
12941 | vcvtps2phx_512(a, src, k, ROUNDING) |
12942 | } |
12943 | } |
12944 | |
12945 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) |
12946 | /// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the |
12947 | /// corresponding mask bit is not set). |
12948 | /// |
12949 | /// Rounding is done according to the rounding parameter, which can be one of: |
12950 | /// |
12951 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
12952 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
12953 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
12954 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
12955 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
12956 | /// |
12957 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtx_roundps_ph) |
12958 | #[inline ] |
12959 | #[target_feature (enable = "avx512fp16" )] |
12960 | #[cfg_attr (test, assert_instr(vcvtps2phx, ROUNDING = 8))] |
12961 | #[rustc_legacy_const_generics (2)] |
12962 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12963 | pub fn _mm512_maskz_cvtx_roundps_ph<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m256h { |
12964 | static_assert_rounding!(ROUNDING); |
12965 | _mm512_mask_cvtx_roundps_ph::<ROUNDING>(src:f16x16::ZERO.as_m256h(), k, a) |
12966 | } |
12967 | |
12968 | /// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit) |
12969 | /// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed |
12970 | /// elements from a to the upper elements of dst. |
12971 | /// |
12972 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_sh) |
12973 | #[inline ] |
12974 | #[target_feature (enable = "avx512fp16" )] |
12975 | #[cfg_attr (test, assert_instr(vcvtss2sh))] |
12976 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12977 | pub fn _mm_cvtss_sh(a: __m128h, b: __m128) -> __m128h { |
12978 | _mm_mask_cvtss_sh(src:f16x8::ZERO.as_m128h(), k:0xff, a, b) |
12979 | } |
12980 | |
12981 | /// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit) |
12982 | /// floating-point elements, store the result in the lower element of dst using writemask k (the element |
12983 | /// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the |
12984 | /// upper elements of dst. |
12985 | /// |
12986 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtss_sh) |
12987 | #[inline ] |
12988 | #[target_feature (enable = "avx512fp16" )] |
12989 | #[cfg_attr (test, assert_instr(vcvtss2sh))] |
12990 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12991 | pub fn _mm_mask_cvtss_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128) -> __m128h { |
12992 | unsafe { vcvtss2sh(a, b, src, k, _MM_FROUND_CUR_DIRECTION) } |
12993 | } |
12994 | |
12995 | /// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit) |
12996 | /// floating-point elements, store the result in the lower element of dst using zeromask k (the element |
12997 | /// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper |
12998 | /// elements of dst. |
12999 | /// |
13000 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtss_sh) |
13001 | #[inline ] |
13002 | #[target_feature (enable = "avx512fp16" )] |
13003 | #[cfg_attr (test, assert_instr(vcvtss2sh))] |
13004 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13005 | pub fn _mm_maskz_cvtss_sh(k: __mmask8, a: __m128h, b: __m128) -> __m128h { |
13006 | _mm_mask_cvtss_sh(src:f16x8::ZERO.as_m128h(), k, a, b) |
13007 | } |
13008 | |
13009 | /// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit) |
13010 | /// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed |
13011 | /// elements from a to the upper elements of dst. |
13012 | /// |
13013 | /// Rounding is done according to the rounding parameter, which can be one of: |
13014 | /// |
13015 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
13016 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
13017 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
13018 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
13019 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
13020 | /// |
13021 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_sh) |
13022 | #[inline ] |
13023 | #[target_feature (enable = "avx512fp16" )] |
13024 | #[cfg_attr (test, assert_instr(vcvtss2sh, ROUNDING = 8))] |
13025 | #[rustc_legacy_const_generics (2)] |
13026 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13027 | pub fn _mm_cvt_roundss_sh<const ROUNDING: i32>(a: __m128h, b: __m128) -> __m128h { |
13028 | static_assert_rounding!(ROUNDING); |
13029 | _mm_mask_cvt_roundss_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b) |
13030 | } |
13031 | |
13032 | /// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit) |
13033 | /// floating-point elements, store the result in the lower element of dst using writemask k (the element |
13034 | /// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the |
13035 | /// upper elements of dst. |
13036 | /// |
13037 | /// Rounding is done according to the rounding parameter, which can be one of: |
13038 | /// |
13039 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
13040 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
13041 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
13042 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
13043 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
13044 | /// |
13045 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundss_sh) |
13046 | #[inline ] |
13047 | #[target_feature (enable = "avx512fp16" )] |
13048 | #[cfg_attr (test, assert_instr(vcvtss2sh, ROUNDING = 8))] |
13049 | #[rustc_legacy_const_generics (4)] |
13050 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13051 | pub fn _mm_mask_cvt_roundss_sh<const ROUNDING: i32>( |
13052 | src: __m128h, |
13053 | k: __mmask8, |
13054 | a: __m128h, |
13055 | b: __m128, |
13056 | ) -> __m128h { |
13057 | unsafe { |
13058 | static_assert_rounding!(ROUNDING); |
13059 | vcvtss2sh(a, b, src, k, ROUNDING) |
13060 | } |
13061 | } |
13062 | |
13063 | /// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit) |
13064 | /// floating-point elements, store the result in the lower element of dst using zeromask k (the element |
13065 | /// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper |
13066 | /// elements of dst. |
13067 | /// |
13068 | /// Rounding is done according to the rounding parameter, which can be one of: |
13069 | /// |
13070 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
13071 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
13072 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
13073 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
13074 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
13075 | /// |
13076 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundss_sh) |
13077 | #[inline ] |
13078 | #[target_feature (enable = "avx512fp16" )] |
13079 | #[cfg_attr (test, assert_instr(vcvtss2sh, ROUNDING = 8))] |
13080 | #[rustc_legacy_const_generics (3)] |
13081 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13082 | pub fn _mm_maskz_cvt_roundss_sh<const ROUNDING: i32>( |
13083 | k: __mmask8, |
13084 | a: __m128h, |
13085 | b: __m128, |
13086 | ) -> __m128h { |
13087 | static_assert_rounding!(ROUNDING); |
13088 | _mm_mask_cvt_roundss_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b) |
13089 | } |
13090 | |
13091 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) |
13092 | /// floating-point elements, and store the results in dst. The upper 96 bits of dst are zeroed out. |
13093 | /// |
13094 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_ph) |
13095 | #[inline ] |
13096 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13097 | #[cfg_attr (test, assert_instr(vcvtpd2ph))] |
13098 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13099 | pub fn _mm_cvtpd_ph(a: __m128d) -> __m128h { |
13100 | _mm_mask_cvtpd_ph(src:_mm_setzero_ph(), k:0xff, a) |
13101 | } |
13102 | |
13103 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) |
13104 | /// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst |
13105 | /// when the corresponding mask bit is not set). The upper 96 bits of dst are zeroed out. |
13106 | /// |
13107 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtpd_ph) |
13108 | #[inline ] |
13109 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13110 | #[cfg_attr (test, assert_instr(vcvtpd2ph))] |
13111 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13112 | pub fn _mm_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m128d) -> __m128h { |
13113 | unsafe { vcvtpd2ph_128(a, src, k) } |
13114 | } |
13115 | |
13116 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) |
13117 | /// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the |
13118 | /// corresponding mask bit is not set). The upper 96 bits of dst are zeroed out. |
13119 | /// |
13120 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtpd_ph) |
13121 | #[inline ] |
13122 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13123 | #[cfg_attr (test, assert_instr(vcvtpd2ph))] |
13124 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13125 | pub fn _mm_maskz_cvtpd_ph(k: __mmask8, a: __m128d) -> __m128h { |
13126 | _mm_mask_cvtpd_ph(src:_mm_setzero_ph(), k, a) |
13127 | } |
13128 | |
13129 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) |
13130 | /// floating-point elements, and store the results in dst. The upper 64 bits of dst are zeroed out. |
13131 | /// |
13132 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtpd_ph) |
13133 | #[inline ] |
13134 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13135 | #[cfg_attr (test, assert_instr(vcvtpd2ph))] |
13136 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13137 | pub fn _mm256_cvtpd_ph(a: __m256d) -> __m128h { |
13138 | _mm256_mask_cvtpd_ph(src:_mm_setzero_ph(), k:0xff, a) |
13139 | } |
13140 | |
13141 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) |
13142 | /// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst |
13143 | /// when the corresponding mask bit is not set). The upper 64 bits of dst are zeroed out. |
13144 | /// |
13145 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtpd_ph) |
13146 | #[inline ] |
13147 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13148 | #[cfg_attr (test, assert_instr(vcvtpd2ph))] |
13149 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13150 | pub fn _mm256_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m256d) -> __m128h { |
13151 | unsafe { vcvtpd2ph_256(a, src, k) } |
13152 | } |
13153 | |
13154 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) |
13155 | /// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the |
13156 | /// corresponding mask bit is not set). The upper 64 bits of dst are zeroed out. |
13157 | /// |
13158 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtpd_ph) |
13159 | #[inline ] |
13160 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13161 | #[cfg_attr (test, assert_instr(vcvtpd2ph))] |
13162 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13163 | pub fn _mm256_maskz_cvtpd_ph(k: __mmask8, a: __m256d) -> __m128h { |
13164 | _mm256_mask_cvtpd_ph(src:_mm_setzero_ph(), k, a) |
13165 | } |
13166 | |
13167 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) |
13168 | /// floating-point elements, and store the results in dst. |
13169 | /// |
13170 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtpd_ph) |
13171 | #[inline ] |
13172 | #[target_feature (enable = "avx512fp16" )] |
13173 | #[cfg_attr (test, assert_instr(vcvtpd2ph))] |
13174 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13175 | pub fn _mm512_cvtpd_ph(a: __m512d) -> __m128h { |
13176 | _mm512_mask_cvtpd_ph(src:f16x8::ZERO.as_m128h(), k:0xff, a) |
13177 | } |
13178 | |
13179 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) |
13180 | /// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst |
13181 | /// when the corresponding mask bit is not set). |
13182 | /// |
13183 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtpd_ph) |
13184 | #[inline ] |
13185 | #[target_feature (enable = "avx512fp16" )] |
13186 | #[cfg_attr (test, assert_instr(vcvtpd2ph))] |
13187 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13188 | pub fn _mm512_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m512d) -> __m128h { |
13189 | unsafe { vcvtpd2ph_512(a, src, k, _MM_FROUND_CUR_DIRECTION) } |
13190 | } |
13191 | |
13192 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) |
13193 | /// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the |
13194 | /// corresponding mask bit is not set). |
13195 | /// |
13196 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtpd_ph) |
13197 | #[inline ] |
13198 | #[target_feature (enable = "avx512fp16" )] |
13199 | #[cfg_attr (test, assert_instr(vcvtpd2ph))] |
13200 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13201 | pub fn _mm512_maskz_cvtpd_ph(k: __mmask8, a: __m512d) -> __m128h { |
13202 | _mm512_mask_cvtpd_ph(src:f16x8::ZERO.as_m128h(), k, a) |
13203 | } |
13204 | |
13205 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) |
13206 | /// floating-point elements, and store the results in dst. |
13207 | /// |
13208 | /// Rounding is done according to the rounding parameter, which can be one of: |
13209 | /// |
13210 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
13211 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
13212 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
13213 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
13214 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
13215 | /// |
13216 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundpd_ph) |
13217 | #[inline ] |
13218 | #[target_feature (enable = "avx512fp16" )] |
13219 | #[cfg_attr (test, assert_instr(vcvtpd2ph, ROUNDING = 8))] |
13220 | #[rustc_legacy_const_generics (1)] |
13221 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13222 | pub fn _mm512_cvt_roundpd_ph<const ROUNDING: i32>(a: __m512d) -> __m128h { |
13223 | static_assert_rounding!(ROUNDING); |
13224 | _mm512_mask_cvt_roundpd_ph::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a) |
13225 | } |
13226 | |
13227 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) |
13228 | /// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst |
13229 | /// when the corresponding mask bit is not set). |
13230 | /// |
13231 | /// Rounding is done according to the rounding parameter, which can be one of: |
13232 | /// |
13233 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
13234 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
13235 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
13236 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
13237 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
13238 | /// |
13239 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundpd_ph) |
13240 | #[inline ] |
13241 | #[target_feature (enable = "avx512fp16" )] |
13242 | #[cfg_attr (test, assert_instr(vcvtpd2ph, ROUNDING = 8))] |
13243 | #[rustc_legacy_const_generics (3)] |
13244 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13245 | pub fn _mm512_mask_cvt_roundpd_ph<const ROUNDING: i32>( |
13246 | src: __m128h, |
13247 | k: __mmask8, |
13248 | a: __m512d, |
13249 | ) -> __m128h { |
13250 | unsafe { |
13251 | static_assert_rounding!(ROUNDING); |
13252 | vcvtpd2ph_512(a, src, k, ROUNDING) |
13253 | } |
13254 | } |
13255 | |
13256 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) |
13257 | /// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the |
13258 | /// corresponding mask bit is not set). |
13259 | /// |
13260 | /// Rounding is done according to the rounding parameter, which can be one of: |
13261 | /// |
13262 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
13263 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
13264 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
13265 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
13266 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
13267 | /// |
13268 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_ph) |
13269 | #[inline ] |
13270 | #[target_feature (enable = "avx512fp16" )] |
13271 | #[cfg_attr (test, assert_instr(vcvtpd2ph, ROUNDING = 8))] |
13272 | #[rustc_legacy_const_generics (2)] |
13273 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13274 | pub fn _mm512_maskz_cvt_roundpd_ph<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m128h { |
13275 | static_assert_rounding!(ROUNDING); |
13276 | _mm512_mask_cvt_roundpd_ph::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a) |
13277 | } |
13278 | |
13279 | /// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit) |
13280 | /// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed |
13281 | /// elements from a to the upper elements of dst. |
13282 | /// |
13283 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_sh) |
13284 | #[inline ] |
13285 | #[target_feature (enable = "avx512fp16" )] |
13286 | #[cfg_attr (test, assert_instr(vcvtsd2sh))] |
13287 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13288 | pub fn _mm_cvtsd_sh(a: __m128h, b: __m128d) -> __m128h { |
13289 | _mm_mask_cvtsd_sh(src:f16x8::ZERO.as_m128h(), k:0xff, a, b) |
13290 | } |
13291 | |
13292 | /// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit) |
13293 | /// floating-point elements, store the result in the lower element of dst using writemask k (the element |
13294 | /// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the |
13295 | /// upper elements of dst. |
13296 | /// |
13297 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsd_sh) |
13298 | #[inline ] |
13299 | #[target_feature (enable = "avx512fp16" )] |
13300 | #[cfg_attr (test, assert_instr(vcvtsd2sh))] |
13301 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13302 | pub fn _mm_mask_cvtsd_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128d) -> __m128h { |
13303 | unsafe { vcvtsd2sh(a, b, src, k, _MM_FROUND_CUR_DIRECTION) } |
13304 | } |
13305 | |
13306 | /// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit) |
13307 | /// floating-point elements, store the result in the lower element of dst using zeromask k (the element |
13308 | /// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper |
13309 | /// elements of dst. |
13310 | /// |
13311 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsd_sh) |
13312 | #[inline ] |
13313 | #[target_feature (enable = "avx512fp16" )] |
13314 | #[cfg_attr (test, assert_instr(vcvtsd2sh))] |
13315 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13316 | pub fn _mm_maskz_cvtsd_sh(k: __mmask8, a: __m128h, b: __m128d) -> __m128h { |
13317 | _mm_mask_cvtsd_sh(src:f16x8::ZERO.as_m128h(), k, a, b) |
13318 | } |
13319 | |
13320 | /// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit) |
13321 | /// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed |
13322 | /// elements from a to the upper elements of dst. |
13323 | /// |
13324 | /// Rounding is done according to the rounding parameter, which can be one of: |
13325 | /// |
13326 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
13327 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
13328 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
13329 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
13330 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
13331 | /// |
13332 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_sh) |
13333 | #[inline ] |
13334 | #[target_feature (enable = "avx512fp16" )] |
13335 | #[cfg_attr (test, assert_instr(vcvtsd2sh, ROUNDING = 8))] |
13336 | #[rustc_legacy_const_generics (2)] |
13337 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13338 | pub fn _mm_cvt_roundsd_sh<const ROUNDING: i32>(a: __m128h, b: __m128d) -> __m128h { |
13339 | static_assert_rounding!(ROUNDING); |
13340 | _mm_mask_cvt_roundsd_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b) |
13341 | } |
13342 | |
13343 | /// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit) |
13344 | /// floating-point elements, store the result in the lower element of dst using writemask k (the element |
13345 | /// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the |
13346 | /// upper elements of dst. |
13347 | /// |
13348 | /// Rounding is done according to the rounding parameter, which can be one of: |
13349 | /// |
13350 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
13351 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
13352 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
13353 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
13354 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
13355 | /// |
13356 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsd_sh) |
13357 | #[inline ] |
13358 | #[target_feature (enable = "avx512fp16" )] |
13359 | #[cfg_attr (test, assert_instr(vcvtsd2sh, ROUNDING = 8))] |
13360 | #[rustc_legacy_const_generics (4)] |
13361 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13362 | pub fn _mm_mask_cvt_roundsd_sh<const ROUNDING: i32>( |
13363 | src: __m128h, |
13364 | k: __mmask8, |
13365 | a: __m128h, |
13366 | b: __m128d, |
13367 | ) -> __m128h { |
13368 | unsafe { |
13369 | static_assert_rounding!(ROUNDING); |
13370 | vcvtsd2sh(a, b, src, k, ROUNDING) |
13371 | } |
13372 | } |
13373 | |
13374 | /// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit) |
13375 | /// floating-point elements, store the result in the lower element of dst using zeromask k (the element |
13376 | /// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper |
13377 | /// elements of dst. |
13378 | /// |
13379 | /// Rounding is done according to the rounding parameter, which can be one of: |
13380 | /// |
13381 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
13382 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
13383 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
13384 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
13385 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
13386 | /// |
13387 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsd_sh) |
13388 | #[inline ] |
13389 | #[target_feature (enable = "avx512fp16" )] |
13390 | #[cfg_attr (test, assert_instr(vcvtsd2sh, ROUNDING = 8))] |
13391 | #[rustc_legacy_const_generics (3)] |
13392 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13393 | pub fn _mm_maskz_cvt_roundsd_sh<const ROUNDING: i32>( |
13394 | k: __mmask8, |
13395 | a: __m128h, |
13396 | b: __m128d, |
13397 | ) -> __m128h { |
13398 | static_assert_rounding!(ROUNDING); |
13399 | _mm_mask_cvt_roundsd_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b) |
13400 | } |
13401 | |
13402 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and |
13403 | /// store the results in dst. |
13404 | /// |
13405 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epi16) |
13406 | #[inline ] |
13407 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13408 | #[cfg_attr (test, assert_instr(vcvtph2w))] |
13409 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13410 | pub fn _mm_cvtph_epi16(a: __m128h) -> __m128i { |
13411 | _mm_mask_cvtph_epi16(src:_mm_undefined_si128(), k:0xff, a) |
13412 | } |
13413 | |
13414 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and |
13415 | /// store the results in dst using writemask k (elements are copied from src when the corresponding |
13416 | /// mask bit is not set). |
13417 | /// |
13418 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epi16) |
13419 | #[inline ] |
13420 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13421 | #[cfg_attr (test, assert_instr(vcvtph2w))] |
13422 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13423 | pub fn _mm_mask_cvtph_epi16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { |
13424 | unsafe { transmute(src:vcvtph2w_128(a, src.as_i16x8(), k)) } |
13425 | } |
13426 | |
13427 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and |
13428 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
13429 | /// |
13430 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epi16) |
13431 | #[inline ] |
13432 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13433 | #[cfg_attr (test, assert_instr(vcvtph2w))] |
13434 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13435 | pub fn _mm_maskz_cvtph_epi16(k: __mmask8, a: __m128h) -> __m128i { |
13436 | _mm_mask_cvtph_epi16(src:_mm_setzero_si128(), k, a) |
13437 | } |
13438 | |
13439 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and |
13440 | /// store the results in dst. |
13441 | /// |
13442 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epi16) |
13443 | #[inline ] |
13444 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13445 | #[cfg_attr (test, assert_instr(vcvtph2w))] |
13446 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13447 | pub fn _mm256_cvtph_epi16(a: __m256h) -> __m256i { |
13448 | _mm256_mask_cvtph_epi16(src:_mm256_undefined_si256(), k:0xffff, a) |
13449 | } |
13450 | |
13451 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and |
13452 | /// store the results in dst using writemask k (elements are copied from src when the corresponding |
13453 | /// mask bit is not set). |
13454 | /// |
13455 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epi16) |
13456 | #[inline ] |
13457 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13458 | #[cfg_attr (test, assert_instr(vcvtph2w))] |
13459 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13460 | pub fn _mm256_mask_cvtph_epi16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i { |
13461 | unsafe { transmute(src:vcvtph2w_256(a, src.as_i16x16(), k)) } |
13462 | } |
13463 | |
13464 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and |
13465 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
13466 | /// |
13467 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epi16) |
13468 | #[inline ] |
13469 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13470 | #[cfg_attr (test, assert_instr(vcvtph2w))] |
13471 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13472 | pub fn _mm256_maskz_cvtph_epi16(k: __mmask16, a: __m256h) -> __m256i { |
13473 | _mm256_mask_cvtph_epi16(src:_mm256_setzero_si256(), k, a) |
13474 | } |
13475 | |
13476 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and |
13477 | /// store the results in dst. |
13478 | /// |
13479 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi16) |
13480 | #[inline ] |
13481 | #[target_feature (enable = "avx512fp16" )] |
13482 | #[cfg_attr (test, assert_instr(vcvtph2w))] |
13483 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13484 | pub fn _mm512_cvtph_epi16(a: __m512h) -> __m512i { |
13485 | _mm512_mask_cvtph_epi16(src:_mm512_undefined_epi32(), k:0xffffffff, a) |
13486 | } |
13487 | |
13488 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and |
13489 | /// store the results in dst using writemask k (elements are copied from src when the corresponding |
13490 | /// mask bit is not set). |
13491 | /// |
13492 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi16) |
13493 | #[inline ] |
13494 | #[target_feature (enable = "avx512fp16" )] |
13495 | #[cfg_attr (test, assert_instr(vcvtph2w))] |
13496 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13497 | pub fn _mm512_mask_cvtph_epi16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i { |
13498 | unsafe { |
13499 | transmute(src:vcvtph2w_512( |
13500 | a, |
13501 | src.as_i16x32(), |
13502 | k, |
13503 | _MM_FROUND_CUR_DIRECTION, |
13504 | )) |
13505 | } |
13506 | } |
13507 | |
13508 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and |
13509 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
13510 | /// |
13511 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi16) |
13512 | #[inline ] |
13513 | #[target_feature (enable = "avx512fp16" )] |
13514 | #[cfg_attr (test, assert_instr(vcvtph2w))] |
13515 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13516 | pub fn _mm512_maskz_cvtph_epi16(k: __mmask32, a: __m512h) -> __m512i { |
13517 | _mm512_mask_cvtph_epi16(src:_mm512_setzero_si512(), k, a) |
13518 | } |
13519 | |
13520 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and |
13521 | /// store the results in dst. |
13522 | /// |
13523 | /// Rounding is done according to the rounding parameter, which can be one of: |
13524 | /// |
13525 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
13526 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
13527 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
13528 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
13529 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
13530 | /// |
13531 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi16) |
13532 | #[inline ] |
13533 | #[target_feature (enable = "avx512fp16" )] |
13534 | #[cfg_attr (test, assert_instr(vcvtph2w, ROUNDING = 8))] |
13535 | #[rustc_legacy_const_generics (1)] |
13536 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13537 | pub fn _mm512_cvt_roundph_epi16<const ROUNDING: i32>(a: __m512h) -> __m512i { |
13538 | static_assert_rounding!(ROUNDING); |
13539 | _mm512_mask_cvt_roundph_epi16::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xffffffff, a) |
13540 | } |
13541 | |
13542 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and |
13543 | /// store the results in dst using writemask k (elements are copied from src when the corresponding |
13544 | /// mask bit is not set). |
13545 | /// |
13546 | /// Rounding is done according to the rounding parameter, which can be one of: |
13547 | /// |
13548 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
13549 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
13550 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
13551 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
13552 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
13553 | /// |
13554 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi16) |
13555 | #[inline ] |
13556 | #[target_feature (enable = "avx512fp16" )] |
13557 | #[cfg_attr (test, assert_instr(vcvtph2w, ROUNDING = 8))] |
13558 | #[rustc_legacy_const_generics (3)] |
13559 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13560 | pub fn _mm512_mask_cvt_roundph_epi16<const ROUNDING: i32>( |
13561 | src: __m512i, |
13562 | k: __mmask32, |
13563 | a: __m512h, |
13564 | ) -> __m512i { |
13565 | unsafe { |
13566 | static_assert_rounding!(ROUNDING); |
13567 | transmute(src:vcvtph2w_512(a, src.as_i16x32(), k, ROUNDING)) |
13568 | } |
13569 | } |
13570 | |
13571 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and |
13572 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
13573 | /// |
13574 | /// Rounding is done according to the rounding parameter, which can be one of: |
13575 | /// |
13576 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
13577 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
13578 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
13579 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
13580 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
13581 | /// |
13582 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi16) |
13583 | #[inline ] |
13584 | #[target_feature (enable = "avx512fp16" )] |
13585 | #[cfg_attr (test, assert_instr(vcvtph2w, ROUNDING = 8))] |
13586 | #[rustc_legacy_const_generics (2)] |
13587 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13588 | pub fn _mm512_maskz_cvt_roundph_epi16<const ROUNDING: i32>(k: __mmask32, a: __m512h) -> __m512i { |
13589 | static_assert_rounding!(ROUNDING); |
13590 | _mm512_mask_cvt_roundph_epi16::<ROUNDING>(src:_mm512_setzero_si512(), k, a) |
13591 | } |
13592 | |
13593 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, |
13594 | /// and store the results in dst. |
13595 | /// |
13596 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epu16) |
13597 | #[inline ] |
13598 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13599 | #[cfg_attr (test, assert_instr(vcvtph2uw))] |
13600 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13601 | pub fn _mm_cvtph_epu16(a: __m128h) -> __m128i { |
13602 | _mm_mask_cvtph_epu16(src:_mm_undefined_si128(), k:0xff, a) |
13603 | } |
13604 | |
13605 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, |
13606 | /// and store the results in dst using writemask k (elements are copied from src when the corresponding |
13607 | /// mask bit is not set). |
13608 | /// |
13609 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epu16) |
13610 | #[inline ] |
13611 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13612 | #[cfg_attr (test, assert_instr(vcvtph2uw))] |
13613 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13614 | pub fn _mm_mask_cvtph_epu16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { |
13615 | unsafe { transmute(src:vcvtph2uw_128(a, src.as_u16x8(), k)) } |
13616 | } |
13617 | |
13618 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, |
13619 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
13620 | /// |
13621 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epu16) |
13622 | #[inline ] |
13623 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13624 | #[cfg_attr (test, assert_instr(vcvtph2uw))] |
13625 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13626 | pub fn _mm_maskz_cvtph_epu16(k: __mmask8, a: __m128h) -> __m128i { |
13627 | _mm_mask_cvtph_epu16(src:_mm_setzero_si128(), k, a) |
13628 | } |
13629 | |
13630 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, |
13631 | /// and store the results in dst. |
13632 | /// |
13633 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epu16) |
13634 | #[inline ] |
13635 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13636 | #[cfg_attr (test, assert_instr(vcvtph2uw))] |
13637 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13638 | pub fn _mm256_cvtph_epu16(a: __m256h) -> __m256i { |
13639 | _mm256_mask_cvtph_epu16(src:_mm256_undefined_si256(), k:0xffff, a) |
13640 | } |
13641 | |
13642 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, |
13643 | /// and store the results in dst using writemask k (elements are copied from src when the corresponding |
13644 | /// mask bit is not set). |
13645 | /// |
13646 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epu16) |
13647 | #[inline ] |
13648 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13649 | #[cfg_attr (test, assert_instr(vcvtph2uw))] |
13650 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13651 | pub fn _mm256_mask_cvtph_epu16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i { |
13652 | unsafe { transmute(src:vcvtph2uw_256(a, src.as_u16x16(), k)) } |
13653 | } |
13654 | |
13655 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, |
13656 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
13657 | /// |
13658 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epu16) |
13659 | #[inline ] |
13660 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13661 | #[cfg_attr (test, assert_instr(vcvtph2uw))] |
13662 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13663 | pub fn _mm256_maskz_cvtph_epu16(k: __mmask16, a: __m256h) -> __m256i { |
13664 | _mm256_mask_cvtph_epu16(src:_mm256_setzero_si256(), k, a) |
13665 | } |
13666 | |
13667 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, |
13668 | /// and store the results in dst. |
13669 | /// |
13670 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epu16) |
13671 | #[inline ] |
13672 | #[target_feature (enable = "avx512fp16" )] |
13673 | #[cfg_attr (test, assert_instr(vcvtph2uw))] |
13674 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13675 | pub fn _mm512_cvtph_epu16(a: __m512h) -> __m512i { |
13676 | _mm512_mask_cvtph_epu16(src:_mm512_undefined_epi32(), k:0xffffffff, a) |
13677 | } |
13678 | |
13679 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, |
13680 | /// and store the results in dst using writemask k (elements are copied from src when the corresponding |
13681 | /// mask bit is not set). |
13682 | /// |
13683 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epu16) |
13684 | #[inline ] |
13685 | #[target_feature (enable = "avx512fp16" )] |
13686 | #[cfg_attr (test, assert_instr(vcvtph2uw))] |
13687 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13688 | pub fn _mm512_mask_cvtph_epu16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i { |
13689 | unsafe { |
13690 | transmute(src:vcvtph2uw_512( |
13691 | a, |
13692 | src.as_u16x32(), |
13693 | k, |
13694 | _MM_FROUND_CUR_DIRECTION, |
13695 | )) |
13696 | } |
13697 | } |
13698 | |
13699 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, |
13700 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
13701 | /// |
13702 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epu16) |
13703 | #[inline ] |
13704 | #[target_feature (enable = "avx512fp16" )] |
13705 | #[cfg_attr (test, assert_instr(vcvtph2uw))] |
13706 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13707 | pub fn _mm512_maskz_cvtph_epu16(k: __mmask32, a: __m512h) -> __m512i { |
13708 | _mm512_mask_cvtph_epu16(src:_mm512_setzero_si512(), k, a) |
13709 | } |
13710 | |
13711 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, |
13712 | /// and store the results in dst. |
13713 | /// |
13714 | /// Exceptions can be suppressed by passing [`_MM_FROUND_NO_EXC`] in the sae parameter. |
13715 | /// |
13716 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu16) |
13717 | #[inline ] |
13718 | #[target_feature (enable = "avx512fp16" )] |
13719 | #[cfg_attr (test, assert_instr(vcvtph2uw, SAE = 8))] |
13720 | #[rustc_legacy_const_generics (1)] |
13721 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13722 | pub fn _mm512_cvt_roundph_epu16<const SAE: i32>(a: __m512h) -> __m512i { |
13723 | static_assert_sae!(SAE); |
13724 | _mm512_mask_cvt_roundph_epu16::<SAE>(src:_mm512_undefined_epi32(), k:0xffffffff, a) |
13725 | } |
13726 | |
13727 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, |
13728 | /// and store the results in dst using writemask k (elements are copied from src when the corresponding |
13729 | /// mask bit is not set). |
13730 | /// |
13731 | /// Exceptions can be suppressed by passing [`_MM_FROUND_NO_EXC`] in the sae parameter. |
13732 | /// |
13733 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu16) |
13734 | #[inline ] |
13735 | #[target_feature (enable = "avx512fp16" )] |
13736 | #[cfg_attr (test, assert_instr(vcvtph2uw, SAE = 8))] |
13737 | #[rustc_legacy_const_generics (3)] |
13738 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13739 | pub fn _mm512_mask_cvt_roundph_epu16<const SAE: i32>( |
13740 | src: __m512i, |
13741 | k: __mmask32, |
13742 | a: __m512h, |
13743 | ) -> __m512i { |
13744 | unsafe { |
13745 | static_assert_sae!(SAE); |
13746 | transmute(src:vcvtph2uw_512(a, src.as_u16x32(), k, SAE)) |
13747 | } |
13748 | } |
13749 | |
13750 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, |
13751 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
13752 | /// |
13753 | /// Exceptions can be suppressed by passing [`_MM_FROUND_NO_EXC`] in the sae parameter. |
13754 | /// |
13755 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu16) |
13756 | #[inline ] |
13757 | #[target_feature (enable = "avx512fp16" )] |
13758 | #[cfg_attr (test, assert_instr(vcvtph2uw, SAE = 8))] |
13759 | #[rustc_legacy_const_generics (2)] |
13760 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13761 | pub fn _mm512_maskz_cvt_roundph_epu16<const SAE: i32>(k: __mmask32, a: __m512h) -> __m512i { |
13762 | static_assert_sae!(SAE); |
13763 | _mm512_mask_cvt_roundph_epu16::<SAE>(src:_mm512_setzero_si512(), k, a) |
13764 | } |
13765 | |
13766 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with |
13767 | /// truncation, and store the results in dst. |
13768 | /// |
13769 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epi16) |
13770 | #[inline ] |
13771 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13772 | #[cfg_attr (test, assert_instr(vcvttph2w))] |
13773 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13774 | pub fn _mm_cvttph_epi16(a: __m128h) -> __m128i { |
13775 | _mm_mask_cvttph_epi16(src:_mm_undefined_si128(), k:0xff, a) |
13776 | } |
13777 | |
13778 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with |
13779 | /// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding |
13780 | /// mask bit is not set). |
13781 | /// |
13782 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epi16) |
13783 | #[inline ] |
13784 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13785 | #[cfg_attr (test, assert_instr(vcvttph2w))] |
13786 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13787 | pub fn _mm_mask_cvttph_epi16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { |
13788 | unsafe { transmute(src:vcvttph2w_128(a, src.as_i16x8(), k)) } |
13789 | } |
13790 | |
13791 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with |
13792 | /// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding |
13793 | /// mask bit is not set). |
13794 | /// |
13795 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epi16) |
13796 | #[inline ] |
13797 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13798 | #[cfg_attr (test, assert_instr(vcvttph2w))] |
13799 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13800 | pub fn _mm_maskz_cvttph_epi16(k: __mmask8, a: __m128h) -> __m128i { |
13801 | _mm_mask_cvttph_epi16(src:_mm_setzero_si128(), k, a) |
13802 | } |
13803 | |
13804 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with |
13805 | /// truncation, and store the results in dst. |
13806 | /// |
13807 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epi16) |
13808 | #[inline ] |
13809 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13810 | #[cfg_attr (test, assert_instr(vcvttph2w))] |
13811 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13812 | pub fn _mm256_cvttph_epi16(a: __m256h) -> __m256i { |
13813 | _mm256_mask_cvttph_epi16(src:_mm256_undefined_si256(), k:0xffff, a) |
13814 | } |
13815 | |
13816 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with |
13817 | /// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding |
13818 | /// mask bit is not set). |
13819 | /// |
13820 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epi16) |
13821 | #[inline ] |
13822 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13823 | #[cfg_attr (test, assert_instr(vcvttph2w))] |
13824 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13825 | pub fn _mm256_mask_cvttph_epi16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i { |
13826 | unsafe { transmute(src:vcvttph2w_256(a, src.as_i16x16(), k)) } |
13827 | } |
13828 | |
13829 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with |
13830 | /// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding |
13831 | /// mask bit is not set). |
13832 | /// |
13833 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epi16) |
13834 | #[inline ] |
13835 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13836 | #[cfg_attr (test, assert_instr(vcvttph2w))] |
13837 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13838 | pub fn _mm256_maskz_cvttph_epi16(k: __mmask16, a: __m256h) -> __m256i { |
13839 | _mm256_mask_cvttph_epi16(src:_mm256_setzero_si256(), k, a) |
13840 | } |
13841 | |
13842 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with |
13843 | /// truncation, and store the results in dst. |
13844 | /// |
13845 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epi16) |
13846 | #[inline ] |
13847 | #[target_feature (enable = "avx512fp16" )] |
13848 | #[cfg_attr (test, assert_instr(vcvttph2w))] |
13849 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13850 | pub fn _mm512_cvttph_epi16(a: __m512h) -> __m512i { |
13851 | _mm512_mask_cvttph_epi16(src:_mm512_undefined_epi32(), k:0xffffffff, a) |
13852 | } |
13853 | |
13854 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with |
13855 | /// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding |
13856 | /// mask bit is not set). |
13857 | /// |
13858 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epi16) |
13859 | #[inline ] |
13860 | #[target_feature (enable = "avx512fp16" )] |
13861 | #[cfg_attr (test, assert_instr(vcvttph2w))] |
13862 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13863 | pub fn _mm512_mask_cvttph_epi16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i { |
13864 | unsafe { |
13865 | transmute(src:vcvttph2w_512( |
13866 | a, |
13867 | src.as_i16x32(), |
13868 | k, |
13869 | _MM_FROUND_CUR_DIRECTION, |
13870 | )) |
13871 | } |
13872 | } |
13873 | |
13874 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with |
13875 | /// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding |
13876 | /// mask bit is not set). |
13877 | /// |
13878 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epi16) |
13879 | #[inline ] |
13880 | #[target_feature (enable = "avx512fp16" )] |
13881 | #[cfg_attr (test, assert_instr(vcvttph2w))] |
13882 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13883 | pub fn _mm512_maskz_cvttph_epi16(k: __mmask32, a: __m512h) -> __m512i { |
13884 | _mm512_mask_cvttph_epi16(src:_mm512_setzero_si512(), k, a) |
13885 | } |
13886 | |
13887 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with |
13888 | /// truncation, and store the results in dst. |
13889 | /// |
13890 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
13891 | /// |
13892 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epi16) |
13893 | #[inline ] |
13894 | #[target_feature (enable = "avx512fp16" )] |
13895 | #[cfg_attr (test, assert_instr(vcvttph2w, SAE = 8))] |
13896 | #[rustc_legacy_const_generics (1)] |
13897 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13898 | pub fn _mm512_cvtt_roundph_epi16<const SAE: i32>(a: __m512h) -> __m512i { |
13899 | static_assert_sae!(SAE); |
13900 | _mm512_mask_cvtt_roundph_epi16::<SAE>(src:_mm512_undefined_epi32(), k:0xffffffff, a) |
13901 | } |
13902 | |
13903 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with |
13904 | /// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding |
13905 | /// mask bit is not set). |
13906 | /// |
13907 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
13908 | /// |
13909 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epi16) |
13910 | #[inline ] |
13911 | #[target_feature (enable = "avx512fp16" )] |
13912 | #[cfg_attr (test, assert_instr(vcvttph2w, SAE = 8))] |
13913 | #[rustc_legacy_const_generics (3)] |
13914 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13915 | pub fn _mm512_mask_cvtt_roundph_epi16<const SAE: i32>( |
13916 | src: __m512i, |
13917 | k: __mmask32, |
13918 | a: __m512h, |
13919 | ) -> __m512i { |
13920 | unsafe { |
13921 | static_assert_sae!(SAE); |
13922 | transmute(src:vcvttph2w_512(a, src.as_i16x32(), k, SAE)) |
13923 | } |
13924 | } |
13925 | |
13926 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with |
13927 | /// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding |
13928 | /// mask bit is not set). |
13929 | /// |
13930 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
13931 | /// |
13932 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epi16) |
13933 | #[inline ] |
13934 | #[target_feature (enable = "avx512fp16" )] |
13935 | #[cfg_attr (test, assert_instr(vcvttph2w, SAE = 8))] |
13936 | #[rustc_legacy_const_generics (2)] |
13937 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13938 | pub fn _mm512_maskz_cvtt_roundph_epi16<const SAE: i32>(k: __mmask32, a: __m512h) -> __m512i { |
13939 | static_assert_sae!(SAE); |
13940 | _mm512_mask_cvtt_roundph_epi16::<SAE>(src:_mm512_setzero_si512(), k, a) |
13941 | } |
13942 | |
13943 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with |
13944 | /// truncation, and store the results in dst. |
13945 | /// |
13946 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu16) |
13947 | #[inline ] |
13948 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13949 | #[cfg_attr (test, assert_instr(vcvttph2uw))] |
13950 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13951 | pub fn _mm_cvttph_epu16(a: __m128h) -> __m128i { |
13952 | _mm_mask_cvttph_epu16(src:_mm_undefined_si128(), k:0xff, a) |
13953 | } |
13954 | |
13955 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with |
13956 | /// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding |
13957 | /// mask bit is not set). |
13958 | /// |
13959 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu16) |
13960 | #[inline ] |
13961 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13962 | #[cfg_attr (test, assert_instr(vcvttph2uw))] |
13963 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13964 | pub fn _mm_mask_cvttph_epu16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { |
13965 | unsafe { transmute(src:vcvttph2uw_128(a, src.as_u16x8(), k)) } |
13966 | } |
13967 | |
13968 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with |
13969 | /// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding |
13970 | /// mask bit is not set). |
13971 | /// |
13972 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu16) |
13973 | #[inline ] |
13974 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13975 | #[cfg_attr (test, assert_instr(vcvttph2uw))] |
13976 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13977 | pub fn _mm_maskz_cvttph_epu16(k: __mmask8, a: __m128h) -> __m128i { |
13978 | _mm_mask_cvttph_epu16(src:_mm_setzero_si128(), k, a) |
13979 | } |
13980 | |
13981 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with |
13982 | /// truncation, and store the results in dst. |
13983 | /// |
13984 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu16) |
13985 | #[inline ] |
13986 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13987 | #[cfg_attr (test, assert_instr(vcvttph2uw))] |
13988 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13989 | pub fn _mm256_cvttph_epu16(a: __m256h) -> __m256i { |
13990 | _mm256_mask_cvttph_epu16(src:_mm256_undefined_si256(), k:0xffff, a) |
13991 | } |
13992 | |
13993 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with |
13994 | /// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding |
13995 | /// mask bit is not set). |
13996 | /// |
13997 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu16) |
13998 | #[inline ] |
13999 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14000 | #[cfg_attr (test, assert_instr(vcvttph2uw))] |
14001 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14002 | pub fn _mm256_mask_cvttph_epu16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i { |
14003 | unsafe { transmute(src:vcvttph2uw_256(a, src.as_u16x16(), k)) } |
14004 | } |
14005 | |
14006 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with |
14007 | /// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding |
14008 | /// mask bit is not set). |
14009 | /// |
14010 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu16) |
14011 | #[inline ] |
14012 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14013 | #[cfg_attr (test, assert_instr(vcvttph2uw))] |
14014 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14015 | pub fn _mm256_maskz_cvttph_epu16(k: __mmask16, a: __m256h) -> __m256i { |
14016 | _mm256_mask_cvttph_epu16(src:_mm256_setzero_si256(), k, a) |
14017 | } |
14018 | |
14019 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with |
14020 | /// truncation, and store the results in dst. |
14021 | /// |
14022 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu16) |
14023 | #[inline ] |
14024 | #[target_feature (enable = "avx512fp16" )] |
14025 | #[cfg_attr (test, assert_instr(vcvttph2uw))] |
14026 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14027 | pub fn _mm512_cvttph_epu16(a: __m512h) -> __m512i { |
14028 | _mm512_mask_cvttph_epu16(src:_mm512_undefined_epi32(), k:0xffffffff, a) |
14029 | } |
14030 | |
14031 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with |
14032 | /// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding |
14033 | /// mask bit is not set). |
14034 | /// |
14035 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu16) |
14036 | #[inline ] |
14037 | #[target_feature (enable = "avx512fp16" )] |
14038 | #[cfg_attr (test, assert_instr(vcvttph2uw))] |
14039 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14040 | pub fn _mm512_mask_cvttph_epu16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i { |
14041 | unsafe { |
14042 | transmute(src:vcvttph2uw_512( |
14043 | a, |
14044 | src.as_u16x32(), |
14045 | k, |
14046 | _MM_FROUND_CUR_DIRECTION, |
14047 | )) |
14048 | } |
14049 | } |
14050 | |
14051 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with |
14052 | /// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding |
14053 | /// mask bit is not set). |
14054 | /// |
14055 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu16) |
14056 | #[inline ] |
14057 | #[target_feature (enable = "avx512fp16" )] |
14058 | #[cfg_attr (test, assert_instr(vcvttph2uw))] |
14059 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14060 | pub fn _mm512_maskz_cvttph_epu16(k: __mmask32, a: __m512h) -> __m512i { |
14061 | _mm512_mask_cvttph_epu16(src:_mm512_setzero_si512(), k, a) |
14062 | } |
14063 | |
14064 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with |
14065 | /// truncation, and store the results in dst. |
14066 | /// |
14067 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
14068 | /// |
14069 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epu16) |
14070 | #[inline ] |
14071 | #[target_feature (enable = "avx512fp16" )] |
14072 | #[cfg_attr (test, assert_instr(vcvttph2uw, SAE = 8))] |
14073 | #[rustc_legacy_const_generics (1)] |
14074 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14075 | pub fn _mm512_cvtt_roundph_epu16<const SAE: i32>(a: __m512h) -> __m512i { |
14076 | static_assert_sae!(SAE); |
14077 | _mm512_mask_cvtt_roundph_epu16::<SAE>(src:_mm512_undefined_epi32(), k:0xffffffff, a) |
14078 | } |
14079 | |
14080 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with |
14081 | /// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding |
14082 | /// mask bit is not set). |
14083 | /// |
14084 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
14085 | /// |
14086 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epu16) |
14087 | #[inline ] |
14088 | #[target_feature (enable = "avx512fp16" )] |
14089 | #[cfg_attr (test, assert_instr(vcvttph2uw, SAE = 8))] |
14090 | #[rustc_legacy_const_generics (3)] |
14091 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14092 | pub fn _mm512_mask_cvtt_roundph_epu16<const SAE: i32>( |
14093 | src: __m512i, |
14094 | k: __mmask32, |
14095 | a: __m512h, |
14096 | ) -> __m512i { |
14097 | unsafe { |
14098 | static_assert_sae!(SAE); |
14099 | transmute(src:vcvttph2uw_512(a, src.as_u16x32(), k, SAE)) |
14100 | } |
14101 | } |
14102 | |
14103 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with |
14104 | /// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding |
14105 | /// mask bit is not set). |
14106 | /// |
14107 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
14108 | /// |
14109 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epu16) |
14110 | #[inline ] |
14111 | #[target_feature (enable = "avx512fp16" )] |
14112 | #[cfg_attr (test, assert_instr(vcvttph2uw, SAE = 8))] |
14113 | #[rustc_legacy_const_generics (2)] |
14114 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14115 | pub fn _mm512_maskz_cvtt_roundph_epu16<const SAE: i32>(k: __mmask32, a: __m512h) -> __m512i { |
14116 | static_assert_sae!(SAE); |
14117 | _mm512_mask_cvtt_roundph_epu16::<SAE>(src:_mm512_setzero_si512(), k, a) |
14118 | } |
14119 | |
14120 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the |
14121 | /// results in dst. |
14122 | /// |
14123 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epi32) |
14124 | #[inline ] |
14125 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14126 | #[cfg_attr (test, assert_instr(vcvtph2dq))] |
14127 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14128 | pub fn _mm_cvtph_epi32(a: __m128h) -> __m128i { |
14129 | _mm_mask_cvtph_epi32(src:_mm_undefined_si128(), k:0xff, a) |
14130 | } |
14131 | |
14132 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the |
14133 | /// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
14134 | /// |
14135 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epi32) |
14136 | #[inline ] |
14137 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14138 | #[cfg_attr (test, assert_instr(vcvtph2dq))] |
14139 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14140 | pub fn _mm_mask_cvtph_epi32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { |
14141 | unsafe { transmute(src:vcvtph2dq_128(a, src.as_i32x4(), k)) } |
14142 | } |
14143 | |
14144 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the |
14145 | /// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
14146 | /// |
14147 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epi32) |
14148 | #[inline ] |
14149 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14150 | #[cfg_attr (test, assert_instr(vcvtph2dq))] |
14151 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14152 | pub fn _mm_maskz_cvtph_epi32(k: __mmask8, a: __m128h) -> __m128i { |
14153 | _mm_mask_cvtph_epi32(src:_mm_setzero_si128(), k, a) |
14154 | } |
14155 | |
14156 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the |
14157 | /// results in dst. |
14158 | /// |
14159 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epi32) |
14160 | #[inline ] |
14161 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14162 | #[cfg_attr (test, assert_instr(vcvtph2dq))] |
14163 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14164 | pub fn _mm256_cvtph_epi32(a: __m128h) -> __m256i { |
14165 | _mm256_mask_cvtph_epi32(src:_mm256_undefined_si256(), k:0xff, a) |
14166 | } |
14167 | |
14168 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the |
14169 | /// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
14170 | /// |
14171 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epi32) |
14172 | #[inline ] |
14173 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14174 | #[cfg_attr (test, assert_instr(vcvtph2dq))] |
14175 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14176 | pub fn _mm256_mask_cvtph_epi32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { |
14177 | unsafe { transmute(src:vcvtph2dq_256(a, src.as_i32x8(), k)) } |
14178 | } |
14179 | |
14180 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the |
14181 | /// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
14182 | /// |
14183 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epi32) |
14184 | #[inline ] |
14185 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14186 | #[cfg_attr (test, assert_instr(vcvtph2dq))] |
14187 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14188 | pub fn _mm256_maskz_cvtph_epi32(k: __mmask8, a: __m128h) -> __m256i { |
14189 | _mm256_mask_cvtph_epi32(src:_mm256_setzero_si256(), k, a) |
14190 | } |
14191 | |
14192 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the |
14193 | /// results in dst. |
14194 | /// |
14195 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi32) |
14196 | #[inline ] |
14197 | #[target_feature (enable = "avx512fp16" )] |
14198 | #[cfg_attr (test, assert_instr(vcvtph2dq))] |
14199 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14200 | pub fn _mm512_cvtph_epi32(a: __m256h) -> __m512i { |
14201 | _mm512_mask_cvtph_epi32(src:_mm512_undefined_epi32(), k:0xffff, a) |
14202 | } |
14203 | |
14204 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the |
14205 | /// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
14206 | /// |
14207 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi32) |
14208 | #[inline ] |
14209 | #[target_feature (enable = "avx512fp16" )] |
14210 | #[cfg_attr (test, assert_instr(vcvtph2dq))] |
14211 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14212 | pub fn _mm512_mask_cvtph_epi32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i { |
14213 | unsafe { |
14214 | transmute(src:vcvtph2dq_512( |
14215 | a, |
14216 | src.as_i32x16(), |
14217 | k, |
14218 | _MM_FROUND_CUR_DIRECTION, |
14219 | )) |
14220 | } |
14221 | } |
14222 | |
14223 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the |
14224 | /// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
14225 | /// |
14226 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi32) |
14227 | #[inline ] |
14228 | #[target_feature (enable = "avx512fp16" )] |
14229 | #[cfg_attr (test, assert_instr(vcvtph2dq))] |
14230 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14231 | pub fn _mm512_maskz_cvtph_epi32(k: __mmask16, a: __m256h) -> __m512i { |
14232 | _mm512_mask_cvtph_epi32(src:_mm512_setzero_si512(), k, a) |
14233 | } |
14234 | |
14235 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the |
14236 | /// results in dst. |
14237 | /// |
14238 | /// Rounding is done according to the rounding parameter, which can be one of: |
14239 | /// |
14240 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
14241 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
14242 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
14243 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
14244 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
14245 | /// |
14246 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi32) |
14247 | #[inline ] |
14248 | #[target_feature (enable = "avx512fp16" )] |
14249 | #[cfg_attr (test, assert_instr(vcvtph2dq, ROUNDING = 8))] |
14250 | #[rustc_legacy_const_generics (1)] |
14251 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14252 | pub fn _mm512_cvt_roundph_epi32<const ROUNDING: i32>(a: __m256h) -> __m512i { |
14253 | static_assert_rounding!(ROUNDING); |
14254 | _mm512_mask_cvt_roundph_epi32::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xffff, a) |
14255 | } |
14256 | |
14257 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the |
14258 | /// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
14259 | /// |
14260 | /// Rounding is done according to the rounding parameter, which can be one of: |
14261 | /// |
14262 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
14263 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
14264 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
14265 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
14266 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
14267 | /// |
14268 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi32) |
14269 | #[inline ] |
14270 | #[target_feature (enable = "avx512fp16" )] |
14271 | #[cfg_attr (test, assert_instr(vcvtph2dq, ROUNDING = 8))] |
14272 | #[rustc_legacy_const_generics (3)] |
14273 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14274 | pub fn _mm512_mask_cvt_roundph_epi32<const ROUNDING: i32>( |
14275 | src: __m512i, |
14276 | k: __mmask16, |
14277 | a: __m256h, |
14278 | ) -> __m512i { |
14279 | unsafe { |
14280 | static_assert_rounding!(ROUNDING); |
14281 | transmute(src:vcvtph2dq_512(a, src.as_i32x16(), k, ROUNDING)) |
14282 | } |
14283 | } |
14284 | |
14285 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the |
14286 | /// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
14287 | /// |
14288 | /// Rounding is done according to the rounding parameter, which can be one of: |
14289 | /// |
14290 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
14291 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
14292 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
14293 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
14294 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
14295 | /// |
14296 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi32) |
14297 | #[inline ] |
14298 | #[target_feature (enable = "avx512fp16" )] |
14299 | #[cfg_attr (test, assert_instr(vcvtph2dq, ROUNDING = 8))] |
14300 | #[rustc_legacy_const_generics (2)] |
14301 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14302 | pub fn _mm512_maskz_cvt_roundph_epi32<const ROUNDING: i32>(k: __mmask16, a: __m256h) -> __m512i { |
14303 | static_assert_rounding!(ROUNDING); |
14304 | _mm512_mask_cvt_roundph_epi32::<ROUNDING>(src:_mm512_setzero_si512(), k, a) |
14305 | } |
14306 | |
14307 | /// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer, and store |
14308 | /// the result in dst. |
14309 | /// |
14310 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_i32) |
14311 | #[inline ] |
14312 | #[target_feature (enable = "avx512fp16" )] |
14313 | #[cfg_attr (test, assert_instr(vcvtsh2si))] |
14314 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14315 | pub fn _mm_cvtsh_i32(a: __m128h) -> i32 { |
14316 | unsafe { vcvtsh2si32(a, _MM_FROUND_CUR_DIRECTION) } |
14317 | } |
14318 | |
14319 | /// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer, and store |
14320 | /// the result in dst. |
14321 | /// |
14322 | /// Rounding is done according to the rounding parameter, which can be one of: |
14323 | /// |
14324 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
14325 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
14326 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
14327 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
14328 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
14329 | /// |
14330 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_i32) |
14331 | #[inline ] |
14332 | #[target_feature (enable = "avx512fp16" )] |
14333 | #[cfg_attr (test, assert_instr(vcvtsh2si, ROUNDING = 8))] |
14334 | #[rustc_legacy_const_generics (1)] |
14335 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14336 | pub fn _mm_cvt_roundsh_i32<const ROUNDING: i32>(a: __m128h) -> i32 { |
14337 | unsafe { |
14338 | static_assert_rounding!(ROUNDING); |
14339 | vcvtsh2si32(a, ROUNDING) |
14340 | } |
14341 | } |
14342 | |
14343 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the |
14344 | /// results in dst. |
14345 | /// |
14346 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epu32) |
14347 | #[inline ] |
14348 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14349 | #[cfg_attr (test, assert_instr(vcvtph2udq))] |
14350 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14351 | pub fn _mm_cvtph_epu32(a: __m128h) -> __m128i { |
14352 | _mm_mask_cvtph_epu32(src:_mm_undefined_si128(), k:0xff, a) |
14353 | } |
14354 | |
14355 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store |
14356 | /// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
14357 | /// |
14358 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epu32) |
14359 | #[inline ] |
14360 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14361 | #[cfg_attr (test, assert_instr(vcvtph2udq))] |
14362 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14363 | pub fn _mm_mask_cvtph_epu32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { |
14364 | unsafe { transmute(src:vcvtph2udq_128(a, src.as_u32x4(), k)) } |
14365 | } |
14366 | |
14367 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store |
14368 | /// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
14369 | /// |
14370 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epu32) |
14371 | #[inline ] |
14372 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14373 | #[cfg_attr (test, assert_instr(vcvtph2udq))] |
14374 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14375 | pub fn _mm_maskz_cvtph_epu32(k: __mmask8, a: __m128h) -> __m128i { |
14376 | _mm_mask_cvtph_epu32(src:_mm_setzero_si128(), k, a) |
14377 | } |
14378 | |
14379 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store |
14380 | /// the results in dst. |
14381 | /// |
14382 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epu32) |
14383 | #[inline ] |
14384 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14385 | #[cfg_attr (test, assert_instr(vcvtph2udq))] |
14386 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14387 | pub fn _mm256_cvtph_epu32(a: __m128h) -> __m256i { |
14388 | _mm256_mask_cvtph_epu32(src:_mm256_undefined_si256(), k:0xff, a) |
14389 | } |
14390 | |
14391 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store |
14392 | /// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
14393 | /// |
14394 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epu32) |
14395 | #[inline ] |
14396 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14397 | #[cfg_attr (test, assert_instr(vcvtph2udq))] |
14398 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14399 | pub fn _mm256_mask_cvtph_epu32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { |
14400 | unsafe { transmute(src:vcvtph2udq_256(a, src.as_u32x8(), k)) } |
14401 | } |
14402 | |
14403 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store |
14404 | /// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
14405 | /// |
14406 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epu32) |
14407 | #[inline ] |
14408 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14409 | #[cfg_attr (test, assert_instr(vcvtph2udq))] |
14410 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14411 | pub fn _mm256_maskz_cvtph_epu32(k: __mmask8, a: __m128h) -> __m256i { |
14412 | _mm256_mask_cvtph_epu32(src:_mm256_setzero_si256(), k, a) |
14413 | } |
14414 | |
14415 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store |
14416 | /// the results in dst. |
14417 | /// |
14418 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epu32) |
14419 | #[inline ] |
14420 | #[target_feature (enable = "avx512fp16" )] |
14421 | #[cfg_attr (test, assert_instr(vcvtph2udq))] |
14422 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14423 | pub fn _mm512_cvtph_epu32(a: __m256h) -> __m512i { |
14424 | _mm512_mask_cvtph_epu32(src:_mm512_undefined_epi32(), k:0xffff, a) |
14425 | } |
14426 | |
14427 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store |
14428 | /// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
14429 | /// |
14430 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epu32) |
14431 | #[inline ] |
14432 | #[target_feature (enable = "avx512fp16" )] |
14433 | #[cfg_attr (test, assert_instr(vcvtph2udq))] |
14434 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14435 | pub fn _mm512_mask_cvtph_epu32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i { |
14436 | unsafe { |
14437 | transmute(src:vcvtph2udq_512( |
14438 | a, |
14439 | src.as_u32x16(), |
14440 | k, |
14441 | _MM_FROUND_CUR_DIRECTION, |
14442 | )) |
14443 | } |
14444 | } |
14445 | |
14446 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store |
14447 | /// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
14448 | /// |
14449 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epu32) |
14450 | #[inline ] |
14451 | #[target_feature (enable = "avx512fp16" )] |
14452 | #[cfg_attr (test, assert_instr(vcvtph2udq))] |
14453 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14454 | pub fn _mm512_maskz_cvtph_epu32(k: __mmask16, a: __m256h) -> __m512i { |
14455 | _mm512_mask_cvtph_epu32(src:_mm512_setzero_si512(), k, a) |
14456 | } |
14457 | |
14458 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store |
14459 | /// the results in dst. |
14460 | /// |
14461 | /// Rounding is done according to the rounding parameter, which can be one of: |
14462 | /// |
14463 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
14464 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
14465 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
14466 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
14467 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
14468 | /// |
14469 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu32) |
14470 | #[inline ] |
14471 | #[target_feature (enable = "avx512fp16" )] |
14472 | #[cfg_attr (test, assert_instr(vcvtph2udq, ROUNDING = 8))] |
14473 | #[rustc_legacy_const_generics (1)] |
14474 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14475 | pub fn _mm512_cvt_roundph_epu32<const ROUNDING: i32>(a: __m256h) -> __m512i { |
14476 | static_assert_rounding!(ROUNDING); |
14477 | _mm512_mask_cvt_roundph_epu32::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xffff, a) |
14478 | } |
14479 | |
14480 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store |
14481 | /// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
14482 | /// |
14483 | /// Rounding is done according to the rounding parameter, which can be one of: |
14484 | /// |
14485 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
14486 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
14487 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
14488 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
14489 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
14490 | /// |
14491 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu32) |
14492 | #[inline ] |
14493 | #[target_feature (enable = "avx512fp16" )] |
14494 | #[cfg_attr (test, assert_instr(vcvtph2udq, ROUNDING = 8))] |
14495 | #[rustc_legacy_const_generics (3)] |
14496 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14497 | pub fn _mm512_mask_cvt_roundph_epu32<const ROUNDING: i32>( |
14498 | src: __m512i, |
14499 | k: __mmask16, |
14500 | a: __m256h, |
14501 | ) -> __m512i { |
14502 | unsafe { |
14503 | static_assert_rounding!(ROUNDING); |
14504 | transmute(src:vcvtph2udq_512(a, src.as_u32x16(), k, ROUNDING)) |
14505 | } |
14506 | } |
14507 | |
14508 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store |
14509 | /// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
14510 | /// |
14511 | /// Rounding is done according to the rounding parameter, which can be one of: |
14512 | /// |
14513 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
14514 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
14515 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
14516 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
14517 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
14518 | /// |
14519 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu32) |
14520 | #[inline ] |
14521 | #[target_feature (enable = "avx512fp16" )] |
14522 | #[cfg_attr (test, assert_instr(vcvtph2udq, ROUNDING = 8))] |
14523 | #[rustc_legacy_const_generics (2)] |
14524 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14525 | pub fn _mm512_maskz_cvt_roundph_epu32<const ROUNDING: i32>(k: __mmask16, a: __m256h) -> __m512i { |
14526 | static_assert_rounding!(ROUNDING); |
14527 | _mm512_mask_cvt_roundph_epu32::<ROUNDING>(src:_mm512_setzero_si512(), k, a) |
14528 | } |
14529 | |
14530 | /// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer, and store |
14531 | /// the result in dst. |
14532 | /// |
14533 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_u32) |
14534 | #[inline ] |
14535 | #[target_feature (enable = "avx512fp16" )] |
14536 | #[cfg_attr (test, assert_instr(vcvtsh2usi))] |
14537 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14538 | pub fn _mm_cvtsh_u32(a: __m128h) -> u32 { |
14539 | unsafe { vcvtsh2usi32(a, _MM_FROUND_CUR_DIRECTION) } |
14540 | } |
14541 | |
14542 | /// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer, and store |
14543 | /// the result in dst. |
14544 | /// |
14545 | /// Exceptions can be suppressed by passing [`_MM_FROUND_NO_EXC`] in the sae parameter. |
14546 | /// |
14547 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_u32) |
14548 | #[inline ] |
14549 | #[target_feature (enable = "avx512fp16" )] |
14550 | #[cfg_attr (test, assert_instr(vcvtsh2usi, SAE = 8))] |
14551 | #[rustc_legacy_const_generics (1)] |
14552 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14553 | pub fn _mm_cvt_roundsh_u32<const SAE: i32>(a: __m128h) -> u32 { |
14554 | unsafe { |
14555 | static_assert_rounding!(SAE); |
14556 | vcvtsh2usi32(a, SAE) |
14557 | } |
14558 | } |
14559 | |
14560 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and |
14561 | /// store the results in dst. |
14562 | /// |
14563 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epi32) |
14564 | #[inline ] |
14565 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14566 | #[cfg_attr (test, assert_instr(vcvttph2dq))] |
14567 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14568 | pub fn _mm_cvttph_epi32(a: __m128h) -> __m128i { |
14569 | _mm_mask_cvttph_epi32(src:_mm_undefined_si128(), k:0xff, a) |
14570 | } |
14571 | |
14572 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and |
14573 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
14574 | /// |
14575 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epi32) |
14576 | #[inline ] |
14577 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14578 | #[cfg_attr (test, assert_instr(vcvttph2dq))] |
14579 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14580 | pub fn _mm_mask_cvttph_epi32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { |
14581 | unsafe { transmute(src:vcvttph2dq_128(a, src.as_i32x4(), k)) } |
14582 | } |
14583 | |
14584 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and |
14585 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
14586 | /// |
14587 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epi32) |
14588 | #[inline ] |
14589 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14590 | #[cfg_attr (test, assert_instr(vcvttph2dq))] |
14591 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14592 | pub fn _mm_maskz_cvttph_epi32(k: __mmask8, a: __m128h) -> __m128i { |
14593 | _mm_mask_cvttph_epi32(src:_mm_setzero_si128(), k, a) |
14594 | } |
14595 | |
14596 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and |
14597 | /// store the results in dst. |
14598 | /// |
14599 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epi32) |
14600 | #[inline ] |
14601 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14602 | #[cfg_attr (test, assert_instr(vcvttph2dq))] |
14603 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14604 | pub fn _mm256_cvttph_epi32(a: __m128h) -> __m256i { |
14605 | _mm256_mask_cvttph_epi32(src:_mm256_undefined_si256(), k:0xff, a) |
14606 | } |
14607 | |
14608 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and |
14609 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
14610 | /// |
14611 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epi32) |
14612 | #[inline ] |
14613 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14614 | #[cfg_attr (test, assert_instr(vcvttph2dq))] |
14615 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14616 | pub fn _mm256_mask_cvttph_epi32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { |
14617 | unsafe { transmute(src:vcvttph2dq_256(a, src.as_i32x8(), k)) } |
14618 | } |
14619 | |
14620 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and |
14621 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
14622 | /// |
14623 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epi32) |
14624 | #[inline ] |
14625 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14626 | #[cfg_attr (test, assert_instr(vcvttph2dq))] |
14627 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14628 | pub fn _mm256_maskz_cvttph_epi32(k: __mmask8, a: __m128h) -> __m256i { |
14629 | _mm256_mask_cvttph_epi32(src:_mm256_setzero_si256(), k, a) |
14630 | } |
14631 | |
14632 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and |
14633 | /// store the results in dst. |
14634 | /// |
14635 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epi32) |
14636 | #[inline ] |
14637 | #[target_feature (enable = "avx512fp16" )] |
14638 | #[cfg_attr (test, assert_instr(vcvttph2dq))] |
14639 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14640 | pub fn _mm512_cvttph_epi32(a: __m256h) -> __m512i { |
14641 | _mm512_mask_cvttph_epi32(src:_mm512_undefined_epi32(), k:0xffff, a) |
14642 | } |
14643 | |
14644 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and |
14645 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
14646 | /// |
14647 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epi32) |
14648 | #[inline ] |
14649 | #[target_feature (enable = "avx512fp16" )] |
14650 | #[cfg_attr (test, assert_instr(vcvttph2dq))] |
14651 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14652 | pub fn _mm512_mask_cvttph_epi32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i { |
14653 | unsafe { |
14654 | transmute(src:vcvttph2dq_512( |
14655 | a, |
14656 | src.as_i32x16(), |
14657 | k, |
14658 | _MM_FROUND_CUR_DIRECTION, |
14659 | )) |
14660 | } |
14661 | } |
14662 | |
14663 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and |
14664 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
14665 | /// |
14666 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epi32) |
14667 | #[inline ] |
14668 | #[target_feature (enable = "avx512fp16" )] |
14669 | #[cfg_attr (test, assert_instr(vcvttph2dq))] |
14670 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14671 | pub fn _mm512_maskz_cvttph_epi32(k: __mmask16, a: __m256h) -> __m512i { |
14672 | _mm512_mask_cvttph_epi32(src:_mm512_setzero_si512(), k, a) |
14673 | } |
14674 | |
14675 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and |
14676 | /// store the results in dst. |
14677 | /// |
14678 | /// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter. |
14679 | /// |
14680 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epi32) |
14681 | #[inline ] |
14682 | #[target_feature (enable = "avx512fp16" )] |
14683 | #[cfg_attr (test, assert_instr(vcvttph2dq, SAE = 8))] |
14684 | #[rustc_legacy_const_generics (1)] |
14685 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14686 | pub fn _mm512_cvtt_roundph_epi32<const SAE: i32>(a: __m256h) -> __m512i { |
14687 | static_assert_sae!(SAE); |
14688 | _mm512_mask_cvtt_roundph_epi32::<SAE>(src:_mm512_undefined_epi32(), k:0xffff, a) |
14689 | } |
14690 | |
14691 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and |
14692 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
14693 | /// |
14694 | /// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter. |
14695 | /// |
14696 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epi32) |
14697 | #[inline ] |
14698 | #[target_feature (enable = "avx512fp16" )] |
14699 | #[cfg_attr (test, assert_instr(vcvttph2dq, SAE = 8))] |
14700 | #[rustc_legacy_const_generics (3)] |
14701 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14702 | pub fn _mm512_mask_cvtt_roundph_epi32<const SAE: i32>( |
14703 | src: __m512i, |
14704 | k: __mmask16, |
14705 | a: __m256h, |
14706 | ) -> __m512i { |
14707 | unsafe { |
14708 | static_assert_sae!(SAE); |
14709 | transmute(src:vcvttph2dq_512(a, src.as_i32x16(), k, SAE)) |
14710 | } |
14711 | } |
14712 | |
14713 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and |
14714 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
14715 | /// |
14716 | /// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter. |
14717 | /// |
14718 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epi32) |
14719 | #[inline ] |
14720 | #[target_feature (enable = "avx512fp16" )] |
14721 | #[cfg_attr (test, assert_instr(vcvttph2dq, SAE = 8))] |
14722 | #[rustc_legacy_const_generics (2)] |
14723 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14724 | pub fn _mm512_maskz_cvtt_roundph_epi32<const SAE: i32>(k: __mmask16, a: __m256h) -> __m512i { |
14725 | static_assert_sae!(SAE); |
14726 | _mm512_mask_cvtt_roundph_epi32::<SAE>(src:_mm512_setzero_si512(), k, a) |
14727 | } |
14728 | |
14729 | /// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer with truncation, and store |
14730 | /// the result in dst. |
14731 | /// |
14732 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_i32) |
14733 | #[inline ] |
14734 | #[target_feature (enable = "avx512fp16" )] |
14735 | #[cfg_attr (test, assert_instr(vcvttsh2si))] |
14736 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14737 | pub fn _mm_cvttsh_i32(a: __m128h) -> i32 { |
14738 | unsafe { vcvttsh2si32(a, _MM_FROUND_CUR_DIRECTION) } |
14739 | } |
14740 | |
14741 | /// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer with truncation, and store |
14742 | /// the result in dst. |
14743 | /// |
14744 | /// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter. |
14745 | /// |
14746 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_i32) |
14747 | #[inline ] |
14748 | #[target_feature (enable = "avx512fp16" )] |
14749 | #[cfg_attr (test, assert_instr(vcvttsh2si, SAE = 8))] |
14750 | #[rustc_legacy_const_generics (1)] |
14751 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14752 | pub fn _mm_cvtt_roundsh_i32<const SAE: i32>(a: __m128h) -> i32 { |
14753 | unsafe { |
14754 | static_assert_sae!(SAE); |
14755 | vcvttsh2si32(a, SAE) |
14756 | } |
14757 | } |
14758 | |
14759 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and |
14760 | /// store the results in dst. |
14761 | /// |
14762 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu32) |
14763 | #[inline ] |
14764 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14765 | #[cfg_attr (test, assert_instr(vcvttph2udq))] |
14766 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14767 | pub fn _mm_cvttph_epu32(a: __m128h) -> __m128i { |
14768 | _mm_mask_cvttph_epu32(src:_mm_undefined_si128(), k:0xff, a) |
14769 | } |
14770 | |
14771 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and |
14772 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
14773 | /// |
14774 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu32) |
14775 | #[inline ] |
14776 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14777 | #[cfg_attr (test, assert_instr(vcvttph2udq))] |
14778 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14779 | pub fn _mm_mask_cvttph_epu32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { |
14780 | unsafe { transmute(src:vcvttph2udq_128(a, src.as_u32x4(), k)) } |
14781 | } |
14782 | |
14783 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and |
14784 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
14785 | /// |
14786 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu32) |
14787 | #[inline ] |
14788 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14789 | #[cfg_attr (test, assert_instr(vcvttph2udq))] |
14790 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14791 | pub fn _mm_maskz_cvttph_epu32(k: __mmask8, a: __m128h) -> __m128i { |
14792 | _mm_mask_cvttph_epu32(src:_mm_setzero_si128(), k, a) |
14793 | } |
14794 | |
14795 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and |
14796 | /// store the results in dst. |
14797 | /// |
14798 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu32) |
14799 | #[inline ] |
14800 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14801 | #[cfg_attr (test, assert_instr(vcvttph2udq))] |
14802 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14803 | pub fn _mm256_cvttph_epu32(a: __m128h) -> __m256i { |
14804 | _mm256_mask_cvttph_epu32(src:_mm256_undefined_si256(), k:0xff, a) |
14805 | } |
14806 | |
14807 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and |
14808 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
14809 | /// |
14810 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu32) |
14811 | #[inline ] |
14812 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14813 | #[cfg_attr (test, assert_instr(vcvttph2udq))] |
14814 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14815 | pub fn _mm256_mask_cvttph_epu32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { |
14816 | unsafe { transmute(src:vcvttph2udq_256(a, src.as_u32x8(), k)) } |
14817 | } |
14818 | |
14819 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and |
14820 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
14821 | /// |
14822 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu32) |
14823 | #[inline ] |
14824 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14825 | #[cfg_attr (test, assert_instr(vcvttph2udq))] |
14826 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14827 | pub fn _mm256_maskz_cvttph_epu32(k: __mmask8, a: __m128h) -> __m256i { |
14828 | _mm256_mask_cvttph_epu32(src:_mm256_setzero_si256(), k, a) |
14829 | } |
14830 | |
14831 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and |
14832 | /// store the results in dst. |
14833 | /// |
14834 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu32) |
14835 | #[inline ] |
14836 | #[target_feature (enable = "avx512fp16" )] |
14837 | #[cfg_attr (test, assert_instr(vcvttph2udq))] |
14838 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14839 | pub fn _mm512_cvttph_epu32(a: __m256h) -> __m512i { |
14840 | _mm512_mask_cvttph_epu32(src:_mm512_undefined_epi32(), k:0xffff, a) |
14841 | } |
14842 | |
14843 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and |
14844 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
14845 | /// |
14846 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu32) |
14847 | #[inline ] |
14848 | #[target_feature (enable = "avx512fp16" )] |
14849 | #[cfg_attr (test, assert_instr(vcvttph2udq))] |
14850 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14851 | pub fn _mm512_mask_cvttph_epu32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i { |
14852 | unsafe { |
14853 | transmute(src:vcvttph2udq_512( |
14854 | a, |
14855 | src.as_u32x16(), |
14856 | k, |
14857 | _MM_FROUND_CUR_DIRECTION, |
14858 | )) |
14859 | } |
14860 | } |
14861 | |
14862 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and |
14863 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
14864 | /// |
14865 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu32) |
14866 | #[inline ] |
14867 | #[target_feature (enable = "avx512fp16" )] |
14868 | #[cfg_attr (test, assert_instr(vcvttph2udq))] |
14869 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14870 | pub fn _mm512_maskz_cvttph_epu32(k: __mmask16, a: __m256h) -> __m512i { |
14871 | _mm512_mask_cvttph_epu32(src:_mm512_setzero_si512(), k, a) |
14872 | } |
14873 | |
14874 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and |
14875 | /// store the results in dst. |
14876 | /// |
14877 | /// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter. |
14878 | /// |
14879 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epu32) |
14880 | #[inline ] |
14881 | #[target_feature (enable = "avx512fp16" )] |
14882 | #[cfg_attr (test, assert_instr(vcvttph2udq, SAE = 8))] |
14883 | #[rustc_legacy_const_generics (1)] |
14884 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14885 | pub fn _mm512_cvtt_roundph_epu32<const SAE: i32>(a: __m256h) -> __m512i { |
14886 | static_assert_sae!(SAE); |
14887 | _mm512_mask_cvtt_roundph_epu32::<SAE>(src:_mm512_undefined_epi32(), k:0xffff, a) |
14888 | } |
14889 | |
14890 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and |
14891 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
14892 | /// |
14893 | /// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter. |
14894 | /// |
14895 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epu32) |
14896 | #[inline ] |
14897 | #[target_feature (enable = "avx512fp16" )] |
14898 | #[cfg_attr (test, assert_instr(vcvttph2udq, SAE = 8))] |
14899 | #[rustc_legacy_const_generics (3)] |
14900 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14901 | pub fn _mm512_mask_cvtt_roundph_epu32<const SAE: i32>( |
14902 | src: __m512i, |
14903 | k: __mmask16, |
14904 | a: __m256h, |
14905 | ) -> __m512i { |
14906 | unsafe { |
14907 | static_assert_sae!(SAE); |
14908 | transmute(src:vcvttph2udq_512(a, src.as_u32x16(), k, SAE)) |
14909 | } |
14910 | } |
14911 | |
14912 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and |
14913 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
14914 | /// |
14915 | /// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter. |
14916 | /// |
14917 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epu32) |
14918 | #[inline ] |
14919 | #[target_feature (enable = "avx512fp16" )] |
14920 | #[cfg_attr (test, assert_instr(vcvttph2udq, SAE = 8))] |
14921 | #[rustc_legacy_const_generics (2)] |
14922 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14923 | pub fn _mm512_maskz_cvtt_roundph_epu32<const SAE: i32>(k: __mmask16, a: __m256h) -> __m512i { |
14924 | static_assert_sae!(SAE); |
14925 | _mm512_mask_cvtt_roundph_epu32::<SAE>(src:_mm512_setzero_si512(), k, a) |
14926 | } |
14927 | |
14928 | /// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer with truncation, and store |
14929 | /// the result in dst. |
14930 | /// |
14931 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_u32) |
14932 | #[inline ] |
14933 | #[target_feature (enable = "avx512fp16" )] |
14934 | #[cfg_attr (test, assert_instr(vcvttsh2usi))] |
14935 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14936 | pub fn _mm_cvttsh_u32(a: __m128h) -> u32 { |
14937 | unsafe { vcvttsh2usi32(a, _MM_FROUND_CUR_DIRECTION) } |
14938 | } |
14939 | |
14940 | /// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer with truncation, and store |
14941 | /// the result in dst. |
14942 | /// |
14943 | /// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter. |
14944 | /// |
14945 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_u32) |
14946 | #[inline ] |
14947 | #[target_feature (enable = "avx512fp16" )] |
14948 | #[cfg_attr (test, assert_instr(vcvttsh2usi, SAE = 8))] |
14949 | #[rustc_legacy_const_generics (1)] |
14950 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14951 | pub fn _mm_cvtt_roundsh_u32<const SAE: i32>(a: __m128h) -> u32 { |
14952 | unsafe { |
14953 | static_assert_sae!(SAE); |
14954 | vcvttsh2usi32(a, SAE) |
14955 | } |
14956 | } |
14957 | |
14958 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and |
14959 | /// store the results in dst. |
14960 | /// |
14961 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epi64) |
14962 | #[inline ] |
14963 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14964 | #[cfg_attr (test, assert_instr(vcvtph2qq))] |
14965 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14966 | pub fn _mm_cvtph_epi64(a: __m128h) -> __m128i { |
14967 | _mm_mask_cvtph_epi64(src:_mm_undefined_si128(), k:0xff, a) |
14968 | } |
14969 | |
14970 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and |
14971 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
14972 | /// |
14973 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epi64) |
14974 | #[inline ] |
14975 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14976 | #[cfg_attr (test, assert_instr(vcvtph2qq))] |
14977 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14978 | pub fn _mm_mask_cvtph_epi64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { |
14979 | unsafe { transmute(src:vcvtph2qq_128(a, src.as_i64x2(), k)) } |
14980 | } |
14981 | |
14982 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and |
14983 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
14984 | /// |
14985 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epi64) |
14986 | #[inline ] |
14987 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14988 | #[cfg_attr (test, assert_instr(vcvtph2qq))] |
14989 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14990 | pub fn _mm_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m128i { |
14991 | _mm_mask_cvtph_epi64(src:_mm_setzero_si128(), k, a) |
14992 | } |
14993 | |
14994 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and |
14995 | /// store the results in dst. |
14996 | /// |
14997 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epi64) |
14998 | #[inline ] |
14999 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15000 | #[cfg_attr (test, assert_instr(vcvtph2qq))] |
15001 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15002 | pub fn _mm256_cvtph_epi64(a: __m128h) -> __m256i { |
15003 | _mm256_mask_cvtph_epi64(src:_mm256_undefined_si256(), k:0xff, a) |
15004 | } |
15005 | |
15006 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and |
15007 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
15008 | /// |
15009 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epi64) |
15010 | #[inline ] |
15011 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15012 | #[cfg_attr (test, assert_instr(vcvtph2qq))] |
15013 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15014 | pub fn _mm256_mask_cvtph_epi64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { |
15015 | unsafe { transmute(src:vcvtph2qq_256(a, src.as_i64x4(), k)) } |
15016 | } |
15017 | |
15018 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and |
15019 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
15020 | /// |
15021 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epi64) |
15022 | #[inline ] |
15023 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15024 | #[cfg_attr (test, assert_instr(vcvtph2qq))] |
15025 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15026 | pub fn _mm256_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m256i { |
15027 | _mm256_mask_cvtph_epi64(src:_mm256_setzero_si256(), k, a) |
15028 | } |
15029 | |
15030 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and |
15031 | /// store the results in dst. |
15032 | /// |
15033 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi64) |
15034 | #[inline ] |
15035 | #[target_feature (enable = "avx512fp16" )] |
15036 | #[cfg_attr (test, assert_instr(vcvtph2qq))] |
15037 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15038 | pub fn _mm512_cvtph_epi64(a: __m128h) -> __m512i { |
15039 | _mm512_mask_cvtph_epi64(src:_mm512_undefined_epi32(), k:0xff, a) |
15040 | } |
15041 | |
15042 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and |
15043 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
15044 | /// |
15045 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi64) |
15046 | #[inline ] |
15047 | #[target_feature (enable = "avx512fp16" )] |
15048 | #[cfg_attr (test, assert_instr(vcvtph2qq))] |
15049 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15050 | pub fn _mm512_mask_cvtph_epi64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i { |
15051 | unsafe { |
15052 | transmute(src:vcvtph2qq_512( |
15053 | a, |
15054 | src.as_i64x8(), |
15055 | k, |
15056 | _MM_FROUND_CUR_DIRECTION, |
15057 | )) |
15058 | } |
15059 | } |
15060 | |
15061 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and |
15062 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
15063 | /// |
15064 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi64) |
15065 | #[inline ] |
15066 | #[target_feature (enable = "avx512fp16" )] |
15067 | #[cfg_attr (test, assert_instr(vcvtph2qq))] |
15068 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15069 | pub fn _mm512_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m512i { |
15070 | _mm512_mask_cvtph_epi64(src:_mm512_setzero_si512(), k, a) |
15071 | } |
15072 | |
15073 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and |
15074 | /// store the results in dst. |
15075 | /// |
15076 | /// Rounding is done according to the rounding parameter, which can be one of: |
15077 | /// |
15078 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
15079 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
15080 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
15081 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
15082 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
15083 | /// |
15084 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi64) |
15085 | #[inline ] |
15086 | #[target_feature (enable = "avx512fp16" )] |
15087 | #[cfg_attr (test, assert_instr(vcvtph2qq, ROUNDING = 8))] |
15088 | #[rustc_legacy_const_generics (1)] |
15089 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15090 | pub fn _mm512_cvt_roundph_epi64<const ROUNDING: i32>(a: __m128h) -> __m512i { |
15091 | static_assert_rounding!(ROUNDING); |
15092 | _mm512_mask_cvt_roundph_epi64::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xff, a) |
15093 | } |
15094 | |
15095 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and |
15096 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
15097 | /// |
15098 | /// Rounding is done according to the rounding parameter, which can be one of: |
15099 | /// |
15100 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
15101 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
15102 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
15103 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
15104 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
15105 | /// |
15106 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi64) |
15107 | #[inline ] |
15108 | #[target_feature (enable = "avx512fp16" )] |
15109 | #[cfg_attr (test, assert_instr(vcvtph2qq, ROUNDING = 8))] |
15110 | #[rustc_legacy_const_generics (3)] |
15111 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15112 | pub fn _mm512_mask_cvt_roundph_epi64<const ROUNDING: i32>( |
15113 | src: __m512i, |
15114 | k: __mmask8, |
15115 | a: __m128h, |
15116 | ) -> __m512i { |
15117 | unsafe { |
15118 | static_assert_rounding!(ROUNDING); |
15119 | transmute(src:vcvtph2qq_512(a, src.as_i64x8(), k, ROUNDING)) |
15120 | } |
15121 | } |
15122 | |
15123 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and |
15124 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
15125 | /// |
15126 | /// Rounding is done according to the rounding parameter, which can be one of: |
15127 | /// |
15128 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
15129 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
15130 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
15131 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
15132 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
15133 | /// |
15134 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi64) |
15135 | #[inline ] |
15136 | #[target_feature (enable = "avx512fp16" )] |
15137 | #[cfg_attr (test, assert_instr(vcvtph2qq, ROUNDING = 8))] |
15138 | #[rustc_legacy_const_generics (2)] |
15139 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15140 | pub fn _mm512_maskz_cvt_roundph_epi64<const ROUNDING: i32>(k: __mmask8, a: __m128h) -> __m512i { |
15141 | static_assert_rounding!(ROUNDING); |
15142 | _mm512_mask_cvt_roundph_epi64::<ROUNDING>(src:_mm512_setzero_si512(), k, a) |
15143 | } |
15144 | |
15145 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and |
15146 | /// store the results in dst. |
15147 | /// |
15148 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epu64) |
15149 | #[inline ] |
15150 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15151 | #[cfg_attr (test, assert_instr(vcvtph2uqq))] |
15152 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15153 | pub fn _mm_cvtph_epu64(a: __m128h) -> __m128i { |
15154 | _mm_mask_cvtph_epu64(src:_mm_undefined_si128(), k:0xff, a) |
15155 | } |
15156 | |
15157 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and |
15158 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
15159 | /// |
15160 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epu64) |
15161 | #[inline ] |
15162 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15163 | #[cfg_attr (test, assert_instr(vcvtph2uqq))] |
15164 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15165 | pub fn _mm_mask_cvtph_epu64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { |
15166 | unsafe { transmute(src:vcvtph2uqq_128(a, src.as_u64x2(), k)) } |
15167 | } |
15168 | |
15169 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and |
15170 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
15171 | /// |
15172 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epu64) |
15173 | #[inline ] |
15174 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15175 | #[cfg_attr (test, assert_instr(vcvtph2uqq))] |
15176 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15177 | pub fn _mm_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m128i { |
15178 | _mm_mask_cvtph_epu64(src:_mm_setzero_si128(), k, a) |
15179 | } |
15180 | |
15181 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and |
15182 | /// store the results in dst. |
15183 | /// |
15184 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epu64) |
15185 | #[inline ] |
15186 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15187 | #[cfg_attr (test, assert_instr(vcvtph2uqq))] |
15188 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15189 | pub fn _mm256_cvtph_epu64(a: __m128h) -> __m256i { |
15190 | _mm256_mask_cvtph_epu64(src:_mm256_undefined_si256(), k:0xff, a) |
15191 | } |
15192 | |
15193 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and |
15194 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
15195 | /// |
15196 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epu64) |
15197 | #[inline ] |
15198 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15199 | #[cfg_attr (test, assert_instr(vcvtph2uqq))] |
15200 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15201 | pub fn _mm256_mask_cvtph_epu64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { |
15202 | unsafe { transmute(src:vcvtph2uqq_256(a, src.as_u64x4(), k)) } |
15203 | } |
15204 | |
15205 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and |
15206 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
15207 | /// |
15208 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epu64) |
15209 | #[inline ] |
15210 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15211 | #[cfg_attr (test, assert_instr(vcvtph2uqq))] |
15212 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15213 | pub fn _mm256_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m256i { |
15214 | _mm256_mask_cvtph_epu64(src:_mm256_setzero_si256(), k, a) |
15215 | } |
15216 | |
15217 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and |
15218 | /// store the results in dst. |
15219 | /// |
15220 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epu64) |
15221 | #[inline ] |
15222 | #[target_feature (enable = "avx512fp16" )] |
15223 | #[cfg_attr (test, assert_instr(vcvtph2uqq))] |
15224 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15225 | pub fn _mm512_cvtph_epu64(a: __m128h) -> __m512i { |
15226 | _mm512_mask_cvtph_epu64(src:_mm512_undefined_epi32(), k:0xff, a) |
15227 | } |
15228 | |
15229 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and |
15230 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
15231 | /// |
15232 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epu64) |
15233 | #[inline ] |
15234 | #[target_feature (enable = "avx512fp16" )] |
15235 | #[cfg_attr (test, assert_instr(vcvtph2uqq))] |
15236 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15237 | pub fn _mm512_mask_cvtph_epu64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i { |
15238 | unsafe { |
15239 | transmute(src:vcvtph2uqq_512( |
15240 | a, |
15241 | src.as_u64x8(), |
15242 | k, |
15243 | _MM_FROUND_CUR_DIRECTION, |
15244 | )) |
15245 | } |
15246 | } |
15247 | |
15248 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and |
15249 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
15250 | /// |
15251 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epu64) |
15252 | #[inline ] |
15253 | #[target_feature (enable = "avx512fp16" )] |
15254 | #[cfg_attr (test, assert_instr(vcvtph2uqq))] |
15255 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15256 | pub fn _mm512_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m512i { |
15257 | _mm512_mask_cvtph_epu64(src:_mm512_setzero_si512(), k, a) |
15258 | } |
15259 | |
15260 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and |
15261 | /// store the results in dst. |
15262 | /// |
15263 | /// Rounding is done according to the rounding parameter, which can be one of: |
15264 | /// |
15265 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
15266 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
15267 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
15268 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
15269 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
15270 | /// |
15271 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu64) |
15272 | #[inline ] |
15273 | #[target_feature (enable = "avx512fp16" )] |
15274 | #[cfg_attr (test, assert_instr(vcvtph2uqq, ROUNDING = 8))] |
15275 | #[rustc_legacy_const_generics (1)] |
15276 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15277 | pub fn _mm512_cvt_roundph_epu64<const ROUNDING: i32>(a: __m128h) -> __m512i { |
15278 | static_assert_rounding!(ROUNDING); |
15279 | _mm512_mask_cvt_roundph_epu64::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xff, a) |
15280 | } |
15281 | |
15282 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and |
15283 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
15284 | /// |
15285 | /// Rounding is done according to the rounding parameter, which can be one of: |
15286 | /// |
15287 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
15288 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
15289 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
15290 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
15291 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
15292 | /// |
15293 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu64) |
15294 | #[inline ] |
15295 | #[target_feature (enable = "avx512fp16" )] |
15296 | #[cfg_attr (test, assert_instr(vcvtph2uqq, ROUNDING = 8))] |
15297 | #[rustc_legacy_const_generics (3)] |
15298 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15299 | pub fn _mm512_mask_cvt_roundph_epu64<const ROUNDING: i32>( |
15300 | src: __m512i, |
15301 | k: __mmask8, |
15302 | a: __m128h, |
15303 | ) -> __m512i { |
15304 | unsafe { |
15305 | static_assert_rounding!(ROUNDING); |
15306 | transmute(src:vcvtph2uqq_512(a, src.as_u64x8(), k, ROUNDING)) |
15307 | } |
15308 | } |
15309 | |
15310 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and |
15311 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
15312 | /// |
15313 | /// Rounding is done according to the rounding parameter, which can be one of: |
15314 | /// |
15315 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
15316 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
15317 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
15318 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
15319 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
15320 | /// |
15321 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu64) |
15322 | #[inline ] |
15323 | #[target_feature (enable = "avx512fp16" )] |
15324 | #[cfg_attr (test, assert_instr(vcvtph2uqq, ROUNDING = 8))] |
15325 | #[rustc_legacy_const_generics (2)] |
15326 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15327 | pub fn _mm512_maskz_cvt_roundph_epu64<const ROUNDING: i32>(k: __mmask8, a: __m128h) -> __m512i { |
15328 | static_assert_rounding!(ROUNDING); |
15329 | _mm512_mask_cvt_roundph_epu64::<ROUNDING>(src:_mm512_setzero_si512(), k, a) |
15330 | } |
15331 | |
15332 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and |
15333 | /// store the results in dst. |
15334 | /// |
15335 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epi64) |
15336 | #[inline ] |
15337 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15338 | #[cfg_attr (test, assert_instr(vcvttph2qq))] |
15339 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15340 | pub fn _mm_cvttph_epi64(a: __m128h) -> __m128i { |
15341 | _mm_mask_cvttph_epi64(src:_mm_undefined_si128(), k:0xff, a) |
15342 | } |
15343 | |
15344 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and |
15345 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
15346 | /// |
15347 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epi64) |
15348 | #[inline ] |
15349 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15350 | #[cfg_attr (test, assert_instr(vcvttph2qq))] |
15351 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15352 | pub fn _mm_mask_cvttph_epi64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { |
15353 | unsafe { transmute(src:vcvttph2qq_128(a, src.as_i64x2(), k)) } |
15354 | } |
15355 | |
15356 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and |
15357 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
15358 | /// |
15359 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epi64) |
15360 | #[inline ] |
15361 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15362 | #[cfg_attr (test, assert_instr(vcvttph2qq))] |
15363 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15364 | pub fn _mm_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m128i { |
15365 | _mm_mask_cvttph_epi64(src:_mm_setzero_si128(), k, a) |
15366 | } |
15367 | |
15368 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and |
15369 | /// store the results in dst. |
15370 | /// |
15371 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epi64) |
15372 | #[inline ] |
15373 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15374 | #[cfg_attr (test, assert_instr(vcvttph2qq))] |
15375 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15376 | pub fn _mm256_cvttph_epi64(a: __m128h) -> __m256i { |
15377 | _mm256_mask_cvttph_epi64(src:_mm256_undefined_si256(), k:0xff, a) |
15378 | } |
15379 | |
15380 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and |
15381 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
15382 | /// |
15383 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epi64) |
15384 | #[inline ] |
15385 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15386 | #[cfg_attr (test, assert_instr(vcvttph2qq))] |
15387 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15388 | pub fn _mm256_mask_cvttph_epi64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { |
15389 | unsafe { transmute(src:vcvttph2qq_256(a, src.as_i64x4(), k)) } |
15390 | } |
15391 | |
15392 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and |
15393 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
15394 | /// |
15395 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epi64) |
15396 | #[inline ] |
15397 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15398 | #[cfg_attr (test, assert_instr(vcvttph2qq))] |
15399 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15400 | pub fn _mm256_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m256i { |
15401 | _mm256_mask_cvttph_epi64(src:_mm256_setzero_si256(), k, a) |
15402 | } |
15403 | |
15404 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and |
15405 | /// store the results in dst. |
15406 | /// |
15407 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epi64) |
15408 | #[inline ] |
15409 | #[target_feature (enable = "avx512fp16" )] |
15410 | #[cfg_attr (test, assert_instr(vcvttph2qq))] |
15411 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15412 | pub fn _mm512_cvttph_epi64(a: __m128h) -> __m512i { |
15413 | _mm512_mask_cvttph_epi64(src:_mm512_undefined_epi32(), k:0xff, a) |
15414 | } |
15415 | |
15416 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and |
15417 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
15418 | /// |
15419 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epi64) |
15420 | #[inline ] |
15421 | #[target_feature (enable = "avx512fp16" )] |
15422 | #[cfg_attr (test, assert_instr(vcvttph2qq))] |
15423 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15424 | pub fn _mm512_mask_cvttph_epi64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i { |
15425 | unsafe { |
15426 | transmute(src:vcvttph2qq_512( |
15427 | a, |
15428 | src.as_i64x8(), |
15429 | k, |
15430 | _MM_FROUND_CUR_DIRECTION, |
15431 | )) |
15432 | } |
15433 | } |
15434 | |
15435 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and |
15436 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
15437 | /// |
15438 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epi64) |
15439 | #[inline ] |
15440 | #[target_feature (enable = "avx512fp16" )] |
15441 | #[cfg_attr (test, assert_instr(vcvttph2qq))] |
15442 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15443 | pub fn _mm512_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m512i { |
15444 | _mm512_mask_cvttph_epi64(src:_mm512_setzero_si512(), k, a) |
15445 | } |
15446 | |
15447 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and |
15448 | /// store the results in dst. |
15449 | /// |
15450 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
15451 | /// |
15452 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epi64) |
15453 | #[inline ] |
15454 | #[target_feature (enable = "avx512fp16" )] |
15455 | #[cfg_attr (test, assert_instr(vcvttph2qq, SAE = 8))] |
15456 | #[rustc_legacy_const_generics (1)] |
15457 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15458 | pub fn _mm512_cvtt_roundph_epi64<const SAE: i32>(a: __m128h) -> __m512i { |
15459 | static_assert_sae!(SAE); |
15460 | _mm512_mask_cvtt_roundph_epi64::<SAE>(src:_mm512_undefined_epi32(), k:0xff, a) |
15461 | } |
15462 | |
15463 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and |
15464 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
15465 | /// |
15466 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
15467 | /// |
15468 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epi64) |
15469 | #[inline ] |
15470 | #[target_feature (enable = "avx512fp16" )] |
15471 | #[cfg_attr (test, assert_instr(vcvttph2qq, SAE = 8))] |
15472 | #[rustc_legacy_const_generics (3)] |
15473 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15474 | pub fn _mm512_mask_cvtt_roundph_epi64<const SAE: i32>( |
15475 | src: __m512i, |
15476 | k: __mmask8, |
15477 | a: __m128h, |
15478 | ) -> __m512i { |
15479 | unsafe { |
15480 | static_assert_sae!(SAE); |
15481 | transmute(src:vcvttph2qq_512(a, src.as_i64x8(), k, SAE)) |
15482 | } |
15483 | } |
15484 | |
15485 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and |
15486 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
15487 | /// |
15488 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
15489 | /// |
15490 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epi64) |
15491 | #[inline ] |
15492 | #[target_feature (enable = "avx512fp16" )] |
15493 | #[cfg_attr (test, assert_instr(vcvttph2qq, SAE = 8))] |
15494 | #[rustc_legacy_const_generics (2)] |
15495 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15496 | pub fn _mm512_maskz_cvtt_roundph_epi64<const SAE: i32>(k: __mmask8, a: __m128h) -> __m512i { |
15497 | static_assert_sae!(SAE); |
15498 | _mm512_mask_cvtt_roundph_epi64::<SAE>(src:_mm512_setzero_si512(), k, a) |
15499 | } |
15500 | |
15501 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and |
15502 | /// store the results in dst. |
15503 | /// |
15504 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu64) |
15505 | #[inline ] |
15506 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15507 | #[cfg_attr (test, assert_instr(vcvttph2uqq))] |
15508 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15509 | pub fn _mm_cvttph_epu64(a: __m128h) -> __m128i { |
15510 | _mm_mask_cvttph_epu64(src:_mm_undefined_si128(), k:0xff, a) |
15511 | } |
15512 | |
15513 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and |
15514 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
15515 | /// |
15516 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu64) |
15517 | #[inline ] |
15518 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15519 | #[cfg_attr (test, assert_instr(vcvttph2uqq))] |
15520 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15521 | pub fn _mm_mask_cvttph_epu64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { |
15522 | unsafe { transmute(src:vcvttph2uqq_128(a, src.as_u64x2(), k)) } |
15523 | } |
15524 | |
15525 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and |
15526 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
15527 | /// |
15528 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu64) |
15529 | #[inline ] |
15530 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15531 | #[cfg_attr (test, assert_instr(vcvttph2uqq))] |
15532 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15533 | pub fn _mm_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m128i { |
15534 | _mm_mask_cvttph_epu64(src:_mm_setzero_si128(), k, a) |
15535 | } |
15536 | |
15537 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and |
15538 | /// store the results in dst. |
15539 | /// |
15540 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu64) |
15541 | #[inline ] |
15542 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15543 | #[cfg_attr (test, assert_instr(vcvttph2uqq))] |
15544 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15545 | pub fn _mm256_cvttph_epu64(a: __m128h) -> __m256i { |
15546 | _mm256_mask_cvttph_epu64(src:_mm256_undefined_si256(), k:0xff, a) |
15547 | } |
15548 | |
15549 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and |
15550 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
15551 | /// |
15552 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu64) |
15553 | #[inline ] |
15554 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15555 | #[cfg_attr (test, assert_instr(vcvttph2uqq))] |
15556 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15557 | pub fn _mm256_mask_cvttph_epu64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { |
15558 | unsafe { transmute(src:vcvttph2uqq_256(a, src.as_u64x4(), k)) } |
15559 | } |
15560 | |
15561 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and |
15562 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
15563 | /// |
15564 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu64) |
15565 | #[inline ] |
15566 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15567 | #[cfg_attr (test, assert_instr(vcvttph2uqq))] |
15568 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15569 | pub fn _mm256_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m256i { |
15570 | _mm256_mask_cvttph_epu64(src:_mm256_setzero_si256(), k, a) |
15571 | } |
15572 | |
15573 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and |
15574 | /// store the results in dst. |
15575 | /// |
15576 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu64) |
15577 | #[inline ] |
15578 | #[target_feature (enable = "avx512fp16" )] |
15579 | #[cfg_attr (test, assert_instr(vcvttph2uqq))] |
15580 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15581 | pub fn _mm512_cvttph_epu64(a: __m128h) -> __m512i { |
15582 | _mm512_mask_cvttph_epu64(src:_mm512_undefined_epi32(), k:0xff, a) |
15583 | } |
15584 | |
15585 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and |
15586 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
15587 | /// |
15588 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu64) |
15589 | #[inline ] |
15590 | #[target_feature (enable = "avx512fp16" )] |
15591 | #[cfg_attr (test, assert_instr(vcvttph2uqq))] |
15592 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15593 | pub fn _mm512_mask_cvttph_epu64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i { |
15594 | unsafe { |
15595 | transmute(src:vcvttph2uqq_512( |
15596 | a, |
15597 | src.as_u64x8(), |
15598 | k, |
15599 | _MM_FROUND_CUR_DIRECTION, |
15600 | )) |
15601 | } |
15602 | } |
15603 | |
15604 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and |
15605 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
15606 | /// |
15607 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu64) |
15608 | #[inline ] |
15609 | #[target_feature (enable = "avx512fp16" )] |
15610 | #[cfg_attr (test, assert_instr(vcvttph2uqq))] |
15611 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15612 | pub fn _mm512_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m512i { |
15613 | _mm512_mask_cvttph_epu64(src:_mm512_setzero_si512(), k, a) |
15614 | } |
15615 | |
15616 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and |
15617 | /// store the results in dst. |
15618 | /// |
15619 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
15620 | /// |
15621 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epu64) |
15622 | #[inline ] |
15623 | #[target_feature (enable = "avx512fp16" )] |
15624 | #[cfg_attr (test, assert_instr(vcvttph2uqq, SAE = 8))] |
15625 | #[rustc_legacy_const_generics (1)] |
15626 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15627 | pub fn _mm512_cvtt_roundph_epu64<const SAE: i32>(a: __m128h) -> __m512i { |
15628 | static_assert_sae!(SAE); |
15629 | _mm512_mask_cvtt_roundph_epu64::<SAE>(src:_mm512_undefined_epi32(), k:0xff, a) |
15630 | } |
15631 | |
15632 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and |
15633 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
15634 | /// |
15635 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
15636 | /// |
15637 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epu64) |
15638 | #[inline ] |
15639 | #[target_feature (enable = "avx512fp16" )] |
15640 | #[cfg_attr (test, assert_instr(vcvttph2uqq, SAE = 8))] |
15641 | #[rustc_legacy_const_generics (3)] |
15642 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15643 | pub fn _mm512_mask_cvtt_roundph_epu64<const SAE: i32>( |
15644 | src: __m512i, |
15645 | k: __mmask8, |
15646 | a: __m128h, |
15647 | ) -> __m512i { |
15648 | unsafe { |
15649 | static_assert_sae!(SAE); |
15650 | transmute(src:vcvttph2uqq_512(a, src.as_u64x8(), k, SAE)) |
15651 | } |
15652 | } |
15653 | |
15654 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and |
15655 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
15656 | /// |
15657 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
15658 | /// |
15659 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epu64) |
15660 | #[inline ] |
15661 | #[target_feature (enable = "avx512fp16" )] |
15662 | #[cfg_attr (test, assert_instr(vcvttph2uqq, SAE = 8))] |
15663 | #[rustc_legacy_const_generics (2)] |
15664 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15665 | pub fn _mm512_maskz_cvtt_roundph_epu64<const SAE: i32>(k: __mmask8, a: __m128h) -> __m512i { |
15666 | static_assert_sae!(SAE); |
15667 | _mm512_mask_cvtt_roundph_epu64::<SAE>(src:_mm512_setzero_si512(), k, a) |
15668 | } |
15669 | |
15670 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) |
15671 | /// floating-point elements, and store the results in dst. |
15672 | /// |
15673 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtxph_ps) |
15674 | #[inline ] |
15675 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15676 | #[cfg_attr (test, assert_instr(vcvtph2psx))] |
15677 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15678 | pub fn _mm_cvtxph_ps(a: __m128h) -> __m128 { |
15679 | _mm_mask_cvtxph_ps(src:_mm_setzero_ps(), k:0xff, a) |
15680 | } |
15681 | |
15682 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) |
15683 | /// floating-point elements, and store the results in dst using writemask k (elements are copied from src to |
15684 | /// dst when the corresponding mask bit is not set). |
15685 | /// |
15686 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtxph_ps) |
15687 | #[inline ] |
15688 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15689 | #[cfg_attr (test, assert_instr(vcvtph2psx))] |
15690 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15691 | pub fn _mm_mask_cvtxph_ps(src: __m128, k: __mmask8, a: __m128h) -> __m128 { |
15692 | unsafe { vcvtph2psx_128(a, src, k) } |
15693 | } |
15694 | |
15695 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) |
15696 | /// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the |
15697 | /// corresponding mask bit is not set). |
15698 | /// |
15699 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtxph_ps) |
15700 | #[inline ] |
15701 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15702 | #[cfg_attr (test, assert_instr(vcvtph2psx))] |
15703 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15704 | pub fn _mm_maskz_cvtxph_ps(k: __mmask8, a: __m128h) -> __m128 { |
15705 | _mm_mask_cvtxph_ps(src:_mm_setzero_ps(), k, a) |
15706 | } |
15707 | |
15708 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) |
15709 | /// floating-point elements, and store the results in dst. |
15710 | /// |
15711 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtxph_ps) |
15712 | #[inline ] |
15713 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15714 | #[cfg_attr (test, assert_instr(vcvtph2psx))] |
15715 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15716 | pub fn _mm256_cvtxph_ps(a: __m128h) -> __m256 { |
15717 | _mm256_mask_cvtxph_ps(src:_mm256_setzero_ps(), k:0xff, a) |
15718 | } |
15719 | |
15720 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) |
15721 | /// floating-point elements, and store the results in dst using writemask k (elements are copied from src to |
15722 | /// dst when the corresponding mask bit is not set). |
15723 | /// |
15724 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtxph_ps) |
15725 | #[inline ] |
15726 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15727 | #[cfg_attr (test, assert_instr(vcvtph2psx))] |
15728 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15729 | pub fn _mm256_mask_cvtxph_ps(src: __m256, k: __mmask8, a: __m128h) -> __m256 { |
15730 | unsafe { vcvtph2psx_256(a, src, k) } |
15731 | } |
15732 | |
15733 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) |
15734 | /// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the |
15735 | /// corresponding mask bit is not set). |
15736 | /// |
15737 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtxph_ps) |
15738 | #[inline ] |
15739 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15740 | #[cfg_attr (test, assert_instr(vcvtph2psx))] |
15741 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15742 | pub fn _mm256_maskz_cvtxph_ps(k: __mmask8, a: __m128h) -> __m256 { |
15743 | _mm256_mask_cvtxph_ps(src:_mm256_setzero_ps(), k, a) |
15744 | } |
15745 | |
15746 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) |
15747 | /// floating-point elements, and store the results in dst. |
15748 | /// |
15749 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtxph_ps) |
15750 | #[inline ] |
15751 | #[target_feature (enable = "avx512fp16" )] |
15752 | #[cfg_attr (test, assert_instr(vcvtph2psx))] |
15753 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15754 | pub fn _mm512_cvtxph_ps(a: __m256h) -> __m512 { |
15755 | _mm512_mask_cvtxph_ps(src:_mm512_setzero_ps(), k:0xffff, a) |
15756 | } |
15757 | |
15758 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) |
15759 | /// floating-point elements, and store the results in dst using writemask k (elements are copied from src to |
15760 | /// dst when the corresponding mask bit is not set). |
15761 | /// |
15762 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtxph_ps) |
15763 | #[inline ] |
15764 | #[target_feature (enable = "avx512fp16" )] |
15765 | #[cfg_attr (test, assert_instr(vcvtph2psx))] |
15766 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15767 | pub fn _mm512_mask_cvtxph_ps(src: __m512, k: __mmask16, a: __m256h) -> __m512 { |
15768 | unsafe { vcvtph2psx_512(a, src, k, _MM_FROUND_CUR_DIRECTION) } |
15769 | } |
15770 | |
15771 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) |
15772 | /// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the |
15773 | /// corresponding mask bit is not set). |
15774 | /// |
15775 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtxph_ps) |
15776 | #[inline ] |
15777 | #[target_feature (enable = "avx512fp16" )] |
15778 | #[cfg_attr (test, assert_instr(vcvtph2psx))] |
15779 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15780 | pub fn _mm512_maskz_cvtxph_ps(k: __mmask16, a: __m256h) -> __m512 { |
15781 | _mm512_mask_cvtxph_ps(src:_mm512_setzero_ps(), k, a) |
15782 | } |
15783 | |
15784 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) |
15785 | /// floating-point elements, and store the results in dst. |
15786 | /// |
15787 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
15788 | /// |
15789 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtx_roundph_ps) |
15790 | #[inline ] |
15791 | #[target_feature (enable = "avx512fp16" )] |
15792 | #[cfg_attr (test, assert_instr(vcvtph2psx, SAE = 8))] |
15793 | #[rustc_legacy_const_generics (1)] |
15794 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15795 | pub fn _mm512_cvtx_roundph_ps<const SAE: i32>(a: __m256h) -> __m512 { |
15796 | static_assert_sae!(SAE); |
15797 | _mm512_mask_cvtx_roundph_ps::<SAE>(src:_mm512_setzero_ps(), k:0xffff, a) |
15798 | } |
15799 | |
15800 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) |
15801 | /// floating-point elements, and store the results in dst using writemask k (elements are copied from src to |
15802 | /// dst when the corresponding mask bit is not set). |
15803 | /// |
15804 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
15805 | /// |
15806 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtx_roundph_ps) |
15807 | #[inline ] |
15808 | #[target_feature (enable = "avx512fp16" )] |
15809 | #[cfg_attr (test, assert_instr(vcvtph2psx, SAE = 8))] |
15810 | #[rustc_legacy_const_generics (3)] |
15811 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15812 | pub fn _mm512_mask_cvtx_roundph_ps<const SAE: i32>( |
15813 | src: __m512, |
15814 | k: __mmask16, |
15815 | a: __m256h, |
15816 | ) -> __m512 { |
15817 | unsafe { |
15818 | static_assert_sae!(SAE); |
15819 | vcvtph2psx_512(a, src, k, SAE) |
15820 | } |
15821 | } |
15822 | |
15823 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) |
15824 | /// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the |
15825 | /// corresponding mask bit is not set). |
15826 | /// |
15827 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
15828 | /// |
15829 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtx_roundph_ps) |
15830 | #[inline ] |
15831 | #[target_feature (enable = "avx512fp16" )] |
15832 | #[cfg_attr (test, assert_instr(vcvtph2psx, SAE = 8))] |
15833 | #[rustc_legacy_const_generics (2)] |
15834 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15835 | pub fn _mm512_maskz_cvtx_roundph_ps<const SAE: i32>(k: __mmask16, a: __m256h) -> __m512 { |
15836 | static_assert_sae!(SAE); |
15837 | _mm512_mask_cvtx_roundph_ps::<SAE>(src:_mm512_setzero_ps(), k, a) |
15838 | } |
15839 | |
15840 | /// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit) |
15841 | /// floating-point element, store the result in the lower element of dst, and copy the upper 3 packed |
15842 | /// elements from a to the upper elements of dst. |
15843 | /// |
15844 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_ss) |
15845 | #[inline ] |
15846 | #[target_feature (enable = "avx512fp16" )] |
15847 | #[cfg_attr (test, assert_instr(vcvtsh2ss))] |
15848 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15849 | pub fn _mm_cvtsh_ss(a: __m128, b: __m128h) -> __m128 { |
15850 | _mm_mask_cvtsh_ss(src:a, k:0xff, a, b) |
15851 | } |
15852 | |
15853 | /// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit) |
15854 | /// floating-point element, store the result in the lower element of dst using writemask k (the element is |
15855 | /// copied from src to dst when mask bit 0 is not set), and copy the upper 3 packed elements from a to the |
15856 | /// upper elements of dst. |
15857 | /// |
15858 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsh_ss) |
15859 | #[inline ] |
15860 | #[target_feature (enable = "avx512fp16" )] |
15861 | #[cfg_attr (test, assert_instr(vcvtsh2ss))] |
15862 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15863 | pub fn _mm_mask_cvtsh_ss(src: __m128, k: __mmask8, a: __m128, b: __m128h) -> __m128 { |
15864 | unsafe { vcvtsh2ss(a, b, src, k, _MM_FROUND_CUR_DIRECTION) } |
15865 | } |
15866 | |
15867 | /// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit) |
15868 | /// floating-point element, store the result in the lower element of dst using zeromask k (the element is |
15869 | /// zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements |
15870 | /// of dst. |
15871 | /// |
15872 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsh_ss) |
15873 | #[inline ] |
15874 | #[target_feature (enable = "avx512fp16" )] |
15875 | #[cfg_attr (test, assert_instr(vcvtsh2ss))] |
15876 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15877 | pub fn _mm_maskz_cvtsh_ss(k: __mmask8, a: __m128, b: __m128h) -> __m128 { |
15878 | _mm_mask_cvtsh_ss(src:_mm_set_ss(0.0), k, a, b) |
15879 | } |
15880 | |
15881 | /// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit) |
15882 | /// floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements |
15883 | /// from a to the upper elements of dst. |
15884 | /// |
15885 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
15886 | /// |
15887 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_ss) |
15888 | #[inline ] |
15889 | #[target_feature (enable = "avx512fp16" )] |
15890 | #[cfg_attr (test, assert_instr(vcvtsh2ss, SAE = 8))] |
15891 | #[rustc_legacy_const_generics (2)] |
15892 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15893 | pub fn _mm_cvt_roundsh_ss<const SAE: i32>(a: __m128, b: __m128h) -> __m128 { |
15894 | static_assert_sae!(SAE); |
15895 | _mm_mask_cvt_roundsh_ss::<SAE>(src:_mm_undefined_ps(), k:0xff, a, b) |
15896 | } |
15897 | |
15898 | /// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit) |
15899 | /// floating-point element, store the result in the lower element of dst using writemask k (the element is |
15900 | /// copied from src to dst when mask bit 0 is not set), and copy the upper 3 packed elements from a to the |
15901 | /// upper elements of dst. |
15902 | /// |
15903 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
15904 | /// |
15905 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsh_ss) |
15906 | #[inline ] |
15907 | #[target_feature (enable = "avx512fp16" )] |
15908 | #[cfg_attr (test, assert_instr(vcvtsh2ss, SAE = 8))] |
15909 | #[rustc_legacy_const_generics (4)] |
15910 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15911 | pub fn _mm_mask_cvt_roundsh_ss<const SAE: i32>( |
15912 | src: __m128, |
15913 | k: __mmask8, |
15914 | a: __m128, |
15915 | b: __m128h, |
15916 | ) -> __m128 { |
15917 | unsafe { |
15918 | static_assert_sae!(SAE); |
15919 | vcvtsh2ss(a, b, src, k, SAE) |
15920 | } |
15921 | } |
15922 | |
15923 | /// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit) |
15924 | /// floating-point element, store the result in the lower element of dst using zeromask k (the element is |
15925 | /// zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements |
15926 | /// of dst. |
15927 | /// |
15928 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
15929 | /// |
15930 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsh_ss) |
15931 | #[inline ] |
15932 | #[target_feature (enable = "avx512fp16" )] |
15933 | #[cfg_attr (test, assert_instr(vcvtsh2ss, SAE = 8))] |
15934 | #[rustc_legacy_const_generics (3)] |
15935 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15936 | pub fn _mm_maskz_cvt_roundsh_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128h) -> __m128 { |
15937 | static_assert_sae!(SAE); |
15938 | _mm_mask_cvt_roundsh_ss::<SAE>(src:_mm_set_ss(0.0), k, a, b) |
15939 | } |
15940 | |
15941 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) |
15942 | /// floating-point elements, and store the results in dst. |
15943 | /// |
15944 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_pd) |
15945 | #[inline ] |
15946 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15947 | #[cfg_attr (test, assert_instr(vcvtph2pd))] |
15948 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15949 | pub fn _mm_cvtph_pd(a: __m128h) -> __m128d { |
15950 | _mm_mask_cvtph_pd(src:_mm_setzero_pd(), k:0xff, a) |
15951 | } |
15952 | |
15953 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) |
15954 | /// floating-point elements, and store the results in dst using writemask k (elements are copied from src to |
15955 | /// dst when the corresponding mask bit is not set). |
15956 | /// |
15957 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_pd) |
15958 | #[inline ] |
15959 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15960 | #[cfg_attr (test, assert_instr(vcvtph2pd))] |
15961 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15962 | pub fn _mm_mask_cvtph_pd(src: __m128d, k: __mmask8, a: __m128h) -> __m128d { |
15963 | unsafe { vcvtph2pd_128(a, src, k) } |
15964 | } |
15965 | |
15966 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) |
15967 | /// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the |
15968 | /// corresponding mask bit is not set). |
15969 | /// |
15970 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_pd) |
15971 | #[inline ] |
15972 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15973 | #[cfg_attr (test, assert_instr(vcvtph2pd))] |
15974 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15975 | pub fn _mm_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m128d { |
15976 | _mm_mask_cvtph_pd(src:_mm_setzero_pd(), k, a) |
15977 | } |
15978 | |
15979 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) |
15980 | /// floating-point elements, and store the results in dst. |
15981 | /// |
15982 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_pd) |
15983 | #[inline ] |
15984 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15985 | #[cfg_attr (test, assert_instr(vcvtph2pd))] |
15986 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15987 | pub fn _mm256_cvtph_pd(a: __m128h) -> __m256d { |
15988 | _mm256_mask_cvtph_pd(src:_mm256_setzero_pd(), k:0xff, a) |
15989 | } |
15990 | |
15991 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) |
15992 | /// floating-point elements, and store the results in dst using writemask k (elements are copied from src to |
15993 | /// dst when the corresponding mask bit is not set). |
15994 | /// |
15995 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_pd) |
15996 | #[inline ] |
15997 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15998 | #[cfg_attr (test, assert_instr(vcvtph2pd))] |
15999 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
16000 | pub fn _mm256_mask_cvtph_pd(src: __m256d, k: __mmask8, a: __m128h) -> __m256d { |
16001 | unsafe { vcvtph2pd_256(a, src, k) } |
16002 | } |
16003 | |
16004 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) |
16005 | /// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the |
16006 | /// corresponding mask bit is not set). |
16007 | /// |
16008 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_pd) |
16009 | #[inline ] |
16010 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
16011 | #[cfg_attr (test, assert_instr(vcvtph2pd))] |
16012 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
16013 | pub fn _mm256_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m256d { |
16014 | _mm256_mask_cvtph_pd(src:_mm256_setzero_pd(), k, a) |
16015 | } |
16016 | |
16017 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) |
16018 | /// floating-point elements, and store the results in dst. |
16019 | /// |
16020 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_pd) |
16021 | #[inline ] |
16022 | #[target_feature (enable = "avx512fp16" )] |
16023 | #[cfg_attr (test, assert_instr(vcvtph2pd))] |
16024 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
16025 | pub fn _mm512_cvtph_pd(a: __m128h) -> __m512d { |
16026 | _mm512_mask_cvtph_pd(src:_mm512_setzero_pd(), k:0xff, a) |
16027 | } |
16028 | |
16029 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) |
16030 | /// floating-point elements, and store the results in dst using writemask k (elements are copied from src to |
16031 | /// dst when the corresponding mask bit is not set). |
16032 | /// |
16033 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_pd) |
16034 | #[inline ] |
16035 | #[target_feature (enable = "avx512fp16" )] |
16036 | #[cfg_attr (test, assert_instr(vcvtph2pd))] |
16037 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
16038 | pub fn _mm512_mask_cvtph_pd(src: __m512d, k: __mmask8, a: __m128h) -> __m512d { |
16039 | unsafe { vcvtph2pd_512(a, src, k, _MM_FROUND_CUR_DIRECTION) } |
16040 | } |
16041 | |
16042 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) |
16043 | /// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the |
16044 | /// corresponding mask bit is not set). |
16045 | /// |
16046 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_pd) |
16047 | #[inline ] |
16048 | #[target_feature (enable = "avx512fp16" )] |
16049 | #[cfg_attr (test, assert_instr(vcvtph2pd))] |
16050 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
16051 | pub fn _mm512_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m512d { |
16052 | _mm512_mask_cvtph_pd(src:_mm512_setzero_pd(), k, a) |
16053 | } |
16054 | |
16055 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) |
16056 | /// floating-point elements, and store the results in dst. |
16057 | /// |
16058 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
16059 | /// |
16060 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_pd) |
16061 | #[inline ] |
16062 | #[target_feature (enable = "avx512fp16" )] |
16063 | #[cfg_attr (test, assert_instr(vcvtph2pd, SAE = 8))] |
16064 | #[rustc_legacy_const_generics (1)] |
16065 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
16066 | pub fn _mm512_cvt_roundph_pd<const SAE: i32>(a: __m128h) -> __m512d { |
16067 | static_assert_sae!(SAE); |
16068 | _mm512_mask_cvt_roundph_pd::<SAE>(src:_mm512_setzero_pd(), k:0xff, a) |
16069 | } |
16070 | |
16071 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) |
16072 | /// floating-point elements, and store the results in dst using writemask k (elements are copied from src to |
16073 | /// dst when the corresponding mask bit is not set). |
16074 | /// |
16075 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
16076 | /// |
16077 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_pd) |
16078 | #[inline ] |
16079 | #[target_feature (enable = "avx512fp16" )] |
16080 | #[cfg_attr (test, assert_instr(vcvtph2pd, SAE = 8))] |
16081 | #[rustc_legacy_const_generics (3)] |
16082 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
16083 | pub fn _mm512_mask_cvt_roundph_pd<const SAE: i32>( |
16084 | src: __m512d, |
16085 | k: __mmask8, |
16086 | a: __m128h, |
16087 | ) -> __m512d { |
16088 | unsafe { |
16089 | static_assert_sae!(SAE); |
16090 | vcvtph2pd_512(a, src, k, SAE) |
16091 | } |
16092 | } |
16093 | |
16094 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) |
16095 | /// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the |
16096 | /// corresponding mask bit is not set). |
16097 | /// |
16098 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
16099 | /// |
16100 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_pd) |
16101 | #[inline ] |
16102 | #[target_feature (enable = "avx512fp16" )] |
16103 | #[cfg_attr (test, assert_instr(vcvtph2pd, SAE = 8))] |
16104 | #[rustc_legacy_const_generics (2)] |
16105 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
16106 | pub fn _mm512_maskz_cvt_roundph_pd<const SAE: i32>(k: __mmask8, a: __m128h) -> __m512d { |
16107 | static_assert_sae!(SAE); |
16108 | _mm512_mask_cvt_roundph_pd::<SAE>(src:_mm512_setzero_pd(), k, a) |
16109 | } |
16110 | |
16111 | /// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit) |
16112 | /// floating-point element, store the result in the lower element of dst, and copy the upper element |
16113 | /// from a to the upper element of dst. |
16114 | /// |
16115 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_sd) |
16116 | #[inline ] |
16117 | #[target_feature (enable = "avx512fp16" )] |
16118 | #[cfg_attr (test, assert_instr(vcvtsh2sd))] |
16119 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
16120 | pub fn _mm_cvtsh_sd(a: __m128d, b: __m128h) -> __m128d { |
16121 | _mm_mask_cvtsh_sd(src:a, k:0xff, a, b) |
16122 | } |
16123 | |
16124 | /// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit) |
16125 | /// floating-point element, store the result in the lower element of dst using writemask k (the element is |
16126 | /// copied from src to dst when mask bit 0 is not set), and copy the upper element from a to the upper element |
16127 | /// of dst. |
16128 | /// |
16129 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsh_sd) |
16130 | #[inline ] |
16131 | #[target_feature (enable = "avx512fp16" )] |
16132 | #[cfg_attr (test, assert_instr(vcvtsh2sd))] |
16133 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
16134 | pub fn _mm_mask_cvtsh_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128h) -> __m128d { |
16135 | unsafe { vcvtsh2sd(a, b, src, k, _MM_FROUND_CUR_DIRECTION) } |
16136 | } |
16137 | |
16138 | /// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit) |
16139 | /// floating-point element, store the result in the lower element of dst using zeromask k (the element is |
16140 | /// zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. |
16141 | /// |
16142 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsh_sd) |
16143 | #[inline ] |
16144 | #[target_feature (enable = "avx512fp16" )] |
16145 | #[cfg_attr (test, assert_instr(vcvtsh2sd))] |
16146 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
16147 | pub fn _mm_maskz_cvtsh_sd(k: __mmask8, a: __m128d, b: __m128h) -> __m128d { |
16148 | _mm_mask_cvtsh_sd(src:_mm_set_sd(0.0), k, a, b) |
16149 | } |
16150 | |
16151 | /// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit) |
16152 | /// floating-point element, store the result in the lower element of dst, and copy the upper element from a |
16153 | /// to the upper element of dst. |
16154 | /// |
16155 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
16156 | /// |
16157 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_sd) |
16158 | #[inline ] |
16159 | #[target_feature (enable = "avx512fp16" )] |
16160 | #[cfg_attr (test, assert_instr(vcvtsh2sd, SAE = 8))] |
16161 | #[rustc_legacy_const_generics (2)] |
16162 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
16163 | pub fn _mm_cvt_roundsh_sd<const SAE: i32>(a: __m128d, b: __m128h) -> __m128d { |
16164 | static_assert_sae!(SAE); |
16165 | _mm_mask_cvt_roundsh_sd::<SAE>(src:a, k:0xff, a, b) |
16166 | } |
16167 | |
16168 | /// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit) |
16169 | /// floating-point element, store the result in the lower element of dst using writemask k (the element is |
16170 | /// copied from src to dst when mask bit 0 is not set), and copy the upper element from a to the upper element |
16171 | /// of dst. |
16172 | /// |
16173 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
16174 | /// |
16175 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsh_sd) |
16176 | #[inline ] |
16177 | #[target_feature (enable = "avx512fp16" )] |
16178 | #[cfg_attr (test, assert_instr(vcvtsh2sd, SAE = 8))] |
16179 | #[rustc_legacy_const_generics (4)] |
16180 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
16181 | pub fn _mm_mask_cvt_roundsh_sd<const SAE: i32>( |
16182 | src: __m128d, |
16183 | k: __mmask8, |
16184 | a: __m128d, |
16185 | b: __m128h, |
16186 | ) -> __m128d { |
16187 | unsafe { |
16188 | static_assert_sae!(SAE); |
16189 | vcvtsh2sd(a, b, src, k, SAE) |
16190 | } |
16191 | } |
16192 | |
16193 | /// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit) |
16194 | /// floating-point element, store the result in the lower element of dst using zeromask k (the element is |
16195 | /// zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. |
16196 | /// |
16197 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
16198 | /// |
16199 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsh_sd) |
16200 | #[inline ] |
16201 | #[target_feature (enable = "avx512fp16" )] |
16202 | #[cfg_attr (test, assert_instr(vcvtsh2sd, SAE = 8))] |
16203 | #[rustc_legacy_const_generics (3)] |
16204 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
16205 | pub fn _mm_maskz_cvt_roundsh_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128h) -> __m128d { |
16206 | static_assert_sae!(SAE); |
16207 | _mm_mask_cvt_roundsh_sd::<SAE>(src:_mm_set_sd(0.0), k, a, b) |
16208 | } |
16209 | |
16210 | /// Copy the lower half-precision (16-bit) floating-point element from `a` to `dst`. |
16211 | /// |
16212 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_h) |
16213 | #[inline ] |
16214 | #[target_feature (enable = "avx512fp16" )] |
16215 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
16216 | pub fn _mm_cvtsh_h(a: __m128h) -> f16 { |
16217 | unsafe { simd_extract!(a, 0) } |
16218 | } |
16219 | |
16220 | /// Copy the lower half-precision (16-bit) floating-point element from `a` to `dst`. |
16221 | /// |
16222 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtsh_h) |
16223 | #[inline ] |
16224 | #[target_feature (enable = "avx512fp16" )] |
16225 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
16226 | pub fn _mm256_cvtsh_h(a: __m256h) -> f16 { |
16227 | unsafe { simd_extract!(a, 0) } |
16228 | } |
16229 | |
16230 | /// Copy the lower half-precision (16-bit) floating-point element from `a` to `dst`. |
16231 | /// |
16232 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtsh_h) |
16233 | #[inline ] |
16234 | #[target_feature (enable = "avx512fp16" )] |
16235 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
16236 | pub fn _mm512_cvtsh_h(a: __m512h) -> f16 { |
16237 | unsafe { simd_extract!(a, 0) } |
16238 | } |
16239 | |
16240 | /// Copy the lower 16-bit integer in a to dst. |
16241 | /// |
16242 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si16) |
16243 | #[inline ] |
16244 | #[target_feature (enable = "avx512fp16" )] |
16245 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
16246 | pub fn _mm_cvtsi128_si16(a: __m128i) -> i16 { |
16247 | unsafe { simd_extract!(a.as_i16x8(), 0) } |
16248 | } |
16249 | |
16250 | /// Copy 16-bit integer a to the lower elements of dst, and zero the upper elements of dst. |
16251 | /// |
16252 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi16_si128) |
16253 | #[inline ] |
16254 | #[target_feature (enable = "avx512fp16" )] |
16255 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
16256 | pub fn _mm_cvtsi16_si128(a: i16) -> __m128i { |
16257 | unsafe { transmute(src:simd_insert!(i16x8::ZERO, 0, a)) } |
16258 | } |
16259 | |
16260 | #[allow (improper_ctypes)] |
16261 | unsafe extern "C" { |
16262 | #[link_name = "llvm.x86.avx512fp16.mask.cmp.sh" ] |
16263 | unsafefn vcmpsh(a: __m128h, b: __m128h, imm8: i32, mask: __mmask8, sae: i32) -> __mmask8; |
16264 | #[link_name = "llvm.x86.avx512fp16.vcomi.sh" ] |
16265 | unsafefn vcomish(a: __m128h, b: __m128h, imm8: i32, sae: i32) -> i32; |
16266 | |
16267 | #[link_name = "llvm.x86.avx512fp16.add.ph.512" ] |
16268 | unsafefn vaddph(a: __m512h, b: __m512h, rounding: i32) -> __m512h; |
16269 | #[link_name = "llvm.x86.avx512fp16.sub.ph.512" ] |
16270 | unsafefn vsubph(a: __m512h, b: __m512h, rounding: i32) -> __m512h; |
16271 | #[link_name = "llvm.x86.avx512fp16.mul.ph.512" ] |
16272 | unsafefn vmulph(a: __m512h, b: __m512h, rounding: i32) -> __m512h; |
16273 | #[link_name = "llvm.x86.avx512fp16.div.ph.512" ] |
16274 | unsafefn vdivph(a: __m512h, b: __m512h, rounding: i32) -> __m512h; |
16275 | |
16276 | #[link_name = "llvm.x86.avx512fp16.mask.add.sh.round" ] |
16277 | unsafefn vaddsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h; |
16278 | #[link_name = "llvm.x86.avx512fp16.mask.sub.sh.round" ] |
16279 | unsafefn vsubsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h; |
16280 | #[link_name = "llvm.x86.avx512fp16.mask.mul.sh.round" ] |
16281 | unsafefn vmulsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h; |
16282 | #[link_name = "llvm.x86.avx512fp16.mask.div.sh.round" ] |
16283 | unsafefn vdivsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h; |
16284 | |
16285 | #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.128" ] |
16286 | unsafefn vfmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128; |
16287 | #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.256" ] |
16288 | unsafefn vfmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256; |
16289 | #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.512" ] |
16290 | unsafefn vfmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512; |
16291 | #[link_name = "llvm.x86.avx512fp16.mask.vfmul.csh" ] |
16292 | unsafefn vfmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128; |
16293 | |
16294 | #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.128" ] |
16295 | unsafefn vfcmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128; |
16296 | #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.256" ] |
16297 | unsafefn vfcmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256; |
16298 | #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.512" ] |
16299 | unsafefn vfcmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512; |
16300 | #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.csh" ] |
16301 | unsafefn vfcmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128; |
16302 | |
16303 | #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.128" ] |
16304 | unsafefn vfmaddcph_mask3_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128; |
16305 | #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.128" ] |
16306 | unsafefn vfmaddcph_maskz_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128; |
16307 | #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.256" ] |
16308 | unsafefn vfmaddcph_mask3_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256; |
16309 | #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.256" ] |
16310 | unsafefn vfmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256; |
16311 | #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.512" ] |
16312 | unsafefn vfmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) -> __m512; |
16313 | #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.512" ] |
16314 | unsafefn vfmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) -> __m512; |
16315 | #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.csh" ] |
16316 | unsafefn vfmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128; |
16317 | #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.csh" ] |
16318 | unsafefn vfmaddcsh_maskz(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128; |
16319 | |
16320 | #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.128" ] |
16321 | unsafefn vfcmaddcph_mask3_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128; |
16322 | #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.128" ] |
16323 | unsafefn vfcmaddcph_maskz_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128; |
16324 | #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.256" ] |
16325 | unsafefn vfcmaddcph_mask3_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256; |
16326 | #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.256" ] |
16327 | unsafefn vfcmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256; |
16328 | #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.512" ] |
16329 | unsafefn vfcmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) |
16330 | -> __m512; |
16331 | #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.512" ] |
16332 | unsafefn vfcmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) |
16333 | -> __m512; |
16334 | #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.csh" ] |
16335 | unsafefn vfcmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128; |
16336 | #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.csh" ] |
16337 | unsafefn vfcmaddcsh_maskz(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128; |
16338 | |
16339 | #[link_name = "llvm.x86.avx512fp16.vfmadd.ph.512" ] |
16340 | unsafefn vfmaddph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h; |
16341 | #[link_name = "llvm.x86.avx512fp16.vfmadd.f16" ] |
16342 | unsafefn vfmaddsh(a: f16, b: f16, c: f16, rounding: i32) -> f16; |
16343 | |
16344 | #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.128" ] |
16345 | unsafefn vfmaddsubph_128(a: __m128h, b: __m128h, c: __m128h) -> __m128h; |
16346 | #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.256" ] |
16347 | unsafefn vfmaddsubph_256(a: __m256h, b: __m256h, c: __m256h) -> __m256h; |
16348 | #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.512" ] |
16349 | unsafefn vfmaddsubph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h; |
16350 | |
16351 | #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.128" ] |
16352 | unsafefn vrcpph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h; |
16353 | #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.256" ] |
16354 | unsafefn vrcpph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h; |
16355 | #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.512" ] |
16356 | unsafefn vrcpph_512(a: __m512h, src: __m512h, k: __mmask32) -> __m512h; |
16357 | #[link_name = "llvm.x86.avx512fp16.mask.rcp.sh" ] |
16358 | unsafefn vrcpsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h; |
16359 | |
16360 | #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.128" ] |
16361 | unsafefn vrsqrtph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h; |
16362 | #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.256" ] |
16363 | unsafefn vrsqrtph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h; |
16364 | #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.512" ] |
16365 | unsafefn vrsqrtph_512(a: __m512h, src: __m512h, k: __mmask32) -> __m512h; |
16366 | #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.sh" ] |
16367 | unsafefn vrsqrtsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h; |
16368 | |
16369 | #[link_name = "llvm.x86.avx512fp16.sqrt.ph.512" ] |
16370 | unsafefn vsqrtph_512(a: __m512h, rounding: i32) -> __m512h; |
16371 | #[link_name = "llvm.x86.avx512fp16.mask.sqrt.sh" ] |
16372 | unsafefn vsqrtsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h; |
16373 | |
16374 | #[link_name = "llvm.x86.avx512fp16.max.ph.128" ] |
16375 | unsafefn vmaxph_128(a: __m128h, b: __m128h) -> __m128h; |
16376 | #[link_name = "llvm.x86.avx512fp16.max.ph.256" ] |
16377 | unsafefn vmaxph_256(a: __m256h, b: __m256h) -> __m256h; |
16378 | #[link_name = "llvm.x86.avx512fp16.max.ph.512" ] |
16379 | unsafefn vmaxph_512(a: __m512h, b: __m512h, sae: i32) -> __m512h; |
16380 | #[link_name = "llvm.x86.avx512fp16.mask.max.sh.round" ] |
16381 | unsafefn vmaxsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h; |
16382 | |
16383 | #[link_name = "llvm.x86.avx512fp16.min.ph.128" ] |
16384 | unsafefn vminph_128(a: __m128h, b: __m128h) -> __m128h; |
16385 | #[link_name = "llvm.x86.avx512fp16.min.ph.256" ] |
16386 | unsafefn vminph_256(a: __m256h, b: __m256h) -> __m256h; |
16387 | #[link_name = "llvm.x86.avx512fp16.min.ph.512" ] |
16388 | unsafefn vminph_512(a: __m512h, b: __m512h, sae: i32) -> __m512h; |
16389 | #[link_name = "llvm.x86.avx512fp16.mask.min.sh.round" ] |
16390 | unsafefn vminsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h; |
16391 | |
16392 | #[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.128" ] |
16393 | unsafefn vgetexpph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h; |
16394 | #[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.256" ] |
16395 | unsafefn vgetexpph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h; |
16396 | #[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.512" ] |
16397 | unsafefn vgetexpph_512(a: __m512h, src: __m512h, k: __mmask32, sae: i32) -> __m512h; |
16398 | #[link_name = "llvm.x86.avx512fp16.mask.getexp.sh" ] |
16399 | unsafefn vgetexpsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h; |
16400 | |
16401 | #[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.128" ] |
16402 | unsafefn vgetmantph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h; |
16403 | #[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.256" ] |
16404 | unsafefn vgetmantph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h; |
16405 | #[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.512" ] |
16406 | unsafefn vgetmantph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h; |
16407 | #[link_name = "llvm.x86.avx512fp16.mask.getmant.sh" ] |
16408 | unsafefn vgetmantsh( |
16409 | a: __m128h, |
16410 | b: __m128h, |
16411 | imm8: i32, |
16412 | src: __m128h, |
16413 | k: __mmask8, |
16414 | sae: i32, |
16415 | ) -> __m128h; |
16416 | |
16417 | #[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.128" ] |
16418 | unsafefn vrndscaleph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h; |
16419 | #[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.256" ] |
16420 | unsafefn vrndscaleph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h; |
16421 | #[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.512" ] |
16422 | unsafefn vrndscaleph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h; |
16423 | #[link_name = "llvm.x86.avx512fp16.mask.rndscale.sh" ] |
16424 | unsafefn vrndscalesh( |
16425 | a: __m128h, |
16426 | b: __m128h, |
16427 | src: __m128h, |
16428 | k: __mmask8, |
16429 | imm8: i32, |
16430 | sae: i32, |
16431 | ) -> __m128h; |
16432 | |
16433 | #[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.128" ] |
16434 | unsafefn vscalefph_128(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h; |
16435 | #[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.256" ] |
16436 | unsafefn vscalefph_256(a: __m256h, b: __m256h, src: __m256h, k: __mmask16) -> __m256h; |
16437 | #[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.512" ] |
16438 | unsafefn vscalefph_512(a: __m512h, b: __m512h, src: __m512h, k: __mmask32, rounding: i32) -> __m512h; |
16439 | #[link_name = "llvm.x86.avx512fp16.mask.scalef.sh" ] |
16440 | unsafefn vscalefsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h; |
16441 | |
16442 | #[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.128" ] |
16443 | unsafefn vreduceph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h; |
16444 | #[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.256" ] |
16445 | unsafefn vreduceph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h; |
16446 | #[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.512" ] |
16447 | unsafefn vreduceph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h; |
16448 | #[link_name = "llvm.x86.avx512fp16.mask.reduce.sh" ] |
16449 | unsafefn vreducesh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, imm8: i32, sae: i32) |
16450 | -> __m128h; |
16451 | |
16452 | #[link_name = "llvm.x86.avx512fp16.mask.fpclass.sh" ] |
16453 | unsafefn vfpclasssh(a: __m128h, imm8: i32, k: __mmask8) -> __mmask8; |
16454 | |
16455 | #[link_name = "llvm.x86.avx512.sitofp.round.v8f16.v8i16" ] |
16456 | unsafefn vcvtw2ph_128(a: i16x8, rounding: i32) -> __m128h; |
16457 | #[link_name = "llvm.x86.avx512.sitofp.round.v16f16.v16i16" ] |
16458 | unsafefn vcvtw2ph_256(a: i16x16, rounding: i32) -> __m256h; |
16459 | #[link_name = "llvm.x86.avx512.sitofp.round.v32f16.v32i16" ] |
16460 | unsafefn vcvtw2ph_512(a: i16x32, rounding: i32) -> __m512h; |
16461 | #[link_name = "llvm.x86.avx512.uitofp.round.v8f16.v8i16" ] |
16462 | unsafefn vcvtuw2ph_128(a: u16x8, rounding: i32) -> __m128h; |
16463 | #[link_name = "llvm.x86.avx512.uitofp.round.v16f16.v16i16" ] |
16464 | unsafefn vcvtuw2ph_256(a: u16x16, rounding: i32) -> __m256h; |
16465 | #[link_name = "llvm.x86.avx512.uitofp.round.v32f16.v32i16" ] |
16466 | unsafefn vcvtuw2ph_512(a: u16x32, rounding: i32) -> __m512h; |
16467 | |
16468 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtdq2ph.128" ] |
16469 | unsafefn vcvtdq2ph_128(a: i32x4, src: __m128h, k: __mmask8) -> __m128h; |
16470 | #[link_name = "llvm.x86.avx512.sitofp.round.v8f16.v8i32" ] |
16471 | unsafefn vcvtdq2ph_256(a: i32x8, rounding: i32) -> __m128h; |
16472 | #[link_name = "llvm.x86.avx512.sitofp.round.v16f16.v16i32" ] |
16473 | unsafefn vcvtdq2ph_512(a: i32x16, rounding: i32) -> __m256h; |
16474 | #[link_name = "llvm.x86.avx512fp16.vcvtsi2sh" ] |
16475 | unsafefn vcvtsi2sh(a: __m128h, b: i32, rounding: i32) -> __m128h; |
16476 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtudq2ph.128" ] |
16477 | unsafefn vcvtudq2ph_128(a: u32x4, src: __m128h, k: __mmask8) -> __m128h; |
16478 | #[link_name = "llvm.x86.avx512.uitofp.round.v8f16.v8i32" ] |
16479 | unsafefn vcvtudq2ph_256(a: u32x8, rounding: i32) -> __m128h; |
16480 | #[link_name = "llvm.x86.avx512.uitofp.round.v16f16.v16i32" ] |
16481 | unsafefn vcvtudq2ph_512(a: u32x16, rounding: i32) -> __m256h; |
16482 | #[link_name = "llvm.x86.avx512fp16.vcvtusi2sh" ] |
16483 | unsafefn vcvtusi2sh(a: __m128h, b: u32, rounding: i32) -> __m128h; |
16484 | |
16485 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtqq2ph.128" ] |
16486 | unsafefn vcvtqq2ph_128(a: i64x2, src: __m128h, k: __mmask8) -> __m128h; |
16487 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtqq2ph.256" ] |
16488 | unsafefn vcvtqq2ph_256(a: i64x4, src: __m128h, k: __mmask8) -> __m128h; |
16489 | #[link_name = "llvm.x86.avx512.sitofp.round.v8f16.v8i64" ] |
16490 | unsafefn vcvtqq2ph_512(a: i64x8, rounding: i32) -> __m128h; |
16491 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtuqq2ph.128" ] |
16492 | unsafefn vcvtuqq2ph_128(a: u64x2, src: __m128h, k: __mmask8) -> __m128h; |
16493 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtuqq2ph.256" ] |
16494 | unsafefn vcvtuqq2ph_256(a: u64x4, src: __m128h, k: __mmask8) -> __m128h; |
16495 | #[link_name = "llvm.x86.avx512.uitofp.round.v8f16.v8i64" ] |
16496 | unsafefn vcvtuqq2ph_512(a: u64x8, rounding: i32) -> __m128h; |
16497 | |
16498 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtps2phx.128" ] |
16499 | unsafefn vcvtps2phx_128(a: __m128, src: __m128h, k: __mmask8) -> __m128h; |
16500 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtps2phx.256" ] |
16501 | unsafefn vcvtps2phx_256(a: __m256, src: __m128h, k: __mmask8) -> __m128h; |
16502 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtps2phx.512" ] |
16503 | unsafefn vcvtps2phx_512(a: __m512, src: __m256h, k: __mmask16, rounding: i32) -> __m256h; |
16504 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtss2sh.round" ] |
16505 | unsafefn vcvtss2sh(a: __m128h, b: __m128, src: __m128h, k: __mmask8, rounding: i32) -> __m128h; |
16506 | |
16507 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtpd2ph.128" ] |
16508 | unsafefn vcvtpd2ph_128(a: __m128d, src: __m128h, k: __mmask8) -> __m128h; |
16509 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtpd2ph.256" ] |
16510 | unsafefn vcvtpd2ph_256(a: __m256d, src: __m128h, k: __mmask8) -> __m128h; |
16511 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtpd2ph.512" ] |
16512 | unsafefn vcvtpd2ph_512(a: __m512d, src: __m128h, k: __mmask8, rounding: i32) -> __m128h; |
16513 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtsd2sh.round" ] |
16514 | unsafefn vcvtsd2sh(a: __m128h, b: __m128d, src: __m128h, k: __mmask8, rounding: i32) -> __m128h; |
16515 | |
16516 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2w.128" ] |
16517 | unsafefn vcvtph2w_128(a: __m128h, src: i16x8, k: __mmask8) -> i16x8; |
16518 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2w.256" ] |
16519 | unsafefn vcvtph2w_256(a: __m256h, src: i16x16, k: __mmask16) -> i16x16; |
16520 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2w.512" ] |
16521 | unsafefn vcvtph2w_512(a: __m512h, src: i16x32, k: __mmask32, rounding: i32) -> i16x32; |
16522 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uw.128" ] |
16523 | unsafefn vcvtph2uw_128(a: __m128h, src: u16x8, k: __mmask8) -> u16x8; |
16524 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uw.256" ] |
16525 | unsafefn vcvtph2uw_256(a: __m256h, src: u16x16, k: __mmask16) -> u16x16; |
16526 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uw.512" ] |
16527 | unsafefn vcvtph2uw_512(a: __m512h, src: u16x32, k: __mmask32, sae: i32) -> u16x32; |
16528 | |
16529 | #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2w.128" ] |
16530 | unsafefn vcvttph2w_128(a: __m128h, src: i16x8, k: __mmask8) -> i16x8; |
16531 | #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2w.256" ] |
16532 | unsafefn vcvttph2w_256(a: __m256h, src: i16x16, k: __mmask16) -> i16x16; |
16533 | #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2w.512" ] |
16534 | unsafefn vcvttph2w_512(a: __m512h, src: i16x32, k: __mmask32, sae: i32) -> i16x32; |
16535 | #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uw.128" ] |
16536 | unsafefn vcvttph2uw_128(a: __m128h, src: u16x8, k: __mmask8) -> u16x8; |
16537 | #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uw.256" ] |
16538 | unsafefn vcvttph2uw_256(a: __m256h, src: u16x16, k: __mmask16) -> u16x16; |
16539 | #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uw.512" ] |
16540 | unsafefn vcvttph2uw_512(a: __m512h, src: u16x32, k: __mmask32, sae: i32) -> u16x32; |
16541 | |
16542 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2dq.128" ] |
16543 | unsafefn vcvtph2dq_128(a: __m128h, src: i32x4, k: __mmask8) -> i32x4; |
16544 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2dq.256" ] |
16545 | unsafefn vcvtph2dq_256(a: __m128h, src: i32x8, k: __mmask8) -> i32x8; |
16546 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2dq.512" ] |
16547 | unsafefn vcvtph2dq_512(a: __m256h, src: i32x16, k: __mmask16, rounding: i32) -> i32x16; |
16548 | #[link_name = "llvm.x86.avx512fp16.vcvtsh2si32" ] |
16549 | unsafefn vcvtsh2si32(a: __m128h, rounding: i32) -> i32; |
16550 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2udq.128" ] |
16551 | unsafefn vcvtph2udq_128(a: __m128h, src: u32x4, k: __mmask8) -> u32x4; |
16552 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2udq.256" ] |
16553 | unsafefn vcvtph2udq_256(a: __m128h, src: u32x8, k: __mmask8) -> u32x8; |
16554 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2udq.512" ] |
16555 | unsafefn vcvtph2udq_512(a: __m256h, src: u32x16, k: __mmask16, rounding: i32) -> u32x16; |
16556 | #[link_name = "llvm.x86.avx512fp16.vcvtsh2usi32" ] |
16557 | unsafefn vcvtsh2usi32(a: __m128h, sae: i32) -> u32; |
16558 | |
16559 | #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2dq.128" ] |
16560 | unsafefn vcvttph2dq_128(a: __m128h, src: i32x4, k: __mmask8) -> i32x4; |
16561 | #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2dq.256" ] |
16562 | unsafefn vcvttph2dq_256(a: __m128h, src: i32x8, k: __mmask8) -> i32x8; |
16563 | #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2dq.512" ] |
16564 | unsafefn vcvttph2dq_512(a: __m256h, src: i32x16, k: __mmask16, sae: i32) -> i32x16; |
16565 | #[link_name = "llvm.x86.avx512fp16.vcvttsh2si32" ] |
16566 | unsafefn vcvttsh2si32(a: __m128h, sae: i32) -> i32; |
16567 | #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2udq.128" ] |
16568 | unsafefn vcvttph2udq_128(a: __m128h, src: u32x4, k: __mmask8) -> u32x4; |
16569 | #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2udq.256" ] |
16570 | unsafefn vcvttph2udq_256(a: __m128h, src: u32x8, k: __mmask8) -> u32x8; |
16571 | #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2udq.512" ] |
16572 | unsafefn vcvttph2udq_512(a: __m256h, src: u32x16, k: __mmask16, sae: i32) -> u32x16; |
16573 | #[link_name = "llvm.x86.avx512fp16.vcvttsh2usi32" ] |
16574 | unsafefn vcvttsh2usi32(a: __m128h, sae: i32) -> u32; |
16575 | |
16576 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2qq.128" ] |
16577 | unsafefn vcvtph2qq_128(a: __m128h, src: i64x2, k: __mmask8) -> i64x2; |
16578 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2qq.256" ] |
16579 | unsafefn vcvtph2qq_256(a: __m128h, src: i64x4, k: __mmask8) -> i64x4; |
16580 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2qq.512" ] |
16581 | unsafefn vcvtph2qq_512(a: __m128h, src: i64x8, k: __mmask8, rounding: i32) -> i64x8; |
16582 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uqq.128" ] |
16583 | unsafefn vcvtph2uqq_128(a: __m128h, src: u64x2, k: __mmask8) -> u64x2; |
16584 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uqq.256" ] |
16585 | unsafefn vcvtph2uqq_256(a: __m128h, src: u64x4, k: __mmask8) -> u64x4; |
16586 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uqq.512" ] |
16587 | unsafefn vcvtph2uqq_512(a: __m128h, src: u64x8, k: __mmask8, rounding: i32) -> u64x8; |
16588 | |
16589 | #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2qq.128" ] |
16590 | unsafefn vcvttph2qq_128(a: __m128h, src: i64x2, k: __mmask8) -> i64x2; |
16591 | #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2qq.256" ] |
16592 | unsafefn vcvttph2qq_256(a: __m128h, src: i64x4, k: __mmask8) -> i64x4; |
16593 | #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2qq.512" ] |
16594 | unsafefn vcvttph2qq_512(a: __m128h, src: i64x8, k: __mmask8, sae: i32) -> i64x8; |
16595 | #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uqq.128" ] |
16596 | unsafefn vcvttph2uqq_128(a: __m128h, src: u64x2, k: __mmask8) -> u64x2; |
16597 | #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uqq.256" ] |
16598 | unsafefn vcvttph2uqq_256(a: __m128h, src: u64x4, k: __mmask8) -> u64x4; |
16599 | #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uqq.512" ] |
16600 | unsafefn vcvttph2uqq_512(a: __m128h, src: u64x8, k: __mmask8, sae: i32) -> u64x8; |
16601 | |
16602 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2psx.128" ] |
16603 | unsafefn vcvtph2psx_128(a: __m128h, src: __m128, k: __mmask8) -> __m128; |
16604 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2psx.256" ] |
16605 | unsafefn vcvtph2psx_256(a: __m128h, src: __m256, k: __mmask8) -> __m256; |
16606 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2psx.512" ] |
16607 | unsafefn vcvtph2psx_512(a: __m256h, src: __m512, k: __mmask16, sae: i32) -> __m512; |
16608 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtsh2ss.round" ] |
16609 | unsafefn vcvtsh2ss(a: __m128, b: __m128h, src: __m128, k: __mmask8, sae: i32) -> __m128; |
16610 | |
16611 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2pd.128" ] |
16612 | unsafefn vcvtph2pd_128(a: __m128h, src: __m128d, k: __mmask8) -> __m128d; |
16613 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2pd.256" ] |
16614 | unsafefn vcvtph2pd_256(a: __m128h, src: __m256d, k: __mmask8) -> __m256d; |
16615 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2pd.512" ] |
16616 | unsafefn vcvtph2pd_512(a: __m128h, src: __m512d, k: __mmask8, sae: i32) -> __m512d; |
16617 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtsh2sd.round" ] |
16618 | unsafefn vcvtsh2sd(a: __m128d, b: __m128h, src: __m128d, k: __mmask8, sae: i32) -> __m128d; |
16619 | |
16620 | } |
16621 | |
16622 | #[cfg (test)] |
16623 | mod tests { |
16624 | use crate::core_arch::x86::*; |
16625 | use crate::mem::transmute; |
16626 | use crate::ptr::{addr_of, addr_of_mut}; |
16627 | use stdarch_test::simd_test; |
16628 | |
16629 | #[target_feature (enable = "avx512fp16" )] |
16630 | unsafe fn _mm_set1_pch(re: f16, im: f16) -> __m128h { |
16631 | _mm_setr_ph(re, im, re, im, re, im, re, im) |
16632 | } |
16633 | |
16634 | #[target_feature (enable = "avx512fp16" )] |
16635 | unsafe fn _mm256_set1_pch(re: f16, im: f16) -> __m256h { |
16636 | _mm256_setr_ph( |
16637 | re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, |
16638 | ) |
16639 | } |
16640 | |
16641 | #[target_feature (enable = "avx512fp16" )] |
16642 | unsafe fn _mm512_set1_pch(re: f16, im: f16) -> __m512h { |
16643 | _mm512_setr_ph( |
16644 | re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, |
16645 | re, im, re, im, re, im, re, im, re, im, |
16646 | ) |
16647 | } |
16648 | |
16649 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
16650 | unsafe fn test_mm_set_ph() { |
16651 | let r = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
16652 | let e = _mm_setr_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0); |
16653 | assert_eq_m128h(r, e); |
16654 | } |
16655 | |
16656 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
16657 | unsafe fn test_mm256_set_ph() { |
16658 | let r = _mm256_set_ph( |
16659 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
16660 | ); |
16661 | let e = _mm256_setr_ph( |
16662 | 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, |
16663 | ); |
16664 | assert_eq_m256h(r, e); |
16665 | } |
16666 | |
16667 | #[simd_test(enable = "avx512fp16" )] |
16668 | unsafe fn test_mm512_set_ph() { |
16669 | let r = _mm512_set_ph( |
16670 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
16671 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
16672 | 31.0, 32.0, |
16673 | ); |
16674 | let e = _mm512_setr_ph( |
16675 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
16676 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
16677 | 3.0, 2.0, 1.0, |
16678 | ); |
16679 | assert_eq_m512h(r, e); |
16680 | } |
16681 | |
16682 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
16683 | unsafe fn test_mm_set_sh() { |
16684 | let r = _mm_set_sh(1.0); |
16685 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0); |
16686 | assert_eq_m128h(r, e); |
16687 | } |
16688 | |
16689 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
16690 | unsafe fn test_mm_set1_ph() { |
16691 | let r = _mm_set1_ph(1.0); |
16692 | let e = _mm_set_ph(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0); |
16693 | assert_eq_m128h(r, e); |
16694 | } |
16695 | |
16696 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
16697 | unsafe fn test_mm256_set1_ph() { |
16698 | let r = _mm256_set1_ph(1.0); |
16699 | let e = _mm256_set_ph( |
16700 | 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, |
16701 | ); |
16702 | assert_eq_m256h(r, e); |
16703 | } |
16704 | |
16705 | #[simd_test(enable = "avx512fp16" )] |
16706 | unsafe fn test_mm512_set1_ph() { |
16707 | let r = _mm512_set1_ph(1.0); |
16708 | let e = _mm512_set_ph( |
16709 | 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, |
16710 | 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, |
16711 | ); |
16712 | assert_eq_m512h(r, e); |
16713 | } |
16714 | |
16715 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
16716 | unsafe fn test_mm_setr_ph() { |
16717 | let r = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
16718 | let e = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0); |
16719 | assert_eq_m128h(r, e); |
16720 | } |
16721 | |
16722 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
16723 | unsafe fn test_mm256_setr_ph() { |
16724 | let r = _mm256_setr_ph( |
16725 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
16726 | ); |
16727 | let e = _mm256_set_ph( |
16728 | 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, |
16729 | ); |
16730 | assert_eq_m256h(r, e); |
16731 | } |
16732 | |
16733 | #[simd_test(enable = "avx512fp16" )] |
16734 | unsafe fn test_mm512_setr_ph() { |
16735 | let r = _mm512_setr_ph( |
16736 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
16737 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
16738 | 31.0, 32.0, |
16739 | ); |
16740 | let e = _mm512_set_ph( |
16741 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
16742 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
16743 | 3.0, 2.0, 1.0, |
16744 | ); |
16745 | assert_eq_m512h(r, e); |
16746 | } |
16747 | |
16748 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
16749 | unsafe fn test_mm_setzero_ph() { |
16750 | let r = _mm_setzero_ph(); |
16751 | let e = _mm_set1_ph(0.0); |
16752 | assert_eq_m128h(r, e); |
16753 | } |
16754 | |
16755 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
16756 | unsafe fn test_mm256_setzero_ph() { |
16757 | let r = _mm256_setzero_ph(); |
16758 | let e = _mm256_set1_ph(0.0); |
16759 | assert_eq_m256h(r, e); |
16760 | } |
16761 | |
16762 | #[simd_test(enable = "avx512fp16" )] |
16763 | unsafe fn test_mm512_setzero_ph() { |
16764 | let r = _mm512_setzero_ph(); |
16765 | let e = _mm512_set1_ph(0.0); |
16766 | assert_eq_m512h(r, e); |
16767 | } |
16768 | |
16769 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
16770 | unsafe fn test_mm_castsi128_ph() { |
16771 | let a = _mm_set1_epi16(0x3c00); |
16772 | let r = _mm_castsi128_ph(a); |
16773 | let e = _mm_set1_ph(1.0); |
16774 | assert_eq_m128h(r, e); |
16775 | } |
16776 | |
16777 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
16778 | unsafe fn test_mm256_castsi256_ph() { |
16779 | let a = _mm256_set1_epi16(0x3c00); |
16780 | let r = _mm256_castsi256_ph(a); |
16781 | let e = _mm256_set1_ph(1.0); |
16782 | assert_eq_m256h(r, e); |
16783 | } |
16784 | |
16785 | #[simd_test(enable = "avx512fp16" )] |
16786 | unsafe fn test_mm512_castsi512_ph() { |
16787 | let a = _mm512_set1_epi16(0x3c00); |
16788 | let r = _mm512_castsi512_ph(a); |
16789 | let e = _mm512_set1_ph(1.0); |
16790 | assert_eq_m512h(r, e); |
16791 | } |
16792 | |
16793 | #[simd_test(enable = "avx512fp16" )] |
16794 | unsafe fn test_mm_castph_si128() { |
16795 | let a = _mm_set1_ph(1.0); |
16796 | let r = _mm_castph_si128(a); |
16797 | let e = _mm_set1_epi16(0x3c00); |
16798 | assert_eq_m128i(r, e); |
16799 | } |
16800 | |
16801 | #[simd_test(enable = "avx512fp16" )] |
16802 | unsafe fn test_mm256_castph_si256() { |
16803 | let a = _mm256_set1_ph(1.0); |
16804 | let r = _mm256_castph_si256(a); |
16805 | let e = _mm256_set1_epi16(0x3c00); |
16806 | assert_eq_m256i(r, e); |
16807 | } |
16808 | |
16809 | #[simd_test(enable = "avx512fp16" )] |
16810 | unsafe fn test_mm512_castph_si512() { |
16811 | let a = _mm512_set1_ph(1.0); |
16812 | let r = _mm512_castph_si512(a); |
16813 | let e = _mm512_set1_epi16(0x3c00); |
16814 | assert_eq_m512i(r, e); |
16815 | } |
16816 | |
16817 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
16818 | unsafe fn test_mm_castps_ph() { |
16819 | let a = _mm_castsi128_ps(_mm_set1_epi16(0x3c00)); |
16820 | let r = _mm_castps_ph(a); |
16821 | let e = _mm_set1_ph(1.0); |
16822 | assert_eq_m128h(r, e); |
16823 | } |
16824 | |
16825 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
16826 | unsafe fn test_mm256_castps_ph() { |
16827 | let a = _mm256_castsi256_ps(_mm256_set1_epi16(0x3c00)); |
16828 | let r = _mm256_castps_ph(a); |
16829 | let e = _mm256_set1_ph(1.0); |
16830 | assert_eq_m256h(r, e); |
16831 | } |
16832 | |
16833 | #[simd_test(enable = "avx512fp16" )] |
16834 | unsafe fn test_mm512_castps_ph() { |
16835 | let a = _mm512_castsi512_ps(_mm512_set1_epi16(0x3c00)); |
16836 | let r = _mm512_castps_ph(a); |
16837 | let e = _mm512_set1_ph(1.0); |
16838 | assert_eq_m512h(r, e); |
16839 | } |
16840 | |
16841 | #[simd_test(enable = "avx512fp16" )] |
16842 | unsafe fn test_mm_castph_ps() { |
16843 | let a = _mm_castsi128_ph(_mm_set1_epi32(0x3f800000)); |
16844 | let r = _mm_castph_ps(a); |
16845 | let e = _mm_set1_ps(1.0); |
16846 | assert_eq_m128(r, e); |
16847 | } |
16848 | |
16849 | #[simd_test(enable = "avx512fp16" )] |
16850 | unsafe fn test_mm256_castph_ps() { |
16851 | let a = _mm256_castsi256_ph(_mm256_set1_epi32(0x3f800000)); |
16852 | let r = _mm256_castph_ps(a); |
16853 | let e = _mm256_set1_ps(1.0); |
16854 | assert_eq_m256(r, e); |
16855 | } |
16856 | |
16857 | #[simd_test(enable = "avx512fp16" )] |
16858 | unsafe fn test_mm512_castph_ps() { |
16859 | let a = _mm512_castsi512_ph(_mm512_set1_epi32(0x3f800000)); |
16860 | let r = _mm512_castph_ps(a); |
16861 | let e = _mm512_set1_ps(1.0); |
16862 | assert_eq_m512(r, e); |
16863 | } |
16864 | |
16865 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
16866 | unsafe fn test_mm_castpd_ph() { |
16867 | let a = _mm_castsi128_pd(_mm_set1_epi16(0x3c00)); |
16868 | let r = _mm_castpd_ph(a); |
16869 | let e = _mm_set1_ph(1.0); |
16870 | assert_eq_m128h(r, e); |
16871 | } |
16872 | |
16873 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
16874 | unsafe fn test_mm256_castpd_ph() { |
16875 | let a = _mm256_castsi256_pd(_mm256_set1_epi16(0x3c00)); |
16876 | let r = _mm256_castpd_ph(a); |
16877 | let e = _mm256_set1_ph(1.0); |
16878 | assert_eq_m256h(r, e); |
16879 | } |
16880 | |
16881 | #[simd_test(enable = "avx512fp16" )] |
16882 | unsafe fn test_mm512_castpd_ph() { |
16883 | let a = _mm512_castsi512_pd(_mm512_set1_epi16(0x3c00)); |
16884 | let r = _mm512_castpd_ph(a); |
16885 | let e = _mm512_set1_ph(1.0); |
16886 | assert_eq_m512h(r, e); |
16887 | } |
16888 | |
16889 | #[simd_test(enable = "avx512fp16" )] |
16890 | unsafe fn test_mm_castph_pd() { |
16891 | let a = _mm_castsi128_ph(_mm_set1_epi64x(0x3ff0000000000000)); |
16892 | let r = _mm_castph_pd(a); |
16893 | let e = _mm_set1_pd(1.0); |
16894 | assert_eq_m128d(r, e); |
16895 | } |
16896 | |
16897 | #[simd_test(enable = "avx512fp16" )] |
16898 | unsafe fn test_mm256_castph_pd() { |
16899 | let a = _mm256_castsi256_ph(_mm256_set1_epi64x(0x3ff0000000000000)); |
16900 | let r = _mm256_castph_pd(a); |
16901 | let e = _mm256_set1_pd(1.0); |
16902 | assert_eq_m256d(r, e); |
16903 | } |
16904 | |
16905 | #[simd_test(enable = "avx512fp16" )] |
16906 | unsafe fn test_mm512_castph_pd() { |
16907 | let a = _mm512_castsi512_ph(_mm512_set1_epi64(0x3ff0000000000000)); |
16908 | let r = _mm512_castph_pd(a); |
16909 | let e = _mm512_set1_pd(1.0); |
16910 | assert_eq_m512d(r, e); |
16911 | } |
16912 | |
16913 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
16914 | unsafe fn test_mm256_castph256_ph128() { |
16915 | let a = _mm256_setr_ph( |
16916 | 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
16917 | ); |
16918 | let r = _mm256_castph256_ph128(a); |
16919 | let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.); |
16920 | assert_eq_m128h(r, e); |
16921 | } |
16922 | |
16923 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
16924 | unsafe fn test_mm512_castph512_ph128() { |
16925 | let a = _mm512_setr_ph( |
16926 | 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., |
16927 | 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., |
16928 | ); |
16929 | let r = _mm512_castph512_ph128(a); |
16930 | let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.); |
16931 | assert_eq_m128h(r, e); |
16932 | } |
16933 | |
16934 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
16935 | unsafe fn test_mm512_castph512_ph256() { |
16936 | let a = _mm512_setr_ph( |
16937 | 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., |
16938 | 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., |
16939 | ); |
16940 | let r = _mm512_castph512_ph256(a); |
16941 | let e = _mm256_setr_ph( |
16942 | 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
16943 | ); |
16944 | assert_eq_m256h(r, e); |
16945 | } |
16946 | |
16947 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
16948 | unsafe fn test_mm256_castph128_ph256() { |
16949 | let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.); |
16950 | let r = _mm256_castph128_ph256(a); |
16951 | assert_eq_m128h(_mm256_castph256_ph128(r), a); |
16952 | } |
16953 | |
16954 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
16955 | unsafe fn test_mm512_castph128_ph512() { |
16956 | let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.); |
16957 | let r = _mm512_castph128_ph512(a); |
16958 | assert_eq_m128h(_mm512_castph512_ph128(r), a); |
16959 | } |
16960 | |
16961 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
16962 | unsafe fn test_mm512_castph256_ph512() { |
16963 | let a = _mm256_setr_ph( |
16964 | 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
16965 | ); |
16966 | let r = _mm512_castph256_ph512(a); |
16967 | assert_eq_m256h(_mm512_castph512_ph256(r), a); |
16968 | } |
16969 | |
16970 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
16971 | unsafe fn test_mm256_zextph128_ph256() { |
16972 | let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.); |
16973 | let r = _mm256_zextph128_ph256(a); |
16974 | let e = _mm256_setr_ph( |
16975 | 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0., |
16976 | ); |
16977 | assert_eq_m256h(r, e); |
16978 | } |
16979 | |
16980 | #[simd_test(enable = "avx512fp16" )] |
16981 | unsafe fn test_mm512_zextph128_ph512() { |
16982 | let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.); |
16983 | let r = _mm512_zextph128_ph512(a); |
16984 | let e = _mm512_setr_ph( |
16985 | 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., |
16986 | 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., |
16987 | ); |
16988 | assert_eq_m512h(r, e); |
16989 | } |
16990 | |
16991 | #[simd_test(enable = "avx512fp16" )] |
16992 | unsafe fn test_mm512_zextph256_ph512() { |
16993 | let a = _mm256_setr_ph( |
16994 | 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
16995 | ); |
16996 | let r = _mm512_zextph256_ph512(a); |
16997 | let e = _mm512_setr_ph( |
16998 | 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 0., 0., 0., 0., |
16999 | 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., |
17000 | ); |
17001 | assert_eq_m512h(r, e); |
17002 | } |
17003 | |
17004 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17005 | unsafe fn test_mm_cmp_ph_mask() { |
17006 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
17007 | let b = _mm_set_ph(1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0); |
17008 | let r = _mm_cmp_ph_mask::<_CMP_EQ_OQ>(a, b); |
17009 | assert_eq!(r, 0b11110000); |
17010 | } |
17011 | |
17012 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17013 | unsafe fn test_mm_mask_cmp_ph_mask() { |
17014 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
17015 | let b = _mm_set_ph(1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0); |
17016 | let r = _mm_mask_cmp_ph_mask::<_CMP_EQ_OQ>(0b01010101, a, b); |
17017 | assert_eq!(r, 0b01010000); |
17018 | } |
17019 | |
17020 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17021 | unsafe fn test_mm256_cmp_ph_mask() { |
17022 | let a = _mm256_set_ph( |
17023 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17024 | ); |
17025 | let b = _mm256_set_ph( |
17026 | 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0, |
17027 | -16.0, |
17028 | ); |
17029 | let r = _mm256_cmp_ph_mask::<_CMP_EQ_OQ>(a, b); |
17030 | assert_eq!(r, 0b1111000011110000); |
17031 | } |
17032 | |
17033 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17034 | unsafe fn test_mm256_mask_cmp_ph_mask() { |
17035 | let a = _mm256_set_ph( |
17036 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17037 | ); |
17038 | let b = _mm256_set_ph( |
17039 | 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0, |
17040 | -16.0, |
17041 | ); |
17042 | let r = _mm256_mask_cmp_ph_mask::<_CMP_EQ_OQ>(0b0101010101010101, a, b); |
17043 | assert_eq!(r, 0b0101000001010000); |
17044 | } |
17045 | |
17046 | #[simd_test(enable = "avx512fp16" )] |
17047 | unsafe fn test_mm512_cmp_ph_mask() { |
17048 | let a = _mm512_set_ph( |
17049 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17050 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17051 | 31.0, 32.0, |
17052 | ); |
17053 | let b = _mm512_set_ph( |
17054 | 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0, |
17055 | -16.0, 17.0, 18.0, 19.0, 20.0, -21.0, -22.0, -23.0, -24.0, 25.0, 26.0, 27.0, 28.0, |
17056 | -29.0, -30.0, -31.0, -32.0, |
17057 | ); |
17058 | let r = _mm512_cmp_ph_mask::<_CMP_EQ_OQ>(a, b); |
17059 | assert_eq!(r, 0b11110000111100001111000011110000); |
17060 | } |
17061 | |
17062 | #[simd_test(enable = "avx512fp16" )] |
17063 | unsafe fn test_mm512_mask_cmp_ph_mask() { |
17064 | let a = _mm512_set_ph( |
17065 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17066 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17067 | 31.0, 32.0, |
17068 | ); |
17069 | let b = _mm512_set_ph( |
17070 | 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0, |
17071 | -16.0, 17.0, 18.0, 19.0, 20.0, -21.0, -22.0, -23.0, -24.0, 25.0, 26.0, 27.0, 28.0, |
17072 | -29.0, -30.0, -31.0, -32.0, |
17073 | ); |
17074 | let r = _mm512_mask_cmp_ph_mask::<_CMP_EQ_OQ>(0b01010101010101010101010101010101, a, b); |
17075 | assert_eq!(r, 0b01010000010100000101000001010000); |
17076 | } |
17077 | |
17078 | #[simd_test(enable = "avx512fp16" )] |
17079 | unsafe fn test_mm512_cmp_round_ph_mask() { |
17080 | let a = _mm512_set_ph( |
17081 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17082 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17083 | 31.0, 32.0, |
17084 | ); |
17085 | let b = _mm512_set_ph( |
17086 | 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0, |
17087 | -16.0, 17.0, 18.0, 19.0, 20.0, -21.0, -22.0, -23.0, -24.0, 25.0, 26.0, 27.0, 28.0, |
17088 | -29.0, -30.0, -31.0, -32.0, |
17089 | ); |
17090 | let r = _mm512_cmp_round_ph_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b); |
17091 | assert_eq!(r, 0b11110000111100001111000011110000); |
17092 | } |
17093 | |
17094 | #[simd_test(enable = "avx512fp16" )] |
17095 | unsafe fn test_mm512_mask_cmp_round_ph_mask() { |
17096 | let a = _mm512_set_ph( |
17097 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17098 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17099 | 31.0, 32.0, |
17100 | ); |
17101 | let b = _mm512_set_ph( |
17102 | 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0, |
17103 | -16.0, 17.0, 18.0, 19.0, 20.0, -21.0, -22.0, -23.0, -24.0, 25.0, 26.0, 27.0, 28.0, |
17104 | -29.0, -30.0, -31.0, -32.0, |
17105 | ); |
17106 | let r = _mm512_mask_cmp_round_ph_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>( |
17107 | 0b01010101010101010101010101010101, |
17108 | a, |
17109 | b, |
17110 | ); |
17111 | assert_eq!(r, 0b01010000010100000101000001010000); |
17112 | } |
17113 | |
17114 | #[simd_test(enable = "avx512fp16" )] |
17115 | unsafe fn test_mm_cmp_round_sh_mask() { |
17116 | let a = _mm_set_sh(1.0); |
17117 | let b = _mm_set_sh(1.0); |
17118 | let r = _mm_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b); |
17119 | assert_eq!(r, 1); |
17120 | } |
17121 | |
17122 | #[simd_test(enable = "avx512fp16" )] |
17123 | unsafe fn test_mm_mask_cmp_round_sh_mask() { |
17124 | let a = _mm_set_sh(1.0); |
17125 | let b = _mm_set_sh(1.0); |
17126 | let r = _mm_mask_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(0, a, b); |
17127 | assert_eq!(r, 0); |
17128 | } |
17129 | |
17130 | #[simd_test(enable = "avx512fp16" )] |
17131 | unsafe fn test_mm_cmp_sh_mask() { |
17132 | let a = _mm_set_sh(1.0); |
17133 | let b = _mm_set_sh(1.0); |
17134 | let r = _mm_cmp_sh_mask::<_CMP_EQ_OQ>(a, b); |
17135 | assert_eq!(r, 1); |
17136 | } |
17137 | |
17138 | #[simd_test(enable = "avx512fp16" )] |
17139 | unsafe fn test_mm_mask_cmp_sh_mask() { |
17140 | let a = _mm_set_sh(1.0); |
17141 | let b = _mm_set_sh(1.0); |
17142 | let r = _mm_mask_cmp_sh_mask::<_CMP_EQ_OQ>(0, a, b); |
17143 | assert_eq!(r, 0); |
17144 | } |
17145 | |
17146 | #[simd_test(enable = "avx512fp16" )] |
17147 | unsafe fn test_mm_comi_round_sh() { |
17148 | let a = _mm_set_sh(1.0); |
17149 | let b = _mm_set_sh(1.0); |
17150 | let r = _mm_comi_round_sh::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b); |
17151 | assert_eq!(r, 1); |
17152 | } |
17153 | |
17154 | #[simd_test(enable = "avx512fp16" )] |
17155 | unsafe fn test_mm_comi_sh() { |
17156 | let a = _mm_set_sh(1.0); |
17157 | let b = _mm_set_sh(1.0); |
17158 | let r = _mm_comi_sh::<_CMP_EQ_OQ>(a, b); |
17159 | assert_eq!(r, 1); |
17160 | } |
17161 | |
17162 | #[simd_test(enable = "avx512fp16" )] |
17163 | unsafe fn test_mm_comieq_sh() { |
17164 | let a = _mm_set_sh(1.0); |
17165 | let b = _mm_set_sh(1.0); |
17166 | let r = _mm_comieq_sh(a, b); |
17167 | assert_eq!(r, 1); |
17168 | } |
17169 | |
17170 | #[simd_test(enable = "avx512fp16" )] |
17171 | unsafe fn test_mm_comige_sh() { |
17172 | let a = _mm_set_sh(2.0); |
17173 | let b = _mm_set_sh(1.0); |
17174 | let r = _mm_comige_sh(a, b); |
17175 | assert_eq!(r, 1); |
17176 | } |
17177 | |
17178 | #[simd_test(enable = "avx512fp16" )] |
17179 | unsafe fn test_mm_comigt_sh() { |
17180 | let a = _mm_set_sh(2.0); |
17181 | let b = _mm_set_sh(1.0); |
17182 | let r = _mm_comigt_sh(a, b); |
17183 | assert_eq!(r, 1); |
17184 | } |
17185 | |
17186 | #[simd_test(enable = "avx512fp16" )] |
17187 | unsafe fn test_mm_comile_sh() { |
17188 | let a = _mm_set_sh(1.0); |
17189 | let b = _mm_set_sh(2.0); |
17190 | let r = _mm_comile_sh(a, b); |
17191 | assert_eq!(r, 1); |
17192 | } |
17193 | |
17194 | #[simd_test(enable = "avx512fp16" )] |
17195 | unsafe fn test_mm_comilt_sh() { |
17196 | let a = _mm_set_sh(1.0); |
17197 | let b = _mm_set_sh(2.0); |
17198 | let r = _mm_comilt_sh(a, b); |
17199 | assert_eq!(r, 1); |
17200 | } |
17201 | |
17202 | #[simd_test(enable = "avx512fp16" )] |
17203 | unsafe fn test_mm_comineq_sh() { |
17204 | let a = _mm_set_sh(1.0); |
17205 | let b = _mm_set_sh(2.0); |
17206 | let r = _mm_comineq_sh(a, b); |
17207 | assert_eq!(r, 1); |
17208 | } |
17209 | |
17210 | #[simd_test(enable = "avx512fp16" )] |
17211 | unsafe fn test_mm_ucomieq_sh() { |
17212 | let a = _mm_set_sh(1.0); |
17213 | let b = _mm_set_sh(1.0); |
17214 | let r = _mm_ucomieq_sh(a, b); |
17215 | assert_eq!(r, 1); |
17216 | } |
17217 | |
17218 | #[simd_test(enable = "avx512fp16" )] |
17219 | unsafe fn test_mm_ucomige_sh() { |
17220 | let a = _mm_set_sh(2.0); |
17221 | let b = _mm_set_sh(1.0); |
17222 | let r = _mm_ucomige_sh(a, b); |
17223 | assert_eq!(r, 1); |
17224 | } |
17225 | |
17226 | #[simd_test(enable = "avx512fp16" )] |
17227 | unsafe fn test_mm_ucomigt_sh() { |
17228 | let a = _mm_set_sh(2.0); |
17229 | let b = _mm_set_sh(1.0); |
17230 | let r = _mm_ucomigt_sh(a, b); |
17231 | assert_eq!(r, 1); |
17232 | } |
17233 | |
17234 | #[simd_test(enable = "avx512fp16" )] |
17235 | unsafe fn test_mm_ucomile_sh() { |
17236 | let a = _mm_set_sh(1.0); |
17237 | let b = _mm_set_sh(2.0); |
17238 | let r = _mm_ucomile_sh(a, b); |
17239 | assert_eq!(r, 1); |
17240 | } |
17241 | |
17242 | #[simd_test(enable = "avx512fp16" )] |
17243 | unsafe fn test_mm_ucomilt_sh() { |
17244 | let a = _mm_set_sh(1.0); |
17245 | let b = _mm_set_sh(2.0); |
17246 | let r = _mm_ucomilt_sh(a, b); |
17247 | assert_eq!(r, 1); |
17248 | } |
17249 | |
17250 | #[simd_test(enable = "avx512fp16" )] |
17251 | unsafe fn test_mm_ucomineq_sh() { |
17252 | let a = _mm_set_sh(1.0); |
17253 | let b = _mm_set_sh(2.0); |
17254 | let r = _mm_ucomineq_sh(a, b); |
17255 | assert_eq!(r, 1); |
17256 | } |
17257 | |
17258 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17259 | unsafe fn test_mm_load_ph() { |
17260 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
17261 | let b = _mm_load_ph(addr_of!(a).cast()); |
17262 | assert_eq_m128h(a, b); |
17263 | } |
17264 | |
17265 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17266 | unsafe fn test_mm256_load_ph() { |
17267 | let a = _mm256_set_ph( |
17268 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17269 | ); |
17270 | let b = _mm256_load_ph(addr_of!(a).cast()); |
17271 | assert_eq_m256h(a, b); |
17272 | } |
17273 | |
17274 | #[simd_test(enable = "avx512fp16" )] |
17275 | unsafe fn test_mm512_load_ph() { |
17276 | let a = _mm512_set_ph( |
17277 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17278 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17279 | 31.0, 32.0, |
17280 | ); |
17281 | let b = _mm512_load_ph(addr_of!(a).cast()); |
17282 | assert_eq_m512h(a, b); |
17283 | } |
17284 | |
17285 | #[simd_test(enable = "avx512fp16" )] |
17286 | unsafe fn test_mm_load_sh() { |
17287 | let a = _mm_set_sh(1.0); |
17288 | let b = _mm_load_sh(addr_of!(a).cast()); |
17289 | assert_eq_m128h(a, b); |
17290 | } |
17291 | |
17292 | #[simd_test(enable = "avx512fp16" )] |
17293 | unsafe fn test_mm_mask_load_sh() { |
17294 | let a = _mm_set_sh(1.0); |
17295 | let src = _mm_set_sh(2.); |
17296 | let b = _mm_mask_load_sh(src, 1, addr_of!(a).cast()); |
17297 | assert_eq_m128h(a, b); |
17298 | let b = _mm_mask_load_sh(src, 0, addr_of!(a).cast()); |
17299 | assert_eq_m128h(src, b); |
17300 | } |
17301 | |
17302 | #[simd_test(enable = "avx512fp16" )] |
17303 | unsafe fn test_mm_maskz_load_sh() { |
17304 | let a = _mm_set_sh(1.0); |
17305 | let b = _mm_maskz_load_sh(1, addr_of!(a).cast()); |
17306 | assert_eq_m128h(a, b); |
17307 | let b = _mm_maskz_load_sh(0, addr_of!(a).cast()); |
17308 | assert_eq_m128h(_mm_setzero_ph(), b); |
17309 | } |
17310 | |
17311 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17312 | unsafe fn test_mm_loadu_ph() { |
17313 | let array = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]; |
17314 | let r = _mm_loadu_ph(array.as_ptr()); |
17315 | let e = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
17316 | assert_eq_m128h(r, e); |
17317 | } |
17318 | |
17319 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17320 | unsafe fn test_mm256_loadu_ph() { |
17321 | let array = [ |
17322 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17323 | ]; |
17324 | let r = _mm256_loadu_ph(array.as_ptr()); |
17325 | let e = _mm256_setr_ph( |
17326 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17327 | ); |
17328 | assert_eq_m256h(r, e); |
17329 | } |
17330 | |
17331 | #[simd_test(enable = "avx512fp16" )] |
17332 | unsafe fn test_mm512_loadu_ph() { |
17333 | let array = [ |
17334 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17335 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17336 | 31.0, 32.0, |
17337 | ]; |
17338 | let r = _mm512_loadu_ph(array.as_ptr()); |
17339 | let e = _mm512_setr_ph( |
17340 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17341 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17342 | 31.0, 32.0, |
17343 | ); |
17344 | assert_eq_m512h(r, e); |
17345 | } |
17346 | |
17347 | #[simd_test(enable = "avx512fp16" )] |
17348 | unsafe fn test_mm_move_sh() { |
17349 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
17350 | let b = _mm_set_sh(9.0); |
17351 | let r = _mm_move_sh(a, b); |
17352 | let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0); |
17353 | assert_eq_m128h(r, e); |
17354 | } |
17355 | |
17356 | #[simd_test(enable = "avx512fp16" )] |
17357 | unsafe fn test_mm_mask_move_sh() { |
17358 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
17359 | let b = _mm_set_sh(9.0); |
17360 | let src = _mm_set_sh(10.0); |
17361 | let r = _mm_mask_move_sh(src, 0, a, b); |
17362 | let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 10.0); |
17363 | assert_eq_m128h(r, e); |
17364 | } |
17365 | |
17366 | #[simd_test(enable = "avx512fp16" )] |
17367 | unsafe fn test_mm_maskz_move_sh() { |
17368 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
17369 | let b = _mm_set_sh(9.0); |
17370 | let r = _mm_maskz_move_sh(0, a, b); |
17371 | let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 0.0); |
17372 | assert_eq_m128h(r, e); |
17373 | } |
17374 | |
17375 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17376 | unsafe fn test_mm_store_ph() { |
17377 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
17378 | let mut b = _mm_setzero_ph(); |
17379 | _mm_store_ph(addr_of_mut!(b).cast(), a); |
17380 | assert_eq_m128h(a, b); |
17381 | } |
17382 | |
17383 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17384 | unsafe fn test_mm256_store_ph() { |
17385 | let a = _mm256_set_ph( |
17386 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17387 | ); |
17388 | let mut b = _mm256_setzero_ph(); |
17389 | _mm256_store_ph(addr_of_mut!(b).cast(), a); |
17390 | assert_eq_m256h(a, b); |
17391 | } |
17392 | |
17393 | #[simd_test(enable = "avx512fp16" )] |
17394 | unsafe fn test_mm512_store_ph() { |
17395 | let a = _mm512_set_ph( |
17396 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17397 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17398 | 31.0, 32.0, |
17399 | ); |
17400 | let mut b = _mm512_setzero_ph(); |
17401 | _mm512_store_ph(addr_of_mut!(b).cast(), a); |
17402 | assert_eq_m512h(a, b); |
17403 | } |
17404 | |
17405 | #[simd_test(enable = "avx512fp16" )] |
17406 | unsafe fn test_mm_store_sh() { |
17407 | let a = _mm_set_sh(1.0); |
17408 | let mut b = _mm_setzero_ph(); |
17409 | _mm_store_sh(addr_of_mut!(b).cast(), a); |
17410 | assert_eq_m128h(a, b); |
17411 | } |
17412 | |
17413 | #[simd_test(enable = "avx512fp16" )] |
17414 | unsafe fn test_mm_mask_store_sh() { |
17415 | let a = _mm_set_sh(1.0); |
17416 | let mut b = _mm_setzero_ph(); |
17417 | _mm_mask_store_sh(addr_of_mut!(b).cast(), 0, a); |
17418 | assert_eq_m128h(_mm_setzero_ph(), b); |
17419 | _mm_mask_store_sh(addr_of_mut!(b).cast(), 1, a); |
17420 | assert_eq_m128h(a, b); |
17421 | } |
17422 | |
17423 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17424 | unsafe fn test_mm_storeu_ph() { |
17425 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
17426 | let mut array = [0.0; 8]; |
17427 | _mm_storeu_ph(array.as_mut_ptr(), a); |
17428 | assert_eq_m128h(a, _mm_loadu_ph(array.as_ptr())); |
17429 | } |
17430 | |
17431 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17432 | unsafe fn test_mm256_storeu_ph() { |
17433 | let a = _mm256_set_ph( |
17434 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17435 | ); |
17436 | let mut array = [0.0; 16]; |
17437 | _mm256_storeu_ph(array.as_mut_ptr(), a); |
17438 | assert_eq_m256h(a, _mm256_loadu_ph(array.as_ptr())); |
17439 | } |
17440 | |
17441 | #[simd_test(enable = "avx512fp16" )] |
17442 | unsafe fn test_mm512_storeu_ph() { |
17443 | let a = _mm512_set_ph( |
17444 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17445 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17446 | 31.0, 32.0, |
17447 | ); |
17448 | let mut array = [0.0; 32]; |
17449 | _mm512_storeu_ph(array.as_mut_ptr(), a); |
17450 | assert_eq_m512h(a, _mm512_loadu_ph(array.as_ptr())); |
17451 | } |
17452 | |
17453 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17454 | unsafe fn test_mm_add_ph() { |
17455 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
17456 | let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0); |
17457 | let r = _mm_add_ph(a, b); |
17458 | let e = _mm_set1_ph(9.0); |
17459 | assert_eq_m128h(r, e); |
17460 | } |
17461 | |
17462 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17463 | unsafe fn test_mm_mask_add_ph() { |
17464 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
17465 | let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0); |
17466 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
17467 | let r = _mm_mask_add_ph(src, 0b01010101, a, b); |
17468 | let e = _mm_set_ph(10., 9., 12., 9., 14., 9., 16., 9.); |
17469 | assert_eq_m128h(r, e); |
17470 | } |
17471 | |
17472 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17473 | unsafe fn test_mm_maskz_add_ph() { |
17474 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
17475 | let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0); |
17476 | let r = _mm_maskz_add_ph(0b01010101, a, b); |
17477 | let e = _mm_set_ph(0., 9., 0., 9., 0., 9., 0., 9.); |
17478 | assert_eq_m128h(r, e); |
17479 | } |
17480 | |
17481 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17482 | unsafe fn test_mm256_add_ph() { |
17483 | let a = _mm256_set_ph( |
17484 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17485 | ); |
17486 | let b = _mm256_set_ph( |
17487 | 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, |
17488 | ); |
17489 | let r = _mm256_add_ph(a, b); |
17490 | let e = _mm256_set1_ph(17.0); |
17491 | assert_eq_m256h(r, e); |
17492 | } |
17493 | |
17494 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17495 | unsafe fn test_mm256_mask_add_ph() { |
17496 | let a = _mm256_set_ph( |
17497 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17498 | ); |
17499 | let b = _mm256_set_ph( |
17500 | 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, |
17501 | ); |
17502 | let src = _mm256_set_ph( |
17503 | 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33., |
17504 | ); |
17505 | let r = _mm256_mask_add_ph(src, 0b0101010101010101, a, b); |
17506 | let e = _mm256_set_ph( |
17507 | 18., 17., 20., 17., 22., 17., 24., 17., 26., 17., 28., 17., 30., 17., 32., 17., |
17508 | ); |
17509 | assert_eq_m256h(r, e); |
17510 | } |
17511 | |
17512 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17513 | unsafe fn test_mm256_maskz_add_ph() { |
17514 | let a = _mm256_set_ph( |
17515 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17516 | ); |
17517 | let b = _mm256_set_ph( |
17518 | 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, |
17519 | ); |
17520 | let r = _mm256_maskz_add_ph(0b0101010101010101, a, b); |
17521 | let e = _mm256_set_ph( |
17522 | 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., |
17523 | ); |
17524 | assert_eq_m256h(r, e); |
17525 | } |
17526 | |
17527 | #[simd_test(enable = "avx512fp16" )] |
17528 | unsafe fn test_mm512_add_ph() { |
17529 | let a = _mm512_set_ph( |
17530 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17531 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17532 | 31.0, 32.0, |
17533 | ); |
17534 | let b = _mm512_set_ph( |
17535 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
17536 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
17537 | 3.0, 2.0, 1.0, |
17538 | ); |
17539 | let r = _mm512_add_ph(a, b); |
17540 | let e = _mm512_set1_ph(33.0); |
17541 | assert_eq_m512h(r, e); |
17542 | } |
17543 | |
17544 | #[simd_test(enable = "avx512fp16" )] |
17545 | unsafe fn test_mm512_mask_add_ph() { |
17546 | let a = _mm512_set_ph( |
17547 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17548 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17549 | 31.0, 32.0, |
17550 | ); |
17551 | let b = _mm512_set_ph( |
17552 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
17553 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
17554 | 3.0, 2.0, 1.0, |
17555 | ); |
17556 | let src = _mm512_set_ph( |
17557 | 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50., |
17558 | 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65., |
17559 | ); |
17560 | let r = _mm512_mask_add_ph(src, 0b01010101010101010101010101010101, a, b); |
17561 | let e = _mm512_set_ph( |
17562 | 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50., |
17563 | 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33., |
17564 | ); |
17565 | assert_eq_m512h(r, e); |
17566 | } |
17567 | |
17568 | #[simd_test(enable = "avx512fp16" )] |
17569 | unsafe fn test_mm512_maskz_add_ph() { |
17570 | let a = _mm512_set_ph( |
17571 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17572 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17573 | 31.0, 32.0, |
17574 | ); |
17575 | let b = _mm512_set_ph( |
17576 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
17577 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
17578 | 3.0, 2.0, 1.0, |
17579 | ); |
17580 | let r = _mm512_maskz_add_ph(0b01010101010101010101010101010101, a, b); |
17581 | let e = _mm512_set_ph( |
17582 | 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., |
17583 | 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., |
17584 | ); |
17585 | assert_eq_m512h(r, e); |
17586 | } |
17587 | |
17588 | #[simd_test(enable = "avx512fp16" )] |
17589 | unsafe fn test_mm512_add_round_ph() { |
17590 | let a = _mm512_set_ph( |
17591 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17592 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17593 | 31.0, 32.0, |
17594 | ); |
17595 | let b = _mm512_set_ph( |
17596 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
17597 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
17598 | 3.0, 2.0, 1.0, |
17599 | ); |
17600 | let r = _mm512_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
17601 | let e = _mm512_set1_ph(33.0); |
17602 | assert_eq_m512h(r, e); |
17603 | } |
17604 | |
17605 | #[simd_test(enable = "avx512fp16" )] |
17606 | unsafe fn test_mm512_mask_add_round_ph() { |
17607 | let a = _mm512_set_ph( |
17608 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17609 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17610 | 31.0, 32.0, |
17611 | ); |
17612 | let b = _mm512_set_ph( |
17613 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
17614 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
17615 | 3.0, 2.0, 1.0, |
17616 | ); |
17617 | let src = _mm512_set_ph( |
17618 | 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50., |
17619 | 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65., |
17620 | ); |
17621 | let r = _mm512_mask_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
17622 | src, |
17623 | 0b01010101010101010101010101010101, |
17624 | a, |
17625 | b, |
17626 | ); |
17627 | let e = _mm512_set_ph( |
17628 | 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50., |
17629 | 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33., |
17630 | ); |
17631 | assert_eq_m512h(r, e); |
17632 | } |
17633 | |
17634 | #[simd_test(enable = "avx512fp16" )] |
17635 | unsafe fn test_mm512_maskz_add_round_ph() { |
17636 | let a = _mm512_set_ph( |
17637 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17638 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17639 | 31.0, 32.0, |
17640 | ); |
17641 | let b = _mm512_set_ph( |
17642 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
17643 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
17644 | 3.0, 2.0, 1.0, |
17645 | ); |
17646 | let r = _mm512_maskz_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
17647 | 0b01010101010101010101010101010101, |
17648 | a, |
17649 | b, |
17650 | ); |
17651 | let e = _mm512_set_ph( |
17652 | 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., |
17653 | 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., |
17654 | ); |
17655 | assert_eq_m512h(r, e); |
17656 | } |
17657 | |
17658 | #[simd_test(enable = "avx512fp16" )] |
17659 | unsafe fn test_mm_add_round_sh() { |
17660 | let a = _mm_set_sh(1.0); |
17661 | let b = _mm_set_sh(2.0); |
17662 | let r = _mm_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
17663 | let e = _mm_set_sh(3.0); |
17664 | assert_eq_m128h(r, e); |
17665 | } |
17666 | |
17667 | #[simd_test(enable = "avx512fp16" )] |
17668 | unsafe fn test_mm_mask_add_round_sh() { |
17669 | let a = _mm_set_sh(1.0); |
17670 | let b = _mm_set_sh(2.0); |
17671 | let src = _mm_set_sh(4.0); |
17672 | let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
17673 | src, 0, a, b, |
17674 | ); |
17675 | let e = _mm_set_sh(4.0); |
17676 | assert_eq_m128h(r, e); |
17677 | let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
17678 | src, 1, a, b, |
17679 | ); |
17680 | let e = _mm_set_sh(3.0); |
17681 | assert_eq_m128h(r, e); |
17682 | } |
17683 | |
17684 | #[simd_test(enable = "avx512fp16" )] |
17685 | unsafe fn test_mm_maskz_add_round_sh() { |
17686 | let a = _mm_set_sh(1.0); |
17687 | let b = _mm_set_sh(2.0); |
17688 | let r = |
17689 | _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); |
17690 | let e = _mm_set_sh(0.0); |
17691 | assert_eq_m128h(r, e); |
17692 | let r = |
17693 | _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b); |
17694 | let e = _mm_set_sh(3.0); |
17695 | assert_eq_m128h(r, e); |
17696 | } |
17697 | |
17698 | #[simd_test(enable = "avx512fp16" )] |
17699 | unsafe fn test_mm_add_sh() { |
17700 | let a = _mm_set_sh(1.0); |
17701 | let b = _mm_set_sh(2.0); |
17702 | let r = _mm_add_sh(a, b); |
17703 | let e = _mm_set_sh(3.0); |
17704 | assert_eq_m128h(r, e); |
17705 | } |
17706 | |
17707 | #[simd_test(enable = "avx512fp16" )] |
17708 | unsafe fn test_mm_mask_add_sh() { |
17709 | let a = _mm_set_sh(1.0); |
17710 | let b = _mm_set_sh(2.0); |
17711 | let src = _mm_set_sh(4.0); |
17712 | let r = _mm_mask_add_sh(src, 0, a, b); |
17713 | let e = _mm_set_sh(4.0); |
17714 | assert_eq_m128h(r, e); |
17715 | let r = _mm_mask_add_sh(src, 1, a, b); |
17716 | let e = _mm_set_sh(3.0); |
17717 | assert_eq_m128h(r, e); |
17718 | } |
17719 | |
17720 | #[simd_test(enable = "avx512fp16" )] |
17721 | unsafe fn test_mm_maskz_add_sh() { |
17722 | let a = _mm_set_sh(1.0); |
17723 | let b = _mm_set_sh(2.0); |
17724 | let r = _mm_maskz_add_sh(0, a, b); |
17725 | let e = _mm_set_sh(0.0); |
17726 | assert_eq_m128h(r, e); |
17727 | let r = _mm_maskz_add_sh(1, a, b); |
17728 | let e = _mm_set_sh(3.0); |
17729 | assert_eq_m128h(r, e); |
17730 | } |
17731 | |
17732 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17733 | unsafe fn test_mm_sub_ph() { |
17734 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
17735 | let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0); |
17736 | let r = _mm_sub_ph(a, b); |
17737 | let e = _mm_set_ph(-7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0); |
17738 | assert_eq_m128h(r, e); |
17739 | } |
17740 | |
17741 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17742 | unsafe fn test_mm_mask_sub_ph() { |
17743 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
17744 | let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0); |
17745 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
17746 | let r = _mm_mask_sub_ph(src, 0b01010101, a, b); |
17747 | let e = _mm_set_ph(10., -5., 12., -1., 14., 3., 16., 7.); |
17748 | assert_eq_m128h(r, e); |
17749 | } |
17750 | |
17751 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17752 | unsafe fn test_mm_maskz_sub_ph() { |
17753 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
17754 | let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0); |
17755 | let r = _mm_maskz_sub_ph(0b01010101, a, b); |
17756 | let e = _mm_set_ph(0., -5., 0., -1., 0., 3., 0., 7.); |
17757 | assert_eq_m128h(r, e); |
17758 | } |
17759 | |
17760 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17761 | unsafe fn test_mm256_sub_ph() { |
17762 | let a = _mm256_set_ph( |
17763 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17764 | ); |
17765 | let b = _mm256_set_ph( |
17766 | 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, |
17767 | ); |
17768 | let r = _mm256_sub_ph(a, b); |
17769 | let e = _mm256_set_ph( |
17770 | -15.0, -13.0, -11.0, -9.0, -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, |
17771 | 15.0, |
17772 | ); |
17773 | assert_eq_m256h(r, e); |
17774 | } |
17775 | |
17776 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17777 | unsafe fn test_mm256_mask_sub_ph() { |
17778 | let a = _mm256_set_ph( |
17779 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17780 | ); |
17781 | let b = _mm256_set_ph( |
17782 | 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, |
17783 | ); |
17784 | let src = _mm256_set_ph( |
17785 | 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33., |
17786 | ); |
17787 | let r = _mm256_mask_sub_ph(src, 0b0101010101010101, a, b); |
17788 | let e = _mm256_set_ph( |
17789 | 18., -13., 20., -9., 22., -5., 24., -1., 26., 3., 28., 7., 30., 11., 32., 15., |
17790 | ); |
17791 | assert_eq_m256h(r, e); |
17792 | } |
17793 | |
17794 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17795 | unsafe fn test_mm256_maskz_sub_ph() { |
17796 | let a = _mm256_set_ph( |
17797 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17798 | ); |
17799 | let b = _mm256_set_ph( |
17800 | 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, |
17801 | ); |
17802 | let r = _mm256_maskz_sub_ph(0b0101010101010101, a, b); |
17803 | let e = _mm256_set_ph( |
17804 | 0., -13., 0., -9., 0., -5., 0., -1., 0., 3., 0., 7., 0., 11., 0., 15., |
17805 | ); |
17806 | assert_eq_m256h(r, e); |
17807 | } |
17808 | |
17809 | #[simd_test(enable = "avx512fp16" )] |
17810 | unsafe fn test_mm512_sub_ph() { |
17811 | let a = _mm512_set_ph( |
17812 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17813 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17814 | 31.0, 32.0, |
17815 | ); |
17816 | let b = _mm512_set_ph( |
17817 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
17818 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
17819 | 3.0, 2.0, 1.0, |
17820 | ); |
17821 | let r = _mm512_sub_ph(a, b); |
17822 | let e = _mm512_set_ph( |
17823 | -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0, |
17824 | -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, |
17825 | 23.0, 25.0, 27.0, 29.0, 31.0, |
17826 | ); |
17827 | assert_eq_m512h(r, e); |
17828 | } |
17829 | |
17830 | #[simd_test(enable = "avx512fp16" )] |
17831 | unsafe fn test_mm512_mask_sub_ph() { |
17832 | let a = _mm512_set_ph( |
17833 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17834 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17835 | 31.0, 32.0, |
17836 | ); |
17837 | let b = _mm512_set_ph( |
17838 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
17839 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
17840 | 3.0, 2.0, 1.0, |
17841 | ); |
17842 | let src = _mm512_set_ph( |
17843 | 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50., |
17844 | 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65., |
17845 | ); |
17846 | let r = _mm512_mask_sub_ph(src, 0b01010101010101010101010101010101, a, b); |
17847 | let e = _mm512_set_ph( |
17848 | 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1., |
17849 | 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31., |
17850 | ); |
17851 | assert_eq_m512h(r, e); |
17852 | } |
17853 | |
17854 | #[simd_test(enable = "avx512fp16" )] |
17855 | unsafe fn test_mm512_maskz_sub_ph() { |
17856 | let a = _mm512_set_ph( |
17857 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17858 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17859 | 31.0, 32.0, |
17860 | ); |
17861 | let b = _mm512_set_ph( |
17862 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
17863 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
17864 | 3.0, 2.0, 1.0, |
17865 | ); |
17866 | let r = _mm512_maskz_sub_ph(0b01010101010101010101010101010101, a, b); |
17867 | let e = _mm512_set_ph( |
17868 | 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3., |
17869 | 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31., |
17870 | ); |
17871 | assert_eq_m512h(r, e); |
17872 | } |
17873 | |
17874 | #[simd_test(enable = "avx512fp16" )] |
17875 | unsafe fn test_mm512_sub_round_ph() { |
17876 | let a = _mm512_set_ph( |
17877 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17878 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17879 | 31.0, 32.0, |
17880 | ); |
17881 | let b = _mm512_set_ph( |
17882 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
17883 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
17884 | 3.0, 2.0, 1.0, |
17885 | ); |
17886 | let r = _mm512_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
17887 | let e = _mm512_set_ph( |
17888 | -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0, |
17889 | -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, |
17890 | 23.0, 25.0, 27.0, 29.0, 31.0, |
17891 | ); |
17892 | assert_eq_m512h(r, e); |
17893 | } |
17894 | |
17895 | #[simd_test(enable = "avx512fp16" )] |
17896 | unsafe fn test_mm512_mask_sub_round_ph() { |
17897 | let a = _mm512_set_ph( |
17898 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17899 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17900 | 31.0, 32.0, |
17901 | ); |
17902 | let b = _mm512_set_ph( |
17903 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
17904 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
17905 | 3.0, 2.0, 1.0, |
17906 | ); |
17907 | let src = _mm512_set_ph( |
17908 | 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50., |
17909 | 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65., |
17910 | ); |
17911 | let r = _mm512_mask_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
17912 | src, |
17913 | 0b01010101010101010101010101010101, |
17914 | a, |
17915 | b, |
17916 | ); |
17917 | let e = _mm512_set_ph( |
17918 | 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1., |
17919 | 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31., |
17920 | ); |
17921 | assert_eq_m512h(r, e); |
17922 | } |
17923 | |
17924 | #[simd_test(enable = "avx512fp16" )] |
17925 | unsafe fn test_mm512_maskz_sub_round_ph() { |
17926 | let a = _mm512_set_ph( |
17927 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17928 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17929 | 31.0, 32.0, |
17930 | ); |
17931 | let b = _mm512_set_ph( |
17932 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
17933 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
17934 | 3.0, 2.0, 1.0, |
17935 | ); |
17936 | let r = _mm512_maskz_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
17937 | 0b01010101010101010101010101010101, |
17938 | a, |
17939 | b, |
17940 | ); |
17941 | let e = _mm512_set_ph( |
17942 | 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3., |
17943 | 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31., |
17944 | ); |
17945 | assert_eq_m512h(r, e); |
17946 | } |
17947 | |
17948 | #[simd_test(enable = "avx512fp16" )] |
17949 | unsafe fn test_mm_sub_round_sh() { |
17950 | let a = _mm_set_sh(1.0); |
17951 | let b = _mm_set_sh(2.0); |
17952 | let r = _mm_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
17953 | let e = _mm_set_sh(-1.0); |
17954 | assert_eq_m128h(r, e); |
17955 | } |
17956 | |
17957 | #[simd_test(enable = "avx512fp16" )] |
17958 | unsafe fn test_mm_mask_sub_round_sh() { |
17959 | let a = _mm_set_sh(1.0); |
17960 | let b = _mm_set_sh(2.0); |
17961 | let src = _mm_set_sh(4.0); |
17962 | let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
17963 | src, 0, a, b, |
17964 | ); |
17965 | let e = _mm_set_sh(4.0); |
17966 | assert_eq_m128h(r, e); |
17967 | let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
17968 | src, 1, a, b, |
17969 | ); |
17970 | let e = _mm_set_sh(-1.0); |
17971 | assert_eq_m128h(r, e); |
17972 | } |
17973 | |
17974 | #[simd_test(enable = "avx512fp16" )] |
17975 | unsafe fn test_mm_maskz_sub_round_sh() { |
17976 | let a = _mm_set_sh(1.0); |
17977 | let b = _mm_set_sh(2.0); |
17978 | let r = |
17979 | _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); |
17980 | let e = _mm_set_sh(0.0); |
17981 | assert_eq_m128h(r, e); |
17982 | let r = |
17983 | _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b); |
17984 | let e = _mm_set_sh(-1.0); |
17985 | assert_eq_m128h(r, e); |
17986 | } |
17987 | |
17988 | #[simd_test(enable = "avx512fp16" )] |
17989 | unsafe fn test_mm_sub_sh() { |
17990 | let a = _mm_set_sh(1.0); |
17991 | let b = _mm_set_sh(2.0); |
17992 | let r = _mm_sub_sh(a, b); |
17993 | let e = _mm_set_sh(-1.0); |
17994 | assert_eq_m128h(r, e); |
17995 | } |
17996 | |
17997 | #[simd_test(enable = "avx512fp16" )] |
17998 | unsafe fn test_mm_mask_sub_sh() { |
17999 | let a = _mm_set_sh(1.0); |
18000 | let b = _mm_set_sh(2.0); |
18001 | let src = _mm_set_sh(4.0); |
18002 | let r = _mm_mask_sub_sh(src, 0, a, b); |
18003 | let e = _mm_set_sh(4.0); |
18004 | assert_eq_m128h(r, e); |
18005 | let r = _mm_mask_sub_sh(src, 1, a, b); |
18006 | let e = _mm_set_sh(-1.0); |
18007 | assert_eq_m128h(r, e); |
18008 | } |
18009 | |
18010 | #[simd_test(enable = "avx512fp16" )] |
18011 | unsafe fn test_mm_maskz_sub_sh() { |
18012 | let a = _mm_set_sh(1.0); |
18013 | let b = _mm_set_sh(2.0); |
18014 | let r = _mm_maskz_sub_sh(0, a, b); |
18015 | let e = _mm_set_sh(0.0); |
18016 | assert_eq_m128h(r, e); |
18017 | let r = _mm_maskz_sub_sh(1, a, b); |
18018 | let e = _mm_set_sh(-1.0); |
18019 | assert_eq_m128h(r, e); |
18020 | } |
18021 | |
18022 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18023 | unsafe fn test_mm_mul_ph() { |
18024 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
18025 | let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0); |
18026 | let r = _mm_mul_ph(a, b); |
18027 | let e = _mm_set_ph(8.0, 14.0, 18.0, 20.0, 20.0, 18.0, 14.0, 8.0); |
18028 | assert_eq_m128h(r, e); |
18029 | } |
18030 | |
18031 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18032 | unsafe fn test_mm_mask_mul_ph() { |
18033 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
18034 | let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0); |
18035 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
18036 | let r = _mm_mask_mul_ph(src, 0b01010101, a, b); |
18037 | let e = _mm_set_ph(10., 14., 12., 20., 14., 18., 16., 8.); |
18038 | assert_eq_m128h(r, e); |
18039 | } |
18040 | |
18041 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18042 | unsafe fn test_mm_maskz_mul_ph() { |
18043 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
18044 | let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0); |
18045 | let r = _mm_maskz_mul_ph(0b01010101, a, b); |
18046 | let e = _mm_set_ph(0., 14., 0., 20., 0., 18., 0., 8.); |
18047 | assert_eq_m128h(r, e); |
18048 | } |
18049 | |
18050 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18051 | unsafe fn test_mm256_mul_ph() { |
18052 | let a = _mm256_set_ph( |
18053 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
18054 | ); |
18055 | let b = _mm256_set_ph( |
18056 | 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, |
18057 | ); |
18058 | let r = _mm256_mul_ph(a, b); |
18059 | let e = _mm256_set_ph( |
18060 | 16.0, 30.0, 42.0, 52.0, 60.0, 66.0, 70.0, 72.0, 72.0, 70.0, 66.0, 60.0, 52.0, 42.0, |
18061 | 30.0, 16.0, |
18062 | ); |
18063 | assert_eq_m256h(r, e); |
18064 | } |
18065 | |
18066 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18067 | unsafe fn test_mm256_mask_mul_ph() { |
18068 | let a = _mm256_set_ph( |
18069 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
18070 | ); |
18071 | let b = _mm256_set_ph( |
18072 | 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, |
18073 | ); |
18074 | let src = _mm256_set_ph( |
18075 | 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33., |
18076 | ); |
18077 | let r = _mm256_mask_mul_ph(src, 0b0101010101010101, a, b); |
18078 | let e = _mm256_set_ph( |
18079 | 18., 30., 20., 52., 22., 66., 24., 72., 26., 70., 28., 60., 30., 42., 32., 16., |
18080 | ); |
18081 | assert_eq_m256h(r, e); |
18082 | } |
18083 | |
18084 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18085 | unsafe fn test_mm256_maskz_mul_ph() { |
18086 | let a = _mm256_set_ph( |
18087 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
18088 | ); |
18089 | let b = _mm256_set_ph( |
18090 | 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, |
18091 | ); |
18092 | let r = _mm256_maskz_mul_ph(0b0101010101010101, a, b); |
18093 | let e = _mm256_set_ph( |
18094 | 0., 30., 0., 52., 0., 66., 0., 72., 0., 70., 0., 60., 0., 42., 0., 16., |
18095 | ); |
18096 | assert_eq_m256h(r, e); |
18097 | } |
18098 | |
18099 | #[simd_test(enable = "avx512fp16" )] |
18100 | unsafe fn test_mm512_mul_ph() { |
18101 | let a = _mm512_set_ph( |
18102 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
18103 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
18104 | 31.0, 32.0, |
18105 | ); |
18106 | let b = _mm512_set_ph( |
18107 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
18108 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
18109 | 3.0, 2.0, 1.0, |
18110 | ); |
18111 | let r = _mm512_mul_ph(a, b); |
18112 | let e = _mm512_set_ph( |
18113 | 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0, |
18114 | 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0, |
18115 | 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0, |
18116 | ); |
18117 | assert_eq_m512h(r, e); |
18118 | } |
18119 | |
18120 | #[simd_test(enable = "avx512fp16" )] |
18121 | unsafe fn test_mm512_mask_mul_ph() { |
18122 | let a = _mm512_set_ph( |
18123 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
18124 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
18125 | 31.0, 32.0, |
18126 | ); |
18127 | let b = _mm512_set_ph( |
18128 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
18129 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
18130 | 3.0, 2.0, 1.0, |
18131 | ); |
18132 | let src = _mm512_set_ph( |
18133 | 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50., |
18134 | 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65., |
18135 | ); |
18136 | let r = _mm512_mask_mul_ph(src, 0b01010101010101010101010101010101, a, b); |
18137 | let e = _mm512_set_ph( |
18138 | 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272., |
18139 | 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32., |
18140 | ); |
18141 | assert_eq_m512h(r, e); |
18142 | } |
18143 | |
18144 | #[simd_test(enable = "avx512fp16" )] |
18145 | unsafe fn test_mm512_maskz_mul_ph() { |
18146 | let a = _mm512_set_ph( |
18147 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
18148 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
18149 | 31.0, 32.0, |
18150 | ); |
18151 | let b = _mm512_set_ph( |
18152 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
18153 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
18154 | 3.0, 2.0, 1.0, |
18155 | ); |
18156 | let r = _mm512_maskz_mul_ph(0b01010101010101010101010101010101, a, b); |
18157 | let e = _mm512_set_ph( |
18158 | 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0., |
18159 | 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32., |
18160 | ); |
18161 | assert_eq_m512h(r, e); |
18162 | } |
18163 | |
18164 | #[simd_test(enable = "avx512fp16" )] |
18165 | unsafe fn test_mm512_mul_round_ph() { |
18166 | let a = _mm512_set_ph( |
18167 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
18168 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
18169 | 31.0, 32.0, |
18170 | ); |
18171 | let b = _mm512_set_ph( |
18172 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
18173 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
18174 | 3.0, 2.0, 1.0, |
18175 | ); |
18176 | let r = _mm512_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
18177 | let e = _mm512_set_ph( |
18178 | 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0, |
18179 | 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0, |
18180 | 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0, |
18181 | ); |
18182 | assert_eq_m512h(r, e); |
18183 | } |
18184 | |
18185 | #[simd_test(enable = "avx512fp16" )] |
18186 | unsafe fn test_mm512_mask_mul_round_ph() { |
18187 | let a = _mm512_set_ph( |
18188 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
18189 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
18190 | 31.0, 32.0, |
18191 | ); |
18192 | let b = _mm512_set_ph( |
18193 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
18194 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
18195 | 3.0, 2.0, 1.0, |
18196 | ); |
18197 | let src = _mm512_set_ph( |
18198 | 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50., |
18199 | 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65., |
18200 | ); |
18201 | let r = _mm512_mask_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
18202 | src, |
18203 | 0b01010101010101010101010101010101, |
18204 | a, |
18205 | b, |
18206 | ); |
18207 | let e = _mm512_set_ph( |
18208 | 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272., |
18209 | 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32., |
18210 | ); |
18211 | assert_eq_m512h(r, e); |
18212 | } |
18213 | |
18214 | #[simd_test(enable = "avx512fp16" )] |
18215 | unsafe fn test_mm512_maskz_mul_round_ph() { |
18216 | let a = _mm512_set_ph( |
18217 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
18218 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
18219 | 31.0, 32.0, |
18220 | ); |
18221 | let b = _mm512_set_ph( |
18222 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
18223 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
18224 | 3.0, 2.0, 1.0, |
18225 | ); |
18226 | let r = _mm512_maskz_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
18227 | 0b01010101010101010101010101010101, |
18228 | a, |
18229 | b, |
18230 | ); |
18231 | let e = _mm512_set_ph( |
18232 | 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0., |
18233 | 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32., |
18234 | ); |
18235 | assert_eq_m512h(r, e); |
18236 | } |
18237 | |
18238 | #[simd_test(enable = "avx512fp16" )] |
18239 | unsafe fn test_mm_mul_round_sh() { |
18240 | let a = _mm_set_sh(1.0); |
18241 | let b = _mm_set_sh(2.0); |
18242 | let r = _mm_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
18243 | let e = _mm_set_sh(2.0); |
18244 | assert_eq_m128h(r, e); |
18245 | } |
18246 | |
18247 | #[simd_test(enable = "avx512fp16" )] |
18248 | unsafe fn test_mm_mask_mul_round_sh() { |
18249 | let a = _mm_set_sh(1.0); |
18250 | let b = _mm_set_sh(2.0); |
18251 | let src = _mm_set_sh(4.0); |
18252 | let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
18253 | src, 0, a, b, |
18254 | ); |
18255 | let e = _mm_set_sh(4.0); |
18256 | assert_eq_m128h(r, e); |
18257 | let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
18258 | src, 1, a, b, |
18259 | ); |
18260 | let e = _mm_set_sh(2.0); |
18261 | assert_eq_m128h(r, e); |
18262 | } |
18263 | |
18264 | #[simd_test(enable = "avx512fp16" )] |
18265 | unsafe fn test_mm_maskz_mul_round_sh() { |
18266 | let a = _mm_set_sh(1.0); |
18267 | let b = _mm_set_sh(2.0); |
18268 | let r = |
18269 | _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); |
18270 | let e = _mm_set_sh(0.0); |
18271 | assert_eq_m128h(r, e); |
18272 | let r = |
18273 | _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b); |
18274 | let e = _mm_set_sh(2.0); |
18275 | assert_eq_m128h(r, e); |
18276 | } |
18277 | |
18278 | #[simd_test(enable = "avx512fp16" )] |
18279 | unsafe fn test_mm_mul_sh() { |
18280 | let a = _mm_set_sh(1.0); |
18281 | let b = _mm_set_sh(2.0); |
18282 | let r = _mm_mul_sh(a, b); |
18283 | let e = _mm_set_sh(2.0); |
18284 | assert_eq_m128h(r, e); |
18285 | } |
18286 | |
18287 | #[simd_test(enable = "avx512fp16" )] |
18288 | unsafe fn test_mm_mask_mul_sh() { |
18289 | let a = _mm_set_sh(1.0); |
18290 | let b = _mm_set_sh(2.0); |
18291 | let src = _mm_set_sh(4.0); |
18292 | let r = _mm_mask_mul_sh(src, 0, a, b); |
18293 | let e = _mm_set_sh(4.0); |
18294 | assert_eq_m128h(r, e); |
18295 | let r = _mm_mask_mul_sh(src, 1, a, b); |
18296 | let e = _mm_set_sh(2.0); |
18297 | assert_eq_m128h(r, e); |
18298 | } |
18299 | |
18300 | #[simd_test(enable = "avx512fp16" )] |
18301 | unsafe fn test_mm_maskz_mul_sh() { |
18302 | let a = _mm_set_sh(1.0); |
18303 | let b = _mm_set_sh(2.0); |
18304 | let r = _mm_maskz_mul_sh(0, a, b); |
18305 | let e = _mm_set_sh(0.0); |
18306 | assert_eq_m128h(r, e); |
18307 | let r = _mm_maskz_mul_sh(1, a, b); |
18308 | let e = _mm_set_sh(2.0); |
18309 | assert_eq_m128h(r, e); |
18310 | } |
18311 | |
18312 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18313 | unsafe fn test_mm_div_ph() { |
18314 | let a = _mm_set1_ph(1.0); |
18315 | let b = _mm_set1_ph(2.0); |
18316 | let r = _mm_div_ph(a, b); |
18317 | let e = _mm_set1_ph(0.5); |
18318 | assert_eq_m128h(r, e); |
18319 | } |
18320 | |
18321 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18322 | unsafe fn test_mm_mask_div_ph() { |
18323 | let a = _mm_set1_ph(1.0); |
18324 | let b = _mm_set1_ph(2.0); |
18325 | let src = _mm_set_ph(4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0); |
18326 | let r = _mm_mask_div_ph(src, 0b01010101, a, b); |
18327 | let e = _mm_set_ph(4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5); |
18328 | assert_eq_m128h(r, e); |
18329 | } |
18330 | |
18331 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18332 | unsafe fn test_mm_maskz_div_ph() { |
18333 | let a = _mm_set1_ph(1.0); |
18334 | let b = _mm_set1_ph(2.0); |
18335 | let r = _mm_maskz_div_ph(0b01010101, a, b); |
18336 | let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5); |
18337 | assert_eq_m128h(r, e); |
18338 | } |
18339 | |
18340 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18341 | unsafe fn test_mm256_div_ph() { |
18342 | let a = _mm256_set1_ph(1.0); |
18343 | let b = _mm256_set1_ph(2.0); |
18344 | let r = _mm256_div_ph(a, b); |
18345 | let e = _mm256_set1_ph(0.5); |
18346 | assert_eq_m256h(r, e); |
18347 | } |
18348 | |
18349 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18350 | unsafe fn test_mm256_mask_div_ph() { |
18351 | let a = _mm256_set1_ph(1.0); |
18352 | let b = _mm256_set1_ph(2.0); |
18353 | let src = _mm256_set_ph( |
18354 | 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, |
18355 | 19.0, |
18356 | ); |
18357 | let r = _mm256_mask_div_ph(src, 0b0101010101010101, a, b); |
18358 | let e = _mm256_set_ph( |
18359 | 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5, |
18360 | ); |
18361 | assert_eq_m256h(r, e); |
18362 | } |
18363 | |
18364 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18365 | unsafe fn test_mm256_maskz_div_ph() { |
18366 | let a = _mm256_set1_ph(1.0); |
18367 | let b = _mm256_set1_ph(2.0); |
18368 | let r = _mm256_maskz_div_ph(0b0101010101010101, a, b); |
18369 | let e = _mm256_set_ph( |
18370 | 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, |
18371 | ); |
18372 | assert_eq_m256h(r, e); |
18373 | } |
18374 | |
18375 | #[simd_test(enable = "avx512fp16" )] |
18376 | unsafe fn test_mm512_div_ph() { |
18377 | let a = _mm512_set1_ph(1.0); |
18378 | let b = _mm512_set1_ph(2.0); |
18379 | let r = _mm512_div_ph(a, b); |
18380 | let e = _mm512_set1_ph(0.5); |
18381 | assert_eq_m512h(r, e); |
18382 | } |
18383 | |
18384 | #[simd_test(enable = "avx512fp16" )] |
18385 | unsafe fn test_mm512_mask_div_ph() { |
18386 | let a = _mm512_set1_ph(1.0); |
18387 | let b = _mm512_set1_ph(2.0); |
18388 | let src = _mm512_set_ph( |
18389 | 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, |
18390 | 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0, |
18391 | 33.0, 34.0, 35.0, |
18392 | ); |
18393 | let r = _mm512_mask_div_ph(src, 0b01010101010101010101010101010101, a, b); |
18394 | let e = _mm512_set_ph( |
18395 | 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5, |
18396 | 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5, |
18397 | ); |
18398 | assert_eq_m512h(r, e); |
18399 | } |
18400 | |
18401 | #[simd_test(enable = "avx512fp16" )] |
18402 | unsafe fn test_mm512_maskz_div_ph() { |
18403 | let a = _mm512_set1_ph(1.0); |
18404 | let b = _mm512_set1_ph(2.0); |
18405 | let r = _mm512_maskz_div_ph(0b01010101010101010101010101010101, a, b); |
18406 | let e = _mm512_set_ph( |
18407 | 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, |
18408 | 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, |
18409 | ); |
18410 | assert_eq_m512h(r, e); |
18411 | } |
18412 | |
18413 | #[simd_test(enable = "avx512fp16" )] |
18414 | unsafe fn test_mm512_div_round_ph() { |
18415 | let a = _mm512_set1_ph(1.0); |
18416 | let b = _mm512_set1_ph(2.0); |
18417 | let r = _mm512_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
18418 | let e = _mm512_set1_ph(0.5); |
18419 | assert_eq_m512h(r, e); |
18420 | } |
18421 | |
18422 | #[simd_test(enable = "avx512fp16" )] |
18423 | unsafe fn test_mm512_mask_div_round_ph() { |
18424 | let a = _mm512_set1_ph(1.0); |
18425 | let b = _mm512_set1_ph(2.0); |
18426 | let src = _mm512_set_ph( |
18427 | 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, |
18428 | 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0, |
18429 | 33.0, 34.0, 35.0, |
18430 | ); |
18431 | let r = _mm512_mask_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
18432 | src, |
18433 | 0b01010101010101010101010101010101, |
18434 | a, |
18435 | b, |
18436 | ); |
18437 | let e = _mm512_set_ph( |
18438 | 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5, |
18439 | 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5, |
18440 | ); |
18441 | assert_eq_m512h(r, e); |
18442 | } |
18443 | |
18444 | #[simd_test(enable = "avx512fp16" )] |
18445 | unsafe fn test_mm512_maskz_div_round_ph() { |
18446 | let a = _mm512_set1_ph(1.0); |
18447 | let b = _mm512_set1_ph(2.0); |
18448 | let r = _mm512_maskz_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
18449 | 0b01010101010101010101010101010101, |
18450 | a, |
18451 | b, |
18452 | ); |
18453 | let e = _mm512_set_ph( |
18454 | 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, |
18455 | 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, |
18456 | ); |
18457 | assert_eq_m512h(r, e); |
18458 | } |
18459 | |
18460 | #[simd_test(enable = "avx512fp16" )] |
18461 | unsafe fn test_mm_div_round_sh() { |
18462 | let a = _mm_set_sh(1.0); |
18463 | let b = _mm_set_sh(2.0); |
18464 | let r = _mm_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
18465 | let e = _mm_set_sh(0.5); |
18466 | assert_eq_m128h(r, e); |
18467 | } |
18468 | |
18469 | #[simd_test(enable = "avx512fp16" )] |
18470 | unsafe fn test_mm_mask_div_round_sh() { |
18471 | let a = _mm_set_sh(1.0); |
18472 | let b = _mm_set_sh(2.0); |
18473 | let src = _mm_set_sh(4.0); |
18474 | let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
18475 | src, 0, a, b, |
18476 | ); |
18477 | let e = _mm_set_sh(4.0); |
18478 | assert_eq_m128h(r, e); |
18479 | let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
18480 | src, 1, a, b, |
18481 | ); |
18482 | let e = _mm_set_sh(0.5); |
18483 | assert_eq_m128h(r, e); |
18484 | } |
18485 | |
18486 | #[simd_test(enable = "avx512fp16" )] |
18487 | unsafe fn test_mm_maskz_div_round_sh() { |
18488 | let a = _mm_set_sh(1.0); |
18489 | let b = _mm_set_sh(2.0); |
18490 | let r = |
18491 | _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); |
18492 | let e = _mm_set_sh(0.0); |
18493 | assert_eq_m128h(r, e); |
18494 | let r = |
18495 | _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b); |
18496 | let e = _mm_set_sh(0.5); |
18497 | assert_eq_m128h(r, e); |
18498 | } |
18499 | |
18500 | #[simd_test(enable = "avx512fp16" )] |
18501 | unsafe fn test_mm_div_sh() { |
18502 | let a = _mm_set_sh(1.0); |
18503 | let b = _mm_set_sh(2.0); |
18504 | let r = _mm_div_sh(a, b); |
18505 | let e = _mm_set_sh(0.5); |
18506 | assert_eq_m128h(r, e); |
18507 | } |
18508 | |
18509 | #[simd_test(enable = "avx512fp16" )] |
18510 | unsafe fn test_mm_mask_div_sh() { |
18511 | let a = _mm_set_sh(1.0); |
18512 | let b = _mm_set_sh(2.0); |
18513 | let src = _mm_set_sh(4.0); |
18514 | let r = _mm_mask_div_sh(src, 0, a, b); |
18515 | let e = _mm_set_sh(4.0); |
18516 | assert_eq_m128h(r, e); |
18517 | let r = _mm_mask_div_sh(src, 1, a, b); |
18518 | let e = _mm_set_sh(0.5); |
18519 | assert_eq_m128h(r, e); |
18520 | } |
18521 | |
18522 | #[simd_test(enable = "avx512fp16" )] |
18523 | unsafe fn test_mm_maskz_div_sh() { |
18524 | let a = _mm_set_sh(1.0); |
18525 | let b = _mm_set_sh(2.0); |
18526 | let r = _mm_maskz_div_sh(0, a, b); |
18527 | let e = _mm_set_sh(0.0); |
18528 | assert_eq_m128h(r, e); |
18529 | let r = _mm_maskz_div_sh(1, a, b); |
18530 | let e = _mm_set_sh(0.5); |
18531 | assert_eq_m128h(r, e); |
18532 | } |
18533 | |
18534 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18535 | unsafe fn test_mm_mul_pch() { |
18536 | let a = _mm_set1_pch(0.0, 1.0); |
18537 | let b = _mm_set1_pch(0.0, 1.0); |
18538 | let r = _mm_mul_pch(a, b); |
18539 | let e = _mm_set1_pch(-1.0, 0.0); |
18540 | assert_eq_m128h(r, e); |
18541 | } |
18542 | |
18543 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18544 | unsafe fn test_mm_mask_mul_pch() { |
18545 | let a = _mm_set1_pch(0.0, 1.0); |
18546 | let b = _mm_set1_pch(0.0, 1.0); |
18547 | let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0); |
18548 | let r = _mm_mask_mul_pch(src, 0b0101, a, b); |
18549 | let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0); |
18550 | assert_eq_m128h(r, e); |
18551 | } |
18552 | |
18553 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18554 | unsafe fn test_mm_maskz_mul_pch() { |
18555 | let a = _mm_set1_pch(0.0, 1.0); |
18556 | let b = _mm_set1_pch(0.0, 1.0); |
18557 | let r = _mm_maskz_mul_pch(0b0101, a, b); |
18558 | let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0); |
18559 | assert_eq_m128h(r, e); |
18560 | } |
18561 | |
18562 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18563 | unsafe fn test_mm256_mul_pch() { |
18564 | let a = _mm256_set1_pch(0.0, 1.0); |
18565 | let b = _mm256_set1_pch(0.0, 1.0); |
18566 | let r = _mm256_mul_pch(a, b); |
18567 | let e = _mm256_set1_pch(-1.0, 0.0); |
18568 | assert_eq_m256h(r, e); |
18569 | } |
18570 | |
18571 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18572 | unsafe fn test_mm256_mask_mul_pch() { |
18573 | let a = _mm256_set1_pch(0.0, 1.0); |
18574 | let b = _mm256_set1_pch(0.0, 1.0); |
18575 | let src = _mm256_setr_ph( |
18576 | 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, |
18577 | ); |
18578 | let r = _mm256_mask_mul_pch(src, 0b01010101, a, b); |
18579 | let e = _mm256_setr_ph( |
18580 | -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0, |
18581 | ); |
18582 | assert_eq_m256h(r, e); |
18583 | } |
18584 | |
18585 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18586 | unsafe fn test_mm256_maskz_mul_pch() { |
18587 | let a = _mm256_set1_pch(0.0, 1.0); |
18588 | let b = _mm256_set1_pch(0.0, 1.0); |
18589 | let r = _mm256_maskz_mul_pch(0b01010101, a, b); |
18590 | let e = _mm256_setr_ph( |
18591 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
18592 | ); |
18593 | assert_eq_m256h(r, e); |
18594 | } |
18595 | |
18596 | #[simd_test(enable = "avx512fp16" )] |
18597 | unsafe fn test_mm512_mul_pch() { |
18598 | let a = _mm512_set1_pch(0.0, 1.0); |
18599 | let b = _mm512_set1_pch(0.0, 1.0); |
18600 | let r = _mm512_mul_pch(a, b); |
18601 | let e = _mm512_set1_pch(-1.0, 0.0); |
18602 | assert_eq_m512h(r, e); |
18603 | } |
18604 | |
18605 | #[simd_test(enable = "avx512fp16" )] |
18606 | unsafe fn test_mm512_mask_mul_pch() { |
18607 | let a = _mm512_set1_pch(0.0, 1.0); |
18608 | let b = _mm512_set1_pch(0.0, 1.0); |
18609 | let src = _mm512_setr_ph( |
18610 | 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, |
18611 | 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, |
18612 | 32.0, 33.0, |
18613 | ); |
18614 | let r = _mm512_mask_mul_pch(src, 0b0101010101010101, a, b); |
18615 | let e = _mm512_setr_ph( |
18616 | -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0, |
18617 | -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0, |
18618 | 33.0, |
18619 | ); |
18620 | assert_eq_m512h(r, e); |
18621 | } |
18622 | |
18623 | #[simd_test(enable = "avx512fp16" )] |
18624 | unsafe fn test_mm512_maskz_mul_pch() { |
18625 | let a = _mm512_set1_pch(0.0, 1.0); |
18626 | let b = _mm512_set1_pch(0.0, 1.0); |
18627 | let r = _mm512_maskz_mul_pch(0b0101010101010101, a, b); |
18628 | let e = _mm512_setr_ph( |
18629 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
18630 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
18631 | ); |
18632 | assert_eq_m512h(r, e); |
18633 | } |
18634 | |
18635 | #[simd_test(enable = "avx512fp16" )] |
18636 | unsafe fn test_mm512_mul_round_pch() { |
18637 | let a = _mm512_set1_pch(0.0, 1.0); |
18638 | let b = _mm512_set1_pch(0.0, 1.0); |
18639 | let r = _mm512_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
18640 | let e = _mm512_set1_pch(-1.0, 0.0); |
18641 | assert_eq_m512h(r, e); |
18642 | } |
18643 | |
18644 | #[simd_test(enable = "avx512fp16" )] |
18645 | unsafe fn test_mm512_mask_mul_round_pch() { |
18646 | let a = _mm512_set1_pch(0.0, 1.0); |
18647 | let b = _mm512_set1_pch(0.0, 1.0); |
18648 | let src = _mm512_setr_ph( |
18649 | 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, |
18650 | 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, |
18651 | 32.0, 33.0, |
18652 | ); |
18653 | let r = _mm512_mask_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
18654 | src, |
18655 | 0b0101010101010101, |
18656 | a, |
18657 | b, |
18658 | ); |
18659 | let e = _mm512_setr_ph( |
18660 | -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0, |
18661 | -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0, |
18662 | 33.0, |
18663 | ); |
18664 | assert_eq_m512h(r, e); |
18665 | } |
18666 | |
18667 | #[simd_test(enable = "avx512fp16" )] |
18668 | unsafe fn test_mm512_maskz_mul_round_pch() { |
18669 | let a = _mm512_set1_pch(0.0, 1.0); |
18670 | let b = _mm512_set1_pch(0.0, 1.0); |
18671 | let r = _mm512_maskz_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
18672 | 0b0101010101010101, |
18673 | a, |
18674 | b, |
18675 | ); |
18676 | let e = _mm512_setr_ph( |
18677 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
18678 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
18679 | ); |
18680 | assert_eq_m512h(r, e); |
18681 | } |
18682 | |
18683 | #[simd_test(enable = "avx512fp16" )] |
18684 | unsafe fn test_mm_mul_round_sch() { |
18685 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18686 | let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
18687 | let r = _mm_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
18688 | let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18689 | assert_eq_m128h(r, e); |
18690 | } |
18691 | |
18692 | #[simd_test(enable = "avx512fp16" )] |
18693 | unsafe fn test_mm_mask_mul_round_sch() { |
18694 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18695 | let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
18696 | let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0); |
18697 | let r = _mm_mask_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
18698 | src, 0, a, b, |
18699 | ); |
18700 | let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18701 | assert_eq_m128h(r, e); |
18702 | } |
18703 | |
18704 | #[simd_test(enable = "avx512fp16" )] |
18705 | unsafe fn test_mm_maskz_mul_round_sch() { |
18706 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18707 | let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
18708 | let r = |
18709 | _mm_maskz_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); |
18710 | let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18711 | assert_eq_m128h(r, e); |
18712 | } |
18713 | |
18714 | #[simd_test(enable = "avx512fp16" )] |
18715 | unsafe fn test_mm_mul_sch() { |
18716 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18717 | let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
18718 | let r = _mm_mul_sch(a, b); |
18719 | let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18720 | assert_eq_m128h(r, e); |
18721 | } |
18722 | |
18723 | #[simd_test(enable = "avx512fp16" )] |
18724 | unsafe fn test_mm_mask_mul_sch() { |
18725 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18726 | let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
18727 | let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0); |
18728 | let r = _mm_mask_mul_sch(src, 0, a, b); |
18729 | let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18730 | assert_eq_m128h(r, e); |
18731 | } |
18732 | |
18733 | #[simd_test(enable = "avx512fp16" )] |
18734 | unsafe fn test_mm_maskz_mul_sch() { |
18735 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18736 | let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
18737 | let r = _mm_maskz_mul_sch(0, a, b); |
18738 | let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18739 | assert_eq_m128h(r, e); |
18740 | } |
18741 | |
18742 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18743 | unsafe fn test_mm_fmul_pch() { |
18744 | let a = _mm_set1_pch(0.0, 1.0); |
18745 | let b = _mm_set1_pch(0.0, 1.0); |
18746 | let r = _mm_fmul_pch(a, b); |
18747 | let e = _mm_set1_pch(-1.0, 0.0); |
18748 | assert_eq_m128h(r, e); |
18749 | } |
18750 | |
18751 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18752 | unsafe fn test_mm_mask_fmul_pch() { |
18753 | let a = _mm_set1_pch(0.0, 1.0); |
18754 | let b = _mm_set1_pch(0.0, 1.0); |
18755 | let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0); |
18756 | let r = _mm_mask_fmul_pch(src, 0b0101, a, b); |
18757 | let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0); |
18758 | assert_eq_m128h(r, e); |
18759 | } |
18760 | |
18761 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18762 | unsafe fn test_mm_maskz_fmul_pch() { |
18763 | let a = _mm_set1_pch(0.0, 1.0); |
18764 | let b = _mm_set1_pch(0.0, 1.0); |
18765 | let r = _mm_maskz_fmul_pch(0b0101, a, b); |
18766 | let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0); |
18767 | assert_eq_m128h(r, e); |
18768 | } |
18769 | |
18770 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18771 | unsafe fn test_mm256_fmul_pch() { |
18772 | let a = _mm256_set1_pch(0.0, 1.0); |
18773 | let b = _mm256_set1_pch(0.0, 1.0); |
18774 | let r = _mm256_fmul_pch(a, b); |
18775 | let e = _mm256_set1_pch(-1.0, 0.0); |
18776 | assert_eq_m256h(r, e); |
18777 | } |
18778 | |
18779 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18780 | unsafe fn test_mm256_mask_fmul_pch() { |
18781 | let a = _mm256_set1_pch(0.0, 1.0); |
18782 | let b = _mm256_set1_pch(0.0, 1.0); |
18783 | let src = _mm256_setr_ph( |
18784 | 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, |
18785 | ); |
18786 | let r = _mm256_mask_fmul_pch(src, 0b01010101, a, b); |
18787 | let e = _mm256_setr_ph( |
18788 | -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0, |
18789 | ); |
18790 | assert_eq_m256h(r, e); |
18791 | } |
18792 | |
18793 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18794 | unsafe fn test_mm256_maskz_fmul_pch() { |
18795 | let a = _mm256_set1_pch(0.0, 1.0); |
18796 | let b = _mm256_set1_pch(0.0, 1.0); |
18797 | let r = _mm256_maskz_fmul_pch(0b01010101, a, b); |
18798 | let e = _mm256_setr_ph( |
18799 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
18800 | ); |
18801 | assert_eq_m256h(r, e); |
18802 | } |
18803 | |
18804 | #[simd_test(enable = "avx512fp16" )] |
18805 | unsafe fn test_mm512_fmul_pch() { |
18806 | let a = _mm512_set1_pch(0.0, 1.0); |
18807 | let b = _mm512_set1_pch(0.0, 1.0); |
18808 | let r = _mm512_fmul_pch(a, b); |
18809 | let e = _mm512_set1_pch(-1.0, 0.0); |
18810 | assert_eq_m512h(r, e); |
18811 | } |
18812 | |
18813 | #[simd_test(enable = "avx512fp16" )] |
18814 | unsafe fn test_mm512_mask_fmul_pch() { |
18815 | let a = _mm512_set1_pch(0.0, 1.0); |
18816 | let b = _mm512_set1_pch(0.0, 1.0); |
18817 | let src = _mm512_setr_ph( |
18818 | 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, |
18819 | 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, |
18820 | 32.0, 33.0, |
18821 | ); |
18822 | let r = _mm512_mask_fmul_pch(src, 0b0101010101010101, a, b); |
18823 | let e = _mm512_setr_ph( |
18824 | -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0, |
18825 | -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0, |
18826 | 33.0, |
18827 | ); |
18828 | assert_eq_m512h(r, e); |
18829 | } |
18830 | |
18831 | #[simd_test(enable = "avx512fp16" )] |
18832 | unsafe fn test_mm512_maskz_fmul_pch() { |
18833 | let a = _mm512_set1_pch(0.0, 1.0); |
18834 | let b = _mm512_set1_pch(0.0, 1.0); |
18835 | let r = _mm512_maskz_fmul_pch(0b0101010101010101, a, b); |
18836 | let e = _mm512_setr_ph( |
18837 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
18838 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
18839 | ); |
18840 | assert_eq_m512h(r, e); |
18841 | } |
18842 | |
18843 | #[simd_test(enable = "avx512fp16" )] |
18844 | unsafe fn test_mm512_fmul_round_pch() { |
18845 | let a = _mm512_set1_pch(0.0, 1.0); |
18846 | let b = _mm512_set1_pch(0.0, 1.0); |
18847 | let r = _mm512_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
18848 | let e = _mm512_set1_pch(-1.0, 0.0); |
18849 | assert_eq_m512h(r, e); |
18850 | } |
18851 | |
18852 | #[simd_test(enable = "avx512fp16" )] |
18853 | unsafe fn test_mm512_mask_fmul_round_pch() { |
18854 | let a = _mm512_set1_pch(0.0, 1.0); |
18855 | let b = _mm512_set1_pch(0.0, 1.0); |
18856 | let src = _mm512_setr_ph( |
18857 | 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, |
18858 | 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, |
18859 | 32.0, 33.0, |
18860 | ); |
18861 | let r = _mm512_mask_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
18862 | src, |
18863 | 0b0101010101010101, |
18864 | a, |
18865 | b, |
18866 | ); |
18867 | let e = _mm512_setr_ph( |
18868 | -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0, |
18869 | -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0, |
18870 | 33.0, |
18871 | ); |
18872 | assert_eq_m512h(r, e); |
18873 | } |
18874 | |
18875 | #[simd_test(enable = "avx512fp16" )] |
18876 | unsafe fn test_mm512_maskz_fmul_round_pch() { |
18877 | let a = _mm512_set1_pch(0.0, 1.0); |
18878 | let b = _mm512_set1_pch(0.0, 1.0); |
18879 | let r = _mm512_maskz_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
18880 | 0b0101010101010101, |
18881 | a, |
18882 | b, |
18883 | ); |
18884 | let e = _mm512_setr_ph( |
18885 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
18886 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
18887 | ); |
18888 | assert_eq_m512h(r, e); |
18889 | } |
18890 | |
18891 | #[simd_test(enable = "avx512fp16" )] |
18892 | unsafe fn test_mm_fmul_round_sch() { |
18893 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18894 | let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
18895 | let r = _mm_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
18896 | let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18897 | assert_eq_m128h(r, e); |
18898 | } |
18899 | |
18900 | #[simd_test(enable = "avx512fp16" )] |
18901 | unsafe fn test_mm_mask_fmul_round_sch() { |
18902 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18903 | let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
18904 | let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0); |
18905 | let r = _mm_mask_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
18906 | src, 0, a, b, |
18907 | ); |
18908 | let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18909 | assert_eq_m128h(r, e); |
18910 | } |
18911 | |
18912 | #[simd_test(enable = "avx512fp16" )] |
18913 | unsafe fn test_mm_maskz_fmul_round_sch() { |
18914 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18915 | let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
18916 | let r = |
18917 | _mm_maskz_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); |
18918 | let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18919 | assert_eq_m128h(r, e); |
18920 | } |
18921 | |
18922 | #[simd_test(enable = "avx512fp16" )] |
18923 | unsafe fn test_mm_fmul_sch() { |
18924 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18925 | let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
18926 | let r = _mm_fmul_sch(a, b); |
18927 | let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18928 | assert_eq_m128h(r, e); |
18929 | } |
18930 | |
18931 | #[simd_test(enable = "avx512fp16" )] |
18932 | unsafe fn test_mm_mask_fmul_sch() { |
18933 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18934 | let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
18935 | let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0); |
18936 | let r = _mm_mask_fmul_sch(src, 0, a, b); |
18937 | let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18938 | assert_eq_m128h(r, e); |
18939 | } |
18940 | |
18941 | #[simd_test(enable = "avx512fp16" )] |
18942 | unsafe fn test_mm_maskz_fmul_sch() { |
18943 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18944 | let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
18945 | let r = _mm_maskz_fmul_sch(0, a, b); |
18946 | let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18947 | assert_eq_m128h(r, e); |
18948 | } |
18949 | |
18950 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18951 | unsafe fn test_mm_cmul_pch() { |
18952 | let a = _mm_set1_pch(0.0, 1.0); |
18953 | let b = _mm_set1_pch(0.0, -1.0); |
18954 | let r = _mm_cmul_pch(a, b); |
18955 | let e = _mm_set1_pch(-1.0, 0.0); |
18956 | assert_eq_m128h(r, e); |
18957 | } |
18958 | |
18959 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18960 | unsafe fn test_mm_mask_cmul_pch() { |
18961 | let a = _mm_set1_pch(0.0, 1.0); |
18962 | let b = _mm_set1_pch(0.0, -1.0); |
18963 | let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0); |
18964 | let r = _mm_mask_cmul_pch(src, 0b0101, a, b); |
18965 | let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0); |
18966 | assert_eq_m128h(r, e); |
18967 | } |
18968 | |
18969 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18970 | unsafe fn test_mm_maskz_cmul_pch() { |
18971 | let a = _mm_set1_pch(0.0, 1.0); |
18972 | let b = _mm_set1_pch(0.0, -1.0); |
18973 | let r = _mm_maskz_cmul_pch(0b0101, a, b); |
18974 | let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0); |
18975 | assert_eq_m128h(r, e); |
18976 | } |
18977 | |
18978 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18979 | unsafe fn test_mm256_cmul_pch() { |
18980 | let a = _mm256_set1_pch(0.0, 1.0); |
18981 | let b = _mm256_set1_pch(0.0, -1.0); |
18982 | let r = _mm256_cmul_pch(a, b); |
18983 | let e = _mm256_set1_pch(-1.0, 0.0); |
18984 | assert_eq_m256h(r, e); |
18985 | } |
18986 | |
18987 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18988 | unsafe fn test_mm256_mask_cmul_pch() { |
18989 | let a = _mm256_set1_pch(0.0, 1.0); |
18990 | let b = _mm256_set1_pch(0.0, -1.0); |
18991 | let src = _mm256_setr_ph( |
18992 | 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, |
18993 | ); |
18994 | let r = _mm256_mask_cmul_pch(src, 0b01010101, a, b); |
18995 | let e = _mm256_setr_ph( |
18996 | -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0, |
18997 | ); |
18998 | assert_eq_m256h(r, e); |
18999 | } |
19000 | |
19001 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19002 | unsafe fn test_mm256_maskz_cmul_pch() { |
19003 | let a = _mm256_set1_pch(0.0, 1.0); |
19004 | let b = _mm256_set1_pch(0.0, -1.0); |
19005 | let r = _mm256_maskz_cmul_pch(0b01010101, a, b); |
19006 | let e = _mm256_setr_ph( |
19007 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
19008 | ); |
19009 | assert_eq_m256h(r, e); |
19010 | } |
19011 | |
19012 | #[simd_test(enable = "avx512fp16" )] |
19013 | unsafe fn test_mm512_cmul_pch() { |
19014 | let a = _mm512_set1_pch(0.0, 1.0); |
19015 | let b = _mm512_set1_pch(0.0, -1.0); |
19016 | let r = _mm512_cmul_pch(a, b); |
19017 | let e = _mm512_set1_pch(-1.0, 0.0); |
19018 | assert_eq_m512h(r, e); |
19019 | } |
19020 | |
19021 | #[simd_test(enable = "avx512fp16" )] |
19022 | unsafe fn test_mm512_mask_cmul_pch() { |
19023 | let a = _mm512_set1_pch(0.0, 1.0); |
19024 | let b = _mm512_set1_pch(0.0, -1.0); |
19025 | let src = _mm512_setr_ph( |
19026 | 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, |
19027 | 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, |
19028 | 32.0, 33.0, |
19029 | ); |
19030 | let r = _mm512_mask_cmul_pch(src, 0b0101010101010101, a, b); |
19031 | let e = _mm512_setr_ph( |
19032 | -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0, |
19033 | -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0, |
19034 | 33.0, |
19035 | ); |
19036 | assert_eq_m512h(r, e); |
19037 | } |
19038 | |
19039 | #[simd_test(enable = "avx512fp16" )] |
19040 | unsafe fn test_mm512_maskz_cmul_pch() { |
19041 | let a = _mm512_set1_pch(0.0, 1.0); |
19042 | let b = _mm512_set1_pch(0.0, -1.0); |
19043 | let r = _mm512_maskz_cmul_pch(0b0101010101010101, a, b); |
19044 | let e = _mm512_setr_ph( |
19045 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
19046 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
19047 | ); |
19048 | assert_eq_m512h(r, e); |
19049 | } |
19050 | |
19051 | #[simd_test(enable = "avx512fp16" )] |
19052 | unsafe fn test_mm512_cmul_round_pch() { |
19053 | let a = _mm512_set1_pch(0.0, 1.0); |
19054 | let b = _mm512_set1_pch(0.0, -1.0); |
19055 | let r = _mm512_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
19056 | let e = _mm512_set1_pch(-1.0, 0.0); |
19057 | assert_eq_m512h(r, e); |
19058 | } |
19059 | |
19060 | #[simd_test(enable = "avx512fp16" )] |
19061 | unsafe fn test_mm512_mask_cmul_round_pch() { |
19062 | let a = _mm512_set1_pch(0.0, 1.0); |
19063 | let b = _mm512_set1_pch(0.0, -1.0); |
19064 | let src = _mm512_setr_ph( |
19065 | 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, |
19066 | 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, |
19067 | 32.0, 33.0, |
19068 | ); |
19069 | let r = _mm512_mask_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
19070 | src, |
19071 | 0b0101010101010101, |
19072 | a, |
19073 | b, |
19074 | ); |
19075 | let e = _mm512_setr_ph( |
19076 | -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0, |
19077 | -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0, |
19078 | 33.0, |
19079 | ); |
19080 | assert_eq_m512h(r, e); |
19081 | } |
19082 | |
19083 | #[simd_test(enable = "avx512fp16" )] |
19084 | unsafe fn test_mm512_maskz_cmul_round_pch() { |
19085 | let a = _mm512_set1_pch(0.0, 1.0); |
19086 | let b = _mm512_set1_pch(0.0, -1.0); |
19087 | let r = _mm512_maskz_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
19088 | 0b0101010101010101, |
19089 | a, |
19090 | b, |
19091 | ); |
19092 | let e = _mm512_setr_ph( |
19093 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
19094 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
19095 | ); |
19096 | assert_eq_m512h(r, e); |
19097 | } |
19098 | |
19099 | #[simd_test(enable = "avx512fp16" )] |
19100 | unsafe fn test_mm_cmul_sch() { |
19101 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19102 | let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0); |
19103 | let r = _mm_cmul_sch(a, b); |
19104 | let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19105 | assert_eq_m128h(r, e); |
19106 | } |
19107 | |
19108 | #[simd_test(enable = "avx512fp16" )] |
19109 | unsafe fn test_mm_mask_cmul_sch() { |
19110 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19111 | let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0); |
19112 | let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0); |
19113 | let r = _mm_mask_cmul_sch(src, 0, a, b); |
19114 | let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19115 | assert_eq_m128h(r, e); |
19116 | } |
19117 | |
19118 | #[simd_test(enable = "avx512fp16" )] |
19119 | unsafe fn test_mm_maskz_cmul_sch() { |
19120 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19121 | let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0); |
19122 | let r = _mm_maskz_cmul_sch(0, a, b); |
19123 | let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19124 | assert_eq_m128h(r, e); |
19125 | } |
19126 | |
19127 | #[simd_test(enable = "avx512fp16" )] |
19128 | unsafe fn test_mm_cmul_round_sch() { |
19129 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19130 | let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0); |
19131 | let r = _mm_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
19132 | let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19133 | assert_eq_m128h(r, e); |
19134 | } |
19135 | |
19136 | #[simd_test(enable = "avx512fp16" )] |
19137 | unsafe fn test_mm_mask_cmul_round_sch() { |
19138 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19139 | let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0); |
19140 | let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0); |
19141 | let r = _mm_mask_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
19142 | src, 0, a, b, |
19143 | ); |
19144 | let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19145 | assert_eq_m128h(r, e); |
19146 | } |
19147 | |
19148 | #[simd_test(enable = "avx512fp16" )] |
19149 | unsafe fn test_mm_maskz_cmul_round_sch() { |
19150 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19151 | let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0); |
19152 | let r = |
19153 | _mm_maskz_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); |
19154 | let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19155 | assert_eq_m128h(r, e); |
19156 | } |
19157 | |
19158 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19159 | unsafe fn test_mm_fcmul_pch() { |
19160 | let a = _mm_set1_pch(0.0, 1.0); |
19161 | let b = _mm_set1_pch(0.0, -1.0); |
19162 | let r = _mm_fcmul_pch(a, b); |
19163 | let e = _mm_set1_pch(-1.0, 0.0); |
19164 | assert_eq_m128h(r, e); |
19165 | } |
19166 | |
19167 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19168 | unsafe fn test_mm_mask_fcmul_pch() { |
19169 | let a = _mm_set1_pch(0.0, 1.0); |
19170 | let b = _mm_set1_pch(0.0, -1.0); |
19171 | let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0); |
19172 | let r = _mm_mask_fcmul_pch(src, 0b0101, a, b); |
19173 | let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0); |
19174 | assert_eq_m128h(r, e); |
19175 | } |
19176 | |
19177 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19178 | unsafe fn test_mm_maskz_fcmul_pch() { |
19179 | let a = _mm_set1_pch(0.0, 1.0); |
19180 | let b = _mm_set1_pch(0.0, -1.0); |
19181 | let r = _mm_maskz_fcmul_pch(0b0101, a, b); |
19182 | let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0); |
19183 | assert_eq_m128h(r, e); |
19184 | } |
19185 | |
19186 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19187 | unsafe fn test_mm256_fcmul_pch() { |
19188 | let a = _mm256_set1_pch(0.0, 1.0); |
19189 | let b = _mm256_set1_pch(0.0, -1.0); |
19190 | let r = _mm256_fcmul_pch(a, b); |
19191 | let e = _mm256_set1_pch(-1.0, 0.0); |
19192 | assert_eq_m256h(r, e); |
19193 | } |
19194 | |
19195 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19196 | unsafe fn test_mm256_mask_fcmul_pch() { |
19197 | let a = _mm256_set1_pch(0.0, 1.0); |
19198 | let b = _mm256_set1_pch(0.0, -1.0); |
19199 | let src = _mm256_setr_ph( |
19200 | 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, |
19201 | ); |
19202 | let r = _mm256_mask_fcmul_pch(src, 0b01010101, a, b); |
19203 | let e = _mm256_setr_ph( |
19204 | -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0, |
19205 | ); |
19206 | assert_eq_m256h(r, e); |
19207 | } |
19208 | |
19209 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19210 | unsafe fn test_mm256_maskz_fcmul_pch() { |
19211 | let a = _mm256_set1_pch(0.0, 1.0); |
19212 | let b = _mm256_set1_pch(0.0, -1.0); |
19213 | let r = _mm256_maskz_fcmul_pch(0b01010101, a, b); |
19214 | let e = _mm256_setr_ph( |
19215 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
19216 | ); |
19217 | assert_eq_m256h(r, e); |
19218 | } |
19219 | |
19220 | #[simd_test(enable = "avx512fp16" )] |
19221 | unsafe fn test_mm512_fcmul_pch() { |
19222 | let a = _mm512_set1_pch(0.0, 1.0); |
19223 | let b = _mm512_set1_pch(0.0, -1.0); |
19224 | let r = _mm512_fcmul_pch(a, b); |
19225 | let e = _mm512_set1_pch(-1.0, 0.0); |
19226 | assert_eq_m512h(r, e); |
19227 | } |
19228 | |
19229 | #[simd_test(enable = "avx512fp16" )] |
19230 | unsafe fn test_mm512_mask_fcmul_pch() { |
19231 | let a = _mm512_set1_pch(0.0, 1.0); |
19232 | let b = _mm512_set1_pch(0.0, -1.0); |
19233 | let src = _mm512_setr_ph( |
19234 | 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, |
19235 | 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, |
19236 | 32.0, 33.0, |
19237 | ); |
19238 | let r = _mm512_mask_fcmul_pch(src, 0b0101010101010101, a, b); |
19239 | let e = _mm512_setr_ph( |
19240 | -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0, |
19241 | -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0, |
19242 | 33.0, |
19243 | ); |
19244 | assert_eq_m512h(r, e); |
19245 | } |
19246 | |
19247 | #[simd_test(enable = "avx512fp16" )] |
19248 | unsafe fn test_mm512_maskz_fcmul_pch() { |
19249 | let a = _mm512_set1_pch(0.0, 1.0); |
19250 | let b = _mm512_set1_pch(0.0, -1.0); |
19251 | let r = _mm512_maskz_fcmul_pch(0b0101010101010101, a, b); |
19252 | let e = _mm512_setr_ph( |
19253 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
19254 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
19255 | ); |
19256 | assert_eq_m512h(r, e); |
19257 | } |
19258 | |
19259 | #[simd_test(enable = "avx512fp16" )] |
19260 | unsafe fn test_mm512_fcmul_round_pch() { |
19261 | let a = _mm512_set1_pch(0.0, 1.0); |
19262 | let b = _mm512_set1_pch(0.0, -1.0); |
19263 | let r = _mm512_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
19264 | let e = _mm512_set1_pch(-1.0, 0.0); |
19265 | assert_eq_m512h(r, e); |
19266 | } |
19267 | |
19268 | #[simd_test(enable = "avx512fp16" )] |
19269 | unsafe fn test_mm512_mask_fcmul_round_pch() { |
19270 | let a = _mm512_set1_pch(0.0, 1.0); |
19271 | let b = _mm512_set1_pch(0.0, -1.0); |
19272 | let src = _mm512_setr_ph( |
19273 | 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, |
19274 | 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, |
19275 | 32.0, 33.0, |
19276 | ); |
19277 | let r = _mm512_mask_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
19278 | src, |
19279 | 0b0101010101010101, |
19280 | a, |
19281 | b, |
19282 | ); |
19283 | let e = _mm512_setr_ph( |
19284 | -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0, |
19285 | -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0, |
19286 | 33.0, |
19287 | ); |
19288 | assert_eq_m512h(r, e); |
19289 | } |
19290 | |
19291 | #[simd_test(enable = "avx512fp16" )] |
19292 | unsafe fn test_mm512_maskz_fcmul_round_pch() { |
19293 | let a = _mm512_set1_pch(0.0, 1.0); |
19294 | let b = _mm512_set1_pch(0.0, -1.0); |
19295 | let r = _mm512_maskz_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
19296 | 0b0101010101010101, |
19297 | a, |
19298 | b, |
19299 | ); |
19300 | let e = _mm512_setr_ph( |
19301 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
19302 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
19303 | ); |
19304 | assert_eq_m512h(r, e); |
19305 | } |
19306 | |
19307 | #[simd_test(enable = "avx512fp16" )] |
19308 | unsafe fn test_mm_fcmul_sch() { |
19309 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19310 | let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0); |
19311 | let r = _mm_fcmul_sch(a, b); |
19312 | let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19313 | assert_eq_m128h(r, e); |
19314 | } |
19315 | |
19316 | #[simd_test(enable = "avx512fp16" )] |
19317 | unsafe fn test_mm_mask_fcmul_sch() { |
19318 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19319 | let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0); |
19320 | let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0); |
19321 | let r = _mm_mask_fcmul_sch(src, 0, a, b); |
19322 | let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19323 | assert_eq_m128h(r, e); |
19324 | } |
19325 | |
19326 | #[simd_test(enable = "avx512fp16" )] |
19327 | unsafe fn test_mm_maskz_fcmul_sch() { |
19328 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19329 | let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0); |
19330 | let r = _mm_maskz_fcmul_sch(0, a, b); |
19331 | let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19332 | assert_eq_m128h(r, e); |
19333 | } |
19334 | |
19335 | #[simd_test(enable = "avx512fp16" )] |
19336 | unsafe fn test_mm_fcmul_round_sch() { |
19337 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19338 | let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0); |
19339 | let r = _mm_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
19340 | let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19341 | assert_eq_m128h(r, e); |
19342 | } |
19343 | |
19344 | #[simd_test(enable = "avx512fp16" )] |
19345 | unsafe fn test_mm_mask_fcmul_round_sch() { |
19346 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19347 | let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0); |
19348 | let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0); |
19349 | let r = _mm_mask_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
19350 | src, 0, a, b, |
19351 | ); |
19352 | let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19353 | assert_eq_m128h(r, e); |
19354 | } |
19355 | |
19356 | #[simd_test(enable = "avx512fp16" )] |
19357 | unsafe fn test_mm_maskz_fcmul_round_sch() { |
19358 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19359 | let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0); |
19360 | let r = |
19361 | _mm_maskz_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); |
19362 | let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19363 | assert_eq_m128h(r, e); |
19364 | } |
19365 | |
19366 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19367 | unsafe fn test_mm_abs_ph() { |
19368 | let a = _mm_set_ph(-1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0); |
19369 | let r = _mm_abs_ph(a); |
19370 | let e = _mm_set_ph(1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0); |
19371 | assert_eq_m128h(r, e); |
19372 | } |
19373 | |
19374 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19375 | unsafe fn test_mm256_abs_ph() { |
19376 | let a = _mm256_set_ph( |
19377 | -1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0, |
19378 | -14.0, |
19379 | ); |
19380 | let r = _mm256_abs_ph(a); |
19381 | let e = _mm256_set_ph( |
19382 | 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, |
19383 | ); |
19384 | assert_eq_m256h(r, e); |
19385 | } |
19386 | |
19387 | #[simd_test(enable = "avx512fp16" )] |
19388 | unsafe fn test_mm512_abs_ph() { |
19389 | let a = _mm512_set_ph( |
19390 | -1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0, |
19391 | -14.0, 15.0, -16.0, 17.0, -18.0, 19.0, -20.0, 21.0, -22.0, 23.0, -24.0, 25.0, -26.0, |
19392 | 27.0, -28.0, 29.0, -30.0, |
19393 | ); |
19394 | let r = _mm512_abs_ph(a); |
19395 | let e = _mm512_set_ph( |
19396 | 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, |
19397 | 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, |
19398 | 29.0, 30.0, |
19399 | ); |
19400 | assert_eq_m512h(r, e); |
19401 | } |
19402 | |
19403 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19404 | unsafe fn test_mm_conj_pch() { |
19405 | let a = _mm_set1_pch(0.0, 1.0); |
19406 | let r = _mm_conj_pch(a); |
19407 | let e = _mm_set1_pch(0.0, -1.0); |
19408 | assert_eq_m128h(r, e); |
19409 | } |
19410 | |
19411 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19412 | unsafe fn test_mm_mask_conj_pch() { |
19413 | let a = _mm_set1_pch(0.0, 1.0); |
19414 | let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0); |
19415 | let r = _mm_mask_conj_pch(src, 0b0101, a); |
19416 | let e = _mm_setr_ph(0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0); |
19417 | assert_eq_m128h(r, e); |
19418 | } |
19419 | |
19420 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19421 | unsafe fn test_mm_maskz_conj_pch() { |
19422 | let a = _mm_set1_pch(0.0, 1.0); |
19423 | let r = _mm_maskz_conj_pch(0b0101, a); |
19424 | let e = _mm_setr_ph(0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0); |
19425 | assert_eq_m128h(r, e); |
19426 | } |
19427 | |
19428 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19429 | unsafe fn test_mm256_conj_pch() { |
19430 | let a = _mm256_set1_pch(0.0, 1.0); |
19431 | let r = _mm256_conj_pch(a); |
19432 | let e = _mm256_set1_pch(0.0, -1.0); |
19433 | assert_eq_m256h(r, e); |
19434 | } |
19435 | |
19436 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19437 | unsafe fn test_mm256_mask_conj_pch() { |
19438 | let a = _mm256_set1_pch(0.0, 1.0); |
19439 | let src = _mm256_setr_ph( |
19440 | 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, |
19441 | ); |
19442 | let r = _mm256_mask_conj_pch(src, 0b01010101, a); |
19443 | let e = _mm256_setr_ph( |
19444 | 0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0, 0.0, -1.0, 12.0, 13.0, 0.0, -1.0, 16.0, 17.0, |
19445 | ); |
19446 | assert_eq_m256h(r, e); |
19447 | } |
19448 | |
19449 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19450 | unsafe fn test_mm256_maskz_conj_pch() { |
19451 | let a = _mm256_set1_pch(0.0, 1.0); |
19452 | let r = _mm256_maskz_conj_pch(0b01010101, a); |
19453 | let e = _mm256_setr_ph( |
19454 | 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, |
19455 | ); |
19456 | assert_eq_m256h(r, e); |
19457 | } |
19458 | |
19459 | #[simd_test(enable = "avx512fp16" )] |
19460 | unsafe fn test_mm512_conj_pch() { |
19461 | let a = _mm512_set1_pch(0.0, 1.0); |
19462 | let r = _mm512_conj_pch(a); |
19463 | let e = _mm512_set1_pch(0.0, -1.0); |
19464 | assert_eq_m512h(r, e); |
19465 | } |
19466 | |
19467 | #[simd_test(enable = "avx512fp16" )] |
19468 | unsafe fn test_mm512_mask_conj_pch() { |
19469 | let a = _mm512_set1_pch(0.0, 1.0); |
19470 | let src = _mm512_setr_ph( |
19471 | 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, |
19472 | 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, |
19473 | 32.0, 33.0, |
19474 | ); |
19475 | let r = _mm512_mask_conj_pch(src, 0b0101010101010101, a); |
19476 | let e = _mm512_setr_ph( |
19477 | 0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0, 0.0, -1.0, 12.0, 13.0, 0.0, -1.0, 16.0, 17.0, |
19478 | 0.0, -1.0, 20.0, 21.0, 0.0, -1.0, 24.0, 25.0, 0.0, -1.0, 28.0, 29.0, 0.0, -1.0, 32.0, |
19479 | 33.0, |
19480 | ); |
19481 | assert_eq_m512h(r, e); |
19482 | } |
19483 | |
19484 | #[simd_test(enable = "avx512fp16" )] |
19485 | unsafe fn test_mm512_maskz_conj_pch() { |
19486 | let a = _mm512_set1_pch(0.0, 1.0); |
19487 | let r = _mm512_maskz_conj_pch(0b0101010101010101, a); |
19488 | let e = _mm512_setr_ph( |
19489 | 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, |
19490 | 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, |
19491 | ); |
19492 | assert_eq_m512h(r, e); |
19493 | } |
19494 | |
19495 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19496 | unsafe fn test_mm_fmadd_pch() { |
19497 | let a = _mm_set1_pch(0.0, 1.0); |
19498 | let b = _mm_set1_pch(0.0, 2.0); |
19499 | let c = _mm_set1_pch(0.0, 3.0); |
19500 | let r = _mm_fmadd_pch(a, b, c); |
19501 | let e = _mm_set1_pch(-2.0, 3.0); |
19502 | assert_eq_m128h(r, e); |
19503 | } |
19504 | |
19505 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19506 | unsafe fn test_mm_mask_fmadd_pch() { |
19507 | let a = _mm_set1_pch(0.0, 1.0); |
19508 | let b = _mm_set1_pch(0.0, 2.0); |
19509 | let c = _mm_set1_pch(0.0, 3.0); |
19510 | let r = _mm_mask_fmadd_pch(a, 0b0101, b, c); |
19511 | let e = _mm_setr_ph(-2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0); |
19512 | assert_eq_m128h(r, e); |
19513 | } |
19514 | |
19515 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19516 | unsafe fn test_mm_mask3_fmadd_pch() { |
19517 | let a = _mm_set1_pch(0.0, 1.0); |
19518 | let b = _mm_set1_pch(0.0, 2.0); |
19519 | let c = _mm_set1_pch(0.0, 3.0); |
19520 | let r = _mm_mask3_fmadd_pch(a, b, c, 0b0101); |
19521 | let e = _mm_setr_ph(-2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0); |
19522 | assert_eq_m128h(r, e); |
19523 | } |
19524 | |
19525 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19526 | unsafe fn test_mm_maskz_fmadd_pch() { |
19527 | let a = _mm_set1_pch(0.0, 1.0); |
19528 | let b = _mm_set1_pch(0.0, 2.0); |
19529 | let c = _mm_set1_pch(0.0, 3.0); |
19530 | let r = _mm_maskz_fmadd_pch(0b0101, a, b, c); |
19531 | let e = _mm_setr_ph(-2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0); |
19532 | assert_eq_m128h(r, e); |
19533 | } |
19534 | |
19535 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19536 | unsafe fn test_mm256_fmadd_pch() { |
19537 | let a = _mm256_set1_pch(0.0, 1.0); |
19538 | let b = _mm256_set1_pch(0.0, 2.0); |
19539 | let c = _mm256_set1_pch(0.0, 3.0); |
19540 | let r = _mm256_fmadd_pch(a, b, c); |
19541 | let e = _mm256_set1_pch(-2.0, 3.0); |
19542 | assert_eq_m256h(r, e); |
19543 | } |
19544 | |
19545 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19546 | unsafe fn test_mm256_mask_fmadd_pch() { |
19547 | let a = _mm256_set1_pch(0.0, 1.0); |
19548 | let b = _mm256_set1_pch(0.0, 2.0); |
19549 | let c = _mm256_set1_pch(0.0, 3.0); |
19550 | let r = _mm256_mask_fmadd_pch(a, 0b01010101, b, c); |
19551 | let e = _mm256_setr_ph( |
19552 | -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, |
19553 | ); |
19554 | assert_eq_m256h(r, e); |
19555 | } |
19556 | |
19557 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19558 | unsafe fn test_mm256_mask3_fmadd_pch() { |
19559 | let a = _mm256_set1_pch(0.0, 1.0); |
19560 | let b = _mm256_set1_pch(0.0, 2.0); |
19561 | let c = _mm256_set1_pch(0.0, 3.0); |
19562 | let r = _mm256_mask3_fmadd_pch(a, b, c, 0b01010101); |
19563 | let e = _mm256_setr_ph( |
19564 | -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, |
19565 | ); |
19566 | assert_eq_m256h(r, e); |
19567 | } |
19568 | |
19569 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19570 | unsafe fn test_mm256_maskz_fmadd_pch() { |
19571 | let a = _mm256_set1_pch(0.0, 1.0); |
19572 | let b = _mm256_set1_pch(0.0, 2.0); |
19573 | let c = _mm256_set1_pch(0.0, 3.0); |
19574 | let r = _mm256_maskz_fmadd_pch(0b01010101, a, b, c); |
19575 | let e = _mm256_setr_ph( |
19576 | -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, |
19577 | ); |
19578 | assert_eq_m256h(r, e); |
19579 | } |
19580 | |
19581 | #[simd_test(enable = "avx512fp16" )] |
19582 | unsafe fn test_mm512_fmadd_pch() { |
19583 | let a = _mm512_set1_pch(0.0, 1.0); |
19584 | let b = _mm512_set1_pch(0.0, 2.0); |
19585 | let c = _mm512_set1_pch(0.0, 3.0); |
19586 | let r = _mm512_fmadd_pch(a, b, c); |
19587 | let e = _mm512_set1_pch(-2.0, 3.0); |
19588 | assert_eq_m512h(r, e); |
19589 | } |
19590 | |
19591 | #[simd_test(enable = "avx512fp16" )] |
19592 | unsafe fn test_mm512_mask_fmadd_pch() { |
19593 | let a = _mm512_set1_pch(0.0, 1.0); |
19594 | let b = _mm512_set1_pch(0.0, 2.0); |
19595 | let c = _mm512_set1_pch(0.0, 3.0); |
19596 | let r = _mm512_mask_fmadd_pch(a, 0b0101010101010101, b, c); |
19597 | let e = _mm512_setr_ph( |
19598 | -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, |
19599 | -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, |
19600 | ); |
19601 | assert_eq_m512h(r, e); |
19602 | } |
19603 | |
19604 | #[simd_test(enable = "avx512fp16" )] |
19605 | unsafe fn test_mm512_mask3_fmadd_pch() { |
19606 | let a = _mm512_set1_pch(0.0, 1.0); |
19607 | let b = _mm512_set1_pch(0.0, 2.0); |
19608 | let c = _mm512_set1_pch(0.0, 3.0); |
19609 | let r = _mm512_mask3_fmadd_pch(a, b, c, 0b0101010101010101); |
19610 | let e = _mm512_setr_ph( |
19611 | -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, |
19612 | -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, |
19613 | ); |
19614 | assert_eq_m512h(r, e); |
19615 | } |
19616 | |
19617 | #[simd_test(enable = "avx512fp16" )] |
19618 | unsafe fn test_mm512_maskz_fmadd_pch() { |
19619 | let a = _mm512_set1_pch(0.0, 1.0); |
19620 | let b = _mm512_set1_pch(0.0, 2.0); |
19621 | let c = _mm512_set1_pch(0.0, 3.0); |
19622 | let r = _mm512_maskz_fmadd_pch(0b0101010101010101, a, b, c); |
19623 | let e = _mm512_setr_ph( |
19624 | -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, |
19625 | -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, |
19626 | ); |
19627 | assert_eq_m512h(r, e); |
19628 | } |
19629 | |
19630 | #[simd_test(enable = "avx512fp16" )] |
19631 | unsafe fn test_mm512_fmadd_round_pch() { |
19632 | let a = _mm512_set1_pch(0.0, 1.0); |
19633 | let b = _mm512_set1_pch(0.0, 2.0); |
19634 | let c = _mm512_set1_pch(0.0, 3.0); |
19635 | let r = |
19636 | _mm512_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); |
19637 | let e = _mm512_set1_pch(-2.0, 3.0); |
19638 | assert_eq_m512h(r, e); |
19639 | } |
19640 | |
19641 | #[simd_test(enable = "avx512fp16" )] |
19642 | unsafe fn test_mm512_mask_fmadd_round_pch() { |
19643 | let a = _mm512_set1_pch(0.0, 1.0); |
19644 | let b = _mm512_set1_pch(0.0, 2.0); |
19645 | let c = _mm512_set1_pch(0.0, 3.0); |
19646 | let r = _mm512_mask_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
19647 | a, |
19648 | 0b0101010101010101, |
19649 | b, |
19650 | c, |
19651 | ); |
19652 | let e = _mm512_setr_ph( |
19653 | -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, |
19654 | -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, |
19655 | ); |
19656 | assert_eq_m512h(r, e); |
19657 | } |
19658 | |
19659 | #[simd_test(enable = "avx512fp16" )] |
19660 | unsafe fn test_mm512_mask3_fmadd_round_pch() { |
19661 | let a = _mm512_set1_pch(0.0, 1.0); |
19662 | let b = _mm512_set1_pch(0.0, 2.0); |
19663 | let c = _mm512_set1_pch(0.0, 3.0); |
19664 | let r = _mm512_mask3_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
19665 | a, |
19666 | b, |
19667 | c, |
19668 | 0b0101010101010101, |
19669 | ); |
19670 | let e = _mm512_setr_ph( |
19671 | -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, |
19672 | -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, |
19673 | ); |
19674 | assert_eq_m512h(r, e); |
19675 | } |
19676 | |
19677 | #[simd_test(enable = "avx512fp16" )] |
19678 | unsafe fn test_mm512_maskz_fmadd_round_pch() { |
19679 | let a = _mm512_set1_pch(0.0, 1.0); |
19680 | let b = _mm512_set1_pch(0.0, 2.0); |
19681 | let c = _mm512_set1_pch(0.0, 3.0); |
19682 | let r = _mm512_maskz_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
19683 | 0b0101010101010101, |
19684 | a, |
19685 | b, |
19686 | c, |
19687 | ); |
19688 | let e = _mm512_setr_ph( |
19689 | -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, |
19690 | -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, |
19691 | ); |
19692 | assert_eq_m512h(r, e); |
19693 | } |
19694 | |
19695 | #[simd_test(enable = "avx512fp16" )] |
19696 | unsafe fn test_mm_fmadd_sch() { |
19697 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19698 | let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
19699 | let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
19700 | let r = _mm_fmadd_sch(a, b, c); |
19701 | let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19702 | assert_eq_m128h(r, e); |
19703 | } |
19704 | |
19705 | #[simd_test(enable = "avx512fp16" )] |
19706 | unsafe fn test_mm_mask_fmadd_sch() { |
19707 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19708 | let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
19709 | let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
19710 | let r = _mm_mask_fmadd_sch(a, 0, b, c); |
19711 | let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19712 | assert_eq_m128h(r, e); |
19713 | let r = _mm_mask_fmadd_sch(a, 1, b, c); |
19714 | let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19715 | assert_eq_m128h(r, e); |
19716 | } |
19717 | |
19718 | #[simd_test(enable = "avx512fp16" )] |
19719 | unsafe fn test_mm_mask3_fmadd_sch() { |
19720 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19721 | let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
19722 | let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
19723 | let r = _mm_mask3_fmadd_sch(a, b, c, 0); |
19724 | let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
19725 | assert_eq_m128h(r, e); |
19726 | let r = _mm_mask3_fmadd_sch(a, b, c, 1); |
19727 | let e = _mm_setr_ph(-2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
19728 | assert_eq_m128h(r, e); |
19729 | } |
19730 | |
19731 | #[simd_test(enable = "avx512fp16" )] |
19732 | unsafe fn test_mm_maskz_fmadd_sch() { |
19733 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19734 | let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
19735 | let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
19736 | let r = _mm_maskz_fmadd_sch(0, a, b, c); |
19737 | let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19738 | assert_eq_m128h(r, e); |
19739 | let r = _mm_maskz_fmadd_sch(1, a, b, c); |
19740 | let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19741 | assert_eq_m128h(r, e); |
19742 | } |
19743 | |
19744 | #[simd_test(enable = "avx512fp16" )] |
19745 | unsafe fn test_mm_fmadd_round_sch() { |
19746 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19747 | let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
19748 | let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
19749 | let r = _mm_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); |
19750 | let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19751 | assert_eq_m128h(r, e); |
19752 | } |
19753 | |
19754 | #[simd_test(enable = "avx512fp16" )] |
19755 | unsafe fn test_mm_mask_fmadd_round_sch() { |
19756 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19757 | let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
19758 | let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
19759 | let r = _mm_mask_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
19760 | a, 0, b, c, |
19761 | ); |
19762 | let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19763 | assert_eq_m128h(r, e); |
19764 | let r = _mm_mask_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
19765 | a, 1, b, c, |
19766 | ); |
19767 | let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19768 | assert_eq_m128h(r, e); |
19769 | } |
19770 | |
19771 | #[simd_test(enable = "avx512fp16" )] |
19772 | unsafe fn test_mm_mask3_fmadd_round_sch() { |
19773 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19774 | let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
19775 | let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
19776 | let r = _mm_mask3_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
19777 | a, b, c, 0, |
19778 | ); |
19779 | let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
19780 | assert_eq_m128h(r, e); |
19781 | let r = _mm_mask3_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
19782 | a, b, c, 1, |
19783 | ); |
19784 | let e = _mm_setr_ph(-2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
19785 | assert_eq_m128h(r, e); |
19786 | } |
19787 | |
19788 | #[simd_test(enable = "avx512fp16" )] |
19789 | unsafe fn test_mm_maskz_fmadd_round_sch() { |
19790 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19791 | let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
19792 | let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
19793 | let r = _mm_maskz_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
19794 | 0, a, b, c, |
19795 | ); |
19796 | let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19797 | assert_eq_m128h(r, e); |
19798 | let r = _mm_maskz_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
19799 | 1, a, b, c, |
19800 | ); |
19801 | let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19802 | assert_eq_m128h(r, e); |
19803 | } |
19804 | |
19805 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19806 | unsafe fn test_mm_fcmadd_pch() { |
19807 | let a = _mm_set1_pch(0.0, 1.0); |
19808 | let b = _mm_set1_pch(0.0, 2.0); |
19809 | let c = _mm_set1_pch(0.0, 3.0); |
19810 | let r = _mm_fcmadd_pch(a, b, c); |
19811 | let e = _mm_set1_pch(2.0, 3.0); |
19812 | assert_eq_m128h(r, e); |
19813 | } |
19814 | |
19815 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19816 | unsafe fn test_mm_mask_fcmadd_pch() { |
19817 | let a = _mm_set1_pch(0.0, 1.0); |
19818 | let b = _mm_set1_pch(0.0, 2.0); |
19819 | let c = _mm_set1_pch(0.0, 3.0); |
19820 | let r = _mm_mask_fcmadd_pch(a, 0b0101, b, c); |
19821 | let e = _mm_setr_ph(2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0); |
19822 | assert_eq_m128h(r, e); |
19823 | } |
19824 | |
19825 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19826 | unsafe fn test_mm_mask3_fcmadd_pch() { |
19827 | let a = _mm_set1_pch(0.0, 1.0); |
19828 | let b = _mm_set1_pch(0.0, 2.0); |
19829 | let c = _mm_set1_pch(0.0, 3.0); |
19830 | let r = _mm_mask3_fcmadd_pch(a, b, c, 0b0101); |
19831 | let e = _mm_setr_ph(2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0); |
19832 | assert_eq_m128h(r, e); |
19833 | } |
19834 | |
19835 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19836 | unsafe fn test_mm_maskz_fcmadd_pch() { |
19837 | let a = _mm_set1_pch(0.0, 1.0); |
19838 | let b = _mm_set1_pch(0.0, 2.0); |
19839 | let c = _mm_set1_pch(0.0, 3.0); |
19840 | let r = _mm_maskz_fcmadd_pch(0b0101, a, b, c); |
19841 | let e = _mm_setr_ph(2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0); |
19842 | assert_eq_m128h(r, e); |
19843 | } |
19844 | |
19845 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19846 | unsafe fn test_mm256_fcmadd_pch() { |
19847 | let a = _mm256_set1_pch(0.0, 1.0); |
19848 | let b = _mm256_set1_pch(0.0, 2.0); |
19849 | let c = _mm256_set1_pch(0.0, 3.0); |
19850 | let r = _mm256_fcmadd_pch(a, b, c); |
19851 | let e = _mm256_set1_pch(2.0, 3.0); |
19852 | assert_eq_m256h(r, e); |
19853 | } |
19854 | |
19855 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19856 | unsafe fn test_mm256_mask_fcmadd_pch() { |
19857 | let a = _mm256_set1_pch(0.0, 1.0); |
19858 | let b = _mm256_set1_pch(0.0, 2.0); |
19859 | let c = _mm256_set1_pch(0.0, 3.0); |
19860 | let r = _mm256_mask_fcmadd_pch(a, 0b01010101, b, c); |
19861 | let e = _mm256_setr_ph( |
19862 | 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, |
19863 | ); |
19864 | assert_eq_m256h(r, e); |
19865 | } |
19866 | |
19867 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19868 | unsafe fn test_mm256_mask3_fcmadd_pch() { |
19869 | let a = _mm256_set1_pch(0.0, 1.0); |
19870 | let b = _mm256_set1_pch(0.0, 2.0); |
19871 | let c = _mm256_set1_pch(0.0, 3.0); |
19872 | let r = _mm256_mask3_fcmadd_pch(a, b, c, 0b01010101); |
19873 | let e = _mm256_setr_ph( |
19874 | 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, |
19875 | ); |
19876 | assert_eq_m256h(r, e); |
19877 | } |
19878 | |
19879 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19880 | unsafe fn test_mm256_maskz_fcmadd_pch() { |
19881 | let a = _mm256_set1_pch(0.0, 1.0); |
19882 | let b = _mm256_set1_pch(0.0, 2.0); |
19883 | let c = _mm256_set1_pch(0.0, 3.0); |
19884 | let r = _mm256_maskz_fcmadd_pch(0b01010101, a, b, c); |
19885 | let e = _mm256_setr_ph( |
19886 | 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, |
19887 | ); |
19888 | assert_eq_m256h(r, e); |
19889 | } |
19890 | |
19891 | #[simd_test(enable = "avx512fp16" )] |
19892 | unsafe fn test_mm512_fcmadd_pch() { |
19893 | let a = _mm512_set1_pch(0.0, 1.0); |
19894 | let b = _mm512_set1_pch(0.0, 2.0); |
19895 | let c = _mm512_set1_pch(0.0, 3.0); |
19896 | let r = _mm512_fcmadd_pch(a, b, c); |
19897 | let e = _mm512_set1_pch(2.0, 3.0); |
19898 | assert_eq_m512h(r, e); |
19899 | } |
19900 | |
19901 | #[simd_test(enable = "avx512fp16" )] |
19902 | unsafe fn test_mm512_mask_fcmadd_pch() { |
19903 | let a = _mm512_set1_pch(0.0, 1.0); |
19904 | let b = _mm512_set1_pch(0.0, 2.0); |
19905 | let c = _mm512_set1_pch(0.0, 3.0); |
19906 | let r = _mm512_mask_fcmadd_pch(a, 0b0101010101010101, b, c); |
19907 | let e = _mm512_setr_ph( |
19908 | 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, |
19909 | 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, |
19910 | ); |
19911 | assert_eq_m512h(r, e); |
19912 | } |
19913 | |
19914 | #[simd_test(enable = "avx512fp16" )] |
19915 | unsafe fn test_mm512_mask3_fcmadd_pch() { |
19916 | let a = _mm512_set1_pch(0.0, 1.0); |
19917 | let b = _mm512_set1_pch(0.0, 2.0); |
19918 | let c = _mm512_set1_pch(0.0, 3.0); |
19919 | let r = _mm512_mask3_fcmadd_pch(a, b, c, 0b0101010101010101); |
19920 | let e = _mm512_setr_ph( |
19921 | 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, |
19922 | 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, |
19923 | ); |
19924 | assert_eq_m512h(r, e); |
19925 | } |
19926 | |
19927 | #[simd_test(enable = "avx512fp16" )] |
19928 | unsafe fn test_mm512_maskz_fcmadd_pch() { |
19929 | let a = _mm512_set1_pch(0.0, 1.0); |
19930 | let b = _mm512_set1_pch(0.0, 2.0); |
19931 | let c = _mm512_set1_pch(0.0, 3.0); |
19932 | let r = _mm512_maskz_fcmadd_pch(0b0101010101010101, a, b, c); |
19933 | let e = _mm512_setr_ph( |
19934 | 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, |
19935 | 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, |
19936 | ); |
19937 | assert_eq_m512h(r, e); |
19938 | } |
19939 | |
19940 | #[simd_test(enable = "avx512fp16" )] |
19941 | unsafe fn test_mm512_fcmadd_round_pch() { |
19942 | let a = _mm512_set1_pch(0.0, 1.0); |
19943 | let b = _mm512_set1_pch(0.0, 2.0); |
19944 | let c = _mm512_set1_pch(0.0, 3.0); |
19945 | let r = |
19946 | _mm512_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); |
19947 | let e = _mm512_set1_pch(2.0, 3.0); |
19948 | assert_eq_m512h(r, e); |
19949 | } |
19950 | |
19951 | #[simd_test(enable = "avx512fp16" )] |
19952 | unsafe fn test_mm512_mask_fcmadd_round_pch() { |
19953 | let a = _mm512_set1_pch(0.0, 1.0); |
19954 | let b = _mm512_set1_pch(0.0, 2.0); |
19955 | let c = _mm512_set1_pch(0.0, 3.0); |
19956 | let r = _mm512_mask_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
19957 | a, |
19958 | 0b0101010101010101, |
19959 | b, |
19960 | c, |
19961 | ); |
19962 | let e = _mm512_setr_ph( |
19963 | 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, |
19964 | 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, |
19965 | ); |
19966 | assert_eq_m512h(r, e); |
19967 | } |
19968 | |
19969 | #[simd_test(enable = "avx512fp16" )] |
19970 | unsafe fn test_mm512_mask3_fcmadd_round_pch() { |
19971 | let a = _mm512_set1_pch(0.0, 1.0); |
19972 | let b = _mm512_set1_pch(0.0, 2.0); |
19973 | let c = _mm512_set1_pch(0.0, 3.0); |
19974 | let r = _mm512_mask3_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
19975 | a, |
19976 | b, |
19977 | c, |
19978 | 0b0101010101010101, |
19979 | ); |
19980 | let e = _mm512_setr_ph( |
19981 | 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, |
19982 | 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, |
19983 | ); |
19984 | assert_eq_m512h(r, e); |
19985 | } |
19986 | |
19987 | #[simd_test(enable = "avx512fp16" )] |
19988 | unsafe fn test_mm512_maskz_fcmadd_round_pch() { |
19989 | let a = _mm512_set1_pch(0.0, 1.0); |
19990 | let b = _mm512_set1_pch(0.0, 2.0); |
19991 | let c = _mm512_set1_pch(0.0, 3.0); |
19992 | let r = _mm512_maskz_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
19993 | 0b0101010101010101, |
19994 | a, |
19995 | b, |
19996 | c, |
19997 | ); |
19998 | let e = _mm512_setr_ph( |
19999 | 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, |
20000 | 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, |
20001 | ); |
20002 | assert_eq_m512h(r, e); |
20003 | } |
20004 | |
20005 | #[simd_test(enable = "avx512fp16" )] |
20006 | unsafe fn test_mm_fcmadd_sch() { |
20007 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
20008 | let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
20009 | let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
20010 | let r = _mm_fcmadd_sch(a, b, c); |
20011 | let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
20012 | assert_eq_m128h(r, e); |
20013 | } |
20014 | |
20015 | #[simd_test(enable = "avx512fp16" )] |
20016 | unsafe fn test_mm_mask_fcmadd_sch() { |
20017 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
20018 | let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
20019 | let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
20020 | let r = _mm_mask_fcmadd_sch(a, 0, b, c); |
20021 | let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
20022 | assert_eq_m128h(r, e); |
20023 | let r = _mm_mask_fcmadd_sch(a, 1, b, c); |
20024 | let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
20025 | assert_eq_m128h(r, e); |
20026 | } |
20027 | |
20028 | #[simd_test(enable = "avx512fp16" )] |
20029 | unsafe fn test_mm_mask3_fcmadd_sch() { |
20030 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
20031 | let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
20032 | let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
20033 | let r = _mm_mask3_fcmadd_sch(a, b, c, 0); |
20034 | let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
20035 | assert_eq_m128h(r, e); |
20036 | let r = _mm_mask3_fcmadd_sch(a, b, c, 1); |
20037 | let e = _mm_setr_ph(2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
20038 | assert_eq_m128h(r, e); |
20039 | } |
20040 | |
20041 | #[simd_test(enable = "avx512fp16" )] |
20042 | unsafe fn test_mm_maskz_fcmadd_sch() { |
20043 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
20044 | let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
20045 | let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
20046 | let r = _mm_maskz_fcmadd_sch(0, a, b, c); |
20047 | let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
20048 | assert_eq_m128h(r, e); |
20049 | let r = _mm_maskz_fcmadd_sch(1, a, b, c); |
20050 | let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
20051 | assert_eq_m128h(r, e); |
20052 | } |
20053 | |
20054 | #[simd_test(enable = "avx512fp16" )] |
20055 | unsafe fn test_mm_fcmadd_round_sch() { |
20056 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
20057 | let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
20058 | let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
20059 | let r = _mm_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); |
20060 | let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
20061 | assert_eq_m128h(r, e); |
20062 | } |
20063 | |
20064 | #[simd_test(enable = "avx512fp16" )] |
20065 | unsafe fn test_mm_mask_fcmadd_round_sch() { |
20066 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
20067 | let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
20068 | let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
20069 | let r = _mm_mask_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20070 | a, 0, b, c, |
20071 | ); |
20072 | let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
20073 | assert_eq_m128h(r, e); |
20074 | let r = _mm_mask_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20075 | a, 1, b, c, |
20076 | ); |
20077 | let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
20078 | assert_eq_m128h(r, e); |
20079 | } |
20080 | |
20081 | #[simd_test(enable = "avx512fp16" )] |
20082 | unsafe fn test_mm_mask3_fcmadd_round_sch() { |
20083 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
20084 | let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
20085 | let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
20086 | let r = _mm_mask3_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20087 | a, b, c, 0, |
20088 | ); |
20089 | let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
20090 | assert_eq_m128h(r, e); |
20091 | let r = _mm_mask3_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20092 | a, b, c, 1, |
20093 | ); |
20094 | let e = _mm_setr_ph(2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
20095 | assert_eq_m128h(r, e); |
20096 | } |
20097 | |
20098 | #[simd_test(enable = "avx512fp16" )] |
20099 | unsafe fn test_mm_maskz_fcmadd_round_sch() { |
20100 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
20101 | let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
20102 | let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
20103 | let r = _mm_maskz_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20104 | 0, a, b, c, |
20105 | ); |
20106 | let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
20107 | assert_eq_m128h(r, e); |
20108 | let r = _mm_maskz_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20109 | 1, a, b, c, |
20110 | ); |
20111 | let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
20112 | assert_eq_m128h(r, e); |
20113 | } |
20114 | |
20115 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20116 | unsafe fn test_mm_fmadd_ph() { |
20117 | let a = _mm_set1_ph(1.0); |
20118 | let b = _mm_set1_ph(2.0); |
20119 | let c = _mm_set1_ph(3.0); |
20120 | let r = _mm_fmadd_ph(a, b, c); |
20121 | let e = _mm_set1_ph(5.0); |
20122 | assert_eq_m128h(r, e); |
20123 | } |
20124 | |
20125 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20126 | unsafe fn test_mm_mask_fmadd_ph() { |
20127 | let a = _mm_set1_ph(1.0); |
20128 | let b = _mm_set1_ph(2.0); |
20129 | let c = _mm_set1_ph(3.0); |
20130 | let r = _mm_mask_fmadd_ph(a, 0b01010101, b, c); |
20131 | let e = _mm_set_ph(1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0); |
20132 | assert_eq_m128h(r, e); |
20133 | } |
20134 | |
20135 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20136 | unsafe fn test_mm_mask3_fmadd_ph() { |
20137 | let a = _mm_set1_ph(1.0); |
20138 | let b = _mm_set1_ph(2.0); |
20139 | let c = _mm_set1_ph(3.0); |
20140 | let r = _mm_mask3_fmadd_ph(a, b, c, 0b01010101); |
20141 | let e = _mm_set_ph(3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0); |
20142 | assert_eq_m128h(r, e); |
20143 | } |
20144 | |
20145 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20146 | unsafe fn test_mm_maskz_fmadd_ph() { |
20147 | let a = _mm_set1_ph(1.0); |
20148 | let b = _mm_set1_ph(2.0); |
20149 | let c = _mm_set1_ph(3.0); |
20150 | let r = _mm_maskz_fmadd_ph(0b01010101, a, b, c); |
20151 | let e = _mm_set_ph(0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0); |
20152 | assert_eq_m128h(r, e); |
20153 | } |
20154 | |
20155 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20156 | unsafe fn test_mm256_fmadd_ph() { |
20157 | let a = _mm256_set1_ph(1.0); |
20158 | let b = _mm256_set1_ph(2.0); |
20159 | let c = _mm256_set1_ph(3.0); |
20160 | let r = _mm256_fmadd_ph(a, b, c); |
20161 | let e = _mm256_set1_ph(5.0); |
20162 | assert_eq_m256h(r, e); |
20163 | } |
20164 | |
20165 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20166 | unsafe fn test_mm256_mask_fmadd_ph() { |
20167 | let a = _mm256_set1_ph(1.0); |
20168 | let b = _mm256_set1_ph(2.0); |
20169 | let c = _mm256_set1_ph(3.0); |
20170 | let r = _mm256_mask_fmadd_ph(a, 0b0101010101010101, b, c); |
20171 | let e = _mm256_set_ph( |
20172 | 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, |
20173 | ); |
20174 | assert_eq_m256h(r, e); |
20175 | } |
20176 | |
20177 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20178 | unsafe fn test_mm256_mask3_fmadd_ph() { |
20179 | let a = _mm256_set1_ph(1.0); |
20180 | let b = _mm256_set1_ph(2.0); |
20181 | let c = _mm256_set1_ph(3.0); |
20182 | let r = _mm256_mask3_fmadd_ph(a, b, c, 0b0101010101010101); |
20183 | let e = _mm256_set_ph( |
20184 | 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, |
20185 | ); |
20186 | assert_eq_m256h(r, e); |
20187 | } |
20188 | |
20189 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20190 | unsafe fn test_mm256_maskz_fmadd_ph() { |
20191 | let a = _mm256_set1_ph(1.0); |
20192 | let b = _mm256_set1_ph(2.0); |
20193 | let c = _mm256_set1_ph(3.0); |
20194 | let r = _mm256_maskz_fmadd_ph(0b0101010101010101, a, b, c); |
20195 | let e = _mm256_set_ph( |
20196 | 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, |
20197 | ); |
20198 | assert_eq_m256h(r, e); |
20199 | } |
20200 | |
20201 | #[simd_test(enable = "avx512fp16" )] |
20202 | unsafe fn test_mm512_fmadd_ph() { |
20203 | let a = _mm512_set1_ph(1.0); |
20204 | let b = _mm512_set1_ph(2.0); |
20205 | let c = _mm512_set1_ph(3.0); |
20206 | let r = _mm512_fmadd_ph(a, b, c); |
20207 | let e = _mm512_set1_ph(5.0); |
20208 | assert_eq_m512h(r, e); |
20209 | } |
20210 | |
20211 | #[simd_test(enable = "avx512fp16" )] |
20212 | unsafe fn test_mm512_mask_fmadd_ph() { |
20213 | let a = _mm512_set1_ph(1.0); |
20214 | let b = _mm512_set1_ph(2.0); |
20215 | let c = _mm512_set1_ph(3.0); |
20216 | let r = _mm512_mask_fmadd_ph(a, 0b01010101010101010101010101010101, b, c); |
20217 | let e = _mm512_set_ph( |
20218 | 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, |
20219 | 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, |
20220 | ); |
20221 | assert_eq_m512h(r, e); |
20222 | } |
20223 | |
20224 | #[simd_test(enable = "avx512fp16" )] |
20225 | unsafe fn test_mm512_mask3_fmadd_ph() { |
20226 | let a = _mm512_set1_ph(1.0); |
20227 | let b = _mm512_set1_ph(2.0); |
20228 | let c = _mm512_set1_ph(3.0); |
20229 | let r = _mm512_mask3_fmadd_ph(a, b, c, 0b01010101010101010101010101010101); |
20230 | let e = _mm512_set_ph( |
20231 | 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, |
20232 | 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, |
20233 | ); |
20234 | assert_eq_m512h(r, e); |
20235 | } |
20236 | |
20237 | #[simd_test(enable = "avx512fp16" )] |
20238 | unsafe fn test_mm512_maskz_fmadd_ph() { |
20239 | let a = _mm512_set1_ph(1.0); |
20240 | let b = _mm512_set1_ph(2.0); |
20241 | let c = _mm512_set1_ph(3.0); |
20242 | let r = _mm512_maskz_fmadd_ph(0b01010101010101010101010101010101, a, b, c); |
20243 | let e = _mm512_set_ph( |
20244 | 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, |
20245 | 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, |
20246 | ); |
20247 | assert_eq_m512h(r, e); |
20248 | } |
20249 | |
20250 | #[simd_test(enable = "avx512fp16" )] |
20251 | unsafe fn test_mm512_fmadd_round_ph() { |
20252 | let a = _mm512_set1_ph(1.0); |
20253 | let b = _mm512_set1_ph(2.0); |
20254 | let c = _mm512_set1_ph(3.0); |
20255 | let r = _mm512_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); |
20256 | let e = _mm512_set1_ph(5.0); |
20257 | assert_eq_m512h(r, e); |
20258 | } |
20259 | |
20260 | #[simd_test(enable = "avx512fp16" )] |
20261 | unsafe fn test_mm512_mask_fmadd_round_ph() { |
20262 | let a = _mm512_set1_ph(1.0); |
20263 | let b = _mm512_set1_ph(2.0); |
20264 | let c = _mm512_set1_ph(3.0); |
20265 | let r = _mm512_mask_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20266 | a, |
20267 | 0b01010101010101010101010101010101, |
20268 | b, |
20269 | c, |
20270 | ); |
20271 | let e = _mm512_set_ph( |
20272 | 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, |
20273 | 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, |
20274 | ); |
20275 | assert_eq_m512h(r, e); |
20276 | } |
20277 | |
20278 | #[simd_test(enable = "avx512fp16" )] |
20279 | unsafe fn test_mm512_mask3_fmadd_round_ph() { |
20280 | let a = _mm512_set1_ph(1.0); |
20281 | let b = _mm512_set1_ph(2.0); |
20282 | let c = _mm512_set1_ph(3.0); |
20283 | let r = _mm512_mask3_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20284 | a, |
20285 | b, |
20286 | c, |
20287 | 0b01010101010101010101010101010101, |
20288 | ); |
20289 | let e = _mm512_set_ph( |
20290 | 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, |
20291 | 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, |
20292 | ); |
20293 | assert_eq_m512h(r, e); |
20294 | } |
20295 | |
20296 | #[simd_test(enable = "avx512fp16" )] |
20297 | unsafe fn test_mm512_maskz_fmadd_round_ph() { |
20298 | let a = _mm512_set1_ph(1.0); |
20299 | let b = _mm512_set1_ph(2.0); |
20300 | let c = _mm512_set1_ph(3.0); |
20301 | let r = _mm512_maskz_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20302 | 0b01010101010101010101010101010101, |
20303 | a, |
20304 | b, |
20305 | c, |
20306 | ); |
20307 | let e = _mm512_set_ph( |
20308 | 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, |
20309 | 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, |
20310 | ); |
20311 | assert_eq_m512h(r, e); |
20312 | } |
20313 | |
20314 | #[simd_test(enable = "avx512fp16" )] |
20315 | unsafe fn test_mm_fmadd_sh() { |
20316 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20317 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20318 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20319 | let r = _mm_fmadd_sh(a, b, c); |
20320 | let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.); |
20321 | assert_eq_m128h(r, e); |
20322 | } |
20323 | |
20324 | #[simd_test(enable = "avx512fp16" )] |
20325 | unsafe fn test_mm_mask_fmadd_sh() { |
20326 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20327 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20328 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20329 | let r = _mm_mask_fmadd_sh(a, 0, b, c); |
20330 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20331 | assert_eq_m128h(r, e); |
20332 | let r = _mm_mask_fmadd_sh(a, 1, b, c); |
20333 | let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.); |
20334 | assert_eq_m128h(r, e); |
20335 | } |
20336 | |
20337 | #[simd_test(enable = "avx512fp16" )] |
20338 | unsafe fn test_mm_mask3_fmadd_sh() { |
20339 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20340 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20341 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20342 | let r = _mm_mask3_fmadd_sh(a, b, c, 0); |
20343 | let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20344 | assert_eq_m128h(r, e); |
20345 | let r = _mm_mask3_fmadd_sh(a, b, c, 1); |
20346 | let e = _mm_setr_ph(5.0, 30., 31., 32., 33., 34., 35., 36.); |
20347 | assert_eq_m128h(r, e); |
20348 | } |
20349 | |
20350 | #[simd_test(enable = "avx512fp16" )] |
20351 | unsafe fn test_mm_maskz_fmadd_sh() { |
20352 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20353 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20354 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20355 | let r = _mm_maskz_fmadd_sh(0, a, b, c); |
20356 | let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); |
20357 | assert_eq_m128h(r, e); |
20358 | let r = _mm_maskz_fmadd_sh(1, a, b, c); |
20359 | let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.); |
20360 | assert_eq_m128h(r, e); |
20361 | } |
20362 | |
20363 | #[simd_test(enable = "avx512fp16" )] |
20364 | unsafe fn test_mm_fmadd_round_sh() { |
20365 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20366 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20367 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20368 | let r = _mm_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); |
20369 | let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.); |
20370 | assert_eq_m128h(r, e); |
20371 | } |
20372 | |
20373 | #[simd_test(enable = "avx512fp16" )] |
20374 | unsafe fn test_mm_mask_fmadd_round_sh() { |
20375 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20376 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20377 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20378 | let r = _mm_mask_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20379 | a, 0, b, c, |
20380 | ); |
20381 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20382 | assert_eq_m128h(r, e); |
20383 | let r = _mm_mask_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20384 | a, 1, b, c, |
20385 | ); |
20386 | let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.); |
20387 | assert_eq_m128h(r, e); |
20388 | } |
20389 | |
20390 | #[simd_test(enable = "avx512fp16" )] |
20391 | unsafe fn test_mm_mask3_fmadd_round_sh() { |
20392 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20393 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20394 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20395 | let r = _mm_mask3_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20396 | a, b, c, 0, |
20397 | ); |
20398 | let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20399 | assert_eq_m128h(r, e); |
20400 | let r = _mm_mask3_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20401 | a, b, c, 1, |
20402 | ); |
20403 | let e = _mm_setr_ph(5.0, 30., 31., 32., 33., 34., 35., 36.); |
20404 | assert_eq_m128h(r, e); |
20405 | } |
20406 | |
20407 | #[simd_test(enable = "avx512fp16" )] |
20408 | unsafe fn test_mm_maskz_fmadd_round_sh() { |
20409 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20410 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20411 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20412 | let r = _mm_maskz_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20413 | 0, a, b, c, |
20414 | ); |
20415 | let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); |
20416 | assert_eq_m128h(r, e); |
20417 | let r = _mm_maskz_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20418 | 1, a, b, c, |
20419 | ); |
20420 | let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.); |
20421 | assert_eq_m128h(r, e); |
20422 | } |
20423 | |
20424 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20425 | unsafe fn test_mm_fmsub_ph() { |
20426 | let a = _mm_set1_ph(1.0); |
20427 | let b = _mm_set1_ph(2.0); |
20428 | let c = _mm_set1_ph(3.0); |
20429 | let r = _mm_fmsub_ph(a, b, c); |
20430 | let e = _mm_set1_ph(-1.0); |
20431 | assert_eq_m128h(r, e); |
20432 | } |
20433 | |
20434 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20435 | unsafe fn test_mm_mask_fmsub_ph() { |
20436 | let a = _mm_set1_ph(1.0); |
20437 | let b = _mm_set1_ph(2.0); |
20438 | let c = _mm_set1_ph(3.0); |
20439 | let r = _mm_mask_fmsub_ph(a, 0b01010101, b, c); |
20440 | let e = _mm_set_ph(1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0); |
20441 | assert_eq_m128h(r, e); |
20442 | } |
20443 | |
20444 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20445 | unsafe fn test_mm_mask3_fmsub_ph() { |
20446 | let a = _mm_set1_ph(1.0); |
20447 | let b = _mm_set1_ph(2.0); |
20448 | let c = _mm_set1_ph(3.0); |
20449 | let r = _mm_mask3_fmsub_ph(a, b, c, 0b01010101); |
20450 | let e = _mm_set_ph(3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0); |
20451 | assert_eq_m128h(r, e); |
20452 | } |
20453 | |
20454 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20455 | unsafe fn test_mm_maskz_fmsub_ph() { |
20456 | let a = _mm_set1_ph(1.0); |
20457 | let b = _mm_set1_ph(2.0); |
20458 | let c = _mm_set1_ph(3.0); |
20459 | let r = _mm_maskz_fmsub_ph(0b01010101, a, b, c); |
20460 | let e = _mm_set_ph(0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0); |
20461 | assert_eq_m128h(r, e); |
20462 | } |
20463 | |
20464 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20465 | unsafe fn test_mm256_fmsub_ph() { |
20466 | let a = _mm256_set1_ph(1.0); |
20467 | let b = _mm256_set1_ph(2.0); |
20468 | let c = _mm256_set1_ph(3.0); |
20469 | let r = _mm256_fmsub_ph(a, b, c); |
20470 | let e = _mm256_set1_ph(-1.0); |
20471 | assert_eq_m256h(r, e); |
20472 | } |
20473 | |
20474 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20475 | unsafe fn test_mm256_mask_fmsub_ph() { |
20476 | let a = _mm256_set1_ph(1.0); |
20477 | let b = _mm256_set1_ph(2.0); |
20478 | let c = _mm256_set1_ph(3.0); |
20479 | let r = _mm256_mask_fmsub_ph(a, 0b0101010101010101, b, c); |
20480 | let e = _mm256_set_ph( |
20481 | 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, |
20482 | ); |
20483 | assert_eq_m256h(r, e); |
20484 | } |
20485 | |
20486 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20487 | unsafe fn test_mm256_mask3_fmsub_ph() { |
20488 | let a = _mm256_set1_ph(1.0); |
20489 | let b = _mm256_set1_ph(2.0); |
20490 | let c = _mm256_set1_ph(3.0); |
20491 | let r = _mm256_mask3_fmsub_ph(a, b, c, 0b0101010101010101); |
20492 | let e = _mm256_set_ph( |
20493 | 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, |
20494 | ); |
20495 | assert_eq_m256h(r, e); |
20496 | } |
20497 | |
20498 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20499 | unsafe fn test_mm256_maskz_fmsub_ph() { |
20500 | let a = _mm256_set1_ph(1.0); |
20501 | let b = _mm256_set1_ph(2.0); |
20502 | let c = _mm256_set1_ph(3.0); |
20503 | let r = _mm256_maskz_fmsub_ph(0b0101010101010101, a, b, c); |
20504 | let e = _mm256_set_ph( |
20505 | 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, |
20506 | ); |
20507 | assert_eq_m256h(r, e); |
20508 | } |
20509 | |
20510 | #[simd_test(enable = "avx512fp16" )] |
20511 | unsafe fn test_mm512_fmsub_ph() { |
20512 | let a = _mm512_set1_ph(1.0); |
20513 | let b = _mm512_set1_ph(2.0); |
20514 | let c = _mm512_set1_ph(3.0); |
20515 | let r = _mm512_fmsub_ph(a, b, c); |
20516 | let e = _mm512_set1_ph(-1.0); |
20517 | assert_eq_m512h(r, e); |
20518 | } |
20519 | |
20520 | #[simd_test(enable = "avx512fp16" )] |
20521 | unsafe fn test_mm512_mask_fmsub_ph() { |
20522 | let a = _mm512_set1_ph(1.0); |
20523 | let b = _mm512_set1_ph(2.0); |
20524 | let c = _mm512_set1_ph(3.0); |
20525 | let r = _mm512_mask_fmsub_ph(a, 0b01010101010101010101010101010101, b, c); |
20526 | let e = _mm512_set_ph( |
20527 | 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, |
20528 | 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, |
20529 | ); |
20530 | assert_eq_m512h(r, e); |
20531 | } |
20532 | |
20533 | #[simd_test(enable = "avx512fp16" )] |
20534 | unsafe fn test_mm512_mask3_fmsub_ph() { |
20535 | let a = _mm512_set1_ph(1.0); |
20536 | let b = _mm512_set1_ph(2.0); |
20537 | let c = _mm512_set1_ph(3.0); |
20538 | let r = _mm512_mask3_fmsub_ph(a, b, c, 0b01010101010101010101010101010101); |
20539 | let e = _mm512_set_ph( |
20540 | 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, |
20541 | 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, |
20542 | ); |
20543 | assert_eq_m512h(r, e); |
20544 | } |
20545 | |
20546 | #[simd_test(enable = "avx512fp16" )] |
20547 | unsafe fn test_mm512_maskz_fmsub_ph() { |
20548 | let a = _mm512_set1_ph(1.0); |
20549 | let b = _mm512_set1_ph(2.0); |
20550 | let c = _mm512_set1_ph(3.0); |
20551 | let r = _mm512_maskz_fmsub_ph(0b01010101010101010101010101010101, a, b, c); |
20552 | let e = _mm512_set_ph( |
20553 | 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, |
20554 | 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, |
20555 | ); |
20556 | assert_eq_m512h(r, e); |
20557 | } |
20558 | |
20559 | #[simd_test(enable = "avx512fp16" )] |
20560 | unsafe fn test_mm512_fmsub_round_ph() { |
20561 | let a = _mm512_set1_ph(1.0); |
20562 | let b = _mm512_set1_ph(2.0); |
20563 | let c = _mm512_set1_ph(3.0); |
20564 | let r = _mm512_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); |
20565 | let e = _mm512_set1_ph(-1.0); |
20566 | assert_eq_m512h(r, e); |
20567 | } |
20568 | |
20569 | #[simd_test(enable = "avx512fp16" )] |
20570 | unsafe fn test_mm512_mask_fmsub_round_ph() { |
20571 | let a = _mm512_set1_ph(1.0); |
20572 | let b = _mm512_set1_ph(2.0); |
20573 | let c = _mm512_set1_ph(3.0); |
20574 | let r = _mm512_mask_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20575 | a, |
20576 | 0b01010101010101010101010101010101, |
20577 | b, |
20578 | c, |
20579 | ); |
20580 | let e = _mm512_set_ph( |
20581 | 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, |
20582 | 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, |
20583 | ); |
20584 | assert_eq_m512h(r, e); |
20585 | } |
20586 | |
20587 | #[simd_test(enable = "avx512fp16" )] |
20588 | unsafe fn test_mm512_mask3_fmsub_round_ph() { |
20589 | let a = _mm512_set1_ph(1.0); |
20590 | let b = _mm512_set1_ph(2.0); |
20591 | let c = _mm512_set1_ph(3.0); |
20592 | let r = _mm512_mask3_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20593 | a, |
20594 | b, |
20595 | c, |
20596 | 0b01010101010101010101010101010101, |
20597 | ); |
20598 | let e = _mm512_set_ph( |
20599 | 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, |
20600 | 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, |
20601 | ); |
20602 | assert_eq_m512h(r, e); |
20603 | } |
20604 | |
20605 | #[simd_test(enable = "avx512fp16" )] |
20606 | unsafe fn test_mm512_maskz_fmsub_round_ph() { |
20607 | let a = _mm512_set1_ph(1.0); |
20608 | let b = _mm512_set1_ph(2.0); |
20609 | let c = _mm512_set1_ph(3.0); |
20610 | let r = _mm512_maskz_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20611 | 0b01010101010101010101010101010101, |
20612 | a, |
20613 | b, |
20614 | c, |
20615 | ); |
20616 | let e = _mm512_set_ph( |
20617 | 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, |
20618 | 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, |
20619 | ); |
20620 | assert_eq_m512h(r, e); |
20621 | } |
20622 | |
20623 | #[simd_test(enable = "avx512fp16" )] |
20624 | unsafe fn test_mm_fmsub_sh() { |
20625 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20626 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20627 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20628 | let r = _mm_fmsub_sh(a, b, c); |
20629 | let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.); |
20630 | assert_eq_m128h(r, e); |
20631 | } |
20632 | |
20633 | #[simd_test(enable = "avx512fp16" )] |
20634 | unsafe fn test_mm_mask_fmsub_sh() { |
20635 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20636 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20637 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20638 | let r = _mm_mask_fmsub_sh(a, 0, b, c); |
20639 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20640 | assert_eq_m128h(r, e); |
20641 | let r = _mm_mask_fmsub_sh(a, 1, b, c); |
20642 | let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.); |
20643 | assert_eq_m128h(r, e); |
20644 | } |
20645 | |
20646 | #[simd_test(enable = "avx512fp16" )] |
20647 | unsafe fn test_mm_mask3_fmsub_sh() { |
20648 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20649 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20650 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20651 | let r = _mm_mask3_fmsub_sh(a, b, c, 0); |
20652 | let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20653 | assert_eq_m128h(r, e); |
20654 | let r = _mm_mask3_fmsub_sh(a, b, c, 1); |
20655 | let e = _mm_setr_ph(-1.0, 30., 31., 32., 33., 34., 35., 36.); |
20656 | assert_eq_m128h(r, e); |
20657 | } |
20658 | |
20659 | #[simd_test(enable = "avx512fp16" )] |
20660 | unsafe fn test_mm_maskz_fmsub_sh() { |
20661 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20662 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20663 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20664 | let r = _mm_maskz_fmsub_sh(0, a, b, c); |
20665 | let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); |
20666 | assert_eq_m128h(r, e); |
20667 | let r = _mm_maskz_fmsub_sh(1, a, b, c); |
20668 | let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.); |
20669 | assert_eq_m128h(r, e); |
20670 | } |
20671 | |
20672 | #[simd_test(enable = "avx512fp16" )] |
20673 | unsafe fn test_mm_fmsub_round_sh() { |
20674 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20675 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20676 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20677 | let r = _mm_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); |
20678 | let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.); |
20679 | assert_eq_m128h(r, e); |
20680 | } |
20681 | |
20682 | #[simd_test(enable = "avx512fp16" )] |
20683 | unsafe fn test_mm_mask_fmsub_round_sh() { |
20684 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20685 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20686 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20687 | let r = _mm_mask_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20688 | a, 0, b, c, |
20689 | ); |
20690 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20691 | assert_eq_m128h(r, e); |
20692 | let r = _mm_mask_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20693 | a, 1, b, c, |
20694 | ); |
20695 | let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.); |
20696 | assert_eq_m128h(r, e); |
20697 | } |
20698 | |
20699 | #[simd_test(enable = "avx512fp16" )] |
20700 | unsafe fn test_mm_mask3_fmsub_round_sh() { |
20701 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20702 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20703 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20704 | let r = _mm_mask3_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20705 | a, b, c, 0, |
20706 | ); |
20707 | let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20708 | assert_eq_m128h(r, e); |
20709 | let r = _mm_mask3_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20710 | a, b, c, 1, |
20711 | ); |
20712 | let e = _mm_setr_ph(-1.0, 30., 31., 32., 33., 34., 35., 36.); |
20713 | assert_eq_m128h(r, e); |
20714 | } |
20715 | |
20716 | #[simd_test(enable = "avx512fp16" )] |
20717 | unsafe fn test_mm_maskz_fmsub_round_sh() { |
20718 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20719 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20720 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20721 | let r = _mm_maskz_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20722 | 0, a, b, c, |
20723 | ); |
20724 | let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); |
20725 | assert_eq_m128h(r, e); |
20726 | let r = _mm_maskz_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20727 | 1, a, b, c, |
20728 | ); |
20729 | let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.); |
20730 | assert_eq_m128h(r, e); |
20731 | } |
20732 | |
20733 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20734 | unsafe fn test_mm_fnmadd_ph() { |
20735 | let a = _mm_set1_ph(1.0); |
20736 | let b = _mm_set1_ph(2.0); |
20737 | let c = _mm_set1_ph(3.0); |
20738 | let r = _mm_fnmadd_ph(a, b, c); |
20739 | let e = _mm_set1_ph(1.0); |
20740 | assert_eq_m128h(r, e); |
20741 | } |
20742 | |
20743 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20744 | unsafe fn test_mm_mask_fnmadd_ph() { |
20745 | let a = _mm_set1_ph(1.0); |
20746 | let b = _mm_set1_ph(2.0); |
20747 | let c = _mm_set1_ph(3.0); |
20748 | let r = _mm_mask_fnmadd_ph(a, 0b01010101, b, c); |
20749 | let e = _mm_set_ph(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0); |
20750 | assert_eq_m128h(r, e); |
20751 | } |
20752 | |
20753 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20754 | unsafe fn test_mm_mask3_fnmadd_ph() { |
20755 | let a = _mm_set1_ph(1.0); |
20756 | let b = _mm_set1_ph(2.0); |
20757 | let c = _mm_set1_ph(3.0); |
20758 | let r = _mm_mask3_fnmadd_ph(a, b, c, 0b01010101); |
20759 | let e = _mm_set_ph(3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0); |
20760 | assert_eq_m128h(r, e); |
20761 | } |
20762 | |
20763 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20764 | unsafe fn test_mm_maskz_fnmadd_ph() { |
20765 | let a = _mm_set1_ph(1.0); |
20766 | let b = _mm_set1_ph(2.0); |
20767 | let c = _mm_set1_ph(3.0); |
20768 | let r = _mm_maskz_fnmadd_ph(0b01010101, a, b, c); |
20769 | let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0); |
20770 | assert_eq_m128h(r, e); |
20771 | } |
20772 | |
20773 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20774 | unsafe fn test_mm256_fnmadd_ph() { |
20775 | let a = _mm256_set1_ph(1.0); |
20776 | let b = _mm256_set1_ph(2.0); |
20777 | let c = _mm256_set1_ph(3.0); |
20778 | let r = _mm256_fnmadd_ph(a, b, c); |
20779 | let e = _mm256_set1_ph(1.0); |
20780 | assert_eq_m256h(r, e); |
20781 | } |
20782 | |
20783 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20784 | unsafe fn test_mm256_mask_fnmadd_ph() { |
20785 | let a = _mm256_set1_ph(1.0); |
20786 | let b = _mm256_set1_ph(2.0); |
20787 | let c = _mm256_set1_ph(3.0); |
20788 | let r = _mm256_mask_fnmadd_ph(a, 0b0101010101010101, b, c); |
20789 | let e = _mm256_set_ph( |
20790 | 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, |
20791 | ); |
20792 | assert_eq_m256h(r, e); |
20793 | } |
20794 | |
20795 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20796 | unsafe fn test_mm256_mask3_fnmadd_ph() { |
20797 | let a = _mm256_set1_ph(1.0); |
20798 | let b = _mm256_set1_ph(2.0); |
20799 | let c = _mm256_set1_ph(3.0); |
20800 | let r = _mm256_mask3_fnmadd_ph(a, b, c, 0b0101010101010101); |
20801 | let e = _mm256_set_ph( |
20802 | 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, |
20803 | ); |
20804 | assert_eq_m256h(r, e); |
20805 | } |
20806 | |
20807 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20808 | unsafe fn test_mm256_maskz_fnmadd_ph() { |
20809 | let a = _mm256_set1_ph(1.0); |
20810 | let b = _mm256_set1_ph(2.0); |
20811 | let c = _mm256_set1_ph(3.0); |
20812 | let r = _mm256_maskz_fnmadd_ph(0b0101010101010101, a, b, c); |
20813 | let e = _mm256_set_ph( |
20814 | 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, |
20815 | ); |
20816 | assert_eq_m256h(r, e); |
20817 | } |
20818 | |
20819 | #[simd_test(enable = "avx512fp16" )] |
20820 | unsafe fn test_mm512_fnmadd_ph() { |
20821 | let a = _mm512_set1_ph(1.0); |
20822 | let b = _mm512_set1_ph(2.0); |
20823 | let c = _mm512_set1_ph(3.0); |
20824 | let r = _mm512_fnmadd_ph(a, b, c); |
20825 | let e = _mm512_set1_ph(1.0); |
20826 | assert_eq_m512h(r, e); |
20827 | } |
20828 | |
20829 | #[simd_test(enable = "avx512fp16" )] |
20830 | unsafe fn test_mm512_mask_fnmadd_ph() { |
20831 | let a = _mm512_set1_ph(1.0); |
20832 | let b = _mm512_set1_ph(2.0); |
20833 | let c = _mm512_set1_ph(3.0); |
20834 | let r = _mm512_mask_fnmadd_ph(a, 0b01010101010101010101010101010101, b, c); |
20835 | let e = _mm512_set_ph( |
20836 | 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, |
20837 | 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, |
20838 | ); |
20839 | assert_eq_m512h(r, e); |
20840 | } |
20841 | |
20842 | #[simd_test(enable = "avx512fp16" )] |
20843 | unsafe fn test_mm512_mask3_fnmadd_ph() { |
20844 | let a = _mm512_set1_ph(1.0); |
20845 | let b = _mm512_set1_ph(2.0); |
20846 | let c = _mm512_set1_ph(3.0); |
20847 | let r = _mm512_mask3_fnmadd_ph(a, b, c, 0b01010101010101010101010101010101); |
20848 | let e = _mm512_set_ph( |
20849 | 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, |
20850 | 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, |
20851 | ); |
20852 | assert_eq_m512h(r, e); |
20853 | } |
20854 | |
20855 | #[simd_test(enable = "avx512fp16" )] |
20856 | unsafe fn test_mm512_maskz_fnmadd_ph() { |
20857 | let a = _mm512_set1_ph(1.0); |
20858 | let b = _mm512_set1_ph(2.0); |
20859 | let c = _mm512_set1_ph(3.0); |
20860 | let r = _mm512_maskz_fnmadd_ph(0b01010101010101010101010101010101, a, b, c); |
20861 | let e = _mm512_set_ph( |
20862 | 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, |
20863 | 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, |
20864 | ); |
20865 | assert_eq_m512h(r, e); |
20866 | } |
20867 | |
20868 | #[simd_test(enable = "avx512fp16" )] |
20869 | unsafe fn test_mm512_fnmadd_round_ph() { |
20870 | let a = _mm512_set1_ph(1.0); |
20871 | let b = _mm512_set1_ph(2.0); |
20872 | let c = _mm512_set1_ph(3.0); |
20873 | let r = |
20874 | _mm512_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); |
20875 | let e = _mm512_set1_ph(1.0); |
20876 | assert_eq_m512h(r, e); |
20877 | } |
20878 | |
20879 | #[simd_test(enable = "avx512fp16" )] |
20880 | unsafe fn test_mm512_mask_fnmadd_round_ph() { |
20881 | let a = _mm512_set1_ph(1.0); |
20882 | let b = _mm512_set1_ph(2.0); |
20883 | let c = _mm512_set1_ph(3.0); |
20884 | let r = _mm512_mask_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20885 | a, |
20886 | 0b01010101010101010101010101010101, |
20887 | b, |
20888 | c, |
20889 | ); |
20890 | let e = _mm512_set_ph( |
20891 | 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, |
20892 | 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, |
20893 | ); |
20894 | assert_eq_m512h(r, e); |
20895 | } |
20896 | |
20897 | #[simd_test(enable = "avx512fp16" )] |
20898 | unsafe fn test_mm512_mask3_fnmadd_round_ph() { |
20899 | let a = _mm512_set1_ph(1.0); |
20900 | let b = _mm512_set1_ph(2.0); |
20901 | let c = _mm512_set1_ph(3.0); |
20902 | let r = _mm512_mask3_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20903 | a, |
20904 | b, |
20905 | c, |
20906 | 0b01010101010101010101010101010101, |
20907 | ); |
20908 | let e = _mm512_set_ph( |
20909 | 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, |
20910 | 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, |
20911 | ); |
20912 | assert_eq_m512h(r, e); |
20913 | } |
20914 | |
20915 | #[simd_test(enable = "avx512fp16" )] |
20916 | unsafe fn test_mm512_maskz_fnmadd_round_ph() { |
20917 | let a = _mm512_set1_ph(1.0); |
20918 | let b = _mm512_set1_ph(2.0); |
20919 | let c = _mm512_set1_ph(3.0); |
20920 | let r = _mm512_maskz_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20921 | 0b01010101010101010101010101010101, |
20922 | a, |
20923 | b, |
20924 | c, |
20925 | ); |
20926 | let e = _mm512_set_ph( |
20927 | 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, |
20928 | 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, |
20929 | ); |
20930 | assert_eq_m512h(r, e); |
20931 | } |
20932 | |
20933 | #[simd_test(enable = "avx512fp16" )] |
20934 | unsafe fn test_mm_fnmadd_sh() { |
20935 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20936 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20937 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20938 | let r = _mm_fnmadd_sh(a, b, c); |
20939 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20940 | assert_eq_m128h(r, e); |
20941 | } |
20942 | |
20943 | #[simd_test(enable = "avx512fp16" )] |
20944 | unsafe fn test_mm_mask_fnmadd_sh() { |
20945 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20946 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20947 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20948 | let r = _mm_mask_fnmadd_sh(a, 0, b, c); |
20949 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20950 | assert_eq_m128h(r, e); |
20951 | let r = _mm_mask_fnmadd_sh(a, 1, b, c); |
20952 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20953 | assert_eq_m128h(r, e); |
20954 | } |
20955 | |
20956 | #[simd_test(enable = "avx512fp16" )] |
20957 | unsafe fn test_mm_mask3_fnmadd_sh() { |
20958 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20959 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20960 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20961 | let r = _mm_mask3_fnmadd_sh(a, b, c, 0); |
20962 | let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20963 | assert_eq_m128h(r, e); |
20964 | let r = _mm_mask3_fnmadd_sh(a, b, c, 1); |
20965 | let e = _mm_setr_ph(1.0, 30., 31., 32., 33., 34., 35., 36.); |
20966 | assert_eq_m128h(r, e); |
20967 | } |
20968 | |
20969 | #[simd_test(enable = "avx512fp16" )] |
20970 | unsafe fn test_mm_maskz_fnmadd_sh() { |
20971 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20972 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20973 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20974 | let r = _mm_maskz_fnmadd_sh(0, a, b, c); |
20975 | let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); |
20976 | assert_eq_m128h(r, e); |
20977 | let r = _mm_maskz_fnmadd_sh(1, a, b, c); |
20978 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20979 | assert_eq_m128h(r, e); |
20980 | } |
20981 | |
20982 | #[simd_test(enable = "avx512fp16" )] |
20983 | unsafe fn test_mm_fnmadd_round_sh() { |
20984 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20985 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20986 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20987 | let r = _mm_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); |
20988 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20989 | assert_eq_m128h(r, e); |
20990 | } |
20991 | |
20992 | #[simd_test(enable = "avx512fp16" )] |
20993 | unsafe fn test_mm_mask_fnmadd_round_sh() { |
20994 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20995 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20996 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20997 | let r = _mm_mask_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20998 | a, 0, b, c, |
20999 | ); |
21000 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
21001 | assert_eq_m128h(r, e); |
21002 | let r = _mm_mask_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21003 | a, 1, b, c, |
21004 | ); |
21005 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
21006 | assert_eq_m128h(r, e); |
21007 | } |
21008 | |
21009 | #[simd_test(enable = "avx512fp16" )] |
21010 | unsafe fn test_mm_mask3_fnmadd_round_sh() { |
21011 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
21012 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
21013 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
21014 | let r = _mm_mask3_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21015 | a, b, c, 0, |
21016 | ); |
21017 | let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
21018 | assert_eq_m128h(r, e); |
21019 | let r = _mm_mask3_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21020 | a, b, c, 1, |
21021 | ); |
21022 | let e = _mm_setr_ph(1.0, 30., 31., 32., 33., 34., 35., 36.); |
21023 | assert_eq_m128h(r, e); |
21024 | } |
21025 | |
21026 | #[simd_test(enable = "avx512fp16" )] |
21027 | unsafe fn test_mm_maskz_fnmadd_round_sh() { |
21028 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
21029 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
21030 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
21031 | let r = _mm_maskz_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21032 | 0, a, b, c, |
21033 | ); |
21034 | let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); |
21035 | assert_eq_m128h(r, e); |
21036 | let r = _mm_maskz_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21037 | 1, a, b, c, |
21038 | ); |
21039 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
21040 | assert_eq_m128h(r, e); |
21041 | } |
21042 | |
21043 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21044 | unsafe fn test_mm_fnmsub_ph() { |
21045 | let a = _mm_set1_ph(1.0); |
21046 | let b = _mm_set1_ph(2.0); |
21047 | let c = _mm_set1_ph(3.0); |
21048 | let r = _mm_fnmsub_ph(a, b, c); |
21049 | let e = _mm_set1_ph(-5.0); |
21050 | assert_eq_m128h(r, e); |
21051 | } |
21052 | |
21053 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21054 | unsafe fn test_mm_mask_fnmsub_ph() { |
21055 | let a = _mm_set1_ph(1.0); |
21056 | let b = _mm_set1_ph(2.0); |
21057 | let c = _mm_set1_ph(3.0); |
21058 | let r = _mm_mask_fnmsub_ph(a, 0b01010101, b, c); |
21059 | let e = _mm_set_ph(1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0); |
21060 | assert_eq_m128h(r, e); |
21061 | } |
21062 | |
21063 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21064 | unsafe fn test_mm_mask3_fnmsub_ph() { |
21065 | let a = _mm_set1_ph(1.0); |
21066 | let b = _mm_set1_ph(2.0); |
21067 | let c = _mm_set1_ph(3.0); |
21068 | let r = _mm_mask3_fnmsub_ph(a, b, c, 0b01010101); |
21069 | let e = _mm_set_ph(3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0); |
21070 | assert_eq_m128h(r, e); |
21071 | } |
21072 | |
21073 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21074 | unsafe fn test_mm_maskz_fnmsub_ph() { |
21075 | let a = _mm_set1_ph(1.0); |
21076 | let b = _mm_set1_ph(2.0); |
21077 | let c = _mm_set1_ph(3.0); |
21078 | let r = _mm_maskz_fnmsub_ph(0b01010101, a, b, c); |
21079 | let e = _mm_set_ph(0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0); |
21080 | assert_eq_m128h(r, e); |
21081 | } |
21082 | |
21083 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21084 | unsafe fn test_mm256_fnmsub_ph() { |
21085 | let a = _mm256_set1_ph(1.0); |
21086 | let b = _mm256_set1_ph(2.0); |
21087 | let c = _mm256_set1_ph(3.0); |
21088 | let r = _mm256_fnmsub_ph(a, b, c); |
21089 | let e = _mm256_set1_ph(-5.0); |
21090 | assert_eq_m256h(r, e); |
21091 | } |
21092 | |
21093 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21094 | unsafe fn test_mm256_mask_fnmsub_ph() { |
21095 | let a = _mm256_set1_ph(1.0); |
21096 | let b = _mm256_set1_ph(2.0); |
21097 | let c = _mm256_set1_ph(3.0); |
21098 | let r = _mm256_mask_fnmsub_ph(a, 0b0101010101010101, b, c); |
21099 | let e = _mm256_set_ph( |
21100 | 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, |
21101 | ); |
21102 | assert_eq_m256h(r, e); |
21103 | } |
21104 | |
21105 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21106 | unsafe fn test_mm256_mask3_fnmsub_ph() { |
21107 | let a = _mm256_set1_ph(1.0); |
21108 | let b = _mm256_set1_ph(2.0); |
21109 | let c = _mm256_set1_ph(3.0); |
21110 | let r = _mm256_mask3_fnmsub_ph(a, b, c, 0b0101010101010101); |
21111 | let e = _mm256_set_ph( |
21112 | 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, |
21113 | ); |
21114 | assert_eq_m256h(r, e); |
21115 | } |
21116 | |
21117 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21118 | unsafe fn test_mm256_maskz_fnmsub_ph() { |
21119 | let a = _mm256_set1_ph(1.0); |
21120 | let b = _mm256_set1_ph(2.0); |
21121 | let c = _mm256_set1_ph(3.0); |
21122 | let r = _mm256_maskz_fnmsub_ph(0b0101010101010101, a, b, c); |
21123 | let e = _mm256_set_ph( |
21124 | 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, |
21125 | ); |
21126 | assert_eq_m256h(r, e); |
21127 | } |
21128 | |
21129 | #[simd_test(enable = "avx512fp16" )] |
21130 | unsafe fn test_mm512_fnmsub_ph() { |
21131 | let a = _mm512_set1_ph(1.0); |
21132 | let b = _mm512_set1_ph(2.0); |
21133 | let c = _mm512_set1_ph(3.0); |
21134 | let r = _mm512_fnmsub_ph(a, b, c); |
21135 | let e = _mm512_set1_ph(-5.0); |
21136 | assert_eq_m512h(r, e); |
21137 | } |
21138 | |
21139 | #[simd_test(enable = "avx512fp16" )] |
21140 | unsafe fn test_mm512_mask_fnmsub_ph() { |
21141 | let a = _mm512_set1_ph(1.0); |
21142 | let b = _mm512_set1_ph(2.0); |
21143 | let c = _mm512_set1_ph(3.0); |
21144 | let r = _mm512_mask_fnmsub_ph(a, 0b01010101010101010101010101010101, b, c); |
21145 | let e = _mm512_set_ph( |
21146 | 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, |
21147 | 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, |
21148 | ); |
21149 | assert_eq_m512h(r, e); |
21150 | } |
21151 | |
21152 | #[simd_test(enable = "avx512fp16" )] |
21153 | unsafe fn test_mm512_mask3_fnmsub_ph() { |
21154 | let a = _mm512_set1_ph(1.0); |
21155 | let b = _mm512_set1_ph(2.0); |
21156 | let c = _mm512_set1_ph(3.0); |
21157 | let r = _mm512_mask3_fnmsub_ph(a, b, c, 0b01010101010101010101010101010101); |
21158 | let e = _mm512_set_ph( |
21159 | 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, |
21160 | 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, |
21161 | ); |
21162 | assert_eq_m512h(r, e); |
21163 | } |
21164 | |
21165 | #[simd_test(enable = "avx512fp16" )] |
21166 | unsafe fn test_mm512_maskz_fnmsub_ph() { |
21167 | let a = _mm512_set1_ph(1.0); |
21168 | let b = _mm512_set1_ph(2.0); |
21169 | let c = _mm512_set1_ph(3.0); |
21170 | let r = _mm512_maskz_fnmsub_ph(0b01010101010101010101010101010101, a, b, c); |
21171 | let e = _mm512_set_ph( |
21172 | 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, |
21173 | 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, |
21174 | ); |
21175 | assert_eq_m512h(r, e); |
21176 | } |
21177 | |
21178 | #[simd_test(enable = "avx512fp16" )] |
21179 | unsafe fn test_mm512_fnmsub_round_ph() { |
21180 | let a = _mm512_set1_ph(1.0); |
21181 | let b = _mm512_set1_ph(2.0); |
21182 | let c = _mm512_set1_ph(3.0); |
21183 | let r = |
21184 | _mm512_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); |
21185 | let e = _mm512_set1_ph(-5.0); |
21186 | assert_eq_m512h(r, e); |
21187 | } |
21188 | |
21189 | #[simd_test(enable = "avx512fp16" )] |
21190 | unsafe fn test_mm512_mask_fnmsub_round_ph() { |
21191 | let a = _mm512_set1_ph(1.0); |
21192 | let b = _mm512_set1_ph(2.0); |
21193 | let c = _mm512_set1_ph(3.0); |
21194 | let r = _mm512_mask_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21195 | a, |
21196 | 0b01010101010101010101010101010101, |
21197 | b, |
21198 | c, |
21199 | ); |
21200 | let e = _mm512_set_ph( |
21201 | 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, |
21202 | 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, |
21203 | ); |
21204 | assert_eq_m512h(r, e); |
21205 | } |
21206 | |
21207 | #[simd_test(enable = "avx512fp16" )] |
21208 | unsafe fn test_mm512_mask3_fnmsub_round_ph() { |
21209 | let a = _mm512_set1_ph(1.0); |
21210 | let b = _mm512_set1_ph(2.0); |
21211 | let c = _mm512_set1_ph(3.0); |
21212 | let r = _mm512_mask3_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21213 | a, |
21214 | b, |
21215 | c, |
21216 | 0b01010101010101010101010101010101, |
21217 | ); |
21218 | let e = _mm512_set_ph( |
21219 | 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, |
21220 | 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, |
21221 | ); |
21222 | assert_eq_m512h(r, e); |
21223 | } |
21224 | |
21225 | #[simd_test(enable = "avx512fp16" )] |
21226 | unsafe fn test_mm512_maskz_fnmsub_round_ph() { |
21227 | let a = _mm512_set1_ph(1.0); |
21228 | let b = _mm512_set1_ph(2.0); |
21229 | let c = _mm512_set1_ph(3.0); |
21230 | let r = _mm512_maskz_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21231 | 0b01010101010101010101010101010101, |
21232 | a, |
21233 | b, |
21234 | c, |
21235 | ); |
21236 | let e = _mm512_set_ph( |
21237 | 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, |
21238 | 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, |
21239 | ); |
21240 | assert_eq_m512h(r, e); |
21241 | } |
21242 | |
21243 | #[simd_test(enable = "avx512fp16" )] |
21244 | unsafe fn test_mm_fnmsub_sh() { |
21245 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
21246 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
21247 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
21248 | let r = _mm_fnmsub_sh(a, b, c); |
21249 | let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.); |
21250 | assert_eq_m128h(r, e); |
21251 | } |
21252 | |
21253 | #[simd_test(enable = "avx512fp16" )] |
21254 | unsafe fn test_mm_mask_fnmsub_sh() { |
21255 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
21256 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
21257 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
21258 | let r = _mm_mask_fnmsub_sh(a, 0, b, c); |
21259 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
21260 | assert_eq_m128h(r, e); |
21261 | let r = _mm_mask_fnmsub_sh(a, 1, b, c); |
21262 | let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.); |
21263 | assert_eq_m128h(r, e); |
21264 | } |
21265 | |
21266 | #[simd_test(enable = "avx512fp16" )] |
21267 | unsafe fn test_mm_mask3_fnmsub_sh() { |
21268 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
21269 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
21270 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
21271 | let r = _mm_mask3_fnmsub_sh(a, b, c, 0); |
21272 | let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
21273 | assert_eq_m128h(r, e); |
21274 | let r = _mm_mask3_fnmsub_sh(a, b, c, 1); |
21275 | let e = _mm_setr_ph(-5.0, 30., 31., 32., 33., 34., 35., 36.); |
21276 | assert_eq_m128h(r, e); |
21277 | } |
21278 | |
21279 | #[simd_test(enable = "avx512fp16" )] |
21280 | unsafe fn test_mm_maskz_fnmsub_sh() { |
21281 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
21282 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
21283 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
21284 | let r = _mm_maskz_fnmsub_sh(0, a, b, c); |
21285 | let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); |
21286 | assert_eq_m128h(r, e); |
21287 | let r = _mm_maskz_fnmsub_sh(1, a, b, c); |
21288 | let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.); |
21289 | assert_eq_m128h(r, e); |
21290 | } |
21291 | |
21292 | #[simd_test(enable = "avx512fp16" )] |
21293 | unsafe fn test_mm_fnmsub_round_sh() { |
21294 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
21295 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
21296 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
21297 | let r = _mm_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); |
21298 | let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.); |
21299 | assert_eq_m128h(r, e); |
21300 | } |
21301 | |
21302 | #[simd_test(enable = "avx512fp16" )] |
21303 | unsafe fn test_mm_mask_fnmsub_round_sh() { |
21304 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
21305 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
21306 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
21307 | let r = _mm_mask_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21308 | a, 0, b, c, |
21309 | ); |
21310 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
21311 | assert_eq_m128h(r, e); |
21312 | let r = _mm_mask_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21313 | a, 1, b, c, |
21314 | ); |
21315 | let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.); |
21316 | assert_eq_m128h(r, e); |
21317 | } |
21318 | |
21319 | #[simd_test(enable = "avx512fp16" )] |
21320 | unsafe fn test_mm_mask3_fnmsub_round_sh() { |
21321 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
21322 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
21323 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
21324 | let r = _mm_mask3_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21325 | a, b, c, 0, |
21326 | ); |
21327 | let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
21328 | assert_eq_m128h(r, e); |
21329 | let r = _mm_mask3_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21330 | a, b, c, 1, |
21331 | ); |
21332 | let e = _mm_setr_ph(-5.0, 30., 31., 32., 33., 34., 35., 36.); |
21333 | assert_eq_m128h(r, e); |
21334 | } |
21335 | |
21336 | #[simd_test(enable = "avx512fp16" )] |
21337 | unsafe fn test_mm_maskz_fnmsub_round_sh() { |
21338 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
21339 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
21340 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
21341 | let r = _mm_maskz_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21342 | 0, a, b, c, |
21343 | ); |
21344 | let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); |
21345 | assert_eq_m128h(r, e); |
21346 | let r = _mm_maskz_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21347 | 1, a, b, c, |
21348 | ); |
21349 | let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.); |
21350 | assert_eq_m128h(r, e); |
21351 | } |
21352 | |
21353 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21354 | unsafe fn test_mm_fmaddsub_ph() { |
21355 | let a = _mm_set1_ph(1.0); |
21356 | let b = _mm_set1_ph(2.0); |
21357 | let c = _mm_set1_ph(3.0); |
21358 | let r = _mm_fmaddsub_ph(a, b, c); |
21359 | let e = _mm_set_ph(5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0); |
21360 | assert_eq_m128h(r, e); |
21361 | } |
21362 | |
21363 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21364 | unsafe fn test_mm_mask_fmaddsub_ph() { |
21365 | let a = _mm_set1_ph(1.0); |
21366 | let b = _mm_set1_ph(2.0); |
21367 | let c = _mm_set1_ph(3.0); |
21368 | let r = _mm_mask_fmaddsub_ph(a, 0b00110011, b, c); |
21369 | let e = _mm_set_ph(1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0); |
21370 | assert_eq_m128h(r, e); |
21371 | } |
21372 | |
21373 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21374 | unsafe fn test_mm_mask3_fmaddsub_ph() { |
21375 | let a = _mm_set1_ph(1.0); |
21376 | let b = _mm_set1_ph(2.0); |
21377 | let c = _mm_set1_ph(3.0); |
21378 | let r = _mm_mask3_fmaddsub_ph(a, b, c, 0b00110011); |
21379 | let e = _mm_set_ph(3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0); |
21380 | assert_eq_m128h(r, e); |
21381 | } |
21382 | |
21383 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21384 | unsafe fn test_mm_maskz_fmaddsub_ph() { |
21385 | let a = _mm_set1_ph(1.0); |
21386 | let b = _mm_set1_ph(2.0); |
21387 | let c = _mm_set1_ph(3.0); |
21388 | let r = _mm_maskz_fmaddsub_ph(0b00110011, a, b, c); |
21389 | let e = _mm_set_ph(0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0); |
21390 | assert_eq_m128h(r, e); |
21391 | } |
21392 | |
21393 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21394 | unsafe fn test_mm256_fmaddsub_ph() { |
21395 | let a = _mm256_set1_ph(1.0); |
21396 | let b = _mm256_set1_ph(2.0); |
21397 | let c = _mm256_set1_ph(3.0); |
21398 | let r = _mm256_fmaddsub_ph(a, b, c); |
21399 | let e = _mm256_set_ph( |
21400 | 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, |
21401 | ); |
21402 | assert_eq_m256h(r, e); |
21403 | } |
21404 | |
21405 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21406 | unsafe fn test_mm256_mask_fmaddsub_ph() { |
21407 | let a = _mm256_set1_ph(1.0); |
21408 | let b = _mm256_set1_ph(2.0); |
21409 | let c = _mm256_set1_ph(3.0); |
21410 | let r = _mm256_mask_fmaddsub_ph(a, 0b0011001100110011, b, c); |
21411 | let e = _mm256_set_ph( |
21412 | 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, |
21413 | ); |
21414 | assert_eq_m256h(r, e); |
21415 | } |
21416 | |
21417 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21418 | unsafe fn test_mm256_mask3_fmaddsub_ph() { |
21419 | let a = _mm256_set1_ph(1.0); |
21420 | let b = _mm256_set1_ph(2.0); |
21421 | let c = _mm256_set1_ph(3.0); |
21422 | let r = _mm256_mask3_fmaddsub_ph(a, b, c, 0b0011001100110011); |
21423 | let e = _mm256_set_ph( |
21424 | 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, |
21425 | ); |
21426 | assert_eq_m256h(r, e); |
21427 | } |
21428 | |
21429 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21430 | unsafe fn test_mm256_maskz_fmaddsub_ph() { |
21431 | let a = _mm256_set1_ph(1.0); |
21432 | let b = _mm256_set1_ph(2.0); |
21433 | let c = _mm256_set1_ph(3.0); |
21434 | let r = _mm256_maskz_fmaddsub_ph(0b0011001100110011, a, b, c); |
21435 | let e = _mm256_set_ph( |
21436 | 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, |
21437 | ); |
21438 | assert_eq_m256h(r, e); |
21439 | } |
21440 | |
21441 | #[simd_test(enable = "avx512fp16" )] |
21442 | unsafe fn test_mm512_fmaddsub_ph() { |
21443 | let a = _mm512_set1_ph(1.0); |
21444 | let b = _mm512_set1_ph(2.0); |
21445 | let c = _mm512_set1_ph(3.0); |
21446 | let r = _mm512_fmaddsub_ph(a, b, c); |
21447 | let e = _mm512_set_ph( |
21448 | 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, |
21449 | 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, |
21450 | ); |
21451 | assert_eq_m512h(r, e); |
21452 | } |
21453 | |
21454 | #[simd_test(enable = "avx512fp16" )] |
21455 | unsafe fn test_mm512_mask_fmaddsub_ph() { |
21456 | let a = _mm512_set1_ph(1.0); |
21457 | let b = _mm512_set1_ph(2.0); |
21458 | let c = _mm512_set1_ph(3.0); |
21459 | let r = _mm512_mask_fmaddsub_ph(a, 0b00110011001100110011001100110011, b, c); |
21460 | let e = _mm512_set_ph( |
21461 | 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, |
21462 | 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, |
21463 | ); |
21464 | assert_eq_m512h(r, e); |
21465 | } |
21466 | |
21467 | #[simd_test(enable = "avx512fp16" )] |
21468 | unsafe fn test_mm512_mask3_fmaddsub_ph() { |
21469 | let a = _mm512_set1_ph(1.0); |
21470 | let b = _mm512_set1_ph(2.0); |
21471 | let c = _mm512_set1_ph(3.0); |
21472 | let r = _mm512_mask3_fmaddsub_ph(a, b, c, 0b00110011001100110011001100110011); |
21473 | let e = _mm512_set_ph( |
21474 | 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, |
21475 | 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, |
21476 | ); |
21477 | assert_eq_m512h(r, e); |
21478 | } |
21479 | |
21480 | #[simd_test(enable = "avx512fp16" )] |
21481 | unsafe fn test_mm512_maskz_fmaddsub_ph() { |
21482 | let a = _mm512_set1_ph(1.0); |
21483 | let b = _mm512_set1_ph(2.0); |
21484 | let c = _mm512_set1_ph(3.0); |
21485 | let r = _mm512_maskz_fmaddsub_ph(0b00110011001100110011001100110011, a, b, c); |
21486 | let e = _mm512_set_ph( |
21487 | 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, |
21488 | 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, |
21489 | ); |
21490 | assert_eq_m512h(r, e); |
21491 | } |
21492 | |
21493 | #[simd_test(enable = "avx512fp16" )] |
21494 | unsafe fn test_mm512_fmaddsub_round_ph() { |
21495 | let a = _mm512_set1_ph(1.0); |
21496 | let b = _mm512_set1_ph(2.0); |
21497 | let c = _mm512_set1_ph(3.0); |
21498 | let r = |
21499 | _mm512_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); |
21500 | let e = _mm512_set_ph( |
21501 | 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, |
21502 | 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, |
21503 | ); |
21504 | assert_eq_m512h(r, e); |
21505 | } |
21506 | |
21507 | #[simd_test(enable = "avx512fp16" )] |
21508 | unsafe fn test_mm512_mask_fmaddsub_round_ph() { |
21509 | let a = _mm512_set1_ph(1.0); |
21510 | let b = _mm512_set1_ph(2.0); |
21511 | let c = _mm512_set1_ph(3.0); |
21512 | let r = _mm512_mask_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21513 | a, |
21514 | 0b00110011001100110011001100110011, |
21515 | b, |
21516 | c, |
21517 | ); |
21518 | let e = _mm512_set_ph( |
21519 | 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, |
21520 | 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, |
21521 | ); |
21522 | assert_eq_m512h(r, e); |
21523 | } |
21524 | |
21525 | #[simd_test(enable = "avx512fp16" )] |
21526 | unsafe fn test_mm512_mask3_fmaddsub_round_ph() { |
21527 | let a = _mm512_set1_ph(1.0); |
21528 | let b = _mm512_set1_ph(2.0); |
21529 | let c = _mm512_set1_ph(3.0); |
21530 | let r = _mm512_mask3_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21531 | a, |
21532 | b, |
21533 | c, |
21534 | 0b00110011001100110011001100110011, |
21535 | ); |
21536 | let e = _mm512_set_ph( |
21537 | 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, |
21538 | 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, |
21539 | ); |
21540 | assert_eq_m512h(r, e); |
21541 | } |
21542 | |
21543 | #[simd_test(enable = "avx512fp16" )] |
21544 | unsafe fn test_mm512_maskz_fmaddsub_round_ph() { |
21545 | let a = _mm512_set1_ph(1.0); |
21546 | let b = _mm512_set1_ph(2.0); |
21547 | let c = _mm512_set1_ph(3.0); |
21548 | let r = _mm512_maskz_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21549 | 0b00110011001100110011001100110011, |
21550 | a, |
21551 | b, |
21552 | c, |
21553 | ); |
21554 | let e = _mm512_set_ph( |
21555 | 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, |
21556 | 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, |
21557 | ); |
21558 | assert_eq_m512h(r, e); |
21559 | } |
21560 | |
21561 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21562 | unsafe fn test_mm_fmsubadd_ph() { |
21563 | let a = _mm_set1_ph(1.0); |
21564 | let b = _mm_set1_ph(2.0); |
21565 | let c = _mm_set1_ph(3.0); |
21566 | let r = _mm_fmsubadd_ph(a, b, c); |
21567 | let e = _mm_set_ph(-1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0); |
21568 | assert_eq_m128h(r, e); |
21569 | } |
21570 | |
21571 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21572 | unsafe fn test_mm_mask_fmsubadd_ph() { |
21573 | let a = _mm_set1_ph(1.0); |
21574 | let b = _mm_set1_ph(2.0); |
21575 | let c = _mm_set1_ph(3.0); |
21576 | let r = _mm_mask_fmsubadd_ph(a, 0b00110011, b, c); |
21577 | let e = _mm_set_ph(1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0); |
21578 | assert_eq_m128h(r, e); |
21579 | } |
21580 | |
21581 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21582 | unsafe fn test_mm_mask3_fmsubadd_ph() { |
21583 | let a = _mm_set1_ph(1.0); |
21584 | let b = _mm_set1_ph(2.0); |
21585 | let c = _mm_set1_ph(3.0); |
21586 | let r = _mm_mask3_fmsubadd_ph(a, b, c, 0b00110011); |
21587 | let e = _mm_set_ph(3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0); |
21588 | assert_eq_m128h(r, e); |
21589 | } |
21590 | |
21591 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21592 | unsafe fn test_mm_maskz_fmsubadd_ph() { |
21593 | let a = _mm_set1_ph(1.0); |
21594 | let b = _mm_set1_ph(2.0); |
21595 | let c = _mm_set1_ph(3.0); |
21596 | let r = _mm_maskz_fmsubadd_ph(0b00110011, a, b, c); |
21597 | let e = _mm_set_ph(0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0); |
21598 | assert_eq_m128h(r, e); |
21599 | } |
21600 | |
21601 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21602 | unsafe fn test_mm256_fmsubadd_ph() { |
21603 | let a = _mm256_set1_ph(1.0); |
21604 | let b = _mm256_set1_ph(2.0); |
21605 | let c = _mm256_set1_ph(3.0); |
21606 | let r = _mm256_fmsubadd_ph(a, b, c); |
21607 | let e = _mm256_set_ph( |
21608 | -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, |
21609 | ); |
21610 | assert_eq_m256h(r, e); |
21611 | } |
21612 | |
21613 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21614 | unsafe fn test_mm256_mask_fmsubadd_ph() { |
21615 | let a = _mm256_set1_ph(1.0); |
21616 | let b = _mm256_set1_ph(2.0); |
21617 | let c = _mm256_set1_ph(3.0); |
21618 | let r = _mm256_mask_fmsubadd_ph(a, 0b0011001100110011, b, c); |
21619 | let e = _mm256_set_ph( |
21620 | 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, |
21621 | ); |
21622 | assert_eq_m256h(r, e); |
21623 | } |
21624 | |
21625 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21626 | unsafe fn test_mm256_mask3_fmsubadd_ph() { |
21627 | let a = _mm256_set1_ph(1.0); |
21628 | let b = _mm256_set1_ph(2.0); |
21629 | let c = _mm256_set1_ph(3.0); |
21630 | let r = _mm256_mask3_fmsubadd_ph(a, b, c, 0b0011001100110011); |
21631 | let e = _mm256_set_ph( |
21632 | 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, |
21633 | ); |
21634 | assert_eq_m256h(r, e); |
21635 | } |
21636 | |
21637 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21638 | unsafe fn test_mm256_maskz_fmsubadd_ph() { |
21639 | let a = _mm256_set1_ph(1.0); |
21640 | let b = _mm256_set1_ph(2.0); |
21641 | let c = _mm256_set1_ph(3.0); |
21642 | let r = _mm256_maskz_fmsubadd_ph(0b0011001100110011, a, b, c); |
21643 | let e = _mm256_set_ph( |
21644 | 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, |
21645 | ); |
21646 | assert_eq_m256h(r, e); |
21647 | } |
21648 | |
21649 | #[simd_test(enable = "avx512fp16" )] |
21650 | unsafe fn test_mm512_fmsubadd_ph() { |
21651 | let a = _mm512_set1_ph(1.0); |
21652 | let b = _mm512_set1_ph(2.0); |
21653 | let c = _mm512_set1_ph(3.0); |
21654 | let r = _mm512_fmsubadd_ph(a, b, c); |
21655 | let e = _mm512_set_ph( |
21656 | -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, |
21657 | -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, |
21658 | ); |
21659 | assert_eq_m512h(r, e); |
21660 | } |
21661 | |
21662 | #[simd_test(enable = "avx512fp16" )] |
21663 | unsafe fn test_mm512_mask_fmsubadd_ph() { |
21664 | let a = _mm512_set1_ph(1.0); |
21665 | let b = _mm512_set1_ph(2.0); |
21666 | let c = _mm512_set1_ph(3.0); |
21667 | let r = _mm512_mask_fmsubadd_ph(a, 0b00110011001100110011001100110011, b, c); |
21668 | let e = _mm512_set_ph( |
21669 | 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, |
21670 | 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, |
21671 | ); |
21672 | assert_eq_m512h(r, e); |
21673 | } |
21674 | |
21675 | #[simd_test(enable = "avx512fp16" )] |
21676 | unsafe fn test_mm512_mask3_fmsubadd_ph() { |
21677 | let a = _mm512_set1_ph(1.0); |
21678 | let b = _mm512_set1_ph(2.0); |
21679 | let c = _mm512_set1_ph(3.0); |
21680 | let r = _mm512_mask3_fmsubadd_ph(a, b, c, 0b00110011001100110011001100110011); |
21681 | let e = _mm512_set_ph( |
21682 | 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, |
21683 | 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, |
21684 | ); |
21685 | assert_eq_m512h(r, e); |
21686 | } |
21687 | |
21688 | #[simd_test(enable = "avx512fp16" )] |
21689 | unsafe fn test_mm512_maskz_fmsubadd_ph() { |
21690 | let a = _mm512_set1_ph(1.0); |
21691 | let b = _mm512_set1_ph(2.0); |
21692 | let c = _mm512_set1_ph(3.0); |
21693 | let r = _mm512_maskz_fmsubadd_ph(0b00110011001100110011001100110011, a, b, c); |
21694 | let e = _mm512_set_ph( |
21695 | 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, |
21696 | 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, |
21697 | ); |
21698 | assert_eq_m512h(r, e); |
21699 | } |
21700 | |
21701 | #[simd_test(enable = "avx512fp16" )] |
21702 | unsafe fn test_mm512_fmsubadd_round_ph() { |
21703 | let a = _mm512_set1_ph(1.0); |
21704 | let b = _mm512_set1_ph(2.0); |
21705 | let c = _mm512_set1_ph(3.0); |
21706 | let r = |
21707 | _mm512_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); |
21708 | let e = _mm512_set_ph( |
21709 | -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, |
21710 | -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, |
21711 | ); |
21712 | assert_eq_m512h(r, e); |
21713 | } |
21714 | |
21715 | #[simd_test(enable = "avx512fp16" )] |
21716 | unsafe fn test_mm512_mask_fmsubadd_round_ph() { |
21717 | let a = _mm512_set1_ph(1.0); |
21718 | let b = _mm512_set1_ph(2.0); |
21719 | let c = _mm512_set1_ph(3.0); |
21720 | let r = _mm512_mask_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21721 | a, |
21722 | 0b00110011001100110011001100110011, |
21723 | b, |
21724 | c, |
21725 | ); |
21726 | let e = _mm512_set_ph( |
21727 | 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, |
21728 | 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, |
21729 | ); |
21730 | assert_eq_m512h(r, e); |
21731 | } |
21732 | |
21733 | #[simd_test(enable = "avx512fp16" )] |
21734 | unsafe fn test_mm512_mask3_fmsubadd_round_ph() { |
21735 | let a = _mm512_set1_ph(1.0); |
21736 | let b = _mm512_set1_ph(2.0); |
21737 | let c = _mm512_set1_ph(3.0); |
21738 | let r = _mm512_mask3_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21739 | a, |
21740 | b, |
21741 | c, |
21742 | 0b00110011001100110011001100110011, |
21743 | ); |
21744 | let e = _mm512_set_ph( |
21745 | 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, |
21746 | 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, |
21747 | ); |
21748 | assert_eq_m512h(r, e); |
21749 | } |
21750 | |
21751 | #[simd_test(enable = "avx512fp16" )] |
21752 | unsafe fn test_mm512_maskz_fmsubadd_round_ph() { |
21753 | let a = _mm512_set1_ph(1.0); |
21754 | let b = _mm512_set1_ph(2.0); |
21755 | let c = _mm512_set1_ph(3.0); |
21756 | let r = _mm512_maskz_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21757 | 0b00110011001100110011001100110011, |
21758 | a, |
21759 | b, |
21760 | c, |
21761 | ); |
21762 | let e = _mm512_set_ph( |
21763 | 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, |
21764 | 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, |
21765 | ); |
21766 | assert_eq_m512h(r, e); |
21767 | } |
21768 | |
21769 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21770 | unsafe fn test_mm_rcp_ph() { |
21771 | let a = _mm_set1_ph(2.0); |
21772 | let r = _mm_rcp_ph(a); |
21773 | let e = _mm_set1_ph(0.5); |
21774 | assert_eq_m128h(r, e); |
21775 | } |
21776 | |
21777 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21778 | unsafe fn test_mm_mask_rcp_ph() { |
21779 | let a = _mm_set1_ph(2.0); |
21780 | let src = _mm_set1_ph(1.0); |
21781 | let r = _mm_mask_rcp_ph(src, 0b01010101, a); |
21782 | let e = _mm_set_ph(1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5); |
21783 | assert_eq_m128h(r, e); |
21784 | } |
21785 | |
21786 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21787 | unsafe fn test_mm_maskz_rcp_ph() { |
21788 | let a = _mm_set1_ph(2.0); |
21789 | let r = _mm_maskz_rcp_ph(0b01010101, a); |
21790 | let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5); |
21791 | assert_eq_m128h(r, e); |
21792 | } |
21793 | |
21794 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21795 | unsafe fn test_mm256_rcp_ph() { |
21796 | let a = _mm256_set1_ph(2.0); |
21797 | let r = _mm256_rcp_ph(a); |
21798 | let e = _mm256_set1_ph(0.5); |
21799 | assert_eq_m256h(r, e); |
21800 | } |
21801 | |
21802 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21803 | unsafe fn test_mm256_mask_rcp_ph() { |
21804 | let a = _mm256_set1_ph(2.0); |
21805 | let src = _mm256_set1_ph(1.0); |
21806 | let r = _mm256_mask_rcp_ph(src, 0b0101010101010101, a); |
21807 | let e = _mm256_set_ph( |
21808 | 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, |
21809 | ); |
21810 | assert_eq_m256h(r, e); |
21811 | } |
21812 | |
21813 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21814 | unsafe fn test_mm256_maskz_rcp_ph() { |
21815 | let a = _mm256_set1_ph(2.0); |
21816 | let r = _mm256_maskz_rcp_ph(0b0101010101010101, a); |
21817 | let e = _mm256_set_ph( |
21818 | 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, |
21819 | ); |
21820 | assert_eq_m256h(r, e); |
21821 | } |
21822 | |
21823 | #[simd_test(enable = "avx512fp16" )] |
21824 | unsafe fn test_mm512_rcp_ph() { |
21825 | let a = _mm512_set1_ph(2.0); |
21826 | let r = _mm512_rcp_ph(a); |
21827 | let e = _mm512_set1_ph(0.5); |
21828 | assert_eq_m512h(r, e); |
21829 | } |
21830 | |
21831 | #[simd_test(enable = "avx512fp16" )] |
21832 | unsafe fn test_mm512_mask_rcp_ph() { |
21833 | let a = _mm512_set1_ph(2.0); |
21834 | let src = _mm512_set1_ph(1.0); |
21835 | let r = _mm512_mask_rcp_ph(src, 0b01010101010101010101010101010101, a); |
21836 | let e = _mm512_set_ph( |
21837 | 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, |
21838 | 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, |
21839 | ); |
21840 | assert_eq_m512h(r, e); |
21841 | } |
21842 | |
21843 | #[simd_test(enable = "avx512fp16" )] |
21844 | unsafe fn test_mm512_maskz_rcp_ph() { |
21845 | let a = _mm512_set1_ph(2.0); |
21846 | let r = _mm512_maskz_rcp_ph(0b01010101010101010101010101010101, a); |
21847 | let e = _mm512_set_ph( |
21848 | 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, |
21849 | 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, |
21850 | ); |
21851 | assert_eq_m512h(r, e); |
21852 | } |
21853 | |
21854 | #[simd_test(enable = "avx512fp16" )] |
21855 | unsafe fn test_mm_rcp_sh() { |
21856 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
21857 | let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); |
21858 | let r = _mm_rcp_sh(a, b); |
21859 | let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
21860 | assert_eq_m128h(r, e); |
21861 | } |
21862 | |
21863 | #[simd_test(enable = "avx512fp16" )] |
21864 | unsafe fn test_mm_mask_rcp_sh() { |
21865 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
21866 | let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); |
21867 | let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0); |
21868 | let r = _mm_mask_rcp_sh(src, 0, a, b); |
21869 | let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
21870 | assert_eq_m128h(r, e); |
21871 | let r = _mm_mask_rcp_sh(src, 1, a, b); |
21872 | let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
21873 | assert_eq_m128h(r, e); |
21874 | } |
21875 | |
21876 | #[simd_test(enable = "avx512fp16" )] |
21877 | unsafe fn test_mm_maskz_rcp_sh() { |
21878 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
21879 | let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); |
21880 | let r = _mm_maskz_rcp_sh(0, a, b); |
21881 | let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
21882 | assert_eq_m128h(r, e); |
21883 | let r = _mm_maskz_rcp_sh(1, a, b); |
21884 | let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
21885 | assert_eq_m128h(r, e); |
21886 | } |
21887 | |
21888 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21889 | unsafe fn test_mm_rsqrt_ph() { |
21890 | let a = _mm_set1_ph(4.0); |
21891 | let r = _mm_rsqrt_ph(a); |
21892 | let e = _mm_set1_ph(0.5); |
21893 | assert_eq_m128h(r, e); |
21894 | } |
21895 | |
21896 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21897 | unsafe fn test_mm_mask_rsqrt_ph() { |
21898 | let a = _mm_set1_ph(4.0); |
21899 | let src = _mm_set1_ph(1.0); |
21900 | let r = _mm_mask_rsqrt_ph(src, 0b01010101, a); |
21901 | let e = _mm_set_ph(1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5); |
21902 | assert_eq_m128h(r, e); |
21903 | } |
21904 | |
21905 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21906 | unsafe fn test_mm_maskz_rsqrt_ph() { |
21907 | let a = _mm_set1_ph(4.0); |
21908 | let r = _mm_maskz_rsqrt_ph(0b01010101, a); |
21909 | let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5); |
21910 | assert_eq_m128h(r, e); |
21911 | } |
21912 | |
21913 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21914 | unsafe fn test_mm256_rsqrt_ph() { |
21915 | let a = _mm256_set1_ph(4.0); |
21916 | let r = _mm256_rsqrt_ph(a); |
21917 | let e = _mm256_set1_ph(0.5); |
21918 | assert_eq_m256h(r, e); |
21919 | } |
21920 | |
21921 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21922 | unsafe fn test_mm256_mask_rsqrt_ph() { |
21923 | let a = _mm256_set1_ph(4.0); |
21924 | let src = _mm256_set1_ph(1.0); |
21925 | let r = _mm256_mask_rsqrt_ph(src, 0b0101010101010101, a); |
21926 | let e = _mm256_set_ph( |
21927 | 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, |
21928 | ); |
21929 | assert_eq_m256h(r, e); |
21930 | } |
21931 | |
21932 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21933 | unsafe fn test_mm256_maskz_rsqrt_ph() { |
21934 | let a = _mm256_set1_ph(4.0); |
21935 | let r = _mm256_maskz_rsqrt_ph(0b0101010101010101, a); |
21936 | let e = _mm256_set_ph( |
21937 | 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, |
21938 | ); |
21939 | assert_eq_m256h(r, e); |
21940 | } |
21941 | |
21942 | #[simd_test(enable = "avx512fp16" )] |
21943 | unsafe fn test_mm512_rsqrt_ph() { |
21944 | let a = _mm512_set1_ph(4.0); |
21945 | let r = _mm512_rsqrt_ph(a); |
21946 | let e = _mm512_set1_ph(0.5); |
21947 | assert_eq_m512h(r, e); |
21948 | } |
21949 | |
21950 | #[simd_test(enable = "avx512fp16" )] |
21951 | unsafe fn test_mm512_mask_rsqrt_ph() { |
21952 | let a = _mm512_set1_ph(4.0); |
21953 | let src = _mm512_set1_ph(1.0); |
21954 | let r = _mm512_mask_rsqrt_ph(src, 0b01010101010101010101010101010101, a); |
21955 | let e = _mm512_set_ph( |
21956 | 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, |
21957 | 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, |
21958 | ); |
21959 | assert_eq_m512h(r, e); |
21960 | } |
21961 | |
21962 | #[simd_test(enable = "avx512fp16" )] |
21963 | unsafe fn test_mm512_maskz_rsqrt_ph() { |
21964 | let a = _mm512_set1_ph(4.0); |
21965 | let r = _mm512_maskz_rsqrt_ph(0b01010101010101010101010101010101, a); |
21966 | let e = _mm512_set_ph( |
21967 | 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, |
21968 | 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, |
21969 | ); |
21970 | assert_eq_m512h(r, e); |
21971 | } |
21972 | |
21973 | #[simd_test(enable = "avx512fp16" )] |
21974 | unsafe fn test_mm_rsqrt_sh() { |
21975 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
21976 | let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0); |
21977 | let r = _mm_rsqrt_sh(a, b); |
21978 | let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
21979 | assert_eq_m128h(r, e); |
21980 | } |
21981 | |
21982 | #[simd_test(enable = "avx512fp16" )] |
21983 | unsafe fn test_mm_mask_rsqrt_sh() { |
21984 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
21985 | let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0); |
21986 | let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0); |
21987 | let r = _mm_mask_rsqrt_sh(src, 0, a, b); |
21988 | let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
21989 | assert_eq_m128h(r, e); |
21990 | let r = _mm_mask_rsqrt_sh(src, 1, a, b); |
21991 | let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
21992 | assert_eq_m128h(r, e); |
21993 | } |
21994 | |
21995 | #[simd_test(enable = "avx512fp16" )] |
21996 | unsafe fn test_mm_maskz_rsqrt_sh() { |
21997 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
21998 | let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0); |
21999 | let r = _mm_maskz_rsqrt_sh(0, a, b); |
22000 | let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22001 | assert_eq_m128h(r, e); |
22002 | let r = _mm_maskz_rsqrt_sh(1, a, b); |
22003 | let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22004 | assert_eq_m128h(r, e); |
22005 | } |
22006 | |
22007 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22008 | unsafe fn test_mm_sqrt_ph() { |
22009 | let a = _mm_set1_ph(4.0); |
22010 | let r = _mm_sqrt_ph(a); |
22011 | let e = _mm_set1_ph(2.0); |
22012 | assert_eq_m128h(r, e); |
22013 | } |
22014 | |
22015 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22016 | unsafe fn test_mm_mask_sqrt_ph() { |
22017 | let a = _mm_set1_ph(4.0); |
22018 | let src = _mm_set1_ph(1.0); |
22019 | let r = _mm_mask_sqrt_ph(src, 0b01010101, a); |
22020 | let e = _mm_set_ph(1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0); |
22021 | assert_eq_m128h(r, e); |
22022 | } |
22023 | |
22024 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22025 | unsafe fn test_mm_maskz_sqrt_ph() { |
22026 | let a = _mm_set1_ph(4.0); |
22027 | let r = _mm_maskz_sqrt_ph(0b01010101, a); |
22028 | let e = _mm_set_ph(0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0); |
22029 | assert_eq_m128h(r, e); |
22030 | } |
22031 | |
22032 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22033 | unsafe fn test_mm256_sqrt_ph() { |
22034 | let a = _mm256_set1_ph(4.0); |
22035 | let r = _mm256_sqrt_ph(a); |
22036 | let e = _mm256_set1_ph(2.0); |
22037 | assert_eq_m256h(r, e); |
22038 | } |
22039 | |
22040 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22041 | unsafe fn test_mm256_mask_sqrt_ph() { |
22042 | let a = _mm256_set1_ph(4.0); |
22043 | let src = _mm256_set1_ph(1.0); |
22044 | let r = _mm256_mask_sqrt_ph(src, 0b0101010101010101, a); |
22045 | let e = _mm256_set_ph( |
22046 | 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, |
22047 | ); |
22048 | assert_eq_m256h(r, e); |
22049 | } |
22050 | |
22051 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22052 | unsafe fn test_mm256_maskz_sqrt_ph() { |
22053 | let a = _mm256_set1_ph(4.0); |
22054 | let r = _mm256_maskz_sqrt_ph(0b0101010101010101, a); |
22055 | let e = _mm256_set_ph( |
22056 | 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, |
22057 | ); |
22058 | assert_eq_m256h(r, e); |
22059 | } |
22060 | |
22061 | #[simd_test(enable = "avx512fp16" )] |
22062 | unsafe fn test_mm512_sqrt_ph() { |
22063 | let a = _mm512_set1_ph(4.0); |
22064 | let r = _mm512_sqrt_ph(a); |
22065 | let e = _mm512_set1_ph(2.0); |
22066 | assert_eq_m512h(r, e); |
22067 | } |
22068 | |
22069 | #[simd_test(enable = "avx512fp16" )] |
22070 | unsafe fn test_mm512_mask_sqrt_ph() { |
22071 | let a = _mm512_set1_ph(4.0); |
22072 | let src = _mm512_set1_ph(1.0); |
22073 | let r = _mm512_mask_sqrt_ph(src, 0b01010101010101010101010101010101, a); |
22074 | let e = _mm512_set_ph( |
22075 | 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, |
22076 | 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, |
22077 | ); |
22078 | assert_eq_m512h(r, e); |
22079 | } |
22080 | |
22081 | #[simd_test(enable = "avx512fp16" )] |
22082 | unsafe fn test_mm512_maskz_sqrt_ph() { |
22083 | let a = _mm512_set1_ph(4.0); |
22084 | let r = _mm512_maskz_sqrt_ph(0b01010101010101010101010101010101, a); |
22085 | let e = _mm512_set_ph( |
22086 | 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, |
22087 | 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, |
22088 | ); |
22089 | assert_eq_m512h(r, e); |
22090 | } |
22091 | |
22092 | #[simd_test(enable = "avx512fp16" )] |
22093 | unsafe fn test_mm512_sqrt_round_ph() { |
22094 | let a = _mm512_set1_ph(4.0); |
22095 | let r = _mm512_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
22096 | let e = _mm512_set1_ph(2.0); |
22097 | assert_eq_m512h(r, e); |
22098 | } |
22099 | |
22100 | #[simd_test(enable = "avx512fp16" )] |
22101 | unsafe fn test_mm512_mask_sqrt_round_ph() { |
22102 | let a = _mm512_set1_ph(4.0); |
22103 | let src = _mm512_set1_ph(1.0); |
22104 | let r = _mm512_mask_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
22105 | src, |
22106 | 0b01010101010101010101010101010101, |
22107 | a, |
22108 | ); |
22109 | let e = _mm512_set_ph( |
22110 | 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, |
22111 | 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, |
22112 | ); |
22113 | assert_eq_m512h(r, e); |
22114 | } |
22115 | |
22116 | #[simd_test(enable = "avx512fp16" )] |
22117 | unsafe fn test_mm512_maskz_sqrt_round_ph() { |
22118 | let a = _mm512_set1_ph(4.0); |
22119 | let r = _mm512_maskz_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
22120 | 0b01010101010101010101010101010101, |
22121 | a, |
22122 | ); |
22123 | let e = _mm512_set_ph( |
22124 | 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, |
22125 | 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, |
22126 | ); |
22127 | assert_eq_m512h(r, e); |
22128 | } |
22129 | |
22130 | #[simd_test(enable = "avx512fp16" )] |
22131 | unsafe fn test_mm_sqrt_sh() { |
22132 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22133 | let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0); |
22134 | let r = _mm_sqrt_sh(a, b); |
22135 | let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22136 | assert_eq_m128h(r, e); |
22137 | } |
22138 | |
22139 | #[simd_test(enable = "avx512fp16" )] |
22140 | unsafe fn test_mm_mask_sqrt_sh() { |
22141 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22142 | let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0); |
22143 | let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0); |
22144 | let r = _mm_mask_sqrt_sh(src, 0, a, b); |
22145 | let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22146 | assert_eq_m128h(r, e); |
22147 | let r = _mm_mask_sqrt_sh(src, 1, a, b); |
22148 | let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22149 | assert_eq_m128h(r, e); |
22150 | } |
22151 | |
22152 | #[simd_test(enable = "avx512fp16" )] |
22153 | unsafe fn test_mm_maskz_sqrt_sh() { |
22154 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22155 | let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0); |
22156 | let r = _mm_maskz_sqrt_sh(0, a, b); |
22157 | let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22158 | assert_eq_m128h(r, e); |
22159 | let r = _mm_maskz_sqrt_sh(1, a, b); |
22160 | let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22161 | assert_eq_m128h(r, e); |
22162 | } |
22163 | |
22164 | #[simd_test(enable = "avx512fp16" )] |
22165 | unsafe fn test_mm_sqrt_round_sh() { |
22166 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22167 | let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0); |
22168 | let r = _mm_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
22169 | let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22170 | assert_eq_m128h(r, e); |
22171 | } |
22172 | |
22173 | #[simd_test(enable = "avx512fp16" )] |
22174 | unsafe fn test_mm_mask_sqrt_round_sh() { |
22175 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22176 | let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0); |
22177 | let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0); |
22178 | let r = _mm_mask_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
22179 | src, 0, a, b, |
22180 | ); |
22181 | let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22182 | assert_eq_m128h(r, e); |
22183 | let r = _mm_mask_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
22184 | src, 1, a, b, |
22185 | ); |
22186 | let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22187 | assert_eq_m128h(r, e); |
22188 | } |
22189 | |
22190 | #[simd_test(enable = "avx512fp16" )] |
22191 | unsafe fn test_mm_maskz_sqrt_round_sh() { |
22192 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22193 | let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0); |
22194 | let r = |
22195 | _mm_maskz_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); |
22196 | let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22197 | assert_eq_m128h(r, e); |
22198 | let r = |
22199 | _mm_maskz_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b); |
22200 | let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22201 | assert_eq_m128h(r, e); |
22202 | } |
22203 | |
22204 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22205 | unsafe fn test_mm_max_ph() { |
22206 | let a = _mm_set1_ph(2.0); |
22207 | let b = _mm_set1_ph(1.0); |
22208 | let r = _mm_max_ph(a, b); |
22209 | let e = _mm_set1_ph(2.0); |
22210 | assert_eq_m128h(r, e); |
22211 | } |
22212 | |
22213 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22214 | unsafe fn test_mm_mask_max_ph() { |
22215 | let a = _mm_set1_ph(2.0); |
22216 | let b = _mm_set1_ph(1.0); |
22217 | let src = _mm_set1_ph(3.0); |
22218 | let r = _mm_mask_max_ph(src, 0b01010101, a, b); |
22219 | let e = _mm_set_ph(3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0); |
22220 | assert_eq_m128h(r, e); |
22221 | } |
22222 | |
22223 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22224 | unsafe fn test_mm_maskz_max_ph() { |
22225 | let a = _mm_set1_ph(2.0); |
22226 | let b = _mm_set1_ph(1.0); |
22227 | let r = _mm_maskz_max_ph(0b01010101, a, b); |
22228 | let e = _mm_set_ph(0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0); |
22229 | assert_eq_m128h(r, e); |
22230 | } |
22231 | |
22232 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22233 | unsafe fn test_mm256_max_ph() { |
22234 | let a = _mm256_set1_ph(2.0); |
22235 | let b = _mm256_set1_ph(1.0); |
22236 | let r = _mm256_max_ph(a, b); |
22237 | let e = _mm256_set1_ph(2.0); |
22238 | assert_eq_m256h(r, e); |
22239 | } |
22240 | |
22241 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22242 | unsafe fn test_mm256_mask_max_ph() { |
22243 | let a = _mm256_set1_ph(2.0); |
22244 | let b = _mm256_set1_ph(1.0); |
22245 | let src = _mm256_set1_ph(3.0); |
22246 | let r = _mm256_mask_max_ph(src, 0b0101010101010101, a, b); |
22247 | let e = _mm256_set_ph( |
22248 | 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, |
22249 | ); |
22250 | assert_eq_m256h(r, e); |
22251 | } |
22252 | |
22253 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22254 | unsafe fn test_mm256_maskz_max_ph() { |
22255 | let a = _mm256_set1_ph(2.0); |
22256 | let b = _mm256_set1_ph(1.0); |
22257 | let r = _mm256_maskz_max_ph(0b0101010101010101, a, b); |
22258 | let e = _mm256_set_ph( |
22259 | 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, |
22260 | ); |
22261 | assert_eq_m256h(r, e); |
22262 | } |
22263 | |
22264 | #[simd_test(enable = "avx512fp16" )] |
22265 | unsafe fn test_mm512_max_ph() { |
22266 | let a = _mm512_set1_ph(2.0); |
22267 | let b = _mm512_set1_ph(1.0); |
22268 | let r = _mm512_max_ph(a, b); |
22269 | let e = _mm512_set1_ph(2.0); |
22270 | assert_eq_m512h(r, e); |
22271 | } |
22272 | |
22273 | #[simd_test(enable = "avx512fp16" )] |
22274 | unsafe fn test_mm512_mask_max_ph() { |
22275 | let a = _mm512_set1_ph(2.0); |
22276 | let b = _mm512_set1_ph(1.0); |
22277 | let src = _mm512_set1_ph(3.0); |
22278 | let r = _mm512_mask_max_ph(src, 0b01010101010101010101010101010101, a, b); |
22279 | let e = _mm512_set_ph( |
22280 | 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, |
22281 | 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, |
22282 | ); |
22283 | assert_eq_m512h(r, e); |
22284 | } |
22285 | |
22286 | #[simd_test(enable = "avx512fp16" )] |
22287 | unsafe fn test_mm512_maskz_max_ph() { |
22288 | let a = _mm512_set1_ph(2.0); |
22289 | let b = _mm512_set1_ph(1.0); |
22290 | let r = _mm512_maskz_max_ph(0b01010101010101010101010101010101, a, b); |
22291 | let e = _mm512_set_ph( |
22292 | 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, |
22293 | 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, |
22294 | ); |
22295 | assert_eq_m512h(r, e); |
22296 | } |
22297 | |
22298 | #[simd_test(enable = "avx512fp16" )] |
22299 | unsafe fn test_mm512_max_round_ph() { |
22300 | let a = _mm512_set1_ph(2.0); |
22301 | let b = _mm512_set1_ph(1.0); |
22302 | let r = _mm512_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
22303 | let e = _mm512_set1_ph(2.0); |
22304 | assert_eq_m512h(r, e); |
22305 | } |
22306 | |
22307 | #[simd_test(enable = "avx512fp16" )] |
22308 | unsafe fn test_mm512_mask_max_round_ph() { |
22309 | let a = _mm512_set1_ph(2.0); |
22310 | let b = _mm512_set1_ph(1.0); |
22311 | let src = _mm512_set1_ph(3.0); |
22312 | let r = _mm512_mask_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
22313 | src, |
22314 | 0b01010101010101010101010101010101, |
22315 | a, |
22316 | b, |
22317 | ); |
22318 | let e = _mm512_set_ph( |
22319 | 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, |
22320 | 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, |
22321 | ); |
22322 | assert_eq_m512h(r, e); |
22323 | } |
22324 | |
22325 | #[simd_test(enable = "avx512fp16" )] |
22326 | unsafe fn test_mm512_maskz_max_round_ph() { |
22327 | let a = _mm512_set1_ph(2.0); |
22328 | let b = _mm512_set1_ph(1.0); |
22329 | let r = _mm512_maskz_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
22330 | 0b01010101010101010101010101010101, |
22331 | a, |
22332 | b, |
22333 | ); |
22334 | let e = _mm512_set_ph( |
22335 | 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, |
22336 | 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, |
22337 | ); |
22338 | assert_eq_m512h(r, e); |
22339 | } |
22340 | |
22341 | #[simd_test(enable = "avx512fp16" )] |
22342 | unsafe fn test_mm_max_sh() { |
22343 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22344 | let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); |
22345 | let r = _mm_max_sh(a, b); |
22346 | let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22347 | assert_eq_m128h(r, e); |
22348 | } |
22349 | |
22350 | #[simd_test(enable = "avx512fp16" )] |
22351 | unsafe fn test_mm_mask_max_sh() { |
22352 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22353 | let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); |
22354 | let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0); |
22355 | let r = _mm_mask_max_sh(src, 0, a, b); |
22356 | let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22357 | assert_eq_m128h(r, e); |
22358 | let r = _mm_mask_max_sh(src, 1, a, b); |
22359 | let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22360 | assert_eq_m128h(r, e); |
22361 | } |
22362 | |
22363 | #[simd_test(enable = "avx512fp16" )] |
22364 | unsafe fn test_mm_maskz_max_sh() { |
22365 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22366 | let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); |
22367 | let r = _mm_maskz_max_sh(0, a, b); |
22368 | let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22369 | assert_eq_m128h(r, e); |
22370 | let r = _mm_maskz_max_sh(1, a, b); |
22371 | let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22372 | assert_eq_m128h(r, e); |
22373 | } |
22374 | |
22375 | #[simd_test(enable = "avx512fp16" )] |
22376 | unsafe fn test_mm_max_round_sh() { |
22377 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22378 | let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); |
22379 | let r = _mm_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
22380 | let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22381 | assert_eq_m128h(r, e); |
22382 | } |
22383 | |
22384 | #[simd_test(enable = "avx512fp16" )] |
22385 | unsafe fn test_mm_mask_max_round_sh() { |
22386 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22387 | let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); |
22388 | let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0); |
22389 | let r = _mm_mask_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
22390 | src, 0, a, b, |
22391 | ); |
22392 | let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22393 | assert_eq_m128h(r, e); |
22394 | let r = _mm_mask_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
22395 | src, 1, a, b, |
22396 | ); |
22397 | let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22398 | assert_eq_m128h(r, e); |
22399 | } |
22400 | |
22401 | #[simd_test(enable = "avx512fp16" )] |
22402 | unsafe fn test_mm_maskz_max_round_sh() { |
22403 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22404 | let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); |
22405 | let r = |
22406 | _mm_maskz_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); |
22407 | let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22408 | assert_eq_m128h(r, e); |
22409 | let r = |
22410 | _mm_maskz_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b); |
22411 | let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22412 | assert_eq_m128h(r, e); |
22413 | } |
22414 | |
22415 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22416 | unsafe fn test_mm_min_ph() { |
22417 | let a = _mm_set1_ph(2.0); |
22418 | let b = _mm_set1_ph(1.0); |
22419 | let r = _mm_min_ph(a, b); |
22420 | let e = _mm_set1_ph(1.0); |
22421 | assert_eq_m128h(r, e); |
22422 | } |
22423 | |
22424 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22425 | unsafe fn test_mm_mask_min_ph() { |
22426 | let a = _mm_set1_ph(2.0); |
22427 | let b = _mm_set1_ph(1.0); |
22428 | let src = _mm_set1_ph(3.0); |
22429 | let r = _mm_mask_min_ph(src, 0b01010101, a, b); |
22430 | let e = _mm_set_ph(3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0); |
22431 | assert_eq_m128h(r, e); |
22432 | } |
22433 | |
22434 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22435 | unsafe fn test_mm_maskz_min_ph() { |
22436 | let a = _mm_set1_ph(2.0); |
22437 | let b = _mm_set1_ph(1.0); |
22438 | let r = _mm_maskz_min_ph(0b01010101, a, b); |
22439 | let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0); |
22440 | assert_eq_m128h(r, e); |
22441 | } |
22442 | |
22443 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22444 | unsafe fn test_mm256_min_ph() { |
22445 | let a = _mm256_set1_ph(2.0); |
22446 | let b = _mm256_set1_ph(1.0); |
22447 | let r = _mm256_min_ph(a, b); |
22448 | let e = _mm256_set1_ph(1.0); |
22449 | assert_eq_m256h(r, e); |
22450 | } |
22451 | |
22452 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22453 | unsafe fn test_mm256_mask_min_ph() { |
22454 | let a = _mm256_set1_ph(2.0); |
22455 | let b = _mm256_set1_ph(1.0); |
22456 | let src = _mm256_set1_ph(3.0); |
22457 | let r = _mm256_mask_min_ph(src, 0b0101010101010101, a, b); |
22458 | let e = _mm256_set_ph( |
22459 | 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, |
22460 | ); |
22461 | assert_eq_m256h(r, e); |
22462 | } |
22463 | |
22464 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22465 | unsafe fn test_mm256_maskz_min_ph() { |
22466 | let a = _mm256_set1_ph(2.0); |
22467 | let b = _mm256_set1_ph(1.0); |
22468 | let r = _mm256_maskz_min_ph(0b0101010101010101, a, b); |
22469 | let e = _mm256_set_ph( |
22470 | 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, |
22471 | ); |
22472 | assert_eq_m256h(r, e); |
22473 | } |
22474 | |
22475 | #[simd_test(enable = "avx512fp16" )] |
22476 | unsafe fn test_mm512_min_ph() { |
22477 | let a = _mm512_set1_ph(2.0); |
22478 | let b = _mm512_set1_ph(1.0); |
22479 | let r = _mm512_min_ph(a, b); |
22480 | let e = _mm512_set1_ph(1.0); |
22481 | assert_eq_m512h(r, e); |
22482 | } |
22483 | |
22484 | #[simd_test(enable = "avx512fp16" )] |
22485 | unsafe fn test_mm512_mask_min_ph() { |
22486 | let a = _mm512_set1_ph(2.0); |
22487 | let b = _mm512_set1_ph(1.0); |
22488 | let src = _mm512_set1_ph(3.0); |
22489 | let r = _mm512_mask_min_ph(src, 0b01010101010101010101010101010101, a, b); |
22490 | let e = _mm512_set_ph( |
22491 | 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, |
22492 | 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, |
22493 | ); |
22494 | assert_eq_m512h(r, e); |
22495 | } |
22496 | |
22497 | #[simd_test(enable = "avx512fp16" )] |
22498 | unsafe fn test_mm512_maskz_min_ph() { |
22499 | let a = _mm512_set1_ph(2.0); |
22500 | let b = _mm512_set1_ph(1.0); |
22501 | let r = _mm512_maskz_min_ph(0b01010101010101010101010101010101, a, b); |
22502 | let e = _mm512_set_ph( |
22503 | 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, |
22504 | 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, |
22505 | ); |
22506 | assert_eq_m512h(r, e); |
22507 | } |
22508 | |
22509 | #[simd_test(enable = "avx512fp16" )] |
22510 | unsafe fn test_mm512_min_round_ph() { |
22511 | let a = _mm512_set1_ph(2.0); |
22512 | let b = _mm512_set1_ph(1.0); |
22513 | let r = _mm512_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
22514 | let e = _mm512_set1_ph(1.0); |
22515 | assert_eq_m512h(r, e); |
22516 | } |
22517 | |
22518 | #[simd_test(enable = "avx512fp16" )] |
22519 | unsafe fn test_mm512_mask_min_round_ph() { |
22520 | let a = _mm512_set1_ph(2.0); |
22521 | let b = _mm512_set1_ph(1.0); |
22522 | let src = _mm512_set1_ph(3.0); |
22523 | let r = _mm512_mask_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
22524 | src, |
22525 | 0b01010101010101010101010101010101, |
22526 | a, |
22527 | b, |
22528 | ); |
22529 | let e = _mm512_set_ph( |
22530 | 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, |
22531 | 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, |
22532 | ); |
22533 | assert_eq_m512h(r, e); |
22534 | } |
22535 | |
22536 | #[simd_test(enable = "avx512fp16" )] |
22537 | unsafe fn test_mm512_maskz_min_round_ph() { |
22538 | let a = _mm512_set1_ph(2.0); |
22539 | let b = _mm512_set1_ph(1.0); |
22540 | let r = _mm512_maskz_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
22541 | 0b01010101010101010101010101010101, |
22542 | a, |
22543 | b, |
22544 | ); |
22545 | let e = _mm512_set_ph( |
22546 | 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, |
22547 | 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, |
22548 | ); |
22549 | assert_eq_m512h(r, e); |
22550 | } |
22551 | |
22552 | #[simd_test(enable = "avx512fp16" )] |
22553 | unsafe fn test_mm_min_sh() { |
22554 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22555 | let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); |
22556 | let r = _mm_min_sh(a, b); |
22557 | let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22558 | assert_eq_m128h(r, e); |
22559 | } |
22560 | |
22561 | #[simd_test(enable = "avx512fp16" )] |
22562 | unsafe fn test_mm_mask_min_sh() { |
22563 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22564 | let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); |
22565 | let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0); |
22566 | let r = _mm_mask_min_sh(src, 0, a, b); |
22567 | let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22568 | assert_eq_m128h(r, e); |
22569 | let r = _mm_mask_min_sh(src, 1, a, b); |
22570 | let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22571 | assert_eq_m128h(r, e); |
22572 | } |
22573 | |
22574 | #[simd_test(enable = "avx512fp16" )] |
22575 | unsafe fn test_mm_maskz_min_sh() { |
22576 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22577 | let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); |
22578 | let r = _mm_maskz_min_sh(0, a, b); |
22579 | let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22580 | assert_eq_m128h(r, e); |
22581 | let r = _mm_maskz_min_sh(1, a, b); |
22582 | let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22583 | assert_eq_m128h(r, e); |
22584 | } |
22585 | |
22586 | #[simd_test(enable = "avx512fp16" )] |
22587 | unsafe fn test_mm_min_round_sh() { |
22588 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22589 | let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); |
22590 | let r = _mm_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
22591 | let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22592 | assert_eq_m128h(r, e); |
22593 | } |
22594 | |
22595 | #[simd_test(enable = "avx512fp16" )] |
22596 | unsafe fn test_mm_mask_min_round_sh() { |
22597 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22598 | let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); |
22599 | let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0); |
22600 | let r = _mm_mask_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
22601 | src, 0, a, b, |
22602 | ); |
22603 | let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22604 | assert_eq_m128h(r, e); |
22605 | let r = _mm_mask_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
22606 | src, 1, a, b, |
22607 | ); |
22608 | let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22609 | assert_eq_m128h(r, e); |
22610 | } |
22611 | |
22612 | #[simd_test(enable = "avx512fp16" )] |
22613 | unsafe fn test_mm_maskz_min_round_sh() { |
22614 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22615 | let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); |
22616 | let r = |
22617 | _mm_maskz_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); |
22618 | let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22619 | assert_eq_m128h(r, e); |
22620 | let r = |
22621 | _mm_maskz_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b); |
22622 | let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22623 | assert_eq_m128h(r, e); |
22624 | } |
22625 | |
22626 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22627 | unsafe fn test_mm_getexp_ph() { |
22628 | let a = _mm_set1_ph(3.0); |
22629 | let r = _mm_getexp_ph(a); |
22630 | let e = _mm_set1_ph(1.0); |
22631 | assert_eq_m128h(r, e); |
22632 | } |
22633 | |
22634 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22635 | unsafe fn test_mm_mask_getexp_ph() { |
22636 | let a = _mm_set1_ph(3.0); |
22637 | let src = _mm_set1_ph(4.0); |
22638 | let r = _mm_mask_getexp_ph(src, 0b01010101, a); |
22639 | let e = _mm_set_ph(4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0); |
22640 | assert_eq_m128h(r, e); |
22641 | } |
22642 | |
22643 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22644 | unsafe fn test_mm_maskz_getexp_ph() { |
22645 | let a = _mm_set1_ph(3.0); |
22646 | let r = _mm_maskz_getexp_ph(0b01010101, a); |
22647 | let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0); |
22648 | assert_eq_m128h(r, e); |
22649 | } |
22650 | |
22651 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22652 | unsafe fn test_mm256_getexp_ph() { |
22653 | let a = _mm256_set1_ph(3.0); |
22654 | let r = _mm256_getexp_ph(a); |
22655 | let e = _mm256_set1_ph(1.0); |
22656 | assert_eq_m256h(r, e); |
22657 | } |
22658 | |
22659 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22660 | unsafe fn test_mm256_mask_getexp_ph() { |
22661 | let a = _mm256_set1_ph(3.0); |
22662 | let src = _mm256_set1_ph(4.0); |
22663 | let r = _mm256_mask_getexp_ph(src, 0b0101010101010101, a); |
22664 | let e = _mm256_set_ph( |
22665 | 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, |
22666 | ); |
22667 | assert_eq_m256h(r, e); |
22668 | } |
22669 | |
22670 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22671 | unsafe fn test_mm256_maskz_getexp_ph() { |
22672 | let a = _mm256_set1_ph(3.0); |
22673 | let r = _mm256_maskz_getexp_ph(0b0101010101010101, a); |
22674 | let e = _mm256_set_ph( |
22675 | 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, |
22676 | ); |
22677 | assert_eq_m256h(r, e); |
22678 | } |
22679 | |
22680 | #[simd_test(enable = "avx512fp16" )] |
22681 | unsafe fn test_mm512_getexp_ph() { |
22682 | let a = _mm512_set1_ph(3.0); |
22683 | let r = _mm512_getexp_ph(a); |
22684 | let e = _mm512_set1_ph(1.0); |
22685 | assert_eq_m512h(r, e); |
22686 | } |
22687 | |
22688 | #[simd_test(enable = "avx512fp16" )] |
22689 | unsafe fn test_mm512_mask_getexp_ph() { |
22690 | let a = _mm512_set1_ph(3.0); |
22691 | let src = _mm512_set1_ph(4.0); |
22692 | let r = _mm512_mask_getexp_ph(src, 0b01010101010101010101010101010101, a); |
22693 | let e = _mm512_set_ph( |
22694 | 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, |
22695 | 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, |
22696 | ); |
22697 | assert_eq_m512h(r, e); |
22698 | } |
22699 | |
22700 | #[simd_test(enable = "avx512fp16" )] |
22701 | unsafe fn test_mm512_maskz_getexp_ph() { |
22702 | let a = _mm512_set1_ph(3.0); |
22703 | let r = _mm512_maskz_getexp_ph(0b01010101010101010101010101010101, a); |
22704 | let e = _mm512_set_ph( |
22705 | 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, |
22706 | 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, |
22707 | ); |
22708 | assert_eq_m512h(r, e); |
22709 | } |
22710 | |
22711 | #[simd_test(enable = "avx512fp16" )] |
22712 | unsafe fn test_mm512_getexp_round_ph() { |
22713 | let a = _mm512_set1_ph(3.0); |
22714 | let r = _mm512_getexp_round_ph::<_MM_FROUND_NO_EXC>(a); |
22715 | let e = _mm512_set1_ph(1.0); |
22716 | assert_eq_m512h(r, e); |
22717 | } |
22718 | |
22719 | #[simd_test(enable = "avx512fp16" )] |
22720 | unsafe fn test_mm512_mask_getexp_round_ph() { |
22721 | let a = _mm512_set1_ph(3.0); |
22722 | let src = _mm512_set1_ph(4.0); |
22723 | let r = _mm512_mask_getexp_round_ph::<_MM_FROUND_NO_EXC>( |
22724 | src, |
22725 | 0b01010101010101010101010101010101, |
22726 | a, |
22727 | ); |
22728 | let e = _mm512_set_ph( |
22729 | 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, |
22730 | 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, |
22731 | ); |
22732 | assert_eq_m512h(r, e); |
22733 | } |
22734 | |
22735 | #[simd_test(enable = "avx512fp16" )] |
22736 | unsafe fn test_mm512_maskz_getexp_round_ph() { |
22737 | let a = _mm512_set1_ph(3.0); |
22738 | let r = _mm512_maskz_getexp_round_ph::<_MM_FROUND_NO_EXC>( |
22739 | 0b01010101010101010101010101010101, |
22740 | a, |
22741 | ); |
22742 | let e = _mm512_set_ph( |
22743 | 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, |
22744 | 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, |
22745 | ); |
22746 | assert_eq_m512h(r, e); |
22747 | } |
22748 | |
22749 | #[simd_test(enable = "avx512fp16" )] |
22750 | unsafe fn test_mm_getexp_sh() { |
22751 | let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.); |
22752 | let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.); |
22753 | let r = _mm_getexp_sh(a, b); |
22754 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
22755 | assert_eq_m128h(r, e); |
22756 | } |
22757 | |
22758 | #[simd_test(enable = "avx512fp16" )] |
22759 | unsafe fn test_mm_mask_getexp_sh() { |
22760 | let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.); |
22761 | let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.); |
22762 | let src = _mm_setr_ph(4.0, 30., 31., 32., 33., 34., 35., 36.); |
22763 | let r = _mm_mask_getexp_sh(src, 0, a, b); |
22764 | let e = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.); |
22765 | assert_eq_m128h(r, e); |
22766 | let r = _mm_mask_getexp_sh(src, 1, a, b); |
22767 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
22768 | assert_eq_m128h(r, e); |
22769 | } |
22770 | |
22771 | #[simd_test(enable = "avx512fp16" )] |
22772 | unsafe fn test_mm_maskz_getexp_sh() { |
22773 | let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.); |
22774 | let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.); |
22775 | let r = _mm_maskz_getexp_sh(0, a, b); |
22776 | let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); |
22777 | assert_eq_m128h(r, e); |
22778 | let r = _mm_maskz_getexp_sh(1, a, b); |
22779 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
22780 | assert_eq_m128h(r, e); |
22781 | } |
22782 | |
22783 | #[simd_test(enable = "avx512fp16" )] |
22784 | unsafe fn test_mm_getexp_round_sh() { |
22785 | let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.); |
22786 | let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.); |
22787 | let r = _mm_getexp_round_sh::<_MM_FROUND_NO_EXC>(a, b); |
22788 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
22789 | assert_eq_m128h(r, e); |
22790 | } |
22791 | |
22792 | #[simd_test(enable = "avx512fp16" )] |
22793 | unsafe fn test_mm_mask_getexp_round_sh() { |
22794 | let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.); |
22795 | let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.); |
22796 | let src = _mm_setr_ph(4.0, 30., 31., 32., 33., 34., 35., 36.); |
22797 | let r = _mm_mask_getexp_round_sh::<_MM_FROUND_NO_EXC>(src, 0, a, b); |
22798 | let e = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.); |
22799 | assert_eq_m128h(r, e); |
22800 | let r = _mm_mask_getexp_round_sh::<_MM_FROUND_NO_EXC>(src, 1, a, b); |
22801 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
22802 | assert_eq_m128h(r, e); |
22803 | } |
22804 | |
22805 | #[simd_test(enable = "avx512fp16" )] |
22806 | unsafe fn test_mm_maskz_getexp_round_sh() { |
22807 | let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.); |
22808 | let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.); |
22809 | let r = _mm_maskz_getexp_round_sh::<_MM_FROUND_NO_EXC>(0, a, b); |
22810 | let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); |
22811 | assert_eq_m128h(r, e); |
22812 | let r = _mm_maskz_getexp_round_sh::<_MM_FROUND_NO_EXC>(1, a, b); |
22813 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
22814 | assert_eq_m128h(r, e); |
22815 | } |
22816 | |
22817 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22818 | unsafe fn test_mm_getmant_ph() { |
22819 | let a = _mm_set1_ph(10.0); |
22820 | let r = _mm_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a); |
22821 | let e = _mm_set1_ph(1.25); |
22822 | assert_eq_m128h(r, e); |
22823 | } |
22824 | |
22825 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22826 | unsafe fn test_mm_mask_getmant_ph() { |
22827 | let a = _mm_set1_ph(10.0); |
22828 | let src = _mm_set1_ph(20.0); |
22829 | let r = _mm_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, 0b01010101, a); |
22830 | let e = _mm_set_ph(20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25); |
22831 | assert_eq_m128h(r, e); |
22832 | } |
22833 | |
22834 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22835 | unsafe fn test_mm_maskz_getmant_ph() { |
22836 | let a = _mm_set1_ph(10.0); |
22837 | let r = _mm_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(0b01010101, a); |
22838 | let e = _mm_set_ph(0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25); |
22839 | assert_eq_m128h(r, e); |
22840 | } |
22841 | |
22842 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22843 | unsafe fn test_mm256_getmant_ph() { |
22844 | let a = _mm256_set1_ph(10.0); |
22845 | let r = _mm256_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a); |
22846 | let e = _mm256_set1_ph(1.25); |
22847 | assert_eq_m256h(r, e); |
22848 | } |
22849 | |
22850 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22851 | unsafe fn test_mm256_mask_getmant_ph() { |
22852 | let a = _mm256_set1_ph(10.0); |
22853 | let src = _mm256_set1_ph(20.0); |
22854 | let r = _mm256_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>( |
22855 | src, |
22856 | 0b0101010101010101, |
22857 | a, |
22858 | ); |
22859 | let e = _mm256_set_ph( |
22860 | 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, |
22861 | 20.0, 1.25, |
22862 | ); |
22863 | assert_eq_m256h(r, e); |
22864 | } |
22865 | |
22866 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22867 | unsafe fn test_mm256_maskz_getmant_ph() { |
22868 | let a = _mm256_set1_ph(10.0); |
22869 | let r = _mm256_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>( |
22870 | 0b0101010101010101, |
22871 | a, |
22872 | ); |
22873 | let e = _mm256_set_ph( |
22874 | 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, |
22875 | ); |
22876 | assert_eq_m256h(r, e); |
22877 | } |
22878 | |
22879 | #[simd_test(enable = "avx512fp16" )] |
22880 | unsafe fn test_mm512_getmant_ph() { |
22881 | let a = _mm512_set1_ph(10.0); |
22882 | let r = _mm512_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a); |
22883 | let e = _mm512_set1_ph(1.25); |
22884 | assert_eq_m512h(r, e); |
22885 | } |
22886 | |
22887 | #[simd_test(enable = "avx512fp16" )] |
22888 | unsafe fn test_mm512_mask_getmant_ph() { |
22889 | let a = _mm512_set1_ph(10.0); |
22890 | let src = _mm512_set1_ph(20.0); |
22891 | let r = _mm512_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>( |
22892 | src, |
22893 | 0b01010101010101010101010101010101, |
22894 | a, |
22895 | ); |
22896 | let e = _mm512_set_ph( |
22897 | 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, |
22898 | 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, |
22899 | 20.0, 1.25, 20.0, 1.25, |
22900 | ); |
22901 | assert_eq_m512h(r, e); |
22902 | } |
22903 | |
22904 | #[simd_test(enable = "avx512fp16" )] |
22905 | unsafe fn test_mm512_maskz_getmant_ph() { |
22906 | let a = _mm512_set1_ph(10.0); |
22907 | let r = _mm512_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>( |
22908 | 0b01010101010101010101010101010101, |
22909 | a, |
22910 | ); |
22911 | let e = _mm512_set_ph( |
22912 | 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, |
22913 | 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, |
22914 | ); |
22915 | assert_eq_m512h(r, e); |
22916 | } |
22917 | |
22918 | #[simd_test(enable = "avx512fp16" )] |
22919 | unsafe fn test_mm512_getmant_round_ph() { |
22920 | let a = _mm512_set1_ph(10.0); |
22921 | let r = |
22922 | _mm512_getmant_round_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN, _MM_FROUND_NO_EXC>( |
22923 | a, |
22924 | ); |
22925 | let e = _mm512_set1_ph(1.25); |
22926 | assert_eq_m512h(r, e); |
22927 | } |
22928 | |
22929 | #[simd_test(enable = "avx512fp16" )] |
22930 | unsafe fn test_mm512_mask_getmant_round_ph() { |
22931 | let a = _mm512_set1_ph(10.0); |
22932 | let src = _mm512_set1_ph(20.0); |
22933 | let r = _mm512_mask_getmant_round_ph::< |
22934 | _MM_MANT_NORM_P75_1P5, |
22935 | _MM_MANT_SIGN_NAN, |
22936 | _MM_FROUND_NO_EXC, |
22937 | >(src, 0b01010101010101010101010101010101, a); |
22938 | let e = _mm512_set_ph( |
22939 | 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, |
22940 | 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, |
22941 | 20.0, 1.25, 20.0, 1.25, |
22942 | ); |
22943 | assert_eq_m512h(r, e); |
22944 | } |
22945 | |
22946 | #[simd_test(enable = "avx512fp16" )] |
22947 | unsafe fn test_mm512_maskz_getmant_round_ph() { |
22948 | let a = _mm512_set1_ph(10.0); |
22949 | let r = _mm512_maskz_getmant_round_ph::< |
22950 | _MM_MANT_NORM_P75_1P5, |
22951 | _MM_MANT_SIGN_NAN, |
22952 | _MM_FROUND_NO_EXC, |
22953 | >(0b01010101010101010101010101010101, a); |
22954 | let e = _mm512_set_ph( |
22955 | 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, |
22956 | 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, |
22957 | ); |
22958 | assert_eq_m512h(r, e); |
22959 | } |
22960 | |
22961 | #[simd_test(enable = "avx512fp16" )] |
22962 | unsafe fn test_mm_getmant_sh() { |
22963 | let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.); |
22964 | let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.); |
22965 | let r = _mm_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a, b); |
22966 | let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.); |
22967 | assert_eq_m128h(r, e); |
22968 | } |
22969 | |
22970 | #[simd_test(enable = "avx512fp16" )] |
22971 | unsafe fn test_mm_mask_getmant_sh() { |
22972 | let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.); |
22973 | let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.); |
22974 | let src = _mm_setr_ph(20.0, 30., 31., 32., 33., 34., 35., 36.); |
22975 | let r = _mm_mask_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, 0, a, b); |
22976 | let e = _mm_setr_ph(20.0, 10., 11., 12., 13., 14., 15., 16.); |
22977 | assert_eq_m128h(r, e); |
22978 | let r = _mm_mask_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, 1, a, b); |
22979 | let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.); |
22980 | assert_eq_m128h(r, e); |
22981 | } |
22982 | |
22983 | #[simd_test(enable = "avx512fp16" )] |
22984 | unsafe fn test_mm_maskz_getmant_sh() { |
22985 | let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.); |
22986 | let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.); |
22987 | let r = _mm_maskz_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(0, a, b); |
22988 | let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); |
22989 | assert_eq_m128h(r, e); |
22990 | let r = _mm_maskz_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(1, a, b); |
22991 | let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.); |
22992 | assert_eq_m128h(r, e); |
22993 | } |
22994 | |
22995 | #[simd_test(enable = "avx512fp16" )] |
22996 | unsafe fn test_mm_getmant_round_sh() { |
22997 | let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.); |
22998 | let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.); |
22999 | let r = _mm_getmant_round_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN, _MM_FROUND_NO_EXC>( |
23000 | a, b, |
23001 | ); |
23002 | let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.); |
23003 | assert_eq_m128h(r, e); |
23004 | } |
23005 | |
23006 | #[simd_test(enable = "avx512fp16" )] |
23007 | unsafe fn test_mm_mask_getmant_round_sh() { |
23008 | let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.); |
23009 | let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.); |
23010 | let src = _mm_setr_ph(20.0, 30., 31., 32., 33., 34., 35., 36.); |
23011 | let r = _mm_mask_getmant_round_sh::< |
23012 | _MM_MANT_NORM_P75_1P5, |
23013 | _MM_MANT_SIGN_NAN, |
23014 | _MM_FROUND_NO_EXC, |
23015 | >(src, 0, a, b); |
23016 | let e = _mm_setr_ph(20.0, 10., 11., 12., 13., 14., 15., 16.); |
23017 | assert_eq_m128h(r, e); |
23018 | let r = _mm_mask_getmant_round_sh::< |
23019 | _MM_MANT_NORM_P75_1P5, |
23020 | _MM_MANT_SIGN_NAN, |
23021 | _MM_FROUND_NO_EXC, |
23022 | >(src, 1, a, b); |
23023 | let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.); |
23024 | assert_eq_m128h(r, e); |
23025 | } |
23026 | |
23027 | #[simd_test(enable = "avx512fp16" )] |
23028 | unsafe fn test_mm_maskz_getmant_round_sh() { |
23029 | let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.); |
23030 | let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.); |
23031 | let r = _mm_maskz_getmant_round_sh::< |
23032 | _MM_MANT_NORM_P75_1P5, |
23033 | _MM_MANT_SIGN_NAN, |
23034 | _MM_FROUND_NO_EXC, |
23035 | >(0, a, b); |
23036 | let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); |
23037 | assert_eq_m128h(r, e); |
23038 | let r = _mm_maskz_getmant_round_sh::< |
23039 | _MM_MANT_NORM_P75_1P5, |
23040 | _MM_MANT_SIGN_NAN, |
23041 | _MM_FROUND_NO_EXC, |
23042 | >(1, a, b); |
23043 | let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.); |
23044 | assert_eq_m128h(r, e); |
23045 | } |
23046 | |
23047 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23048 | unsafe fn test_mm_roundscale_ph() { |
23049 | let a = _mm_set1_ph(1.1); |
23050 | let r = _mm_roundscale_ph::<0>(a); |
23051 | let e = _mm_set1_ph(1.0); |
23052 | assert_eq_m128h(r, e); |
23053 | } |
23054 | |
23055 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23056 | unsafe fn test_mm_mask_roundscale_ph() { |
23057 | let a = _mm_set1_ph(1.1); |
23058 | let src = _mm_set1_ph(2.0); |
23059 | let r = _mm_mask_roundscale_ph::<0>(src, 0b01010101, a); |
23060 | let e = _mm_set_ph(2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0); |
23061 | assert_eq_m128h(r, e); |
23062 | } |
23063 | |
23064 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23065 | unsafe fn test_mm_maskz_roundscale_ph() { |
23066 | let a = _mm_set1_ph(1.1); |
23067 | let r = _mm_maskz_roundscale_ph::<0>(0b01010101, a); |
23068 | let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0); |
23069 | assert_eq_m128h(r, e); |
23070 | } |
23071 | |
23072 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23073 | unsafe fn test_mm256_roundscale_ph() { |
23074 | let a = _mm256_set1_ph(1.1); |
23075 | let r = _mm256_roundscale_ph::<0>(a); |
23076 | let e = _mm256_set1_ph(1.0); |
23077 | assert_eq_m256h(r, e); |
23078 | } |
23079 | |
23080 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23081 | unsafe fn test_mm256_mask_roundscale_ph() { |
23082 | let a = _mm256_set1_ph(1.1); |
23083 | let src = _mm256_set1_ph(2.0); |
23084 | let r = _mm256_mask_roundscale_ph::<0>(src, 0b0101010101010101, a); |
23085 | let e = _mm256_set_ph( |
23086 | 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, |
23087 | ); |
23088 | assert_eq_m256h(r, e); |
23089 | } |
23090 | |
23091 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23092 | unsafe fn test_mm256_maskz_roundscale_ph() { |
23093 | let a = _mm256_set1_ph(1.1); |
23094 | let r = _mm256_maskz_roundscale_ph::<0>(0b0101010101010101, a); |
23095 | let e = _mm256_set_ph( |
23096 | 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, |
23097 | ); |
23098 | assert_eq_m256h(r, e); |
23099 | } |
23100 | |
23101 | #[simd_test(enable = "avx512fp16" )] |
23102 | unsafe fn test_mm512_roundscale_ph() { |
23103 | let a = _mm512_set1_ph(1.1); |
23104 | let r = _mm512_roundscale_ph::<0>(a); |
23105 | let e = _mm512_set1_ph(1.0); |
23106 | assert_eq_m512h(r, e); |
23107 | } |
23108 | |
23109 | #[simd_test(enable = "avx512fp16" )] |
23110 | unsafe fn test_mm512_mask_roundscale_ph() { |
23111 | let a = _mm512_set1_ph(1.1); |
23112 | let src = _mm512_set1_ph(2.0); |
23113 | let r = _mm512_mask_roundscale_ph::<0>(src, 0b01010101010101010101010101010101, a); |
23114 | let e = _mm512_set_ph( |
23115 | 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, |
23116 | 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, |
23117 | ); |
23118 | assert_eq_m512h(r, e); |
23119 | } |
23120 | |
23121 | #[simd_test(enable = "avx512fp16" )] |
23122 | unsafe fn test_mm512_maskz_roundscale_ph() { |
23123 | let a = _mm512_set1_ph(1.1); |
23124 | let r = _mm512_maskz_roundscale_ph::<0>(0b01010101010101010101010101010101, a); |
23125 | let e = _mm512_set_ph( |
23126 | 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, |
23127 | 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, |
23128 | ); |
23129 | assert_eq_m512h(r, e); |
23130 | } |
23131 | |
23132 | #[simd_test(enable = "avx512fp16" )] |
23133 | unsafe fn test_mm512_roundscale_round_ph() { |
23134 | let a = _mm512_set1_ph(1.1); |
23135 | let r = _mm512_roundscale_round_ph::<0, _MM_FROUND_NO_EXC>(a); |
23136 | let e = _mm512_set1_ph(1.0); |
23137 | assert_eq_m512h(r, e); |
23138 | } |
23139 | |
23140 | #[simd_test(enable = "avx512fp16" )] |
23141 | unsafe fn test_mm512_mask_roundscale_round_ph() { |
23142 | let a = _mm512_set1_ph(1.1); |
23143 | let src = _mm512_set1_ph(2.0); |
23144 | let r = _mm512_mask_roundscale_round_ph::<0, _MM_FROUND_NO_EXC>( |
23145 | src, |
23146 | 0b01010101010101010101010101010101, |
23147 | a, |
23148 | ); |
23149 | let e = _mm512_set_ph( |
23150 | 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, |
23151 | 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, |
23152 | ); |
23153 | assert_eq_m512h(r, e); |
23154 | } |
23155 | |
23156 | #[simd_test(enable = "avx512fp16" )] |
23157 | unsafe fn test_mm512_maskz_roundscale_round_ph() { |
23158 | let a = _mm512_set1_ph(1.1); |
23159 | let r = _mm512_maskz_roundscale_round_ph::<0, _MM_FROUND_NO_EXC>( |
23160 | 0b01010101010101010101010101010101, |
23161 | a, |
23162 | ); |
23163 | let e = _mm512_set_ph( |
23164 | 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, |
23165 | 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, |
23166 | ); |
23167 | assert_eq_m512h(r, e); |
23168 | } |
23169 | |
23170 | #[simd_test(enable = "avx512fp16" )] |
23171 | unsafe fn test_mm_roundscale_sh() { |
23172 | let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.); |
23173 | let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.); |
23174 | let r = _mm_roundscale_sh::<0>(a, b); |
23175 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
23176 | assert_eq_m128h(r, e); |
23177 | } |
23178 | |
23179 | #[simd_test(enable = "avx512fp16" )] |
23180 | unsafe fn test_mm_mask_roundscale_sh() { |
23181 | let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.); |
23182 | let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.); |
23183 | let src = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
23184 | let r = _mm_mask_roundscale_sh::<0>(src, 0, a, b); |
23185 | let e = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.); |
23186 | assert_eq_m128h(r, e); |
23187 | let r = _mm_mask_roundscale_sh::<0>(src, 1, a, b); |
23188 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
23189 | assert_eq_m128h(r, e); |
23190 | } |
23191 | |
23192 | #[simd_test(enable = "avx512fp16" )] |
23193 | unsafe fn test_mm_maskz_roundscale_sh() { |
23194 | let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.); |
23195 | let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.); |
23196 | let r = _mm_maskz_roundscale_sh::<0>(0, a, b); |
23197 | let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); |
23198 | assert_eq_m128h(r, e); |
23199 | let r = _mm_maskz_roundscale_sh::<0>(1, a, b); |
23200 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
23201 | assert_eq_m128h(r, e); |
23202 | } |
23203 | |
23204 | #[simd_test(enable = "avx512fp16" )] |
23205 | unsafe fn test_mm_roundscale_round_sh() { |
23206 | let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.); |
23207 | let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.); |
23208 | let r = _mm_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(a, b); |
23209 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
23210 | assert_eq_m128h(r, e); |
23211 | } |
23212 | |
23213 | #[simd_test(enable = "avx512fp16" )] |
23214 | unsafe fn test_mm_mask_roundscale_round_sh() { |
23215 | let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.); |
23216 | let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.); |
23217 | let src = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
23218 | let r = _mm_mask_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(src, 0, a, b); |
23219 | let e = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.); |
23220 | assert_eq_m128h(r, e); |
23221 | let r = _mm_mask_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(src, 1, a, b); |
23222 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
23223 | assert_eq_m128h(r, e); |
23224 | } |
23225 | |
23226 | #[simd_test(enable = "avx512fp16" )] |
23227 | unsafe fn test_mm_maskz_roundscale_round_sh() { |
23228 | let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.); |
23229 | let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.); |
23230 | let r = _mm_maskz_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(0, a, b); |
23231 | let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); |
23232 | assert_eq_m128h(r, e); |
23233 | let r = _mm_maskz_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(1, a, b); |
23234 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
23235 | assert_eq_m128h(r, e); |
23236 | } |
23237 | |
23238 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23239 | unsafe fn test_mm_scalef_ph() { |
23240 | let a = _mm_set1_ph(1.); |
23241 | let b = _mm_set1_ph(3.); |
23242 | let r = _mm_scalef_ph(a, b); |
23243 | let e = _mm_set1_ph(8.0); |
23244 | assert_eq_m128h(r, e); |
23245 | } |
23246 | |
23247 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23248 | unsafe fn test_mm_mask_scalef_ph() { |
23249 | let a = _mm_set1_ph(1.); |
23250 | let b = _mm_set1_ph(3.); |
23251 | let src = _mm_set1_ph(2.); |
23252 | let r = _mm_mask_scalef_ph(src, 0b01010101, a, b); |
23253 | let e = _mm_set_ph(2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0); |
23254 | assert_eq_m128h(r, e); |
23255 | } |
23256 | |
23257 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23258 | unsafe fn test_mm_maskz_scalef_ph() { |
23259 | let a = _mm_set1_ph(1.); |
23260 | let b = _mm_set1_ph(3.); |
23261 | let r = _mm_maskz_scalef_ph(0b01010101, a, b); |
23262 | let e = _mm_set_ph(0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0); |
23263 | assert_eq_m128h(r, e); |
23264 | } |
23265 | |
23266 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23267 | unsafe fn test_mm256_scalef_ph() { |
23268 | let a = _mm256_set1_ph(1.); |
23269 | let b = _mm256_set1_ph(3.); |
23270 | let r = _mm256_scalef_ph(a, b); |
23271 | let e = _mm256_set1_ph(8.0); |
23272 | assert_eq_m256h(r, e); |
23273 | } |
23274 | |
23275 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23276 | unsafe fn test_mm256_mask_scalef_ph() { |
23277 | let a = _mm256_set1_ph(1.); |
23278 | let b = _mm256_set1_ph(3.); |
23279 | let src = _mm256_set1_ph(2.); |
23280 | let r = _mm256_mask_scalef_ph(src, 0b0101010101010101, a, b); |
23281 | let e = _mm256_set_ph( |
23282 | 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, |
23283 | ); |
23284 | assert_eq_m256h(r, e); |
23285 | } |
23286 | |
23287 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23288 | unsafe fn test_mm256_maskz_scalef_ph() { |
23289 | let a = _mm256_set1_ph(1.); |
23290 | let b = _mm256_set1_ph(3.); |
23291 | let r = _mm256_maskz_scalef_ph(0b0101010101010101, a, b); |
23292 | let e = _mm256_set_ph( |
23293 | 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, |
23294 | ); |
23295 | assert_eq_m256h(r, e); |
23296 | } |
23297 | |
23298 | #[simd_test(enable = "avx512fp16" )] |
23299 | unsafe fn test_mm512_scalef_ph() { |
23300 | let a = _mm512_set1_ph(1.); |
23301 | let b = _mm512_set1_ph(3.); |
23302 | let r = _mm512_scalef_ph(a, b); |
23303 | let e = _mm512_set1_ph(8.0); |
23304 | assert_eq_m512h(r, e); |
23305 | } |
23306 | |
23307 | #[simd_test(enable = "avx512fp16" )] |
23308 | unsafe fn test_mm512_mask_scalef_ph() { |
23309 | let a = _mm512_set1_ph(1.); |
23310 | let b = _mm512_set1_ph(3.); |
23311 | let src = _mm512_set1_ph(2.); |
23312 | let r = _mm512_mask_scalef_ph(src, 0b01010101010101010101010101010101, a, b); |
23313 | let e = _mm512_set_ph( |
23314 | 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, |
23315 | 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, |
23316 | ); |
23317 | assert_eq_m512h(r, e); |
23318 | } |
23319 | |
23320 | #[simd_test(enable = "avx512fp16" )] |
23321 | unsafe fn test_mm512_maskz_scalef_ph() { |
23322 | let a = _mm512_set1_ph(1.); |
23323 | let b = _mm512_set1_ph(3.); |
23324 | let r = _mm512_maskz_scalef_ph(0b01010101010101010101010101010101, a, b); |
23325 | let e = _mm512_set_ph( |
23326 | 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, |
23327 | 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, |
23328 | ); |
23329 | assert_eq_m512h(r, e); |
23330 | } |
23331 | |
23332 | #[simd_test(enable = "avx512fp16" )] |
23333 | unsafe fn test_mm512_scalef_round_ph() { |
23334 | let a = _mm512_set1_ph(1.); |
23335 | let b = _mm512_set1_ph(3.); |
23336 | let r = _mm512_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
23337 | let e = _mm512_set1_ph(8.0); |
23338 | assert_eq_m512h(r, e); |
23339 | } |
23340 | |
23341 | #[simd_test(enable = "avx512fp16" )] |
23342 | unsafe fn test_mm512_mask_scalef_round_ph() { |
23343 | let a = _mm512_set1_ph(1.); |
23344 | let b = _mm512_set1_ph(3.); |
23345 | let src = _mm512_set1_ph(2.); |
23346 | let r = _mm512_mask_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
23347 | src, |
23348 | 0b01010101010101010101010101010101, |
23349 | a, |
23350 | b, |
23351 | ); |
23352 | let e = _mm512_set_ph( |
23353 | 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, |
23354 | 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, |
23355 | ); |
23356 | assert_eq_m512h(r, e); |
23357 | } |
23358 | |
23359 | #[simd_test(enable = "avx512fp16" )] |
23360 | unsafe fn test_mm512_maskz_scalef_round_ph() { |
23361 | let a = _mm512_set1_ph(1.); |
23362 | let b = _mm512_set1_ph(3.); |
23363 | let r = _mm512_maskz_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
23364 | 0b01010101010101010101010101010101, |
23365 | a, |
23366 | b, |
23367 | ); |
23368 | let e = _mm512_set_ph( |
23369 | 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, |
23370 | 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, |
23371 | ); |
23372 | assert_eq_m512h(r, e); |
23373 | } |
23374 | |
23375 | #[simd_test(enable = "avx512fp16" )] |
23376 | unsafe fn test_mm_scalef_sh() { |
23377 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
23378 | let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.); |
23379 | let r = _mm_scalef_sh(a, b); |
23380 | let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.); |
23381 | assert_eq_m128h(r, e); |
23382 | } |
23383 | |
23384 | #[simd_test(enable = "avx512fp16" )] |
23385 | unsafe fn test_mm_mask_scalef_sh() { |
23386 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
23387 | let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.); |
23388 | let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.); |
23389 | let r = _mm_mask_scalef_sh(src, 0, a, b); |
23390 | let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.); |
23391 | assert_eq_m128h(r, e); |
23392 | let r = _mm_mask_scalef_sh(src, 1, a, b); |
23393 | let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.); |
23394 | assert_eq_m128h(r, e); |
23395 | } |
23396 | |
23397 | #[simd_test(enable = "avx512fp16" )] |
23398 | unsafe fn test_mm_maskz_scalef_sh() { |
23399 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
23400 | let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.); |
23401 | let r = _mm_maskz_scalef_sh(0, a, b); |
23402 | let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); |
23403 | assert_eq_m128h(r, e); |
23404 | let r = _mm_maskz_scalef_sh(1, a, b); |
23405 | let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.); |
23406 | assert_eq_m128h(r, e); |
23407 | } |
23408 | |
23409 | #[simd_test(enable = "avx512fp16" )] |
23410 | unsafe fn test_mm_scalef_round_sh() { |
23411 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
23412 | let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.); |
23413 | let r = _mm_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
23414 | let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.); |
23415 | assert_eq_m128h(r, e); |
23416 | } |
23417 | |
23418 | #[simd_test(enable = "avx512fp16" )] |
23419 | unsafe fn test_mm_mask_scalef_round_sh() { |
23420 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
23421 | let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.); |
23422 | let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.); |
23423 | let r = _mm_mask_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
23424 | src, 0, a, b, |
23425 | ); |
23426 | let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.); |
23427 | assert_eq_m128h(r, e); |
23428 | let r = _mm_mask_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
23429 | src, 1, a, b, |
23430 | ); |
23431 | let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.); |
23432 | assert_eq_m128h(r, e); |
23433 | } |
23434 | |
23435 | #[simd_test(enable = "avx512fp16" )] |
23436 | unsafe fn test_mm_maskz_scalef_round_sh() { |
23437 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
23438 | let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.); |
23439 | let r = |
23440 | _mm_maskz_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); |
23441 | let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); |
23442 | assert_eq_m128h(r, e); |
23443 | let r = |
23444 | _mm_maskz_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b); |
23445 | let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.); |
23446 | assert_eq_m128h(r, e); |
23447 | } |
23448 | |
23449 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23450 | unsafe fn test_mm_reduce_ph() { |
23451 | let a = _mm_set1_ph(1.25); |
23452 | let r = _mm_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(a); |
23453 | let e = _mm_set1_ph(0.25); |
23454 | assert_eq_m128h(r, e); |
23455 | } |
23456 | |
23457 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23458 | unsafe fn test_mm_mask_reduce_ph() { |
23459 | let a = _mm_set1_ph(1.25); |
23460 | let src = _mm_set1_ph(2.0); |
23461 | let r = _mm_mask_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01010101, a); |
23462 | let e = _mm_set_ph(2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25); |
23463 | assert_eq_m128h(r, e); |
23464 | } |
23465 | |
23466 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23467 | unsafe fn test_mm_maskz_reduce_ph() { |
23468 | let a = _mm_set1_ph(1.25); |
23469 | let r = _mm_maskz_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01010101, a); |
23470 | let e = _mm_set_ph(0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25); |
23471 | assert_eq_m128h(r, e); |
23472 | } |
23473 | |
23474 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23475 | unsafe fn test_mm256_reduce_ph() { |
23476 | let a = _mm256_set1_ph(1.25); |
23477 | let r = _mm256_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(a); |
23478 | let e = _mm256_set1_ph(0.25); |
23479 | assert_eq_m256h(r, e); |
23480 | } |
23481 | |
23482 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23483 | unsafe fn test_mm256_mask_reduce_ph() { |
23484 | let a = _mm256_set1_ph(1.25); |
23485 | let src = _mm256_set1_ph(2.0); |
23486 | let r = _mm256_mask_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0101010101010101, a); |
23487 | let e = _mm256_set_ph( |
23488 | 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, |
23489 | ); |
23490 | assert_eq_m256h(r, e); |
23491 | } |
23492 | |
23493 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23494 | unsafe fn test_mm256_maskz_reduce_ph() { |
23495 | let a = _mm256_set1_ph(1.25); |
23496 | let r = _mm256_maskz_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0101010101010101, a); |
23497 | let e = _mm256_set_ph( |
23498 | 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, |
23499 | ); |
23500 | assert_eq_m256h(r, e); |
23501 | } |
23502 | |
23503 | #[simd_test(enable = "avx512fp16" )] |
23504 | unsafe fn test_mm512_reduce_ph() { |
23505 | let a = _mm512_set1_ph(1.25); |
23506 | let r = _mm512_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(a); |
23507 | let e = _mm512_set1_ph(0.25); |
23508 | assert_eq_m512h(r, e); |
23509 | } |
23510 | |
23511 | #[simd_test(enable = "avx512fp16" )] |
23512 | unsafe fn test_mm512_mask_reduce_ph() { |
23513 | let a = _mm512_set1_ph(1.25); |
23514 | let src = _mm512_set1_ph(2.0); |
23515 | let r = _mm512_mask_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>( |
23516 | src, |
23517 | 0b01010101010101010101010101010101, |
23518 | a, |
23519 | ); |
23520 | let e = _mm512_set_ph( |
23521 | 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, |
23522 | 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, |
23523 | ); |
23524 | assert_eq_m512h(r, e); |
23525 | } |
23526 | |
23527 | #[simd_test(enable = "avx512fp16" )] |
23528 | unsafe fn test_mm512_maskz_reduce_ph() { |
23529 | let a = _mm512_set1_ph(1.25); |
23530 | let r = _mm512_maskz_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>( |
23531 | 0b01010101010101010101010101010101, |
23532 | a, |
23533 | ); |
23534 | let e = _mm512_set_ph( |
23535 | 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, |
23536 | 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, |
23537 | ); |
23538 | assert_eq_m512h(r, e); |
23539 | } |
23540 | |
23541 | #[simd_test(enable = "avx512fp16" )] |
23542 | unsafe fn test_mm512_reduce_round_ph() { |
23543 | let a = _mm512_set1_ph(1.25); |
23544 | let r = _mm512_reduce_round_ph::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a); |
23545 | let e = _mm512_set1_ph(0.25); |
23546 | assert_eq_m512h(r, e); |
23547 | } |
23548 | |
23549 | #[simd_test(enable = "avx512fp16" )] |
23550 | unsafe fn test_mm512_mask_reduce_round_ph() { |
23551 | let a = _mm512_set1_ph(1.25); |
23552 | let src = _mm512_set1_ph(2.0); |
23553 | let r = _mm512_mask_reduce_round_ph::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>( |
23554 | src, |
23555 | 0b01010101010101010101010101010101, |
23556 | a, |
23557 | ); |
23558 | let e = _mm512_set_ph( |
23559 | 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, |
23560 | 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, |
23561 | ); |
23562 | assert_eq_m512h(r, e); |
23563 | } |
23564 | |
23565 | #[simd_test(enable = "avx512fp16" )] |
23566 | unsafe fn test_mm512_maskz_reduce_round_ph() { |
23567 | let a = _mm512_set1_ph(1.25); |
23568 | let r = _mm512_maskz_reduce_round_ph::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>( |
23569 | 0b01010101010101010101010101010101, |
23570 | a, |
23571 | ); |
23572 | let e = _mm512_set_ph( |
23573 | 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, |
23574 | 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, |
23575 | ); |
23576 | assert_eq_m512h(r, e); |
23577 | } |
23578 | |
23579 | #[simd_test(enable = "avx512fp16" )] |
23580 | unsafe fn test_mm_reduce_sh() { |
23581 | let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.); |
23582 | let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.); |
23583 | let r = _mm_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(a, b); |
23584 | let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.); |
23585 | assert_eq_m128h(r, e); |
23586 | } |
23587 | |
23588 | #[simd_test(enable = "avx512fp16" )] |
23589 | unsafe fn test_mm_mask_reduce_sh() { |
23590 | let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.); |
23591 | let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.); |
23592 | let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.); |
23593 | let r = _mm_mask_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0, a, b); |
23594 | let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.); |
23595 | assert_eq_m128h(r, e); |
23596 | let r = _mm_mask_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 1, a, b); |
23597 | let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.); |
23598 | assert_eq_m128h(r, e); |
23599 | } |
23600 | |
23601 | #[simd_test(enable = "avx512fp16" )] |
23602 | unsafe fn test_mm_maskz_reduce_sh() { |
23603 | let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.); |
23604 | let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.); |
23605 | let r = _mm_maskz_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(0, a, b); |
23606 | let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); |
23607 | assert_eq_m128h(r, e); |
23608 | let r = _mm_maskz_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(1, a, b); |
23609 | let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.); |
23610 | assert_eq_m128h(r, e); |
23611 | } |
23612 | |
23613 | #[simd_test(enable = "avx512fp16" )] |
23614 | unsafe fn test_mm_reduce_round_sh() { |
23615 | let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.); |
23616 | let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.); |
23617 | let r = _mm_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a, b); |
23618 | let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.); |
23619 | assert_eq_m128h(r, e); |
23620 | } |
23621 | |
23622 | #[simd_test(enable = "avx512fp16" )] |
23623 | unsafe fn test_mm_mask_reduce_round_sh() { |
23624 | let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.); |
23625 | let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.); |
23626 | let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.); |
23627 | let r = _mm_mask_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>( |
23628 | src, 0, a, b, |
23629 | ); |
23630 | let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.); |
23631 | assert_eq_m128h(r, e); |
23632 | let r = _mm_mask_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>( |
23633 | src, 1, a, b, |
23634 | ); |
23635 | let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.); |
23636 | assert_eq_m128h(r, e); |
23637 | } |
23638 | |
23639 | #[simd_test(enable = "avx512fp16" )] |
23640 | unsafe fn test_mm_maskz_reduce_round_sh() { |
23641 | let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.); |
23642 | let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.); |
23643 | let r = |
23644 | _mm_maskz_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(0, a, b); |
23645 | let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); |
23646 | assert_eq_m128h(r, e); |
23647 | let r = |
23648 | _mm_maskz_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(1, a, b); |
23649 | let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.); |
23650 | assert_eq_m128h(r, e); |
23651 | } |
23652 | |
23653 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23654 | unsafe fn test_mm_reduce_add_ph() { |
23655 | let a = _mm_set1_ph(2.0); |
23656 | let r = _mm_reduce_add_ph(a); |
23657 | assert_eq!(r, 16.0); |
23658 | } |
23659 | |
23660 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23661 | unsafe fn test_mm256_reduce_add_ph() { |
23662 | let a = _mm256_set1_ph(2.0); |
23663 | let r = _mm256_reduce_add_ph(a); |
23664 | assert_eq!(r, 32.0); |
23665 | } |
23666 | |
23667 | #[simd_test(enable = "avx512fp16" )] |
23668 | unsafe fn test_mm512_reduce_add_ph() { |
23669 | let a = _mm512_set1_ph(2.0); |
23670 | let r = _mm512_reduce_add_ph(a); |
23671 | assert_eq!(r, 64.0); |
23672 | } |
23673 | |
23674 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23675 | unsafe fn test_mm_reduce_mul_ph() { |
23676 | let a = _mm_set1_ph(2.0); |
23677 | let r = _mm_reduce_mul_ph(a); |
23678 | assert_eq!(r, 256.0); |
23679 | } |
23680 | |
23681 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23682 | unsafe fn test_mm256_reduce_mul_ph() { |
23683 | let a = _mm256_set1_ph(2.0); |
23684 | let r = _mm256_reduce_mul_ph(a); |
23685 | assert_eq!(r, 65536.0); |
23686 | } |
23687 | |
23688 | #[simd_test(enable = "avx512fp16" )] |
23689 | unsafe fn test_mm512_reduce_mul_ph() { |
23690 | let a = _mm512_set1_ph(2.0); |
23691 | let r = _mm512_reduce_mul_ph(a); |
23692 | assert_eq!(r, 16777216.0); |
23693 | } |
23694 | |
23695 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23696 | unsafe fn test_mm_reduce_max_ph() { |
23697 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
23698 | let r = _mm_reduce_max_ph(a); |
23699 | assert_eq!(r, 8.0); |
23700 | } |
23701 | |
23702 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23703 | unsafe fn test_mm256_reduce_max_ph() { |
23704 | let a = _mm256_set_ph( |
23705 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
23706 | ); |
23707 | let r = _mm256_reduce_max_ph(a); |
23708 | assert_eq!(r, 16.0); |
23709 | } |
23710 | |
23711 | #[simd_test(enable = "avx512fp16" )] |
23712 | unsafe fn test_mm512_reduce_max_ph() { |
23713 | let a = _mm512_set_ph( |
23714 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
23715 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
23716 | 31.0, 32.0, |
23717 | ); |
23718 | let r = _mm512_reduce_max_ph(a); |
23719 | assert_eq!(r, 32.0); |
23720 | } |
23721 | |
23722 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23723 | unsafe fn test_mm_reduce_min_ph() { |
23724 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
23725 | let r = _mm_reduce_min_ph(a); |
23726 | assert_eq!(r, 1.0); |
23727 | } |
23728 | |
23729 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23730 | unsafe fn test_mm256_reduce_min_ph() { |
23731 | let a = _mm256_set_ph( |
23732 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
23733 | ); |
23734 | let r = _mm256_reduce_min_ph(a); |
23735 | assert_eq!(r, 1.0); |
23736 | } |
23737 | |
23738 | #[simd_test(enable = "avx512fp16" )] |
23739 | unsafe fn test_mm512_reduce_min_ph() { |
23740 | let a = _mm512_set_ph( |
23741 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
23742 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
23743 | 31.0, 32.0, |
23744 | ); |
23745 | let r = _mm512_reduce_min_ph(a); |
23746 | assert_eq!(r, 1.0); |
23747 | } |
23748 | |
23749 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23750 | unsafe fn test_mm_fpclass_ph_mask() { |
23751 | let a = _mm_set_ph( |
23752 | 1., |
23753 | f16::INFINITY, |
23754 | f16::NEG_INFINITY, |
23755 | 0.0, |
23756 | -0.0, |
23757 | -2.0, |
23758 | f16::NAN, |
23759 | 5.9e-8, // Denormal |
23760 | ); |
23761 | let r = _mm_fpclass_ph_mask::<0x18>(a); // infinities |
23762 | assert_eq!(r, 0b01100000); |
23763 | } |
23764 | |
23765 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23766 | unsafe fn test_mm_mask_fpclass_ph_mask() { |
23767 | let a = _mm_set_ph( |
23768 | 1., |
23769 | f16::INFINITY, |
23770 | f16::NEG_INFINITY, |
23771 | 0.0, |
23772 | -0.0, |
23773 | -2.0, |
23774 | f16::NAN, |
23775 | 5.9e-8, // Denormal |
23776 | ); |
23777 | let r = _mm_mask_fpclass_ph_mask::<0x18>(0b01010101, a); |
23778 | assert_eq!(r, 0b01000000); |
23779 | } |
23780 | |
23781 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23782 | unsafe fn test_mm256_fpclass_ph_mask() { |
23783 | let a = _mm256_set_ph( |
23784 | 1., |
23785 | f16::INFINITY, |
23786 | f16::NEG_INFINITY, |
23787 | 0.0, |
23788 | -0.0, |
23789 | -2.0, |
23790 | f16::NAN, |
23791 | 5.9e-8, // Denormal |
23792 | 1., |
23793 | f16::INFINITY, |
23794 | f16::NEG_INFINITY, |
23795 | 0.0, |
23796 | -0.0, |
23797 | -2.0, |
23798 | f16::NAN, |
23799 | 5.9e-8, // Denormal |
23800 | ); |
23801 | let r = _mm256_fpclass_ph_mask::<0x18>(a); // infinities |
23802 | assert_eq!(r, 0b0110000001100000); |
23803 | } |
23804 | |
23805 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23806 | unsafe fn test_mm256_mask_fpclass_ph_mask() { |
23807 | let a = _mm256_set_ph( |
23808 | 1., |
23809 | f16::INFINITY, |
23810 | f16::NEG_INFINITY, |
23811 | 0.0, |
23812 | -0.0, |
23813 | -2.0, |
23814 | f16::NAN, |
23815 | 5.9e-8, // Denormal |
23816 | 1., |
23817 | f16::INFINITY, |
23818 | f16::NEG_INFINITY, |
23819 | 0.0, |
23820 | -0.0, |
23821 | -2.0, |
23822 | f16::NAN, |
23823 | 5.9e-8, // Denormal |
23824 | ); |
23825 | let r = _mm256_mask_fpclass_ph_mask::<0x18>(0b0101010101010101, a); |
23826 | assert_eq!(r, 0b0100000001000000); |
23827 | } |
23828 | |
23829 | #[simd_test(enable = "avx512fp16" )] |
23830 | unsafe fn test_mm512_fpclass_ph_mask() { |
23831 | let a = _mm512_set_ph( |
23832 | 1., |
23833 | f16::INFINITY, |
23834 | f16::NEG_INFINITY, |
23835 | 0.0, |
23836 | -0.0, |
23837 | -2.0, |
23838 | f16::NAN, |
23839 | 5.9e-8, // Denormal |
23840 | 1., |
23841 | f16::INFINITY, |
23842 | f16::NEG_INFINITY, |
23843 | 0.0, |
23844 | -0.0, |
23845 | -2.0, |
23846 | f16::NAN, |
23847 | 5.9e-8, // Denormal |
23848 | 1., |
23849 | f16::INFINITY, |
23850 | f16::NEG_INFINITY, |
23851 | 0.0, |
23852 | -0.0, |
23853 | -2.0, |
23854 | f16::NAN, |
23855 | 5.9e-8, // Denormal |
23856 | 1., |
23857 | f16::INFINITY, |
23858 | f16::NEG_INFINITY, |
23859 | 0.0, |
23860 | -0.0, |
23861 | -2.0, |
23862 | f16::NAN, |
23863 | 5.9e-8, // Denormal |
23864 | ); |
23865 | let r = _mm512_fpclass_ph_mask::<0x18>(a); // infinities |
23866 | assert_eq!(r, 0b01100000011000000110000001100000); |
23867 | } |
23868 | |
23869 | #[simd_test(enable = "avx512fp16" )] |
23870 | unsafe fn test_mm512_mask_fpclass_ph_mask() { |
23871 | let a = _mm512_set_ph( |
23872 | 1., |
23873 | f16::INFINITY, |
23874 | f16::NEG_INFINITY, |
23875 | 0.0, |
23876 | -0.0, |
23877 | -2.0, |
23878 | f16::NAN, |
23879 | 5.9e-8, // Denormal |
23880 | 1., |
23881 | f16::INFINITY, |
23882 | f16::NEG_INFINITY, |
23883 | 0.0, |
23884 | -0.0, |
23885 | -2.0, |
23886 | f16::NAN, |
23887 | 5.9e-8, // Denormal |
23888 | 1., |
23889 | f16::INFINITY, |
23890 | f16::NEG_INFINITY, |
23891 | 0.0, |
23892 | -0.0, |
23893 | -2.0, |
23894 | f16::NAN, |
23895 | 5.9e-8, // Denormal |
23896 | 1., |
23897 | f16::INFINITY, |
23898 | f16::NEG_INFINITY, |
23899 | 0.0, |
23900 | -0.0, |
23901 | -2.0, |
23902 | f16::NAN, |
23903 | 5.9e-8, // Denormal |
23904 | ); |
23905 | let r = _mm512_mask_fpclass_ph_mask::<0x18>(0b01010101010101010101010101010101, a); |
23906 | assert_eq!(r, 0b01000000010000000100000001000000); |
23907 | } |
23908 | |
23909 | #[simd_test(enable = "avx512fp16" )] |
23910 | unsafe fn test_mm_fpclass_sh_mask() { |
23911 | let a = _mm_set_sh(f16::INFINITY); |
23912 | let r = _mm_fpclass_sh_mask::<0x18>(a); |
23913 | assert_eq!(r, 1); |
23914 | } |
23915 | |
23916 | #[simd_test(enable = "avx512fp16" )] |
23917 | unsafe fn test_mm_mask_fpclass_sh_mask() { |
23918 | let a = _mm_set_sh(f16::INFINITY); |
23919 | let r = _mm_mask_fpclass_sh_mask::<0x18>(0, a); |
23920 | assert_eq!(r, 0); |
23921 | let r = _mm_mask_fpclass_sh_mask::<0x18>(1, a); |
23922 | assert_eq!(r, 1); |
23923 | } |
23924 | |
23925 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23926 | unsafe fn test_mm_mask_blend_ph() { |
23927 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
23928 | let b = _mm_set_ph(-1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0); |
23929 | let r = _mm_mask_blend_ph(0b01010101, a, b); |
23930 | let e = _mm_set_ph(1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0); |
23931 | assert_eq_m128h(r, e); |
23932 | } |
23933 | |
23934 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23935 | unsafe fn test_mm256_mask_blend_ph() { |
23936 | let a = _mm256_set_ph( |
23937 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
23938 | ); |
23939 | let b = _mm256_set_ph( |
23940 | -1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0, -9.0, -10.0, -11.0, -12.0, -13.0, |
23941 | -14.0, -15.0, -16.0, |
23942 | ); |
23943 | let r = _mm256_mask_blend_ph(0b0101010101010101, a, b); |
23944 | let e = _mm256_set_ph( |
23945 | 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0, -14.0, 15.0, |
23946 | -16.0, |
23947 | ); |
23948 | assert_eq_m256h(r, e); |
23949 | } |
23950 | |
23951 | #[simd_test(enable = "avx512fp16" )] |
23952 | unsafe fn test_mm512_mask_blend_ph() { |
23953 | let a = _mm512_set_ph( |
23954 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
23955 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
23956 | 31.0, 32.0, |
23957 | ); |
23958 | let b = _mm512_set_ph( |
23959 | -1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0, -9.0, -10.0, -11.0, -12.0, -13.0, |
23960 | -14.0, -15.0, -16.0, -17.0, -18.0, -19.0, -20.0, -21.0, -22.0, -23.0, -24.0, -25.0, |
23961 | -26.0, -27.0, -28.0, -29.0, -30.0, -31.0, -32.0, |
23962 | ); |
23963 | let r = _mm512_mask_blend_ph(0b01010101010101010101010101010101, a, b); |
23964 | let e = _mm512_set_ph( |
23965 | 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0, -14.0, 15.0, |
23966 | -16.0, 17.0, -18.0, 19.0, -20.0, 21.0, -22.0, 23.0, -24.0, 25.0, -26.0, 27.0, -28.0, |
23967 | 29.0, -30.0, 31.0, -32.0, |
23968 | ); |
23969 | assert_eq_m512h(r, e); |
23970 | } |
23971 | |
23972 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23973 | unsafe fn test_mm_permutex2var_ph() { |
23974 | let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
23975 | let b = _mm_setr_ph(9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
23976 | let idx = _mm_setr_epi16(0, 2, 4, 6, 8, 10, 12, 14); |
23977 | let r = _mm_permutex2var_ph(a, idx, b); |
23978 | let e = _mm_setr_ph(1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0); |
23979 | assert_eq_m128h(r, e); |
23980 | } |
23981 | |
23982 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23983 | unsafe fn test_mm256_permutex2var_ph() { |
23984 | let a = _mm256_setr_ph( |
23985 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
23986 | ); |
23987 | let b = _mm256_setr_ph( |
23988 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
23989 | 31.0, 32.0, |
23990 | ); |
23991 | let idx = _mm256_setr_epi16(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); |
23992 | let r = _mm256_permutex2var_ph(a, idx, b); |
23993 | let e = _mm256_setr_ph( |
23994 | 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, 23.0, 25.0, 27.0, 29.0, |
23995 | 31.0, |
23996 | ); |
23997 | assert_eq_m256h(r, e); |
23998 | } |
23999 | |
24000 | #[simd_test(enable = "avx512fp16" )] |
24001 | unsafe fn test_mm512_permutex2var_ph() { |
24002 | let a = _mm512_setr_ph( |
24003 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24004 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
24005 | 31.0, 32.0, |
24006 | ); |
24007 | let b = _mm512_setr_ph( |
24008 | 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, |
24009 | 47.0, 48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0, |
24010 | 61.0, 62.0, 63.0, 64.0, |
24011 | ); |
24012 | let idx = _mm512_set_epi16( |
24013 | 62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, 36, 34, 32, 30, 28, 26, 24, 22, 20, |
24014 | 18, 16, 14, 12, 10, 8, 6, 4, 2, 0, |
24015 | ); |
24016 | let r = _mm512_permutex2var_ph(a, idx, b); |
24017 | let e = _mm512_setr_ph( |
24018 | 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, 23.0, 25.0, 27.0, 29.0, |
24019 | 31.0, 33.0, 35.0, 37.0, 39.0, 41.0, 43.0, 45.0, 47.0, 49.0, 51.0, 53.0, 55.0, 57.0, |
24020 | 59.0, 61.0, 63.0, |
24021 | ); |
24022 | assert_eq_m512h(r, e); |
24023 | } |
24024 | |
24025 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24026 | unsafe fn test_mm_permutexvar_ph() { |
24027 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24028 | let idx = _mm_set_epi16(0, 2, 4, 6, 1, 3, 5, 7); |
24029 | let r = _mm_permutexvar_ph(idx, a); |
24030 | let e = _mm_setr_ph(1.0, 3.0, 5.0, 7.0, 2.0, 4.0, 6.0, 8.0); |
24031 | assert_eq_m128h(r, e); |
24032 | } |
24033 | |
24034 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24035 | unsafe fn test_mm256_permutexvar_ph() { |
24036 | let a = _mm256_set_ph( |
24037 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24038 | ); |
24039 | let idx = _mm256_set_epi16(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15); |
24040 | let r = _mm256_permutexvar_ph(idx, a); |
24041 | let e = _mm256_setr_ph( |
24042 | 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, |
24043 | ); |
24044 | assert_eq_m256h(r, e); |
24045 | } |
24046 | |
24047 | #[simd_test(enable = "avx512fp16" )] |
24048 | unsafe fn test_mm512_permutexvar_ph() { |
24049 | let a = _mm512_set_ph( |
24050 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24051 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
24052 | 31.0, 32.0, |
24053 | ); |
24054 | let idx = _mm512_set_epi16( |
24055 | 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 1, 3, 5, 7, 9, 11, 13, 15, |
24056 | 17, 19, 21, 23, 25, 27, 29, 31, |
24057 | ); |
24058 | let r = _mm512_permutexvar_ph(idx, a); |
24059 | let e = _mm512_setr_ph( |
24060 | 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, 23.0, 25.0, 27.0, 29.0, |
24061 | 31.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0, 22.0, 24.0, 26.0, 28.0, |
24062 | 30.0, 32.0, |
24063 | ); |
24064 | assert_eq_m512h(r, e); |
24065 | } |
24066 | |
24067 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24068 | unsafe fn test_mm_cvtepi16_ph() { |
24069 | let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); |
24070 | let r = _mm_cvtepi16_ph(a); |
24071 | let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24072 | assert_eq_m128h(r, e); |
24073 | } |
24074 | |
24075 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24076 | unsafe fn test_mm_mask_cvtepi16_ph() { |
24077 | let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); |
24078 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
24079 | let r = _mm_mask_cvtepi16_ph(src, 0b01010101, a); |
24080 | let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.); |
24081 | assert_eq_m128h(r, e); |
24082 | } |
24083 | |
24084 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24085 | unsafe fn test_mm_maskz_cvtepi16_ph() { |
24086 | let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); |
24087 | let r = _mm_maskz_cvtepi16_ph(0b01010101, a); |
24088 | let e = _mm_set_ph(0., 2., 0., 4., 0., 6., 0., 8.); |
24089 | assert_eq_m128h(r, e); |
24090 | } |
24091 | |
24092 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24093 | unsafe fn test_mm256_cvtepi16_ph() { |
24094 | let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
24095 | let r = _mm256_cvtepi16_ph(a); |
24096 | let e = _mm256_set_ph( |
24097 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24098 | ); |
24099 | assert_eq_m256h(r, e); |
24100 | } |
24101 | |
24102 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24103 | unsafe fn test_mm256_mask_cvtepi16_ph() { |
24104 | let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
24105 | let src = _mm256_set_ph( |
24106 | 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., |
24107 | ); |
24108 | let r = _mm256_mask_cvtepi16_ph(src, 0b0101010101010101, a); |
24109 | let e = _mm256_set_ph( |
24110 | 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., |
24111 | ); |
24112 | assert_eq_m256h(r, e); |
24113 | } |
24114 | |
24115 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24116 | unsafe fn test_mm256_maskz_cvtepi16_ph() { |
24117 | let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
24118 | let r = _mm256_maskz_cvtepi16_ph(0b0101010101010101, a); |
24119 | let e = _mm256_set_ph( |
24120 | 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16., |
24121 | ); |
24122 | assert_eq_m256h(r, e); |
24123 | } |
24124 | |
24125 | #[simd_test(enable = "avx512fp16" )] |
24126 | unsafe fn test_mm512_cvtepi16_ph() { |
24127 | let a = _mm512_set_epi16( |
24128 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
24129 | 25, 26, 27, 28, 29, 30, 31, 32, |
24130 | ); |
24131 | let r = _mm512_cvtepi16_ph(a); |
24132 | let e = _mm512_set_ph( |
24133 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24134 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
24135 | 31.0, 32.0, |
24136 | ); |
24137 | assert_eq_m512h(r, e); |
24138 | } |
24139 | |
24140 | #[simd_test(enable = "avx512fp16" )] |
24141 | unsafe fn test_mm512_mask_cvtepi16_ph() { |
24142 | let a = _mm512_set_epi16( |
24143 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
24144 | 25, 26, 27, 28, 29, 30, 31, 32, |
24145 | ); |
24146 | let src = _mm512_set_ph( |
24147 | 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., |
24148 | 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41., |
24149 | ); |
24150 | let r = _mm512_mask_cvtepi16_ph(src, 0b01010101010101010101010101010101, a); |
24151 | let e = _mm512_set_ph( |
24152 | 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., 26., 18., |
24153 | 28., 20., 30., 22., 32., 24., 34., 26., 36., 28., 38., 30., 40., 32., |
24154 | ); |
24155 | assert_eq_m512h(r, e); |
24156 | } |
24157 | |
24158 | #[simd_test(enable = "avx512fp16" )] |
24159 | unsafe fn test_mm512_maskz_cvtepi16_ph() { |
24160 | let a = _mm512_set_epi16( |
24161 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
24162 | 25, 26, 27, 28, 29, 30, 31, 32, |
24163 | ); |
24164 | let r = _mm512_maskz_cvtepi16_ph(0b01010101010101010101010101010101, a); |
24165 | let e = _mm512_set_ph( |
24166 | 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16., 0., 18., 0., 20., |
24167 | 0., 22., 0., 24., 0., 26., 0., 28., 0., 30., 0., 32., |
24168 | ); |
24169 | assert_eq_m512h(r, e); |
24170 | } |
24171 | |
24172 | #[simd_test(enable = "avx512fp16" )] |
24173 | unsafe fn test_mm512_cvt_roundepi16_ph() { |
24174 | let a = _mm512_set_epi16( |
24175 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
24176 | 25, 26, 27, 28, 29, 30, 31, 32, |
24177 | ); |
24178 | let r = _mm512_cvt_roundepi16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
24179 | let e = _mm512_set_ph( |
24180 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24181 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
24182 | 31.0, 32.0, |
24183 | ); |
24184 | assert_eq_m512h(r, e); |
24185 | } |
24186 | |
24187 | #[simd_test(enable = "avx512fp16" )] |
24188 | unsafe fn test_mm512_mask_cvt_roundepi16_ph() { |
24189 | let a = _mm512_set_epi16( |
24190 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
24191 | 25, 26, 27, 28, 29, 30, 31, 32, |
24192 | ); |
24193 | let src = _mm512_set_ph( |
24194 | 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., |
24195 | 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41., |
24196 | ); |
24197 | let r = _mm512_mask_cvt_roundepi16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
24198 | src, |
24199 | 0b01010101010101010101010101010101, |
24200 | a, |
24201 | ); |
24202 | let e = _mm512_set_ph( |
24203 | 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., 26., 18., |
24204 | 28., 20., 30., 22., 32., 24., 34., 26., 36., 28., 38., 30., 40., 32., |
24205 | ); |
24206 | assert_eq_m512h(r, e); |
24207 | } |
24208 | |
24209 | #[simd_test(enable = "avx512fp16" )] |
24210 | unsafe fn test_mm512_maskz_cvt_roundepi16_ph() { |
24211 | let a = _mm512_set_epi16( |
24212 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
24213 | 25, 26, 27, 28, 29, 30, 31, 32, |
24214 | ); |
24215 | let r = _mm512_maskz_cvt_roundepi16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
24216 | 0b01010101010101010101010101010101, |
24217 | a, |
24218 | ); |
24219 | let e = _mm512_set_ph( |
24220 | 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16., 0., 18., 0., 20., |
24221 | 0., 22., 0., 24., 0., 26., 0., 28., 0., 30., 0., 32., |
24222 | ); |
24223 | assert_eq_m512h(r, e); |
24224 | } |
24225 | |
24226 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24227 | unsafe fn test_mm_cvtepu16_ph() { |
24228 | let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); |
24229 | let r = _mm_cvtepu16_ph(a); |
24230 | let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24231 | assert_eq_m128h(r, e); |
24232 | } |
24233 | |
24234 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24235 | unsafe fn test_mm_mask_cvtepu16_ph() { |
24236 | let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); |
24237 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
24238 | let r = _mm_mask_cvtepu16_ph(src, 0b01010101, a); |
24239 | let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.); |
24240 | assert_eq_m128h(r, e); |
24241 | } |
24242 | |
24243 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24244 | unsafe fn test_mm_maskz_cvtepu16_ph() { |
24245 | let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); |
24246 | let r = _mm_maskz_cvtepu16_ph(0b01010101, a); |
24247 | let e = _mm_set_ph(0., 2., 0., 4., 0., 6., 0., 8.); |
24248 | assert_eq_m128h(r, e); |
24249 | } |
24250 | |
24251 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24252 | unsafe fn test_mm256_cvtepu16_ph() { |
24253 | let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
24254 | let r = _mm256_cvtepu16_ph(a); |
24255 | let e = _mm256_set_ph( |
24256 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24257 | ); |
24258 | assert_eq_m256h(r, e); |
24259 | } |
24260 | |
24261 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24262 | unsafe fn test_mm256_mask_cvtepu16_ph() { |
24263 | let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
24264 | let src = _mm256_set_ph( |
24265 | 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., |
24266 | ); |
24267 | let r = _mm256_mask_cvtepu16_ph(src, 0b0101010101010101, a); |
24268 | let e = _mm256_set_ph( |
24269 | 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., |
24270 | ); |
24271 | assert_eq_m256h(r, e); |
24272 | } |
24273 | |
24274 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24275 | unsafe fn test_mm256_maskz_cvtepu16_ph() { |
24276 | let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
24277 | let r = _mm256_maskz_cvtepu16_ph(0b0101010101010101, a); |
24278 | let e = _mm256_set_ph( |
24279 | 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16., |
24280 | ); |
24281 | assert_eq_m256h(r, e); |
24282 | } |
24283 | |
24284 | #[simd_test(enable = "avx512fp16" )] |
24285 | unsafe fn test_mm512_cvtepu16_ph() { |
24286 | let a = _mm512_set_epi16( |
24287 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
24288 | 25, 26, 27, 28, 29, 30, 31, 32, |
24289 | ); |
24290 | let r = _mm512_cvtepu16_ph(a); |
24291 | let e = _mm512_set_ph( |
24292 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24293 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
24294 | 31.0, 32.0, |
24295 | ); |
24296 | assert_eq_m512h(r, e); |
24297 | } |
24298 | |
24299 | #[simd_test(enable = "avx512fp16" )] |
24300 | unsafe fn test_mm512_mask_cvtepu16_ph() { |
24301 | let a = _mm512_set_epi16( |
24302 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
24303 | 25, 26, 27, 28, 29, 30, 31, 32, |
24304 | ); |
24305 | let src = _mm512_set_ph( |
24306 | 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., |
24307 | 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41., |
24308 | ); |
24309 | let r = _mm512_mask_cvtepu16_ph(src, 0b01010101010101010101010101010101, a); |
24310 | let e = _mm512_set_ph( |
24311 | 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., 26., 18., |
24312 | 28., 20., 30., 22., 32., 24., 34., 26., 36., 28., 38., 30., 40., 32., |
24313 | ); |
24314 | assert_eq_m512h(r, e); |
24315 | } |
24316 | |
24317 | #[simd_test(enable = "avx512fp16" )] |
24318 | unsafe fn test_mm512_maskz_cvtepu16_ph() { |
24319 | let a = _mm512_set_epi16( |
24320 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
24321 | 25, 26, 27, 28, 29, 30, 31, 32, |
24322 | ); |
24323 | let r = _mm512_maskz_cvtepu16_ph(0b01010101010101010101010101010101, a); |
24324 | let e = _mm512_set_ph( |
24325 | 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16., 0., 18., 0., 20., |
24326 | 0., 22., 0., 24., 0., 26., 0., 28., 0., 30., 0., 32., |
24327 | ); |
24328 | assert_eq_m512h(r, e); |
24329 | } |
24330 | |
24331 | #[simd_test(enable = "avx512fp16" )] |
24332 | unsafe fn test_mm512_cvt_roundepu16_ph() { |
24333 | let a = _mm512_set_epi16( |
24334 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
24335 | 25, 26, 27, 28, 29, 30, 31, 32, |
24336 | ); |
24337 | let r = _mm512_cvt_roundepu16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
24338 | let e = _mm512_set_ph( |
24339 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24340 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
24341 | 31.0, 32.0, |
24342 | ); |
24343 | assert_eq_m512h(r, e); |
24344 | } |
24345 | |
24346 | #[simd_test(enable = "avx512fp16" )] |
24347 | unsafe fn test_mm512_mask_cvt_roundepu16_ph() { |
24348 | let a = _mm512_set_epi16( |
24349 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
24350 | 25, 26, 27, 28, 29, 30, 31, 32, |
24351 | ); |
24352 | let src = _mm512_set_ph( |
24353 | 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., |
24354 | 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41., |
24355 | ); |
24356 | let r = _mm512_mask_cvt_roundepu16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
24357 | src, |
24358 | 0b01010101010101010101010101010101, |
24359 | a, |
24360 | ); |
24361 | let e = _mm512_set_ph( |
24362 | 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., 26., 18., |
24363 | 28., 20., 30., 22., 32., 24., 34., 26., 36., 28., 38., 30., 40., 32., |
24364 | ); |
24365 | assert_eq_m512h(r, e); |
24366 | } |
24367 | |
24368 | #[simd_test(enable = "avx512fp16" )] |
24369 | unsafe fn test_mm512_maskz_cvt_roundepu16_ph() { |
24370 | let a = _mm512_set_epi16( |
24371 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
24372 | 25, 26, 27, 28, 29, 30, 31, 32, |
24373 | ); |
24374 | let r = _mm512_maskz_cvt_roundepu16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
24375 | 0b01010101010101010101010101010101, |
24376 | a, |
24377 | ); |
24378 | let e = _mm512_set_ph( |
24379 | 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16., 0., 18., 0., 20., |
24380 | 0., 22., 0., 24., 0., 26., 0., 28., 0., 30., 0., 32., |
24381 | ); |
24382 | assert_eq_m512h(r, e); |
24383 | } |
24384 | |
24385 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24386 | unsafe fn test_mm_cvtepi32_ph() { |
24387 | let a = _mm_set_epi32(1, 2, 3, 4); |
24388 | let r = _mm_cvtepi32_ph(a); |
24389 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
24390 | assert_eq_m128h(r, e); |
24391 | } |
24392 | |
24393 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24394 | unsafe fn test_mm_mask_cvtepi32_ph() { |
24395 | let a = _mm_set_epi32(1, 2, 3, 4); |
24396 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
24397 | let r = _mm_mask_cvtepi32_ph(src, 0b0101, a); |
24398 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2., 16., 4.); |
24399 | assert_eq_m128h(r, e); |
24400 | } |
24401 | |
24402 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24403 | unsafe fn test_mm_maskz_cvtepi32_ph() { |
24404 | let a = _mm_set_epi32(1, 2, 3, 4); |
24405 | let r = _mm_maskz_cvtepi32_ph(0b0101, a); |
24406 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2., 0.0, 4.); |
24407 | assert_eq_m128h(r, e); |
24408 | } |
24409 | |
24410 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24411 | unsafe fn test_mm256_cvtepi32_ph() { |
24412 | let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); |
24413 | let r = _mm256_cvtepi32_ph(a); |
24414 | let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24415 | assert_eq_m128h(r, e); |
24416 | } |
24417 | |
24418 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24419 | unsafe fn test_mm256_mask_cvtepi32_ph() { |
24420 | let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); |
24421 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
24422 | let r = _mm256_mask_cvtepi32_ph(src, 0b01010101, a); |
24423 | let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.); |
24424 | assert_eq_m128h(r, e); |
24425 | } |
24426 | |
24427 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24428 | unsafe fn test_mm256_maskz_cvtepi32_ph() { |
24429 | let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); |
24430 | let r = _mm256_maskz_cvtepi32_ph(0b01010101, a); |
24431 | let e = _mm_set_ph(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0); |
24432 | assert_eq_m128h(r, e); |
24433 | } |
24434 | |
24435 | #[simd_test(enable = "avx512fp16" )] |
24436 | unsafe fn test_mm512_cvtepi32_ph() { |
24437 | let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
24438 | let r = _mm512_cvtepi32_ph(a); |
24439 | let e = _mm256_set_ph( |
24440 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24441 | ); |
24442 | assert_eq_m256h(r, e); |
24443 | } |
24444 | |
24445 | #[simd_test(enable = "avx512fp16" )] |
24446 | unsafe fn test_mm512_mask_cvtepi32_ph() { |
24447 | let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
24448 | let src = _mm256_set_ph( |
24449 | 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., |
24450 | ); |
24451 | let r = _mm512_mask_cvtepi32_ph(src, 0b0101010101010101, a); |
24452 | let e = _mm256_set_ph( |
24453 | 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., |
24454 | ); |
24455 | assert_eq_m256h(r, e); |
24456 | } |
24457 | |
24458 | #[simd_test(enable = "avx512fp16" )] |
24459 | unsafe fn test_mm512_maskz_cvtepi32_ph() { |
24460 | let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
24461 | let r = _mm512_maskz_cvtepi32_ph(0b0101010101010101, a); |
24462 | let e = _mm256_set_ph( |
24463 | 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0, |
24464 | ); |
24465 | assert_eq_m256h(r, e); |
24466 | } |
24467 | |
24468 | #[simd_test(enable = "avx512fp16" )] |
24469 | unsafe fn test_mm512_cvt_roundepi32_ph() { |
24470 | let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
24471 | let r = _mm512_cvt_roundepi32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
24472 | let e = _mm256_set_ph( |
24473 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24474 | ); |
24475 | assert_eq_m256h(r, e); |
24476 | } |
24477 | |
24478 | #[simd_test(enable = "avx512fp16" )] |
24479 | unsafe fn test_mm512_mask_cvt_roundepi32_ph() { |
24480 | let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
24481 | let src = _mm256_set_ph( |
24482 | 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., |
24483 | ); |
24484 | let r = _mm512_mask_cvt_roundepi32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
24485 | src, |
24486 | 0b0101010101010101, |
24487 | a, |
24488 | ); |
24489 | let e = _mm256_set_ph( |
24490 | 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., |
24491 | ); |
24492 | assert_eq_m256h(r, e); |
24493 | } |
24494 | |
24495 | #[simd_test(enable = "avx512fp16" )] |
24496 | unsafe fn test_mm512_maskz_cvt_roundepi32_ph() { |
24497 | let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
24498 | let r = _mm512_maskz_cvt_roundepi32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
24499 | 0b0101010101010101, |
24500 | a, |
24501 | ); |
24502 | let e = _mm256_set_ph( |
24503 | 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0, |
24504 | ); |
24505 | assert_eq_m256h(r, e); |
24506 | } |
24507 | |
24508 | #[simd_test(enable = "avx512fp16" )] |
24509 | unsafe fn test_mm_cvti32_sh() { |
24510 | let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24511 | let r = _mm_cvti32_sh(a, 10); |
24512 | let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24513 | assert_eq_m128h(r, e); |
24514 | } |
24515 | |
24516 | #[simd_test(enable = "avx512fp16" )] |
24517 | unsafe fn test_mm_cvt_roundi32_sh() { |
24518 | let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24519 | let r = _mm_cvt_roundi32_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 10); |
24520 | let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24521 | assert_eq_m128h(r, e); |
24522 | } |
24523 | |
24524 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24525 | unsafe fn test_mm_cvtepu32_ph() { |
24526 | let a = _mm_set_epi32(1, 2, 3, 4); |
24527 | let r = _mm_cvtepu32_ph(a); |
24528 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
24529 | assert_eq_m128h(r, e); |
24530 | } |
24531 | |
24532 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24533 | unsafe fn test_mm_mask_cvtepu32_ph() { |
24534 | let a = _mm_set_epi32(1, 2, 3, 4); |
24535 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
24536 | let r = _mm_mask_cvtepu32_ph(src, 0b0101, a); |
24537 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2., 16., 4.); |
24538 | assert_eq_m128h(r, e); |
24539 | } |
24540 | |
24541 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24542 | unsafe fn test_mm_maskz_cvtepu32_ph() { |
24543 | let a = _mm_set_epi32(1, 2, 3, 4); |
24544 | let r = _mm_maskz_cvtepu32_ph(0b0101, a); |
24545 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2., 0.0, 4.); |
24546 | assert_eq_m128h(r, e); |
24547 | } |
24548 | |
24549 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24550 | unsafe fn test_mm256_cvtepu32_ph() { |
24551 | let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); |
24552 | let r = _mm256_cvtepu32_ph(a); |
24553 | let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24554 | assert_eq_m128h(r, e); |
24555 | } |
24556 | |
24557 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24558 | unsafe fn test_mm256_mask_cvtepu32_ph() { |
24559 | let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); |
24560 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
24561 | let r = _mm256_mask_cvtepu32_ph(src, 0b01010101, a); |
24562 | let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.); |
24563 | assert_eq_m128h(r, e); |
24564 | } |
24565 | |
24566 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24567 | unsafe fn test_mm256_maskz_cvtepu32_ph() { |
24568 | let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); |
24569 | let r = _mm256_maskz_cvtepu32_ph(0b01010101, a); |
24570 | let e = _mm_set_ph(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0); |
24571 | assert_eq_m128h(r, e); |
24572 | } |
24573 | |
24574 | #[simd_test(enable = "avx512fp16" )] |
24575 | unsafe fn test_mm512_cvtepu32_ph() { |
24576 | let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
24577 | let r = _mm512_cvtepu32_ph(a); |
24578 | let e = _mm256_set_ph( |
24579 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24580 | ); |
24581 | assert_eq_m256h(r, e); |
24582 | } |
24583 | |
24584 | #[simd_test(enable = "avx512fp16" )] |
24585 | unsafe fn test_mm512_mask_cvtepu32_ph() { |
24586 | let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
24587 | let src = _mm256_set_ph( |
24588 | 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., |
24589 | ); |
24590 | let r = _mm512_mask_cvtepu32_ph(src, 0b0101010101010101, a); |
24591 | let e = _mm256_set_ph( |
24592 | 10., 2.0, 12., 4.0, 14., 6.0, 16., 8.0, 18., 10.0, 20., 12.0, 22., 14.0, 24., 16.0, |
24593 | ); |
24594 | assert_eq_m256h(r, e); |
24595 | } |
24596 | |
24597 | #[simd_test(enable = "avx512fp16" )] |
24598 | unsafe fn test_mm512_maskz_cvtepu32_ph() { |
24599 | let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
24600 | let r = _mm512_maskz_cvtepu32_ph(0b0101010101010101, a); |
24601 | let e = _mm256_set_ph( |
24602 | 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0, |
24603 | ); |
24604 | assert_eq_m256h(r, e); |
24605 | } |
24606 | |
24607 | #[simd_test(enable = "avx512fp16" )] |
24608 | unsafe fn test_mm512_cvt_roundepu32_ph() { |
24609 | let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
24610 | let r = _mm512_cvt_roundepu32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
24611 | let e = _mm256_set_ph( |
24612 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24613 | ); |
24614 | assert_eq_m256h(r, e); |
24615 | } |
24616 | |
24617 | #[simd_test(enable = "avx512fp16" )] |
24618 | unsafe fn test_mm512_mask_cvt_roundepu32_ph() { |
24619 | let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
24620 | let src = _mm256_set_ph( |
24621 | 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., |
24622 | ); |
24623 | let r = _mm512_mask_cvt_roundepu32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
24624 | src, |
24625 | 0b0101010101010101, |
24626 | a, |
24627 | ); |
24628 | let e = _mm256_set_ph( |
24629 | 10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0, 18.0, 10.0, 20.0, 12.0, 22.0, 14.0, 24.0, |
24630 | 16.0, |
24631 | ); |
24632 | assert_eq_m256h(r, e); |
24633 | } |
24634 | |
24635 | #[simd_test(enable = "avx512fp16" )] |
24636 | unsafe fn test_mm512_maskz_cvt_roundepu32_ph() { |
24637 | let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
24638 | let r = _mm512_maskz_cvt_roundepu32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
24639 | 0b0101010101010101, |
24640 | a, |
24641 | ); |
24642 | let e = _mm256_set_ph( |
24643 | 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0, |
24644 | ); |
24645 | assert_eq_m256h(r, e); |
24646 | } |
24647 | |
24648 | #[simd_test(enable = "avx512fp16" )] |
24649 | unsafe fn test_mm_cvtu32_sh() { |
24650 | let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24651 | let r = _mm_cvtu32_sh(a, 10); |
24652 | let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24653 | assert_eq_m128h(r, e); |
24654 | } |
24655 | |
24656 | #[simd_test(enable = "avx512fp16" )] |
24657 | unsafe fn test_mm_cvt_roundu32_sh() { |
24658 | let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24659 | let r = _mm_cvt_roundu32_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 10); |
24660 | let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24661 | assert_eq_m128h(r, e); |
24662 | } |
24663 | |
24664 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24665 | unsafe fn test_mm_cvtepi64_ph() { |
24666 | let a = _mm_set_epi64x(1, 2); |
24667 | let r = _mm_cvtepi64_ph(a); |
24668 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); |
24669 | assert_eq_m128h(r, e); |
24670 | } |
24671 | |
24672 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24673 | unsafe fn test_mm_mask_cvtepi64_ph() { |
24674 | let a = _mm_set_epi64x(1, 2); |
24675 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
24676 | let r = _mm_mask_cvtepi64_ph(src, 0b01, a); |
24677 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 16., 2.); |
24678 | assert_eq_m128h(r, e); |
24679 | } |
24680 | |
24681 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24682 | unsafe fn test_mm_maskz_cvtepi64_ph() { |
24683 | let a = _mm_set_epi64x(1, 2); |
24684 | let r = _mm_maskz_cvtepi64_ph(0b01, a); |
24685 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.); |
24686 | assert_eq_m128h(r, e); |
24687 | } |
24688 | |
24689 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24690 | unsafe fn test_mm256_cvtepi64_ph() { |
24691 | let a = _mm256_set_epi64x(1, 2, 3, 4); |
24692 | let r = _mm256_cvtepi64_ph(a); |
24693 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
24694 | assert_eq_m128h(r, e); |
24695 | } |
24696 | |
24697 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24698 | unsafe fn test_mm256_mask_cvtepi64_ph() { |
24699 | let a = _mm256_set_epi64x(1, 2, 3, 4); |
24700 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
24701 | let r = _mm256_mask_cvtepi64_ph(src, 0b0101, a); |
24702 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2.0, 16.0, 4.0); |
24703 | assert_eq_m128h(r, e); |
24704 | } |
24705 | |
24706 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24707 | unsafe fn test_mm256_maskz_cvtepi64_ph() { |
24708 | let a = _mm256_set_epi64x(1, 2, 3, 4); |
24709 | let r = _mm256_maskz_cvtepi64_ph(0b0101, a); |
24710 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0); |
24711 | assert_eq_m128h(r, e); |
24712 | } |
24713 | |
24714 | #[simd_test(enable = "avx512fp16" )] |
24715 | unsafe fn test_mm512_cvtepi64_ph() { |
24716 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
24717 | let r = _mm512_cvtepi64_ph(a); |
24718 | let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24719 | assert_eq_m128h(r, e); |
24720 | } |
24721 | |
24722 | #[simd_test(enable = "avx512fp16" )] |
24723 | unsafe fn test_mm512_mask_cvtepi64_ph() { |
24724 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
24725 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
24726 | let r = _mm512_mask_cvtepi64_ph(src, 0b01010101, a); |
24727 | let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.); |
24728 | assert_eq_m128h(r, e); |
24729 | } |
24730 | |
24731 | #[simd_test(enable = "avx512fp16" )] |
24732 | unsafe fn test_mm512_maskz_cvtepi64_ph() { |
24733 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
24734 | let r = _mm512_maskz_cvtepi64_ph(0b01010101, a); |
24735 | let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.); |
24736 | assert_eq_m128h(r, e); |
24737 | } |
24738 | |
24739 | #[simd_test(enable = "avx512fp16" )] |
24740 | unsafe fn test_mm512_cvt_roundepi64_ph() { |
24741 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
24742 | let r = _mm512_cvt_roundepi64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
24743 | let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24744 | assert_eq_m128h(r, e); |
24745 | } |
24746 | |
24747 | #[simd_test(enable = "avx512fp16" )] |
24748 | unsafe fn test_mm512_mask_cvt_roundepi64_ph() { |
24749 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
24750 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
24751 | let r = _mm512_mask_cvt_roundepi64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
24752 | src, 0b01010101, a, |
24753 | ); |
24754 | let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.); |
24755 | assert_eq_m128h(r, e); |
24756 | } |
24757 | |
24758 | #[simd_test(enable = "avx512fp16" )] |
24759 | unsafe fn test_mm512_maskz_cvt_roundepi64_ph() { |
24760 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
24761 | let r = _mm512_maskz_cvt_roundepi64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
24762 | 0b01010101, a, |
24763 | ); |
24764 | let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.); |
24765 | assert_eq_m128h(r, e); |
24766 | } |
24767 | |
24768 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24769 | unsafe fn test_mm_cvtepu64_ph() { |
24770 | let a = _mm_set_epi64x(1, 2); |
24771 | let r = _mm_cvtepu64_ph(a); |
24772 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); |
24773 | assert_eq_m128h(r, e); |
24774 | } |
24775 | |
24776 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24777 | unsafe fn test_mm_mask_cvtepu64_ph() { |
24778 | let a = _mm_set_epi64x(1, 2); |
24779 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
24780 | let r = _mm_mask_cvtepu64_ph(src, 0b01, a); |
24781 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 16., 2.); |
24782 | assert_eq_m128h(r, e); |
24783 | } |
24784 | |
24785 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24786 | unsafe fn test_mm_maskz_cvtepu64_ph() { |
24787 | let a = _mm_set_epi64x(1, 2); |
24788 | let r = _mm_maskz_cvtepu64_ph(0b01, a); |
24789 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0); |
24790 | assert_eq_m128h(r, e); |
24791 | } |
24792 | |
24793 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24794 | unsafe fn test_mm256_cvtepu64_ph() { |
24795 | let a = _mm256_set_epi64x(1, 2, 3, 4); |
24796 | let r = _mm256_cvtepu64_ph(a); |
24797 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
24798 | assert_eq_m128h(r, e); |
24799 | } |
24800 | |
24801 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24802 | unsafe fn test_mm256_mask_cvtepu64_ph() { |
24803 | let a = _mm256_set_epi64x(1, 2, 3, 4); |
24804 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
24805 | let r = _mm256_mask_cvtepu64_ph(src, 0b0101, a); |
24806 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2.0, 16.0, 4.0); |
24807 | assert_eq_m128h(r, e); |
24808 | } |
24809 | |
24810 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24811 | unsafe fn test_mm256_maskz_cvtepu64_ph() { |
24812 | let a = _mm256_set_epi64x(1, 2, 3, 4); |
24813 | let r = _mm256_maskz_cvtepu64_ph(0b0101, a); |
24814 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0); |
24815 | assert_eq_m128h(r, e); |
24816 | } |
24817 | |
24818 | #[simd_test(enable = "avx512fp16" )] |
24819 | unsafe fn test_mm512_cvtepu64_ph() { |
24820 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
24821 | let r = _mm512_cvtepu64_ph(a); |
24822 | let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24823 | assert_eq_m128h(r, e); |
24824 | } |
24825 | |
24826 | #[simd_test(enable = "avx512fp16" )] |
24827 | unsafe fn test_mm512_mask_cvtepu64_ph() { |
24828 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
24829 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
24830 | let r = _mm512_mask_cvtepu64_ph(src, 0b01010101, a); |
24831 | let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.); |
24832 | assert_eq_m128h(r, e); |
24833 | } |
24834 | |
24835 | #[simd_test(enable = "avx512fp16" )] |
24836 | unsafe fn test_mm512_maskz_cvtepu64_ph() { |
24837 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
24838 | let r = _mm512_maskz_cvtepu64_ph(0b01010101, a); |
24839 | let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.); |
24840 | assert_eq_m128h(r, e); |
24841 | } |
24842 | |
24843 | #[simd_test(enable = "avx512fp16" )] |
24844 | unsafe fn test_mm512_cvt_roundepu64_ph() { |
24845 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
24846 | let r = _mm512_cvt_roundepu64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
24847 | let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24848 | assert_eq_m128h(r, e); |
24849 | } |
24850 | |
24851 | #[simd_test(enable = "avx512fp16" )] |
24852 | unsafe fn test_mm512_mask_cvt_roundepu64_ph() { |
24853 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
24854 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
24855 | let r = _mm512_mask_cvt_roundepu64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
24856 | src, 0b01010101, a, |
24857 | ); |
24858 | let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.); |
24859 | assert_eq_m128h(r, e); |
24860 | } |
24861 | |
24862 | #[simd_test(enable = "avx512fp16" )] |
24863 | unsafe fn test_mm512_maskz_cvt_roundepu64_ph() { |
24864 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
24865 | let r = _mm512_maskz_cvt_roundepu64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
24866 | 0b01010101, a, |
24867 | ); |
24868 | let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.); |
24869 | assert_eq_m128h(r, e); |
24870 | } |
24871 | |
24872 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24873 | unsafe fn test_mm_cvtxps_ph() { |
24874 | let a = _mm_set_ps(1.0, 2.0, 3.0, 4.0); |
24875 | let r = _mm_cvtxps_ph(a); |
24876 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
24877 | assert_eq_m128h(r, e); |
24878 | } |
24879 | |
24880 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24881 | unsafe fn test_mm_mask_cvtxps_ph() { |
24882 | let a = _mm_set_ps(1.0, 2.0, 3.0, 4.0); |
24883 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
24884 | let r = _mm_mask_cvtxps_ph(src, 0b0101, a); |
24885 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2.0, 16., 4.0); |
24886 | assert_eq_m128h(r, e); |
24887 | } |
24888 | |
24889 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24890 | unsafe fn test_mm_maskz_cvtxps_ph() { |
24891 | let a = _mm_set_ps(1.0, 2.0, 3.0, 4.0); |
24892 | let r = _mm_maskz_cvtxps_ph(0b0101, a); |
24893 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0); |
24894 | assert_eq_m128h(r, e); |
24895 | } |
24896 | |
24897 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24898 | unsafe fn test_mm256_cvtxps_ph() { |
24899 | let a = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24900 | let r = _mm256_cvtxps_ph(a); |
24901 | let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24902 | assert_eq_m128h(r, e); |
24903 | } |
24904 | |
24905 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24906 | unsafe fn test_mm256_mask_cvtxps_ph() { |
24907 | let a = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24908 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
24909 | let r = _mm256_mask_cvtxps_ph(src, 0b01010101, a); |
24910 | let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.); |
24911 | assert_eq_m128h(r, e); |
24912 | } |
24913 | |
24914 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24915 | unsafe fn test_mm256_maskz_cvtxps_ph() { |
24916 | let a = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24917 | let r = _mm256_maskz_cvtxps_ph(0b01010101, a); |
24918 | let e = _mm_set_ph(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0); |
24919 | assert_eq_m128h(r, e); |
24920 | } |
24921 | |
24922 | #[simd_test(enable = "avx512fp16" )] |
24923 | unsafe fn test_mm512_cvtxps_ph() { |
24924 | let a = _mm512_set_ps( |
24925 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24926 | ); |
24927 | let r = _mm512_cvtxps_ph(a); |
24928 | let e = _mm256_set_ph( |
24929 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24930 | ); |
24931 | assert_eq_m256h(r, e); |
24932 | } |
24933 | |
24934 | #[simd_test(enable = "avx512fp16" )] |
24935 | unsafe fn test_mm512_mask_cvtxps_ph() { |
24936 | let a = _mm512_set_ps( |
24937 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24938 | ); |
24939 | let src = _mm256_set_ph( |
24940 | 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., |
24941 | ); |
24942 | let r = _mm512_mask_cvtxps_ph(src, 0b0101010101010101, a); |
24943 | let e = _mm256_set_ph( |
24944 | 10., 2.0, 12., 4.0, 14., 6.0, 16., 8.0, 18., 10.0, 20., 12.0, 22., 14.0, 24., 16.0, |
24945 | ); |
24946 | assert_eq_m256h(r, e); |
24947 | } |
24948 | |
24949 | #[simd_test(enable = "avx512fp16" )] |
24950 | unsafe fn test_mm512_maskz_cvtxps_ph() { |
24951 | let a = _mm512_set_ps( |
24952 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24953 | ); |
24954 | let r = _mm512_maskz_cvtxps_ph(0b0101010101010101, a); |
24955 | let e = _mm256_set_ph( |
24956 | 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0, |
24957 | ); |
24958 | assert_eq_m256h(r, e); |
24959 | } |
24960 | |
24961 | #[simd_test(enable = "avx512fp16" )] |
24962 | unsafe fn test_mm512_cvtx_roundps_ph() { |
24963 | let a = _mm512_set_ps( |
24964 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24965 | ); |
24966 | let r = _mm512_cvtx_roundps_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
24967 | let e = _mm256_set_ph( |
24968 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24969 | ); |
24970 | assert_eq_m256h(r, e); |
24971 | } |
24972 | |
24973 | #[simd_test(enable = "avx512fp16" )] |
24974 | unsafe fn test_mm512_mask_cvtx_roundps_ph() { |
24975 | let a = _mm512_set_ps( |
24976 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24977 | ); |
24978 | let src = _mm256_set_ph( |
24979 | 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., |
24980 | ); |
24981 | let r = _mm512_mask_cvtx_roundps_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
24982 | src, |
24983 | 0b0101010101010101, |
24984 | a, |
24985 | ); |
24986 | let e = _mm256_set_ph( |
24987 | 10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0, 18.0, 10.0, 20.0, 12.0, 22.0, 14.0, 24.0, |
24988 | 16.0, |
24989 | ); |
24990 | assert_eq_m256h(r, e); |
24991 | } |
24992 | |
24993 | #[simd_test(enable = "avx512fp16" )] |
24994 | unsafe fn test_mm512_maskz_cvtx_roundps_ph() { |
24995 | let a = _mm512_set_ps( |
24996 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24997 | ); |
24998 | let r = _mm512_maskz_cvtx_roundps_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
24999 | 0b0101010101010101, |
25000 | a, |
25001 | ); |
25002 | let e = _mm256_set_ph( |
25003 | 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0, |
25004 | ); |
25005 | assert_eq_m256h(r, e); |
25006 | } |
25007 | |
25008 | #[simd_test(enable = "avx512fp16" )] |
25009 | unsafe fn test_mm_cvtss_sh() { |
25010 | let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
25011 | let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); |
25012 | let r = _mm_cvtss_sh(a, b); |
25013 | let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.); |
25014 | assert_eq_m128h(r, e); |
25015 | } |
25016 | |
25017 | #[simd_test(enable = "avx512fp16" )] |
25018 | unsafe fn test_mm_mask_cvtss_sh() { |
25019 | let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
25020 | let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); |
25021 | let src = _mm_setr_ph(20., 21., 22., 23., 24., 25., 26., 27.); |
25022 | let r = _mm_mask_cvtss_sh(src, 0, a, b); |
25023 | let e = _mm_setr_ph(20., 11., 12., 13., 14., 15., 16., 17.); |
25024 | assert_eq_m128h(r, e); |
25025 | let r = _mm_mask_cvtss_sh(src, 1, a, b); |
25026 | let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.); |
25027 | assert_eq_m128h(r, e); |
25028 | } |
25029 | |
25030 | #[simd_test(enable = "avx512fp16" )] |
25031 | unsafe fn test_mm_maskz_cvtss_sh() { |
25032 | let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
25033 | let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); |
25034 | let r = _mm_maskz_cvtss_sh(0, a, b); |
25035 | let e = _mm_setr_ph(0.0, 11., 12., 13., 14., 15., 16., 17.); |
25036 | assert_eq_m128h(r, e); |
25037 | let r = _mm_maskz_cvtss_sh(1, a, b); |
25038 | let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.); |
25039 | assert_eq_m128h(r, e); |
25040 | } |
25041 | |
25042 | #[simd_test(enable = "avx512fp16" )] |
25043 | unsafe fn test_mm_cvt_roundss_sh() { |
25044 | let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
25045 | let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); |
25046 | let r = _mm_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
25047 | let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.); |
25048 | assert_eq_m128h(r, e); |
25049 | } |
25050 | |
25051 | #[simd_test(enable = "avx512fp16" )] |
25052 | unsafe fn test_mm_mask_cvt_roundss_sh() { |
25053 | let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
25054 | let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); |
25055 | let src = _mm_setr_ph(20., 21., 22., 23., 24., 25., 26., 27.); |
25056 | let r = _mm_mask_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
25057 | src, 0, a, b, |
25058 | ); |
25059 | let e = _mm_setr_ph(20., 11., 12., 13., 14., 15., 16., 17.); |
25060 | assert_eq_m128h(r, e); |
25061 | let r = _mm_mask_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
25062 | src, 1, a, b, |
25063 | ); |
25064 | let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.); |
25065 | assert_eq_m128h(r, e); |
25066 | } |
25067 | |
25068 | #[simd_test(enable = "avx512fp16" )] |
25069 | unsafe fn test_mm_maskz_cvt_roundss_sh() { |
25070 | let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
25071 | let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); |
25072 | let r = |
25073 | _mm_maskz_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); |
25074 | let e = _mm_setr_ph(0.0, 11., 12., 13., 14., 15., 16., 17.); |
25075 | assert_eq_m128h(r, e); |
25076 | let r = |
25077 | _mm_maskz_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b); |
25078 | let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.); |
25079 | assert_eq_m128h(r, e); |
25080 | } |
25081 | |
25082 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25083 | unsafe fn test_mm_cvtpd_ph() { |
25084 | let a = _mm_set_pd(1.0, 2.0); |
25085 | let r = _mm_cvtpd_ph(a); |
25086 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); |
25087 | assert_eq_m128h(r, e); |
25088 | } |
25089 | |
25090 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25091 | unsafe fn test_mm_mask_cvtpd_ph() { |
25092 | let a = _mm_set_pd(1.0, 2.0); |
25093 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
25094 | let r = _mm_mask_cvtpd_ph(src, 0b01, a); |
25095 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 16., 2.); |
25096 | assert_eq_m128h(r, e); |
25097 | } |
25098 | |
25099 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25100 | unsafe fn test_mm_maskz_cvtpd_ph() { |
25101 | let a = _mm_set_pd(1.0, 2.0); |
25102 | let r = _mm_maskz_cvtpd_ph(0b01, a); |
25103 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0); |
25104 | assert_eq_m128h(r, e); |
25105 | } |
25106 | |
25107 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25108 | unsafe fn test_mm256_cvtpd_ph() { |
25109 | let a = _mm256_set_pd(1.0, 2.0, 3.0, 4.0); |
25110 | let r = _mm256_cvtpd_ph(a); |
25111 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
25112 | assert_eq_m128h(r, e); |
25113 | } |
25114 | |
25115 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25116 | unsafe fn test_mm256_mask_cvtpd_ph() { |
25117 | let a = _mm256_set_pd(1.0, 2.0, 3.0, 4.0); |
25118 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
25119 | let r = _mm256_mask_cvtpd_ph(src, 0b0101, a); |
25120 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2.0, 16.0, 4.0); |
25121 | assert_eq_m128h(r, e); |
25122 | } |
25123 | |
25124 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25125 | unsafe fn test_mm256_maskz_cvtpd_ph() { |
25126 | let a = _mm256_set_pd(1.0, 2.0, 3.0, 4.0); |
25127 | let r = _mm256_maskz_cvtpd_ph(0b0101, a); |
25128 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0); |
25129 | assert_eq_m128h(r, e); |
25130 | } |
25131 | |
25132 | #[simd_test(enable = "avx512fp16" )] |
25133 | unsafe fn test_mm512_cvtpd_ph() { |
25134 | let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25135 | let r = _mm512_cvtpd_ph(a); |
25136 | let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25137 | assert_eq_m128h(r, e); |
25138 | } |
25139 | |
25140 | #[simd_test(enable = "avx512fp16" )] |
25141 | unsafe fn test_mm512_mask_cvtpd_ph() { |
25142 | let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25143 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
25144 | let r = _mm512_mask_cvtpd_ph(src, 0b01010101, a); |
25145 | let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.); |
25146 | assert_eq_m128h(r, e); |
25147 | } |
25148 | |
25149 | #[simd_test(enable = "avx512fp16" )] |
25150 | unsafe fn test_mm512_maskz_cvtpd_ph() { |
25151 | let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25152 | let r = _mm512_maskz_cvtpd_ph(0b01010101, a); |
25153 | let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.); |
25154 | assert_eq_m128h(r, e); |
25155 | } |
25156 | |
25157 | #[simd_test(enable = "avx512fp16" )] |
25158 | unsafe fn test_mm512_cvt_roundpd_ph() { |
25159 | let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25160 | let r = _mm512_cvt_roundpd_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
25161 | let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25162 | assert_eq_m128h(r, e); |
25163 | } |
25164 | |
25165 | #[simd_test(enable = "avx512fp16" )] |
25166 | unsafe fn test_mm512_mask_cvt_roundpd_ph() { |
25167 | let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25168 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
25169 | let r = _mm512_mask_cvt_roundpd_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
25170 | src, 0b01010101, a, |
25171 | ); |
25172 | let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.); |
25173 | assert_eq_m128h(r, e); |
25174 | } |
25175 | |
25176 | #[simd_test(enable = "avx512fp16" )] |
25177 | unsafe fn test_mm512_maskz_cvt_roundpd_ph() { |
25178 | let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25179 | let r = _mm512_maskz_cvt_roundpd_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
25180 | 0b01010101, a, |
25181 | ); |
25182 | let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.); |
25183 | assert_eq_m128h(r, e); |
25184 | } |
25185 | |
25186 | #[simd_test(enable = "avx512fp16" )] |
25187 | unsafe fn test_mm_cvtsd_sh() { |
25188 | let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
25189 | let b = _mm_setr_pd(1.0, 2.0); |
25190 | let r = _mm_cvtsd_sh(a, b); |
25191 | let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.); |
25192 | assert_eq_m128h(r, e); |
25193 | } |
25194 | |
25195 | #[simd_test(enable = "avx512fp16" )] |
25196 | unsafe fn test_mm_mask_cvtsd_sh() { |
25197 | let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
25198 | let b = _mm_setr_pd(1.0, 2.0); |
25199 | let src = _mm_setr_ph(20., 21., 22., 23., 24., 25., 26., 27.); |
25200 | let r = _mm_mask_cvtsd_sh(src, 0, a, b); |
25201 | let e = _mm_setr_ph(20., 11., 12., 13., 14., 15., 16., 17.); |
25202 | assert_eq_m128h(r, e); |
25203 | let r = _mm_mask_cvtsd_sh(src, 1, a, b); |
25204 | let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.); |
25205 | assert_eq_m128h(r, e); |
25206 | } |
25207 | |
25208 | #[simd_test(enable = "avx512fp16" )] |
25209 | unsafe fn test_mm_maskz_cvtsd_sh() { |
25210 | let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
25211 | let b = _mm_setr_pd(1.0, 2.0); |
25212 | let r = _mm_maskz_cvtsd_sh(0, a, b); |
25213 | let e = _mm_setr_ph(0.0, 11., 12., 13., 14., 15., 16., 17.); |
25214 | assert_eq_m128h(r, e); |
25215 | let r = _mm_maskz_cvtsd_sh(1, a, b); |
25216 | let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.); |
25217 | assert_eq_m128h(r, e); |
25218 | } |
25219 | |
25220 | #[simd_test(enable = "avx512fp16" )] |
25221 | unsafe fn test_mm_cvt_roundsd_sh() { |
25222 | let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
25223 | let b = _mm_setr_pd(1.0, 2.0); |
25224 | let r = _mm_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
25225 | let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.); |
25226 | assert_eq_m128h(r, e); |
25227 | } |
25228 | |
25229 | #[simd_test(enable = "avx512fp16" )] |
25230 | unsafe fn test_mm_mask_cvt_roundsd_sh() { |
25231 | let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
25232 | let b = _mm_setr_pd(1.0, 2.0); |
25233 | let src = _mm_setr_ph(20., 21., 22., 23., 24., 25., 26., 27.); |
25234 | let r = _mm_mask_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
25235 | src, 0, a, b, |
25236 | ); |
25237 | let e = _mm_setr_ph(20., 11., 12., 13., 14., 15., 16., 17.); |
25238 | assert_eq_m128h(r, e); |
25239 | let r = _mm_mask_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
25240 | src, 1, a, b, |
25241 | ); |
25242 | let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.); |
25243 | assert_eq_m128h(r, e); |
25244 | } |
25245 | |
25246 | #[simd_test(enable = "avx512fp16" )] |
25247 | unsafe fn test_mm_maskz_cvt_roundsd_sh() { |
25248 | let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
25249 | let b = _mm_setr_pd(1.0, 2.0); |
25250 | let r = |
25251 | _mm_maskz_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); |
25252 | let e = _mm_setr_ph(0.0, 11., 12., 13., 14., 15., 16., 17.); |
25253 | assert_eq_m128h(r, e); |
25254 | let r = |
25255 | _mm_maskz_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b); |
25256 | let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.); |
25257 | assert_eq_m128h(r, e); |
25258 | } |
25259 | |
25260 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25261 | unsafe fn test_mm_cvtph_epi16() { |
25262 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25263 | let r = _mm_cvttph_epi16(a); |
25264 | let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); |
25265 | assert_eq_m128i(r, e); |
25266 | } |
25267 | |
25268 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25269 | unsafe fn test_mm_mask_cvtph_epi16() { |
25270 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25271 | let src = _mm_set_epi16(10, 11, 12, 13, 14, 15, 16, 17); |
25272 | let r = _mm_mask_cvttph_epi16(src, 0b01010101, a); |
25273 | let e = _mm_set_epi16(10, 2, 12, 4, 14, 6, 16, 8); |
25274 | assert_eq_m128i(r, e); |
25275 | } |
25276 | |
25277 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25278 | unsafe fn test_mm_maskz_cvtph_epi16() { |
25279 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25280 | let r = _mm_maskz_cvttph_epi16(0b01010101, a); |
25281 | let e = _mm_set_epi16(0, 2, 0, 4, 0, 6, 0, 8); |
25282 | assert_eq_m128i(r, e); |
25283 | } |
25284 | |
25285 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25286 | unsafe fn test_mm256_cvtph_epi16() { |
25287 | let a = _mm256_set_ph( |
25288 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25289 | ); |
25290 | let r = _mm256_cvttph_epi16(a); |
25291 | let e = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
25292 | assert_eq_m256i(r, e); |
25293 | } |
25294 | |
25295 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25296 | unsafe fn test_mm256_mask_cvtph_epi16() { |
25297 | let a = _mm256_set_ph( |
25298 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25299 | ); |
25300 | let src = _mm256_set_epi16( |
25301 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, |
25302 | ); |
25303 | let r = _mm256_mask_cvttph_epi16(src, 0b0101010101010101, a); |
25304 | let e = _mm256_set_epi16(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16); |
25305 | assert_eq_m256i(r, e); |
25306 | } |
25307 | |
25308 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25309 | unsafe fn test_mm256_maskz_cvtph_epi16() { |
25310 | let a = _mm256_set_ph( |
25311 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25312 | ); |
25313 | let r = _mm256_maskz_cvttph_epi16(0b0101010101010101, a); |
25314 | let e = _mm256_set_epi16(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16); |
25315 | assert_eq_m256i(r, e); |
25316 | } |
25317 | |
25318 | #[simd_test(enable = "avx512fp16" )] |
25319 | unsafe fn test_mm512_cvtph_epi16() { |
25320 | let a = _mm512_set_ph( |
25321 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25322 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25323 | 31.0, 32.0, |
25324 | ); |
25325 | let r = _mm512_cvttph_epi16(a); |
25326 | let e = _mm512_set_epi16( |
25327 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
25328 | 25, 26, 27, 28, 29, 30, 31, 32, |
25329 | ); |
25330 | assert_eq_m512i(r, e); |
25331 | } |
25332 | |
25333 | #[simd_test(enable = "avx512fp16" )] |
25334 | unsafe fn test_mm512_mask_cvtph_epi16() { |
25335 | let a = _mm512_set_ph( |
25336 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25337 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25338 | 31.0, 32.0, |
25339 | ); |
25340 | let src = _mm512_set_epi16( |
25341 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
25342 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, |
25343 | ); |
25344 | let r = _mm512_mask_cvttph_epi16(src, 0b01010101010101010101010101010101, a); |
25345 | let e = _mm512_set_epi16( |
25346 | 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32, |
25347 | 24, 34, 26, 36, 28, 38, 30, 40, 32, |
25348 | ); |
25349 | assert_eq_m512i(r, e); |
25350 | } |
25351 | |
25352 | #[simd_test(enable = "avx512fp16" )] |
25353 | unsafe fn test_mm512_maskz_cvtph_epi16() { |
25354 | let a = _mm512_set_ph( |
25355 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25356 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25357 | 31.0, 32.0, |
25358 | ); |
25359 | let r = _mm512_maskz_cvttph_epi16(0b01010101010101010101010101010101, a); |
25360 | let e = _mm512_set_epi16( |
25361 | 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26, |
25362 | 0, 28, 0, 30, 0, 32, |
25363 | ); |
25364 | assert_eq_m512i(r, e); |
25365 | } |
25366 | |
25367 | #[simd_test(enable = "avx512fp16" )] |
25368 | unsafe fn test_mm512_cvt_roundph_epi16() { |
25369 | let a = _mm512_set_ph( |
25370 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25371 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25372 | 31.0, 32.0, |
25373 | ); |
25374 | let r = _mm512_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(a); |
25375 | let e = _mm512_set_epi16( |
25376 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
25377 | 25, 26, 27, 28, 29, 30, 31, 32, |
25378 | ); |
25379 | assert_eq_m512i(r, e); |
25380 | } |
25381 | |
25382 | #[simd_test(enable = "avx512fp16" )] |
25383 | unsafe fn test_mm512_mask_cvt_roundph_epi16() { |
25384 | let a = _mm512_set_ph( |
25385 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25386 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25387 | 31.0, 32.0, |
25388 | ); |
25389 | let src = _mm512_set_epi16( |
25390 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
25391 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, |
25392 | ); |
25393 | let r = _mm512_mask_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>( |
25394 | src, |
25395 | 0b01010101010101010101010101010101, |
25396 | a, |
25397 | ); |
25398 | let e = _mm512_set_epi16( |
25399 | 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32, |
25400 | 24, 34, 26, 36, 28, 38, 30, 40, 32, |
25401 | ); |
25402 | assert_eq_m512i(r, e); |
25403 | } |
25404 | |
25405 | #[simd_test(enable = "avx512fp16" )] |
25406 | unsafe fn test_mm512_maskz_cvt_roundph_epi16() { |
25407 | let a = _mm512_set_ph( |
25408 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25409 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25410 | 31.0, 32.0, |
25411 | ); |
25412 | let r = _mm512_maskz_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>( |
25413 | 0b01010101010101010101010101010101, |
25414 | a, |
25415 | ); |
25416 | let e = _mm512_set_epi16( |
25417 | 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26, |
25418 | 0, 28, 0, 30, 0, 32, |
25419 | ); |
25420 | assert_eq_m512i(r, e); |
25421 | } |
25422 | |
25423 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25424 | unsafe fn test_mm_cvtph_epu16() { |
25425 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25426 | let r = _mm_cvttph_epu16(a); |
25427 | let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); |
25428 | assert_eq_m128i(r, e); |
25429 | } |
25430 | |
25431 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25432 | unsafe fn test_mm_mask_cvtph_epu16() { |
25433 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25434 | let src = _mm_set_epi16(10, 11, 12, 13, 14, 15, 16, 17); |
25435 | let r = _mm_mask_cvttph_epu16(src, 0b01010101, a); |
25436 | let e = _mm_set_epi16(10, 2, 12, 4, 14, 6, 16, 8); |
25437 | assert_eq_m128i(r, e); |
25438 | } |
25439 | |
25440 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25441 | unsafe fn test_mm_maskz_cvtph_epu16() { |
25442 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25443 | let r = _mm_maskz_cvttph_epu16(0b01010101, a); |
25444 | let e = _mm_set_epi16(0, 2, 0, 4, 0, 6, 0, 8); |
25445 | assert_eq_m128i(r, e); |
25446 | } |
25447 | |
25448 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25449 | unsafe fn test_mm256_cvtph_epu16() { |
25450 | let a = _mm256_set_ph( |
25451 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25452 | ); |
25453 | let r = _mm256_cvttph_epu16(a); |
25454 | let e = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
25455 | assert_eq_m256i(r, e); |
25456 | } |
25457 | |
25458 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25459 | unsafe fn test_mm256_mask_cvtph_epu16() { |
25460 | let a = _mm256_set_ph( |
25461 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25462 | ); |
25463 | let src = _mm256_set_epi16( |
25464 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, |
25465 | ); |
25466 | let r = _mm256_mask_cvttph_epu16(src, 0b0101010101010101, a); |
25467 | let e = _mm256_set_epi16(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16); |
25468 | assert_eq_m256i(r, e); |
25469 | } |
25470 | |
25471 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25472 | unsafe fn test_mm256_maskz_cvtph_epu16() { |
25473 | let a = _mm256_set_ph( |
25474 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25475 | ); |
25476 | let r = _mm256_maskz_cvttph_epu16(0b0101010101010101, a); |
25477 | let e = _mm256_set_epi16(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16); |
25478 | assert_eq_m256i(r, e); |
25479 | } |
25480 | |
25481 | #[simd_test(enable = "avx512fp16" )] |
25482 | unsafe fn test_mm512_cvtph_epu16() { |
25483 | let a = _mm512_set_ph( |
25484 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25485 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25486 | 31.0, 32.0, |
25487 | ); |
25488 | let r = _mm512_cvttph_epu16(a); |
25489 | let e = _mm512_set_epi16( |
25490 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
25491 | 25, 26, 27, 28, 29, 30, 31, 32, |
25492 | ); |
25493 | assert_eq_m512i(r, e); |
25494 | } |
25495 | |
25496 | #[simd_test(enable = "avx512fp16" )] |
25497 | unsafe fn test_mm512_mask_cvtph_epu16() { |
25498 | let a = _mm512_set_ph( |
25499 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25500 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25501 | 31.0, 32.0, |
25502 | ); |
25503 | let src = _mm512_set_epi16( |
25504 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
25505 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, |
25506 | ); |
25507 | let r = _mm512_mask_cvttph_epu16(src, 0b01010101010101010101010101010101, a); |
25508 | let e = _mm512_set_epi16( |
25509 | 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32, |
25510 | 24, 34, 26, 36, 28, 38, 30, 40, 32, |
25511 | ); |
25512 | assert_eq_m512i(r, e); |
25513 | } |
25514 | |
25515 | #[simd_test(enable = "avx512fp16" )] |
25516 | unsafe fn test_mm512_maskz_cvtph_epu16() { |
25517 | let a = _mm512_set_ph( |
25518 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25519 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25520 | 31.0, 32.0, |
25521 | ); |
25522 | let r = _mm512_maskz_cvttph_epu16(0b01010101010101010101010101010101, a); |
25523 | let e = _mm512_set_epi16( |
25524 | 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26, |
25525 | 0, 28, 0, 30, 0, 32, |
25526 | ); |
25527 | assert_eq_m512i(r, e); |
25528 | } |
25529 | |
25530 | #[simd_test(enable = "avx512fp16" )] |
25531 | unsafe fn test_mm512_cvt_roundph_epu16() { |
25532 | let a = _mm512_set_ph( |
25533 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25534 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25535 | 31.0, 32.0, |
25536 | ); |
25537 | let r = _mm512_cvt_roundph_epu16::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
25538 | let e = _mm512_set_epi16( |
25539 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
25540 | 25, 26, 27, 28, 29, 30, 31, 32, |
25541 | ); |
25542 | assert_eq_m512i(r, e); |
25543 | } |
25544 | |
25545 | #[simd_test(enable = "avx512fp16" )] |
25546 | unsafe fn test_mm512_mask_cvt_roundph_epu16() { |
25547 | let a = _mm512_set_ph( |
25548 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25549 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25550 | 31.0, 32.0, |
25551 | ); |
25552 | let src = _mm512_set_epi16( |
25553 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
25554 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, |
25555 | ); |
25556 | let r = _mm512_mask_cvt_roundph_epu16::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
25557 | src, |
25558 | 0b01010101010101010101010101010101, |
25559 | a, |
25560 | ); |
25561 | let e = _mm512_set_epi16( |
25562 | 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32, |
25563 | 24, 34, 26, 36, 28, 38, 30, 40, 32, |
25564 | ); |
25565 | assert_eq_m512i(r, e); |
25566 | } |
25567 | |
25568 | #[simd_test(enable = "avx512fp16" )] |
25569 | unsafe fn test_mm512_maskz_cvt_roundph_epu16() { |
25570 | let a = _mm512_set_ph( |
25571 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25572 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25573 | 31.0, 32.0, |
25574 | ); |
25575 | let r = _mm512_maskz_cvt_roundph_epu16::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
25576 | 0b01010101010101010101010101010101, |
25577 | a, |
25578 | ); |
25579 | let e = _mm512_set_epi16( |
25580 | 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26, |
25581 | 0, 28, 0, 30, 0, 32, |
25582 | ); |
25583 | assert_eq_m512i(r, e); |
25584 | } |
25585 | |
25586 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25587 | unsafe fn test_mm_cvttph_epi16() { |
25588 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25589 | let r = _mm_cvttph_epi16(a); |
25590 | let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); |
25591 | assert_eq_m128i(r, e); |
25592 | } |
25593 | |
25594 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25595 | unsafe fn test_mm_mask_cvttph_epi16() { |
25596 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25597 | let src = _mm_set_epi16(10, 11, 12, 13, 14, 15, 16, 17); |
25598 | let r = _mm_mask_cvttph_epi16(src, 0b01010101, a); |
25599 | let e = _mm_set_epi16(10, 2, 12, 4, 14, 6, 16, 8); |
25600 | assert_eq_m128i(r, e); |
25601 | } |
25602 | |
25603 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25604 | unsafe fn test_mm_maskz_cvttph_epi16() { |
25605 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25606 | let r = _mm_maskz_cvttph_epi16(0b01010101, a); |
25607 | let e = _mm_set_epi16(0, 2, 0, 4, 0, 6, 0, 8); |
25608 | assert_eq_m128i(r, e); |
25609 | } |
25610 | |
25611 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25612 | unsafe fn test_mm256_cvttph_epi16() { |
25613 | let a = _mm256_set_ph( |
25614 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25615 | ); |
25616 | let r = _mm256_cvttph_epi16(a); |
25617 | let e = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
25618 | assert_eq_m256i(r, e); |
25619 | } |
25620 | |
25621 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25622 | unsafe fn test_mm256_mask_cvttph_epi16() { |
25623 | let a = _mm256_set_ph( |
25624 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25625 | ); |
25626 | let src = _mm256_set_epi16( |
25627 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, |
25628 | ); |
25629 | let r = _mm256_mask_cvttph_epi16(src, 0b0101010101010101, a); |
25630 | let e = _mm256_set_epi16(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16); |
25631 | assert_eq_m256i(r, e); |
25632 | } |
25633 | |
25634 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25635 | unsafe fn test_mm256_maskz_cvttph_epi16() { |
25636 | let a = _mm256_set_ph( |
25637 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25638 | ); |
25639 | let r = _mm256_maskz_cvttph_epi16(0b0101010101010101, a); |
25640 | let e = _mm256_set_epi16(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16); |
25641 | assert_eq_m256i(r, e); |
25642 | } |
25643 | |
25644 | #[simd_test(enable = "avx512fp16" )] |
25645 | unsafe fn test_mm512_cvttph_epi16() { |
25646 | let a = _mm512_set_ph( |
25647 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25648 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25649 | 31.0, 32.0, |
25650 | ); |
25651 | let r = _mm512_cvttph_epi16(a); |
25652 | let e = _mm512_set_epi16( |
25653 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
25654 | 25, 26, 27, 28, 29, 30, 31, 32, |
25655 | ); |
25656 | assert_eq_m512i(r, e); |
25657 | } |
25658 | |
25659 | #[simd_test(enable = "avx512fp16" )] |
25660 | unsafe fn test_mm512_mask_cvttph_epi16() { |
25661 | let a = _mm512_set_ph( |
25662 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25663 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25664 | 31.0, 32.0, |
25665 | ); |
25666 | let src = _mm512_set_epi16( |
25667 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
25668 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, |
25669 | ); |
25670 | let r = _mm512_mask_cvttph_epi16(src, 0b01010101010101010101010101010101, a); |
25671 | let e = _mm512_set_epi16( |
25672 | 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32, |
25673 | 24, 34, 26, 36, 28, 38, 30, 40, 32, |
25674 | ); |
25675 | assert_eq_m512i(r, e); |
25676 | } |
25677 | |
25678 | #[simd_test(enable = "avx512fp16" )] |
25679 | unsafe fn test_mm512_maskz_cvttph_epi16() { |
25680 | let a = _mm512_set_ph( |
25681 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25682 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25683 | 31.0, 32.0, |
25684 | ); |
25685 | let r = _mm512_maskz_cvttph_epi16(0b01010101010101010101010101010101, a); |
25686 | let e = _mm512_set_epi16( |
25687 | 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26, |
25688 | 0, 28, 0, 30, 0, 32, |
25689 | ); |
25690 | assert_eq_m512i(r, e); |
25691 | } |
25692 | |
25693 | #[simd_test(enable = "avx512fp16" )] |
25694 | unsafe fn test_mm512_cvtt_roundph_epi16() { |
25695 | let a = _mm512_set_ph( |
25696 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25697 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25698 | 31.0, 32.0, |
25699 | ); |
25700 | let r = _mm512_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(a); |
25701 | let e = _mm512_set_epi16( |
25702 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
25703 | 25, 26, 27, 28, 29, 30, 31, 32, |
25704 | ); |
25705 | assert_eq_m512i(r, e); |
25706 | } |
25707 | |
25708 | #[simd_test(enable = "avx512fp16" )] |
25709 | unsafe fn test_mm512_mask_cvtt_roundph_epi16() { |
25710 | let a = _mm512_set_ph( |
25711 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25712 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25713 | 31.0, 32.0, |
25714 | ); |
25715 | let src = _mm512_set_epi16( |
25716 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
25717 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, |
25718 | ); |
25719 | let r = _mm512_mask_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>( |
25720 | src, |
25721 | 0b01010101010101010101010101010101, |
25722 | a, |
25723 | ); |
25724 | let e = _mm512_set_epi16( |
25725 | 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32, |
25726 | 24, 34, 26, 36, 28, 38, 30, 40, 32, |
25727 | ); |
25728 | assert_eq_m512i(r, e); |
25729 | } |
25730 | |
25731 | #[simd_test(enable = "avx512fp16" )] |
25732 | unsafe fn test_mm512_maskz_cvtt_roundph_epi16() { |
25733 | let a = _mm512_set_ph( |
25734 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25735 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25736 | 31.0, 32.0, |
25737 | ); |
25738 | let r = _mm512_maskz_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>( |
25739 | 0b01010101010101010101010101010101, |
25740 | a, |
25741 | ); |
25742 | let e = _mm512_set_epi16( |
25743 | 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26, |
25744 | 0, 28, 0, 30, 0, 32, |
25745 | ); |
25746 | assert_eq_m512i(r, e); |
25747 | } |
25748 | |
25749 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25750 | unsafe fn test_mm_cvttph_epu16() { |
25751 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25752 | let r = _mm_cvttph_epu16(a); |
25753 | let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); |
25754 | assert_eq_m128i(r, e); |
25755 | } |
25756 | |
25757 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25758 | unsafe fn test_mm_mask_cvttph_epu16() { |
25759 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25760 | let src = _mm_set_epi16(10, 11, 12, 13, 14, 15, 16, 17); |
25761 | let r = _mm_mask_cvttph_epu16(src, 0b01010101, a); |
25762 | let e = _mm_set_epi16(10, 2, 12, 4, 14, 6, 16, 8); |
25763 | assert_eq_m128i(r, e); |
25764 | } |
25765 | |
25766 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25767 | unsafe fn test_mm_maskz_cvttph_epu16() { |
25768 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25769 | let r = _mm_maskz_cvttph_epu16(0b01010101, a); |
25770 | let e = _mm_set_epi16(0, 2, 0, 4, 0, 6, 0, 8); |
25771 | assert_eq_m128i(r, e); |
25772 | } |
25773 | |
25774 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25775 | unsafe fn test_mm256_cvttph_epu16() { |
25776 | let a = _mm256_set_ph( |
25777 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25778 | ); |
25779 | let r = _mm256_cvttph_epu16(a); |
25780 | let e = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
25781 | assert_eq_m256i(r, e); |
25782 | } |
25783 | |
25784 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25785 | unsafe fn test_mm256_mask_cvttph_epu16() { |
25786 | let a = _mm256_set_ph( |
25787 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25788 | ); |
25789 | let src = _mm256_set_epi16( |
25790 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, |
25791 | ); |
25792 | let r = _mm256_mask_cvttph_epu16(src, 0b0101010101010101, a); |
25793 | let e = _mm256_set_epi16(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16); |
25794 | assert_eq_m256i(r, e); |
25795 | } |
25796 | |
25797 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25798 | unsafe fn test_mm256_maskz_cvttph_epu16() { |
25799 | let a = _mm256_set_ph( |
25800 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25801 | ); |
25802 | let r = _mm256_maskz_cvttph_epu16(0b0101010101010101, a); |
25803 | let e = _mm256_set_epi16(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16); |
25804 | assert_eq_m256i(r, e); |
25805 | } |
25806 | |
25807 | #[simd_test(enable = "avx512fp16" )] |
25808 | unsafe fn test_mm512_cvttph_epu16() { |
25809 | let a = _mm512_set_ph( |
25810 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25811 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25812 | 31.0, 32.0, |
25813 | ); |
25814 | let r = _mm512_cvttph_epu16(a); |
25815 | let e = _mm512_set_epi16( |
25816 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
25817 | 25, 26, 27, 28, 29, 30, 31, 32, |
25818 | ); |
25819 | assert_eq_m512i(r, e); |
25820 | } |
25821 | |
25822 | #[simd_test(enable = "avx512fp16" )] |
25823 | unsafe fn test_mm512_mask_cvttph_epu16() { |
25824 | let a = _mm512_set_ph( |
25825 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25826 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25827 | 31.0, 32.0, |
25828 | ); |
25829 | let src = _mm512_set_epi16( |
25830 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
25831 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, |
25832 | ); |
25833 | let r = _mm512_mask_cvttph_epu16(src, 0b01010101010101010101010101010101, a); |
25834 | let e = _mm512_set_epi16( |
25835 | 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32, |
25836 | 24, 34, 26, 36, 28, 38, 30, 40, 32, |
25837 | ); |
25838 | assert_eq_m512i(r, e); |
25839 | } |
25840 | |
25841 | #[simd_test(enable = "avx512fp16" )] |
25842 | unsafe fn test_mm512_maskz_cvttph_epu16() { |
25843 | let a = _mm512_set_ph( |
25844 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25845 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25846 | 31.0, 32.0, |
25847 | ); |
25848 | let r = _mm512_maskz_cvttph_epu16(0b01010101010101010101010101010101, a); |
25849 | let e = _mm512_set_epi16( |
25850 | 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26, |
25851 | 0, 28, 0, 30, 0, 32, |
25852 | ); |
25853 | assert_eq_m512i(r, e); |
25854 | } |
25855 | |
25856 | #[simd_test(enable = "avx512fp16" )] |
25857 | unsafe fn test_mm512_cvtt_roundph_epu16() { |
25858 | let a = _mm512_set_ph( |
25859 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25860 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25861 | 31.0, 32.0, |
25862 | ); |
25863 | let r = _mm512_cvtt_roundph_epu16::<_MM_FROUND_NO_EXC>(a); |
25864 | let e = _mm512_set_epi16( |
25865 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
25866 | 25, 26, 27, 28, 29, 30, 31, 32, |
25867 | ); |
25868 | assert_eq_m512i(r, e); |
25869 | } |
25870 | |
25871 | #[simd_test(enable = "avx512fp16" )] |
25872 | unsafe fn test_mm512_mask_cvtt_roundph_epu16() { |
25873 | let a = _mm512_set_ph( |
25874 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25875 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25876 | 31.0, 32.0, |
25877 | ); |
25878 | let src = _mm512_set_epi16( |
25879 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
25880 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, |
25881 | ); |
25882 | let r = _mm512_mask_cvtt_roundph_epu16::<_MM_FROUND_NO_EXC>( |
25883 | src, |
25884 | 0b01010101010101010101010101010101, |
25885 | a, |
25886 | ); |
25887 | let e = _mm512_set_epi16( |
25888 | 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32, |
25889 | 24, 34, 26, 36, 28, 38, 30, 40, 32, |
25890 | ); |
25891 | assert_eq_m512i(r, e); |
25892 | } |
25893 | |
25894 | #[simd_test(enable = "avx512fp16" )] |
25895 | unsafe fn test_mm512_maskz_cvtt_roundph_epu16() { |
25896 | let a = _mm512_set_ph( |
25897 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25898 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25899 | 31.0, 32.0, |
25900 | ); |
25901 | let r = _mm512_maskz_cvtt_roundph_epu16::<_MM_FROUND_NO_EXC>( |
25902 | 0b01010101010101010101010101010101, |
25903 | a, |
25904 | ); |
25905 | let e = _mm512_set_epi16( |
25906 | 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26, |
25907 | 0, 28, 0, 30, 0, 32, |
25908 | ); |
25909 | assert_eq_m512i(r, e); |
25910 | } |
25911 | |
25912 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25913 | unsafe fn test_mm_cvtph_epi32() { |
25914 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
25915 | let r = _mm_cvtph_epi32(a); |
25916 | let e = _mm_set_epi32(1, 2, 3, 4); |
25917 | assert_eq_m128i(r, e); |
25918 | } |
25919 | |
25920 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25921 | unsafe fn test_mm_mask_cvtph_epi32() { |
25922 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
25923 | let src = _mm_set_epi32(10, 11, 12, 13); |
25924 | let r = _mm_mask_cvtph_epi32(src, 0b0101, a); |
25925 | let e = _mm_set_epi32(10, 2, 12, 4); |
25926 | assert_eq_m128i(r, e); |
25927 | } |
25928 | |
25929 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25930 | unsafe fn test_mm_maskz_cvtph_epi32() { |
25931 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
25932 | let r = _mm_maskz_cvtph_epi32(0b0101, a); |
25933 | let e = _mm_set_epi32(0, 2, 0, 4); |
25934 | assert_eq_m128i(r, e); |
25935 | } |
25936 | |
25937 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25938 | unsafe fn test_mm256_cvtph_epi32() { |
25939 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25940 | let r = _mm256_cvtph_epi32(a); |
25941 | let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); |
25942 | assert_eq_m256i(r, e); |
25943 | } |
25944 | |
25945 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25946 | unsafe fn test_mm256_mask_cvtph_epi32() { |
25947 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25948 | let src = _mm256_set_epi32(10, 11, 12, 13, 14, 15, 16, 17); |
25949 | let r = _mm256_mask_cvtph_epi32(src, 0b01010101, a); |
25950 | let e = _mm256_set_epi32(10, 2, 12, 4, 14, 6, 16, 8); |
25951 | assert_eq_m256i(r, e); |
25952 | } |
25953 | |
25954 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25955 | unsafe fn test_mm256_maskz_cvtph_epi32() { |
25956 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25957 | let r = _mm256_maskz_cvtph_epi32(0b01010101, a); |
25958 | let e = _mm256_set_epi32(0, 2, 0, 4, 0, 6, 0, 8); |
25959 | assert_eq_m256i(r, e); |
25960 | } |
25961 | |
25962 | #[simd_test(enable = "avx512fp16" )] |
25963 | unsafe fn test_mm512_cvtph_epi32() { |
25964 | let a = _mm256_set_ph( |
25965 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25966 | ); |
25967 | let r = _mm512_cvtph_epi32(a); |
25968 | let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
25969 | assert_eq_m512i(r, e); |
25970 | } |
25971 | |
25972 | #[simd_test(enable = "avx512fp16" )] |
25973 | unsafe fn test_mm512_mask_cvtph_epi32() { |
25974 | let a = _mm256_set_ph( |
25975 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25976 | ); |
25977 | let src = _mm512_set_epi32( |
25978 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, |
25979 | ); |
25980 | let r = _mm512_mask_cvtph_epi32(src, 0b0101010101010101, a); |
25981 | let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16); |
25982 | assert_eq_m512i(r, e); |
25983 | } |
25984 | |
25985 | #[simd_test(enable = "avx512fp16" )] |
25986 | unsafe fn test_mm512_maskz_cvtph_epi32() { |
25987 | let a = _mm256_set_ph( |
25988 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25989 | ); |
25990 | let r = _mm512_maskz_cvtph_epi32(0b0101010101010101, a); |
25991 | let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16); |
25992 | assert_eq_m512i(r, e); |
25993 | } |
25994 | |
25995 | #[simd_test(enable = "avx512fp16" )] |
25996 | unsafe fn test_mm512_cvt_roundph_epi32() { |
25997 | let a = _mm256_set_ph( |
25998 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25999 | ); |
26000 | let r = _mm512_cvt_roundph_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
26001 | let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
26002 | assert_eq_m512i(r, e); |
26003 | } |
26004 | |
26005 | #[simd_test(enable = "avx512fp16" )] |
26006 | unsafe fn test_mm512_mask_cvt_roundph_epi32() { |
26007 | let a = _mm256_set_ph( |
26008 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26009 | ); |
26010 | let src = _mm512_set_epi32( |
26011 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, |
26012 | ); |
26013 | let r = _mm512_mask_cvt_roundph_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
26014 | src, |
26015 | 0b0101010101010101, |
26016 | a, |
26017 | ); |
26018 | let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16); |
26019 | assert_eq_m512i(r, e); |
26020 | } |
26021 | |
26022 | #[simd_test(enable = "avx512fp16" )] |
26023 | unsafe fn test_mm512_maskz_cvt_roundph_epi32() { |
26024 | let a = _mm256_set_ph( |
26025 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26026 | ); |
26027 | let r = _mm512_maskz_cvt_roundph_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
26028 | 0b0101010101010101, |
26029 | a, |
26030 | ); |
26031 | let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16); |
26032 | assert_eq_m512i(r, e); |
26033 | } |
26034 | |
26035 | #[simd_test(enable = "avx512fp16" )] |
26036 | unsafe fn test_mm_cvtsh_i32() { |
26037 | let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26038 | let r = _mm_cvtsh_i32(a); |
26039 | assert_eq!(r, 1); |
26040 | } |
26041 | |
26042 | #[simd_test(enable = "avx512fp16" )] |
26043 | unsafe fn test_mm_cvt_roundsh_i32() { |
26044 | let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26045 | let r = _mm_cvt_roundsh_i32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
26046 | assert_eq!(r, 1); |
26047 | } |
26048 | |
26049 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26050 | unsafe fn test_mm_cvtph_epu32() { |
26051 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26052 | let r = _mm_cvtph_epu32(a); |
26053 | let e = _mm_set_epi32(1, 2, 3, 4); |
26054 | assert_eq_m128i(r, e); |
26055 | } |
26056 | |
26057 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26058 | unsafe fn test_mm_mask_cvtph_epu32() { |
26059 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26060 | let src = _mm_set_epi32(10, 11, 12, 13); |
26061 | let r = _mm_mask_cvtph_epu32(src, 0b0101, a); |
26062 | let e = _mm_set_epi32(10, 2, 12, 4); |
26063 | assert_eq_m128i(r, e); |
26064 | } |
26065 | |
26066 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26067 | unsafe fn test_mm_maskz_cvtph_epu32() { |
26068 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26069 | let r = _mm_maskz_cvtph_epu32(0b0101, a); |
26070 | let e = _mm_set_epi32(0, 2, 0, 4); |
26071 | assert_eq_m128i(r, e); |
26072 | } |
26073 | |
26074 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26075 | unsafe fn test_mm256_cvtph_epu32() { |
26076 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26077 | let r = _mm256_cvtph_epu32(a); |
26078 | let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); |
26079 | assert_eq_m256i(r, e); |
26080 | } |
26081 | |
26082 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26083 | unsafe fn test_mm256_mask_cvtph_epu32() { |
26084 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26085 | let src = _mm256_set_epi32(10, 11, 12, 13, 14, 15, 16, 17); |
26086 | let r = _mm256_mask_cvtph_epu32(src, 0b01010101, a); |
26087 | let e = _mm256_set_epi32(10, 2, 12, 4, 14, 6, 16, 8); |
26088 | assert_eq_m256i(r, e); |
26089 | } |
26090 | |
26091 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26092 | unsafe fn test_mm256_maskz_cvtph_epu32() { |
26093 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26094 | let r = _mm256_maskz_cvtph_epu32(0b01010101, a); |
26095 | let e = _mm256_set_epi32(0, 2, 0, 4, 0, 6, 0, 8); |
26096 | assert_eq_m256i(r, e); |
26097 | } |
26098 | |
26099 | #[simd_test(enable = "avx512fp16" )] |
26100 | unsafe fn test_mm512_cvtph_epu32() { |
26101 | let a = _mm256_set_ph( |
26102 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26103 | ); |
26104 | let r = _mm512_cvtph_epu32(a); |
26105 | let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
26106 | assert_eq_m512i(r, e); |
26107 | } |
26108 | |
26109 | #[simd_test(enable = "avx512fp16" )] |
26110 | unsafe fn test_mm512_mask_cvtph_epu32() { |
26111 | let a = _mm256_set_ph( |
26112 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26113 | ); |
26114 | let src = _mm512_set_epi32( |
26115 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, |
26116 | ); |
26117 | let r = _mm512_mask_cvtph_epu32(src, 0b0101010101010101, a); |
26118 | let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16); |
26119 | assert_eq_m512i(r, e); |
26120 | } |
26121 | |
26122 | #[simd_test(enable = "avx512fp16" )] |
26123 | unsafe fn test_mm512_maskz_cvtph_epu32() { |
26124 | let a = _mm256_set_ph( |
26125 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26126 | ); |
26127 | let r = _mm512_maskz_cvtph_epu32(0b0101010101010101, a); |
26128 | let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16); |
26129 | assert_eq_m512i(r, e); |
26130 | } |
26131 | |
26132 | #[simd_test(enable = "avx512fp16" )] |
26133 | unsafe fn test_mm512_cvt_roundph_epu32() { |
26134 | let a = _mm256_set_ph( |
26135 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26136 | ); |
26137 | let r = _mm512_cvt_roundph_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
26138 | let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
26139 | assert_eq_m512i(r, e); |
26140 | } |
26141 | |
26142 | #[simd_test(enable = "avx512fp16" )] |
26143 | unsafe fn test_mm512_mask_cvt_roundph_epu32() { |
26144 | let a = _mm256_set_ph( |
26145 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26146 | ); |
26147 | let src = _mm512_set_epi32( |
26148 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, |
26149 | ); |
26150 | let r = _mm512_mask_cvt_roundph_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
26151 | src, |
26152 | 0b0101010101010101, |
26153 | a, |
26154 | ); |
26155 | let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16); |
26156 | assert_eq_m512i(r, e); |
26157 | } |
26158 | |
26159 | #[simd_test(enable = "avx512fp16" )] |
26160 | unsafe fn test_mm512_maskz_cvt_roundph_epu32() { |
26161 | let a = _mm256_set_ph( |
26162 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26163 | ); |
26164 | let r = _mm512_maskz_cvt_roundph_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
26165 | 0b0101010101010101, |
26166 | a, |
26167 | ); |
26168 | let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16); |
26169 | assert_eq_m512i(r, e); |
26170 | } |
26171 | |
26172 | #[simd_test(enable = "avx512fp16" )] |
26173 | unsafe fn test_mm_cvtsh_u32() { |
26174 | let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26175 | let r = _mm_cvtsh_u32(a); |
26176 | assert_eq!(r, 1); |
26177 | } |
26178 | |
26179 | #[simd_test(enable = "avx512fp16" )] |
26180 | unsafe fn test_mm_cvt_roundsh_u32() { |
26181 | let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26182 | let r = _mm_cvt_roundsh_u32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
26183 | assert_eq!(r, 1); |
26184 | } |
26185 | |
26186 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26187 | unsafe fn test_mm_cvttph_epi32() { |
26188 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26189 | let r = _mm_cvttph_epi32(a); |
26190 | let e = _mm_set_epi32(1, 2, 3, 4); |
26191 | assert_eq_m128i(r, e); |
26192 | } |
26193 | |
26194 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26195 | unsafe fn test_mm_mask_cvttph_epi32() { |
26196 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26197 | let src = _mm_set_epi32(10, 11, 12, 13); |
26198 | let r = _mm_mask_cvttph_epi32(src, 0b0101, a); |
26199 | let e = _mm_set_epi32(10, 2, 12, 4); |
26200 | assert_eq_m128i(r, e); |
26201 | } |
26202 | |
26203 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26204 | unsafe fn test_mm_maskz_cvttph_epi32() { |
26205 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26206 | let r = _mm_maskz_cvttph_epi32(0b0101, a); |
26207 | let e = _mm_set_epi32(0, 2, 0, 4); |
26208 | assert_eq_m128i(r, e); |
26209 | } |
26210 | |
26211 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26212 | unsafe fn test_mm256_cvttph_epi32() { |
26213 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26214 | let r = _mm256_cvttph_epi32(a); |
26215 | let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); |
26216 | assert_eq_m256i(r, e); |
26217 | } |
26218 | |
26219 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26220 | unsafe fn test_mm256_mask_cvttph_epi32() { |
26221 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26222 | let src = _mm256_set_epi32(10, 11, 12, 13, 14, 15, 16, 17); |
26223 | let r = _mm256_mask_cvttph_epi32(src, 0b01010101, a); |
26224 | let e = _mm256_set_epi32(10, 2, 12, 4, 14, 6, 16, 8); |
26225 | assert_eq_m256i(r, e); |
26226 | } |
26227 | |
26228 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26229 | unsafe fn test_mm256_maskz_cvttph_epi32() { |
26230 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26231 | let r = _mm256_maskz_cvttph_epi32(0b01010101, a); |
26232 | let e = _mm256_set_epi32(0, 2, 0, 4, 0, 6, 0, 8); |
26233 | assert_eq_m256i(r, e); |
26234 | } |
26235 | |
26236 | #[simd_test(enable = "avx512fp16" )] |
26237 | unsafe fn test_mm512_cvttph_epi32() { |
26238 | let a = _mm256_set_ph( |
26239 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26240 | ); |
26241 | let r = _mm512_cvttph_epi32(a); |
26242 | let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
26243 | assert_eq_m512i(r, e); |
26244 | } |
26245 | |
26246 | #[simd_test(enable = "avx512fp16" )] |
26247 | unsafe fn test_mm512_mask_cvttph_epi32() { |
26248 | let a = _mm256_set_ph( |
26249 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26250 | ); |
26251 | let src = _mm512_set_epi32( |
26252 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, |
26253 | ); |
26254 | let r = _mm512_mask_cvttph_epi32(src, 0b0101010101010101, a); |
26255 | let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16); |
26256 | assert_eq_m512i(r, e); |
26257 | } |
26258 | |
26259 | #[simd_test(enable = "avx512fp16" )] |
26260 | unsafe fn test_mm512_maskz_cvttph_epi32() { |
26261 | let a = _mm256_set_ph( |
26262 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26263 | ); |
26264 | let r = _mm512_maskz_cvttph_epi32(0b0101010101010101, a); |
26265 | let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16); |
26266 | assert_eq_m512i(r, e); |
26267 | } |
26268 | |
26269 | #[simd_test(enable = "avx512fp16" )] |
26270 | unsafe fn test_mm512_cvtt_roundph_epi32() { |
26271 | let a = _mm256_set_ph( |
26272 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26273 | ); |
26274 | let r = _mm512_cvtt_roundph_epi32::<_MM_FROUND_NO_EXC>(a); |
26275 | let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
26276 | assert_eq_m512i(r, e); |
26277 | } |
26278 | |
26279 | #[simd_test(enable = "avx512fp16" )] |
26280 | unsafe fn test_mm512_mask_cvtt_roundph_epi32() { |
26281 | let a = _mm256_set_ph( |
26282 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26283 | ); |
26284 | let src = _mm512_set_epi32( |
26285 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, |
26286 | ); |
26287 | let r = _mm512_mask_cvtt_roundph_epi32::<_MM_FROUND_NO_EXC>(src, 0b0101010101010101, a); |
26288 | let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16); |
26289 | assert_eq_m512i(r, e); |
26290 | } |
26291 | |
26292 | #[simd_test(enable = "avx512fp16" )] |
26293 | unsafe fn test_mm512_maskz_cvtt_roundph_epi32() { |
26294 | let a = _mm256_set_ph( |
26295 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26296 | ); |
26297 | let r = _mm512_maskz_cvtt_roundph_epi32::<_MM_FROUND_NO_EXC>(0b0101010101010101, a); |
26298 | let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16); |
26299 | assert_eq_m512i(r, e); |
26300 | } |
26301 | |
26302 | #[simd_test(enable = "avx512fp16" )] |
26303 | unsafe fn test_mm_cvttsh_i32() { |
26304 | let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26305 | let r = _mm_cvttsh_i32(a); |
26306 | assert_eq!(r, 1); |
26307 | } |
26308 | |
26309 | #[simd_test(enable = "avx512fp16" )] |
26310 | unsafe fn test_mm_cvtt_roundsh_i32() { |
26311 | let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26312 | let r = _mm_cvtt_roundsh_i32::<_MM_FROUND_NO_EXC>(a); |
26313 | assert_eq!(r, 1); |
26314 | } |
26315 | |
26316 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26317 | unsafe fn test_mm_cvttph_epu32() { |
26318 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26319 | let r = _mm_cvttph_epu32(a); |
26320 | let e = _mm_set_epi32(1, 2, 3, 4); |
26321 | assert_eq_m128i(r, e); |
26322 | } |
26323 | |
26324 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26325 | unsafe fn test_mm_mask_cvttph_epu32() { |
26326 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26327 | let src = _mm_set_epi32(10, 11, 12, 13); |
26328 | let r = _mm_mask_cvttph_epu32(src, 0b0101, a); |
26329 | let e = _mm_set_epi32(10, 2, 12, 4); |
26330 | assert_eq_m128i(r, e); |
26331 | } |
26332 | |
26333 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26334 | unsafe fn test_mm_maskz_cvttph_epu32() { |
26335 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26336 | let r = _mm_maskz_cvttph_epu32(0b0101, a); |
26337 | let e = _mm_set_epi32(0, 2, 0, 4); |
26338 | assert_eq_m128i(r, e); |
26339 | } |
26340 | |
26341 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26342 | unsafe fn test_mm256_cvttph_epu32() { |
26343 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26344 | let r = _mm256_cvttph_epu32(a); |
26345 | let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); |
26346 | assert_eq_m256i(r, e); |
26347 | } |
26348 | |
26349 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26350 | unsafe fn test_mm256_mask_cvttph_epu32() { |
26351 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26352 | let src = _mm256_set_epi32(10, 11, 12, 13, 14, 15, 16, 17); |
26353 | let r = _mm256_mask_cvttph_epu32(src, 0b01010101, a); |
26354 | let e = _mm256_set_epi32(10, 2, 12, 4, 14, 6, 16, 8); |
26355 | assert_eq_m256i(r, e); |
26356 | } |
26357 | |
26358 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26359 | unsafe fn test_mm256_maskz_cvttph_epu32() { |
26360 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26361 | let r = _mm256_maskz_cvttph_epu32(0b01010101, a); |
26362 | let e = _mm256_set_epi32(0, 2, 0, 4, 0, 6, 0, 8); |
26363 | assert_eq_m256i(r, e); |
26364 | } |
26365 | |
26366 | #[simd_test(enable = "avx512fp16" )] |
26367 | unsafe fn test_mm512_cvttph_epu32() { |
26368 | let a = _mm256_set_ph( |
26369 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26370 | ); |
26371 | let r = _mm512_cvttph_epu32(a); |
26372 | let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
26373 | assert_eq_m512i(r, e); |
26374 | } |
26375 | |
26376 | #[simd_test(enable = "avx512fp16" )] |
26377 | unsafe fn test_mm512_mask_cvttph_epu32() { |
26378 | let a = _mm256_set_ph( |
26379 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26380 | ); |
26381 | let src = _mm512_set_epi32( |
26382 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, |
26383 | ); |
26384 | let r = _mm512_mask_cvttph_epu32(src, 0b0101010101010101, a); |
26385 | let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16); |
26386 | assert_eq_m512i(r, e); |
26387 | } |
26388 | |
26389 | #[simd_test(enable = "avx512fp16" )] |
26390 | unsafe fn test_mm512_maskz_cvttph_epu32() { |
26391 | let a = _mm256_set_ph( |
26392 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26393 | ); |
26394 | let r = _mm512_maskz_cvttph_epu32(0b0101010101010101, a); |
26395 | let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16); |
26396 | assert_eq_m512i(r, e); |
26397 | } |
26398 | |
26399 | #[simd_test(enable = "avx512fp16" )] |
26400 | unsafe fn test_mm512_cvtt_roundph_epu32() { |
26401 | let a = _mm256_set_ph( |
26402 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26403 | ); |
26404 | let r = _mm512_cvtt_roundph_epu32::<_MM_FROUND_NO_EXC>(a); |
26405 | let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
26406 | assert_eq_m512i(r, e); |
26407 | } |
26408 | |
26409 | #[simd_test(enable = "avx512fp16" )] |
26410 | unsafe fn test_mm512_mask_cvtt_roundph_epu32() { |
26411 | let a = _mm256_set_ph( |
26412 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26413 | ); |
26414 | let src = _mm512_set_epi32( |
26415 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, |
26416 | ); |
26417 | let r = _mm512_mask_cvtt_roundph_epu32::<_MM_FROUND_NO_EXC>(src, 0b0101010101010101, a); |
26418 | let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16); |
26419 | assert_eq_m512i(r, e); |
26420 | } |
26421 | |
26422 | #[simd_test(enable = "avx512fp16" )] |
26423 | unsafe fn test_mm512_maskz_cvtt_roundph_epu32() { |
26424 | let a = _mm256_set_ph( |
26425 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26426 | ); |
26427 | let r = _mm512_maskz_cvtt_roundph_epu32::<_MM_FROUND_NO_EXC>(0b0101010101010101, a); |
26428 | let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16); |
26429 | assert_eq_m512i(r, e); |
26430 | } |
26431 | |
26432 | #[simd_test(enable = "avx512fp16" )] |
26433 | unsafe fn test_mm_cvttsh_u32() { |
26434 | let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26435 | let r = _mm_cvttsh_u32(a); |
26436 | assert_eq!(r, 1); |
26437 | } |
26438 | |
26439 | #[simd_test(enable = "avx512fp16" )] |
26440 | unsafe fn test_mm_cvtt_roundsh_u32() { |
26441 | let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26442 | let r = _mm_cvtt_roundsh_u32::<_MM_FROUND_NO_EXC>(a); |
26443 | assert_eq!(r, 1); |
26444 | } |
26445 | |
26446 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26447 | unsafe fn test_mm_cvtph_epi64() { |
26448 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); |
26449 | let r = _mm_cvtph_epi64(a); |
26450 | let e = _mm_set_epi64x(1, 2); |
26451 | assert_eq_m128i(r, e); |
26452 | } |
26453 | |
26454 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26455 | unsafe fn test_mm_mask_cvtph_epi64() { |
26456 | let src = _mm_set_epi64x(3, 4); |
26457 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); |
26458 | let r = _mm_mask_cvtph_epi64(src, 0b01, a); |
26459 | let e = _mm_set_epi64x(3, 2); |
26460 | assert_eq_m128i(r, e); |
26461 | } |
26462 | |
26463 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26464 | unsafe fn test_mm_maskz_cvtph_epi64() { |
26465 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); |
26466 | let r = _mm_maskz_cvtph_epi64(0b01, a); |
26467 | let e = _mm_set_epi64x(0, 2); |
26468 | assert_eq_m128i(r, e); |
26469 | } |
26470 | |
26471 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26472 | unsafe fn test_mm256_cvtph_epi64() { |
26473 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26474 | let r = _mm256_cvtph_epi64(a); |
26475 | let e = _mm256_set_epi64x(1, 2, 3, 4); |
26476 | assert_eq_m256i(r, e); |
26477 | } |
26478 | |
26479 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26480 | unsafe fn test_mm256_mask_cvtph_epi64() { |
26481 | let src = _mm256_set_epi64x(5, 6, 7, 8); |
26482 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26483 | let r = _mm256_mask_cvtph_epi64(src, 0b0101, a); |
26484 | let e = _mm256_set_epi64x(5, 2, 7, 4); |
26485 | assert_eq_m256i(r, e); |
26486 | } |
26487 | |
26488 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26489 | unsafe fn test_mm256_maskz_cvtph_epi64() { |
26490 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26491 | let r = _mm256_maskz_cvtph_epi64(0b0101, a); |
26492 | let e = _mm256_set_epi64x(0, 2, 0, 4); |
26493 | assert_eq_m256i(r, e); |
26494 | } |
26495 | |
26496 | #[simd_test(enable = "avx512fp16" )] |
26497 | unsafe fn test_mm512_cvtph_epi64() { |
26498 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26499 | let r = _mm512_cvtph_epi64(a); |
26500 | let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
26501 | assert_eq_m512i(r, e); |
26502 | } |
26503 | |
26504 | #[simd_test(enable = "avx512fp16" )] |
26505 | unsafe fn test_mm512_mask_cvtph_epi64() { |
26506 | let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
26507 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26508 | let r = _mm512_mask_cvtph_epi64(src, 0b01010101, a); |
26509 | let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8); |
26510 | assert_eq_m512i(r, e); |
26511 | } |
26512 | |
26513 | #[simd_test(enable = "avx512fp16" )] |
26514 | unsafe fn test_mm512_maskz_cvtph_epi64() { |
26515 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26516 | let r = _mm512_maskz_cvtph_epi64(0b01010101, a); |
26517 | let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8); |
26518 | assert_eq_m512i(r, e); |
26519 | } |
26520 | |
26521 | #[simd_test(enable = "avx512fp16" )] |
26522 | unsafe fn test_mm512_cvt_roundph_epi64() { |
26523 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26524 | let r = _mm512_cvt_roundph_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
26525 | let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
26526 | assert_eq_m512i(r, e); |
26527 | } |
26528 | |
26529 | #[simd_test(enable = "avx512fp16" )] |
26530 | unsafe fn test_mm512_mask_cvt_roundph_epi64() { |
26531 | let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
26532 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26533 | let r = _mm512_mask_cvt_roundph_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
26534 | src, 0b01010101, a, |
26535 | ); |
26536 | let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8); |
26537 | assert_eq_m512i(r, e); |
26538 | } |
26539 | |
26540 | #[simd_test(enable = "avx512fp16" )] |
26541 | unsafe fn test_mm512_maskz_cvt_roundph_epi64() { |
26542 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26543 | let r = _mm512_maskz_cvt_roundph_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
26544 | 0b01010101, a, |
26545 | ); |
26546 | let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8); |
26547 | assert_eq_m512i(r, e); |
26548 | } |
26549 | |
26550 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26551 | unsafe fn test_mm_cvtph_epu64() { |
26552 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); |
26553 | let r = _mm_cvtph_epu64(a); |
26554 | let e = _mm_set_epi64x(1, 2); |
26555 | assert_eq_m128i(r, e); |
26556 | } |
26557 | |
26558 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26559 | unsafe fn test_mm_mask_cvtph_epu64() { |
26560 | let src = _mm_set_epi64x(3, 4); |
26561 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); |
26562 | let r = _mm_mask_cvtph_epu64(src, 0b01, a); |
26563 | let e = _mm_set_epi64x(3, 2); |
26564 | assert_eq_m128i(r, e); |
26565 | } |
26566 | |
26567 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26568 | unsafe fn test_mm_maskz_cvtph_epu64() { |
26569 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); |
26570 | let r = _mm_maskz_cvtph_epu64(0b01, a); |
26571 | let e = _mm_set_epi64x(0, 2); |
26572 | assert_eq_m128i(r, e); |
26573 | } |
26574 | |
26575 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26576 | unsafe fn test_mm256_cvtph_epu64() { |
26577 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26578 | let r = _mm256_cvtph_epu64(a); |
26579 | let e = _mm256_set_epi64x(1, 2, 3, 4); |
26580 | assert_eq_m256i(r, e); |
26581 | } |
26582 | |
26583 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26584 | unsafe fn test_mm256_mask_cvtph_epu64() { |
26585 | let src = _mm256_set_epi64x(5, 6, 7, 8); |
26586 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26587 | let r = _mm256_mask_cvtph_epu64(src, 0b0101, a); |
26588 | let e = _mm256_set_epi64x(5, 2, 7, 4); |
26589 | assert_eq_m256i(r, e); |
26590 | } |
26591 | |
26592 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26593 | unsafe fn test_mm256_maskz_cvtph_epu64() { |
26594 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26595 | let r = _mm256_maskz_cvtph_epu64(0b0101, a); |
26596 | let e = _mm256_set_epi64x(0, 2, 0, 4); |
26597 | assert_eq_m256i(r, e); |
26598 | } |
26599 | |
26600 | #[simd_test(enable = "avx512fp16" )] |
26601 | unsafe fn test_mm512_cvtph_epu64() { |
26602 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26603 | let r = _mm512_cvtph_epu64(a); |
26604 | let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
26605 | assert_eq_m512i(r, e); |
26606 | } |
26607 | |
26608 | #[simd_test(enable = "avx512fp16" )] |
26609 | unsafe fn test_mm512_mask_cvtph_epu64() { |
26610 | let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
26611 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26612 | let r = _mm512_mask_cvtph_epu64(src, 0b01010101, a); |
26613 | let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8); |
26614 | assert_eq_m512i(r, e); |
26615 | } |
26616 | |
26617 | #[simd_test(enable = "avx512fp16" )] |
26618 | unsafe fn test_mm512_maskz_cvtph_epu64() { |
26619 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26620 | let r = _mm512_maskz_cvtph_epu64(0b01010101, a); |
26621 | let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8); |
26622 | assert_eq_m512i(r, e); |
26623 | } |
26624 | |
26625 | #[simd_test(enable = "avx512fp16" )] |
26626 | unsafe fn test_mm512_cvt_roundph_epu64() { |
26627 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26628 | let r = _mm512_cvt_roundph_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
26629 | let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
26630 | assert_eq_m512i(r, e); |
26631 | } |
26632 | |
26633 | #[simd_test(enable = "avx512fp16" )] |
26634 | unsafe fn test_mm512_mask_cvt_roundph_epu64() { |
26635 | let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
26636 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26637 | let r = _mm512_mask_cvt_roundph_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
26638 | src, 0b01010101, a, |
26639 | ); |
26640 | let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8); |
26641 | assert_eq_m512i(r, e); |
26642 | } |
26643 | |
26644 | #[simd_test(enable = "avx512fp16" )] |
26645 | unsafe fn test_mm512_maskz_cvt_roundph_epu64() { |
26646 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26647 | let r = _mm512_maskz_cvt_roundph_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
26648 | 0b01010101, a, |
26649 | ); |
26650 | let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8); |
26651 | assert_eq_m512i(r, e); |
26652 | } |
26653 | |
26654 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26655 | unsafe fn test_mm_cvttph_epi64() { |
26656 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); |
26657 | let r = _mm_cvttph_epi64(a); |
26658 | let e = _mm_set_epi64x(1, 2); |
26659 | assert_eq_m128i(r, e); |
26660 | } |
26661 | |
26662 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26663 | unsafe fn test_mm_mask_cvttph_epi64() { |
26664 | let src = _mm_set_epi64x(3, 4); |
26665 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); |
26666 | let r = _mm_mask_cvttph_epi64(src, 0b01, a); |
26667 | let e = _mm_set_epi64x(3, 2); |
26668 | assert_eq_m128i(r, e); |
26669 | } |
26670 | |
26671 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26672 | unsafe fn test_mm_maskz_cvttph_epi64() { |
26673 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); |
26674 | let r = _mm_maskz_cvttph_epi64(0b01, a); |
26675 | let e = _mm_set_epi64x(0, 2); |
26676 | assert_eq_m128i(r, e); |
26677 | } |
26678 | |
26679 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26680 | unsafe fn test_mm256_cvttph_epi64() { |
26681 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26682 | let r = _mm256_cvttph_epi64(a); |
26683 | let e = _mm256_set_epi64x(1, 2, 3, 4); |
26684 | assert_eq_m256i(r, e); |
26685 | } |
26686 | |
26687 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26688 | unsafe fn test_mm256_mask_cvttph_epi64() { |
26689 | let src = _mm256_set_epi64x(5, 6, 7, 8); |
26690 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26691 | let r = _mm256_mask_cvttph_epi64(src, 0b0101, a); |
26692 | let e = _mm256_set_epi64x(5, 2, 7, 4); |
26693 | assert_eq_m256i(r, e); |
26694 | } |
26695 | |
26696 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26697 | unsafe fn test_mm256_maskz_cvttph_epi64() { |
26698 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26699 | let r = _mm256_maskz_cvttph_epi64(0b0101, a); |
26700 | let e = _mm256_set_epi64x(0, 2, 0, 4); |
26701 | assert_eq_m256i(r, e); |
26702 | } |
26703 | |
26704 | #[simd_test(enable = "avx512fp16" )] |
26705 | unsafe fn test_mm512_cvttph_epi64() { |
26706 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26707 | let r = _mm512_cvttph_epi64(a); |
26708 | let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
26709 | assert_eq_m512i(r, e); |
26710 | } |
26711 | |
26712 | #[simd_test(enable = "avx512fp16" )] |
26713 | unsafe fn test_mm512_mask_cvttph_epi64() { |
26714 | let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
26715 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26716 | let r = _mm512_mask_cvttph_epi64(src, 0b01010101, a); |
26717 | let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8); |
26718 | assert_eq_m512i(r, e); |
26719 | } |
26720 | |
26721 | #[simd_test(enable = "avx512fp16" )] |
26722 | unsafe fn test_mm512_maskz_cvttph_epi64() { |
26723 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26724 | let r = _mm512_maskz_cvttph_epi64(0b01010101, a); |
26725 | let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8); |
26726 | assert_eq_m512i(r, e); |
26727 | } |
26728 | |
26729 | #[simd_test(enable = "avx512fp16" )] |
26730 | unsafe fn test_mm512_cvtt_roundph_epi64() { |
26731 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26732 | let r = _mm512_cvtt_roundph_epi64::<_MM_FROUND_NO_EXC>(a); |
26733 | let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
26734 | assert_eq_m512i(r, e); |
26735 | } |
26736 | |
26737 | #[simd_test(enable = "avx512fp16" )] |
26738 | unsafe fn test_mm512_mask_cvtt_roundph_epi64() { |
26739 | let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
26740 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26741 | let r = _mm512_mask_cvtt_roundph_epi64::<_MM_FROUND_NO_EXC>(src, 0b01010101, a); |
26742 | let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8); |
26743 | assert_eq_m512i(r, e); |
26744 | } |
26745 | |
26746 | #[simd_test(enable = "avx512fp16" )] |
26747 | unsafe fn test_mm512_maskz_cvtt_roundph_epi64() { |
26748 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26749 | let r = _mm512_maskz_cvtt_roundph_epi64::<_MM_FROUND_NO_EXC>(0b01010101, a); |
26750 | let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8); |
26751 | assert_eq_m512i(r, e); |
26752 | } |
26753 | |
26754 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26755 | unsafe fn test_mm_cvttph_epu64() { |
26756 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); |
26757 | let r = _mm_cvttph_epu64(a); |
26758 | let e = _mm_set_epi64x(1, 2); |
26759 | assert_eq_m128i(r, e); |
26760 | } |
26761 | |
26762 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26763 | unsafe fn test_mm_mask_cvttph_epu64() { |
26764 | let src = _mm_set_epi64x(3, 4); |
26765 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); |
26766 | let r = _mm_mask_cvttph_epu64(src, 0b01, a); |
26767 | let e = _mm_set_epi64x(3, 2); |
26768 | assert_eq_m128i(r, e); |
26769 | } |
26770 | |
26771 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26772 | unsafe fn test_mm_maskz_cvttph_epu64() { |
26773 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); |
26774 | let r = _mm_maskz_cvttph_epu64(0b01, a); |
26775 | let e = _mm_set_epi64x(0, 2); |
26776 | assert_eq_m128i(r, e); |
26777 | } |
26778 | |
26779 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26780 | unsafe fn test_mm256_cvttph_epu64() { |
26781 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26782 | let r = _mm256_cvttph_epu64(a); |
26783 | let e = _mm256_set_epi64x(1, 2, 3, 4); |
26784 | assert_eq_m256i(r, e); |
26785 | } |
26786 | |
26787 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26788 | unsafe fn test_mm256_mask_cvttph_epu64() { |
26789 | let src = _mm256_set_epi64x(5, 6, 7, 8); |
26790 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26791 | let r = _mm256_mask_cvttph_epu64(src, 0b0101, a); |
26792 | let e = _mm256_set_epi64x(5, 2, 7, 4); |
26793 | assert_eq_m256i(r, e); |
26794 | } |
26795 | |
26796 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26797 | unsafe fn test_mm256_maskz_cvttph_epu64() { |
26798 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26799 | let r = _mm256_maskz_cvttph_epu64(0b0101, a); |
26800 | let e = _mm256_set_epi64x(0, 2, 0, 4); |
26801 | assert_eq_m256i(r, e); |
26802 | } |
26803 | |
26804 | #[simd_test(enable = "avx512fp16" )] |
26805 | unsafe fn test_mm512_cvttph_epu64() { |
26806 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26807 | let r = _mm512_cvttph_epu64(a); |
26808 | let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
26809 | assert_eq_m512i(r, e); |
26810 | } |
26811 | |
26812 | #[simd_test(enable = "avx512fp16" )] |
26813 | unsafe fn test_mm512_mask_cvttph_epu64() { |
26814 | let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
26815 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26816 | let r = _mm512_mask_cvttph_epu64(src, 0b01010101, a); |
26817 | let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8); |
26818 | assert_eq_m512i(r, e); |
26819 | } |
26820 | |
26821 | #[simd_test(enable = "avx512fp16" )] |
26822 | unsafe fn test_mm512_maskz_cvttph_epu64() { |
26823 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26824 | let r = _mm512_maskz_cvttph_epu64(0b01010101, a); |
26825 | let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8); |
26826 | assert_eq_m512i(r, e); |
26827 | } |
26828 | |
26829 | #[simd_test(enable = "avx512fp16" )] |
26830 | unsafe fn test_mm512_cvtt_roundph_epu64() { |
26831 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26832 | let r = _mm512_cvtt_roundph_epu64::<_MM_FROUND_NO_EXC>(a); |
26833 | let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
26834 | assert_eq_m512i(r, e); |
26835 | } |
26836 | |
26837 | #[simd_test(enable = "avx512fp16" )] |
26838 | unsafe fn test_mm512_mask_cvtt_roundph_epu64() { |
26839 | let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
26840 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26841 | let r = _mm512_mask_cvtt_roundph_epu64::<_MM_FROUND_NO_EXC>(src, 0b01010101, a); |
26842 | let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8); |
26843 | assert_eq_m512i(r, e); |
26844 | } |
26845 | |
26846 | #[simd_test(enable = "avx512fp16" )] |
26847 | unsafe fn test_mm512_maskz_cvtt_roundph_epu64() { |
26848 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26849 | let r = _mm512_maskz_cvtt_roundph_epu64::<_MM_FROUND_NO_EXC>(0b01010101, a); |
26850 | let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8); |
26851 | assert_eq_m512i(r, e); |
26852 | } |
26853 | |
26854 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26855 | unsafe fn test_mm_cvtxph_ps() { |
26856 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26857 | let r = _mm_cvtxph_ps(a); |
26858 | let e = _mm_set_ps(1.0, 2.0, 3.0, 4.0); |
26859 | assert_eq_m128(r, e); |
26860 | } |
26861 | |
26862 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26863 | unsafe fn test_mm_mask_cvtxph_ps() { |
26864 | let src = _mm_set_ps(10.0, 11.0, 12.0, 13.0); |
26865 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26866 | let r = _mm_mask_cvtxph_ps(src, 0b0101, a); |
26867 | let e = _mm_set_ps(10.0, 2.0, 12.0, 4.0); |
26868 | assert_eq_m128(r, e); |
26869 | } |
26870 | |
26871 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26872 | unsafe fn test_mm_maskz_cvtxph_ps() { |
26873 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26874 | let r = _mm_maskz_cvtxph_ps(0b0101, a); |
26875 | let e = _mm_set_ps(0.0, 2.0, 0.0, 4.0); |
26876 | assert_eq_m128(r, e); |
26877 | } |
26878 | |
26879 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26880 | unsafe fn test_mm256_cvtxph_ps() { |
26881 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26882 | let r = _mm256_cvtxph_ps(a); |
26883 | let e = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26884 | assert_eq_m256(r, e); |
26885 | } |
26886 | |
26887 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26888 | unsafe fn test_mm256_mask_cvtxph_ps() { |
26889 | let src = _mm256_set_ps(10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0); |
26890 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26891 | let r = _mm256_mask_cvtxph_ps(src, 0b01010101, a); |
26892 | let e = _mm256_set_ps(10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0); |
26893 | assert_eq_m256(r, e); |
26894 | } |
26895 | |
26896 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26897 | unsafe fn test_mm256_maskz_cvtxph_ps() { |
26898 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26899 | let r = _mm256_maskz_cvtxph_ps(0b01010101, a); |
26900 | let e = _mm256_set_ps(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0); |
26901 | assert_eq_m256(r, e); |
26902 | } |
26903 | |
26904 | #[simd_test(enable = "avx512fp16" )] |
26905 | unsafe fn test_mm512_cvtxph_ps() { |
26906 | let a = _mm256_set_ph( |
26907 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26908 | ); |
26909 | let r = _mm512_cvtxph_ps(a); |
26910 | let e = _mm512_set_ps( |
26911 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26912 | ); |
26913 | assert_eq_m512(r, e); |
26914 | } |
26915 | |
26916 | #[simd_test(enable = "avx512fp16" )] |
26917 | unsafe fn test_mm512_mask_cvtxph_ps() { |
26918 | let src = _mm512_set_ps( |
26919 | 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, |
26920 | 24.0, 25.0, |
26921 | ); |
26922 | let a = _mm256_set_ph( |
26923 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26924 | ); |
26925 | let r = _mm512_mask_cvtxph_ps(src, 0b0101010101010101, a); |
26926 | let e = _mm512_set_ps( |
26927 | 10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0, 18.0, 10.0, 20.0, 12.0, 22.0, 14.0, 24.0, |
26928 | 16.0, |
26929 | ); |
26930 | assert_eq_m512(r, e); |
26931 | } |
26932 | |
26933 | #[simd_test(enable = "avx512fp16" )] |
26934 | unsafe fn test_mm512_maskz_cvtxph_ps() { |
26935 | let a = _mm256_set_ph( |
26936 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26937 | ); |
26938 | let r = _mm512_maskz_cvtxph_ps(0b0101010101010101, a); |
26939 | let e = _mm512_set_ps( |
26940 | 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0, |
26941 | ); |
26942 | assert_eq_m512(r, e); |
26943 | } |
26944 | |
26945 | #[simd_test(enable = "avx512fp16" )] |
26946 | unsafe fn test_mm512_cvtx_roundph_ps() { |
26947 | let a = _mm256_set_ph( |
26948 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26949 | ); |
26950 | let r = _mm512_cvtx_roundph_ps::<_MM_FROUND_NO_EXC>(a); |
26951 | let e = _mm512_set_ps( |
26952 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26953 | ); |
26954 | assert_eq_m512(r, e); |
26955 | } |
26956 | |
26957 | #[simd_test(enable = "avx512fp16" )] |
26958 | unsafe fn test_mm512_mask_cvtx_roundph_ps() { |
26959 | let src = _mm512_set_ps( |
26960 | 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, |
26961 | 24.0, 25.0, |
26962 | ); |
26963 | let a = _mm256_set_ph( |
26964 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26965 | ); |
26966 | let r = _mm512_mask_cvtx_roundph_ps::<_MM_FROUND_NO_EXC>(src, 0b0101010101010101, a); |
26967 | let e = _mm512_set_ps( |
26968 | 10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0, 18.0, 10.0, 20.0, 12.0, 22.0, 14.0, 24.0, |
26969 | 16.0, |
26970 | ); |
26971 | assert_eq_m512(r, e); |
26972 | } |
26973 | |
26974 | #[simd_test(enable = "avx512fp16" )] |
26975 | unsafe fn test_mm512_maskz_cvtx_roundph_ps() { |
26976 | let a = _mm256_set_ph( |
26977 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26978 | ); |
26979 | let r = _mm512_maskz_cvtx_roundph_ps::<_MM_FROUND_NO_EXC>(0b0101010101010101, a); |
26980 | let e = _mm512_set_ps( |
26981 | 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0, |
26982 | ); |
26983 | assert_eq_m512(r, e); |
26984 | } |
26985 | |
26986 | #[simd_test(enable = "avx512fp16" )] |
26987 | unsafe fn test_mm_cvtsh_ss() { |
26988 | let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0); |
26989 | let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
26990 | let r = _mm_cvtsh_ss(a, b); |
26991 | let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0); |
26992 | assert_eq_m128(r, e); |
26993 | } |
26994 | |
26995 | #[simd_test(enable = "avx512fp16" )] |
26996 | unsafe fn test_mm_mask_cvtsh_ss() { |
26997 | let src = _mm_setr_ps(3.0, 11.0, 12.0, 13.0); |
26998 | let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0); |
26999 | let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
27000 | let r = _mm_mask_cvtsh_ss(src, 0, a, b); |
27001 | let e = _mm_setr_ps(3.0, 20.0, 21.0, 22.0); |
27002 | assert_eq_m128(r, e); |
27003 | let r = _mm_mask_cvtsh_ss(src, 1, a, b); |
27004 | let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0); |
27005 | assert_eq_m128(r, e); |
27006 | } |
27007 | |
27008 | #[simd_test(enable = "avx512fp16" )] |
27009 | unsafe fn test_mm_maskz_cvtsh_ss() { |
27010 | let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0); |
27011 | let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
27012 | let r = _mm_maskz_cvtsh_ss(0, a, b); |
27013 | let e = _mm_setr_ps(0.0, 20.0, 21.0, 22.0); |
27014 | assert_eq_m128(r, e); |
27015 | let r = _mm_maskz_cvtsh_ss(1, a, b); |
27016 | let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0); |
27017 | assert_eq_m128(r, e); |
27018 | } |
27019 | |
27020 | #[simd_test(enable = "avx512fp16" )] |
27021 | unsafe fn test_mm_cvt_roundsh_ss() { |
27022 | let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0); |
27023 | let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
27024 | let r = _mm_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(a, b); |
27025 | let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0); |
27026 | assert_eq_m128(r, e); |
27027 | } |
27028 | |
27029 | #[simd_test(enable = "avx512fp16" )] |
27030 | unsafe fn test_mm_mask_cvt_roundsh_ss() { |
27031 | let src = _mm_setr_ps(3.0, 11.0, 12.0, 13.0); |
27032 | let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0); |
27033 | let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
27034 | let r = _mm_mask_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(src, 0, a, b); |
27035 | let e = _mm_setr_ps(3.0, 20.0, 21.0, 22.0); |
27036 | assert_eq_m128(r, e); |
27037 | let r = _mm_mask_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(src, 1, a, b); |
27038 | let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0); |
27039 | assert_eq_m128(r, e); |
27040 | } |
27041 | |
27042 | #[simd_test(enable = "avx512fp16" )] |
27043 | unsafe fn test_mm_maskz_cvt_roundsh_ss() { |
27044 | let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0); |
27045 | let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
27046 | let r = _mm_maskz_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(0, a, b); |
27047 | let e = _mm_setr_ps(0.0, 20.0, 21.0, 22.0); |
27048 | assert_eq_m128(r, e); |
27049 | let r = _mm_maskz_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(1, a, b); |
27050 | let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0); |
27051 | assert_eq_m128(r, e); |
27052 | } |
27053 | |
27054 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
27055 | unsafe fn test_mm_cvtph_pd() { |
27056 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); |
27057 | let r = _mm_cvtph_pd(a); |
27058 | let e = _mm_set_pd(1.0, 2.0); |
27059 | assert_eq_m128d(r, e); |
27060 | } |
27061 | |
27062 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
27063 | unsafe fn test_mm_mask_cvtph_pd() { |
27064 | let src = _mm_set_pd(10.0, 11.0); |
27065 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); |
27066 | let r = _mm_mask_cvtph_pd(src, 0b01, a); |
27067 | let e = _mm_set_pd(10.0, 2.0); |
27068 | assert_eq_m128d(r, e); |
27069 | } |
27070 | |
27071 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
27072 | unsafe fn test_mm_maskz_cvtph_pd() { |
27073 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); |
27074 | let r = _mm_maskz_cvtph_pd(0b01, a); |
27075 | let e = _mm_set_pd(0.0, 2.0); |
27076 | assert_eq_m128d(r, e); |
27077 | } |
27078 | |
27079 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
27080 | unsafe fn test_mm256_cvtph_pd() { |
27081 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
27082 | let r = _mm256_cvtph_pd(a); |
27083 | let e = _mm256_set_pd(1.0, 2.0, 3.0, 4.0); |
27084 | assert_eq_m256d(r, e); |
27085 | } |
27086 | |
27087 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
27088 | unsafe fn test_mm256_mask_cvtph_pd() { |
27089 | let src = _mm256_set_pd(10.0, 11.0, 12.0, 13.0); |
27090 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
27091 | let r = _mm256_mask_cvtph_pd(src, 0b0101, a); |
27092 | let e = _mm256_set_pd(10.0, 2.0, 12.0, 4.0); |
27093 | assert_eq_m256d(r, e); |
27094 | } |
27095 | |
27096 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
27097 | unsafe fn test_mm256_maskz_cvtph_pd() { |
27098 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
27099 | let r = _mm256_maskz_cvtph_pd(0b0101, a); |
27100 | let e = _mm256_set_pd(0.0, 2.0, 0.0, 4.0); |
27101 | assert_eq_m256d(r, e); |
27102 | } |
27103 | |
27104 | #[simd_test(enable = "avx512fp16" )] |
27105 | unsafe fn test_mm512_cvtph_pd() { |
27106 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
27107 | let r = _mm512_cvtph_pd(a); |
27108 | let e = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
27109 | assert_eq_m512d(r, e); |
27110 | } |
27111 | |
27112 | #[simd_test(enable = "avx512fp16" )] |
27113 | unsafe fn test_mm512_mask_cvtph_pd() { |
27114 | let src = _mm512_set_pd(10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0); |
27115 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
27116 | let r = _mm512_mask_cvtph_pd(src, 0b01010101, a); |
27117 | let e = _mm512_set_pd(10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0); |
27118 | assert_eq_m512d(r, e); |
27119 | } |
27120 | |
27121 | #[simd_test(enable = "avx512fp16" )] |
27122 | unsafe fn test_mm512_maskz_cvtph_pd() { |
27123 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
27124 | let r = _mm512_maskz_cvtph_pd(0b01010101, a); |
27125 | let e = _mm512_set_pd(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0); |
27126 | assert_eq_m512d(r, e); |
27127 | } |
27128 | |
27129 | #[simd_test(enable = "avx512fp16" )] |
27130 | unsafe fn test_mm512_cvt_roundph_pd() { |
27131 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
27132 | let r = _mm512_cvt_roundph_pd::<_MM_FROUND_NO_EXC>(a); |
27133 | let e = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
27134 | assert_eq_m512d(r, e); |
27135 | } |
27136 | |
27137 | #[simd_test(enable = "avx512fp16" )] |
27138 | unsafe fn test_mm512_mask_cvt_roundph_pd() { |
27139 | let src = _mm512_set_pd(10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0); |
27140 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
27141 | let r = _mm512_mask_cvt_roundph_pd::<_MM_FROUND_NO_EXC>(src, 0b01010101, a); |
27142 | let e = _mm512_set_pd(10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0); |
27143 | assert_eq_m512d(r, e); |
27144 | } |
27145 | |
27146 | #[simd_test(enable = "avx512fp16" )] |
27147 | unsafe fn test_mm512_maskz_cvt_roundph_pd() { |
27148 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
27149 | let r = _mm512_maskz_cvt_roundph_pd::<_MM_FROUND_NO_EXC>(0b01010101, a); |
27150 | let e = _mm512_set_pd(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0); |
27151 | assert_eq_m512d(r, e); |
27152 | } |
27153 | |
27154 | #[simd_test(enable = "avx512fp16" )] |
27155 | unsafe fn test_mm_cvtsh_sd() { |
27156 | let a = _mm_setr_pd(2.0, 20.0); |
27157 | let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
27158 | let r = _mm_cvtsh_sd(a, b); |
27159 | let e = _mm_setr_pd(1.0, 20.0); |
27160 | assert_eq_m128d(r, e); |
27161 | } |
27162 | |
27163 | #[simd_test(enable = "avx512fp16" )] |
27164 | unsafe fn test_mm_mask_cvtsh_sd() { |
27165 | let src = _mm_setr_pd(3.0, 11.0); |
27166 | let a = _mm_setr_pd(2.0, 20.0); |
27167 | let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
27168 | let r = _mm_mask_cvtsh_sd(src, 0, a, b); |
27169 | let e = _mm_setr_pd(3.0, 20.0); |
27170 | assert_eq_m128d(r, e); |
27171 | let r = _mm_mask_cvtsh_sd(src, 1, a, b); |
27172 | let e = _mm_setr_pd(1.0, 20.0); |
27173 | assert_eq_m128d(r, e); |
27174 | } |
27175 | |
27176 | #[simd_test(enable = "avx512fp16" )] |
27177 | unsafe fn test_mm_maskz_cvtsh_sd() { |
27178 | let a = _mm_setr_pd(2.0, 20.0); |
27179 | let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
27180 | let r = _mm_maskz_cvtsh_sd(0, a, b); |
27181 | let e = _mm_setr_pd(0.0, 20.0); |
27182 | assert_eq_m128d(r, e); |
27183 | let r = _mm_maskz_cvtsh_sd(1, a, b); |
27184 | let e = _mm_setr_pd(1.0, 20.0); |
27185 | assert_eq_m128d(r, e); |
27186 | } |
27187 | |
27188 | #[simd_test(enable = "avx512fp16" )] |
27189 | unsafe fn test_mm_cvt_roundsh_sd() { |
27190 | let a = _mm_setr_pd(2.0, 20.0); |
27191 | let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
27192 | let r = _mm_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(a, b); |
27193 | let e = _mm_setr_pd(1.0, 20.0); |
27194 | assert_eq_m128d(r, e); |
27195 | } |
27196 | |
27197 | #[simd_test(enable = "avx512fp16" )] |
27198 | unsafe fn test_mm_mask_cvt_roundsh_sd() { |
27199 | let src = _mm_setr_pd(3.0, 11.0); |
27200 | let a = _mm_setr_pd(2.0, 20.0); |
27201 | let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
27202 | let r = _mm_mask_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(src, 0, a, b); |
27203 | let e = _mm_setr_pd(3.0, 20.0); |
27204 | assert_eq_m128d(r, e); |
27205 | let r = _mm_mask_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(src, 1, a, b); |
27206 | let e = _mm_setr_pd(1.0, 20.0); |
27207 | assert_eq_m128d(r, e); |
27208 | } |
27209 | |
27210 | #[simd_test(enable = "avx512fp16" )] |
27211 | unsafe fn test_mm_maskz_cvt_roundsh_sd() { |
27212 | let a = _mm_setr_pd(2.0, 20.0); |
27213 | let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
27214 | let r = _mm_maskz_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(0, a, b); |
27215 | let e = _mm_setr_pd(0.0, 20.0); |
27216 | assert_eq_m128d(r, e); |
27217 | let r = _mm_maskz_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(1, a, b); |
27218 | let e = _mm_setr_pd(1.0, 20.0); |
27219 | assert_eq_m128d(r, e); |
27220 | } |
27221 | |
27222 | #[simd_test(enable = "avx512fp16" )] |
27223 | unsafe fn test_mm_cvtsh_h() { |
27224 | let a = _mm_setr_ph(1.0, 2.0, 3.0, 42.0, 5.0, 6.0, 7.0, 8.0); |
27225 | let r = _mm_cvtsh_h(a); |
27226 | assert_eq!(r, 1.0); |
27227 | } |
27228 | |
27229 | #[simd_test(enable = "avx512fp16" )] |
27230 | unsafe fn test_mm256_cvtsh_h() { |
27231 | let a = _mm256_setr_ph( |
27232 | 1.0, 2.0, 3.0, 42.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
27233 | ); |
27234 | let r = _mm256_cvtsh_h(a); |
27235 | assert_eq!(r, 1.0); |
27236 | } |
27237 | |
27238 | #[simd_test(enable = "avx512fp16" )] |
27239 | unsafe fn test_mm512_cvtsh_h() { |
27240 | let a = _mm512_setr_ph( |
27241 | 1.0, 2.0, 3.0, 42.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
27242 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
27243 | 31.0, 32.0, |
27244 | ); |
27245 | let r = _mm512_cvtsh_h(a); |
27246 | assert_eq!(r, 1.0); |
27247 | } |
27248 | |
27249 | #[simd_test(enable = "avx512fp16" )] |
27250 | unsafe fn test_mm_cvtsi128_si16() { |
27251 | let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); |
27252 | let r = _mm_cvtsi128_si16(a); |
27253 | assert_eq!(r, 1); |
27254 | } |
27255 | |
27256 | #[simd_test(enable = "avx512fp16" )] |
27257 | unsafe fn test_mm_cvtsi16_si128() { |
27258 | let a = 1; |
27259 | let r = _mm_cvtsi16_si128(a); |
27260 | let e = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0); |
27261 | assert_eq_m128i(r, e); |
27262 | } |
27263 | } |
27264 | |