1 | use crate::arch::asm; |
2 | use crate::core_arch::{simd::*, x86::*}; |
3 | use crate::intrinsics::{fmaf16, simd::*}; |
4 | use crate::ptr; |
5 | |
6 | /// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values. |
7 | /// |
8 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_ph) |
9 | #[inline ] |
10 | #[target_feature (enable = "avx512fp16" )] |
11 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12 | pub fn _mm_set_ph( |
13 | e7: f16, |
14 | e6: f16, |
15 | e5: f16, |
16 | e4: f16, |
17 | e3: f16, |
18 | e2: f16, |
19 | e1: f16, |
20 | e0: f16, |
21 | ) -> __m128h { |
22 | __m128h([e0, e1, e2, e3, e4, e5, e6, e7]) |
23 | } |
24 | |
25 | /// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values. |
26 | /// |
27 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_ph) |
28 | #[inline ] |
29 | #[target_feature (enable = "avx512fp16" )] |
30 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
31 | pub fn _mm256_set_ph( |
32 | e15: f16, |
33 | e14: f16, |
34 | e13: f16, |
35 | e12: f16, |
36 | e11: f16, |
37 | e10: f16, |
38 | e9: f16, |
39 | e8: f16, |
40 | e7: f16, |
41 | e6: f16, |
42 | e5: f16, |
43 | e4: f16, |
44 | e3: f16, |
45 | e2: f16, |
46 | e1: f16, |
47 | e0: f16, |
48 | ) -> __m256h { |
49 | __m256h([ |
50 | e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, |
51 | ]) |
52 | } |
53 | |
54 | /// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values. |
55 | /// |
56 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set_ph) |
57 | #[inline ] |
58 | #[target_feature (enable = "avx512fp16" )] |
59 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
60 | pub fn _mm512_set_ph( |
61 | e31: f16, |
62 | e30: f16, |
63 | e29: f16, |
64 | e28: f16, |
65 | e27: f16, |
66 | e26: f16, |
67 | e25: f16, |
68 | e24: f16, |
69 | e23: f16, |
70 | e22: f16, |
71 | e21: f16, |
72 | e20: f16, |
73 | e19: f16, |
74 | e18: f16, |
75 | e17: f16, |
76 | e16: f16, |
77 | e15: f16, |
78 | e14: f16, |
79 | e13: f16, |
80 | e12: f16, |
81 | e11: f16, |
82 | e10: f16, |
83 | e9: f16, |
84 | e8: f16, |
85 | e7: f16, |
86 | e6: f16, |
87 | e5: f16, |
88 | e4: f16, |
89 | e3: f16, |
90 | e2: f16, |
91 | e1: f16, |
92 | e0: f16, |
93 | ) -> __m512h { |
94 | __m512h([ |
95 | e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19, |
96 | e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31, |
97 | ]) |
98 | } |
99 | |
100 | /// Copy half-precision (16-bit) floating-point elements from a to the lower element of dst and zero |
101 | /// the upper 7 elements. |
102 | /// |
103 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_sh) |
104 | #[inline ] |
105 | #[target_feature (enable = "avx512fp16" )] |
106 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
107 | pub fn _mm_set_sh(a: f16) -> __m128h { |
108 | __m128h([a, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]) |
109 | } |
110 | |
111 | /// Broadcast the half-precision (16-bit) floating-point value a to all elements of dst. |
112 | /// |
113 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_ph) |
114 | #[inline ] |
115 | #[target_feature (enable = "avx512fp16" )] |
116 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
117 | pub fn _mm_set1_ph(a: f16) -> __m128h { |
118 | unsafe { transmute(src:f16x8::splat(a)) } |
119 | } |
120 | |
121 | /// Broadcast the half-precision (16-bit) floating-point value a to all elements of dst. |
122 | /// |
123 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_ph) |
124 | #[inline ] |
125 | #[target_feature (enable = "avx512fp16" )] |
126 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
127 | pub fn _mm256_set1_ph(a: f16) -> __m256h { |
128 | unsafe { transmute(src:f16x16::splat(a)) } |
129 | } |
130 | |
131 | /// Broadcast the half-precision (16-bit) floating-point value a to all elements of dst. |
132 | /// |
133 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_ph) |
134 | #[inline ] |
135 | #[target_feature (enable = "avx512fp16" )] |
136 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
137 | pub fn _mm512_set1_ph(a: f16) -> __m512h { |
138 | unsafe { transmute(src:f16x32::splat(a)) } |
139 | } |
140 | |
141 | /// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values in reverse order. |
142 | /// |
143 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_ph) |
144 | #[inline ] |
145 | #[target_feature (enable = "avx512fp16" )] |
146 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
147 | pub fn _mm_setr_ph( |
148 | e0: f16, |
149 | e1: f16, |
150 | e2: f16, |
151 | e3: f16, |
152 | e4: f16, |
153 | e5: f16, |
154 | e6: f16, |
155 | e7: f16, |
156 | ) -> __m128h { |
157 | __m128h([e0, e1, e2, e3, e4, e5, e6, e7]) |
158 | } |
159 | |
160 | /// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values in reverse order. |
161 | /// |
162 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_ph) |
163 | #[inline ] |
164 | #[target_feature (enable = "avx512fp16" )] |
165 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
166 | pub fn _mm256_setr_ph( |
167 | e0: f16, |
168 | e1: f16, |
169 | e2: f16, |
170 | e3: f16, |
171 | e4: f16, |
172 | e5: f16, |
173 | e6: f16, |
174 | e7: f16, |
175 | e8: f16, |
176 | e9: f16, |
177 | e10: f16, |
178 | e11: f16, |
179 | e12: f16, |
180 | e13: f16, |
181 | e14: f16, |
182 | e15: f16, |
183 | ) -> __m256h { |
184 | __m256h([ |
185 | e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, |
186 | ]) |
187 | } |
188 | |
189 | /// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values in reverse order. |
190 | /// |
191 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setr_ph) |
192 | #[inline ] |
193 | #[target_feature (enable = "avx512fp16" )] |
194 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
195 | pub fn _mm512_setr_ph( |
196 | e0: f16, |
197 | e1: f16, |
198 | e2: f16, |
199 | e3: f16, |
200 | e4: f16, |
201 | e5: f16, |
202 | e6: f16, |
203 | e7: f16, |
204 | e8: f16, |
205 | e9: f16, |
206 | e10: f16, |
207 | e11: f16, |
208 | e12: f16, |
209 | e13: f16, |
210 | e14: f16, |
211 | e15: f16, |
212 | e16: f16, |
213 | e17: f16, |
214 | e18: f16, |
215 | e19: f16, |
216 | e20: f16, |
217 | e21: f16, |
218 | e22: f16, |
219 | e23: f16, |
220 | e24: f16, |
221 | e25: f16, |
222 | e26: f16, |
223 | e27: f16, |
224 | e28: f16, |
225 | e29: f16, |
226 | e30: f16, |
227 | e31: f16, |
228 | ) -> __m512h { |
229 | __m512h([ |
230 | e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19, |
231 | e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31, |
232 | ]) |
233 | } |
234 | |
235 | /// Return vector of type __m128h with all elements set to zero. |
236 | /// |
237 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_ph) |
238 | #[inline ] |
239 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
240 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
241 | pub fn _mm_setzero_ph() -> __m128h { |
242 | unsafe { transmute(src:f16x8::ZERO) } |
243 | } |
244 | |
245 | /// Return vector of type __m256h with all elements set to zero. |
246 | /// |
247 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setzero_ph) |
248 | #[inline ] |
249 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
250 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
251 | pub fn _mm256_setzero_ph() -> __m256h { |
252 | unsafe { transmute(src:f16x16::ZERO) } |
253 | } |
254 | |
255 | /// Return vector of type __m512h with all elements set to zero. |
256 | /// |
257 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setzero_ph) |
258 | #[inline ] |
259 | #[target_feature (enable = "avx512fp16" )] |
260 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
261 | pub fn _mm512_setzero_ph() -> __m512h { |
262 | unsafe { transmute(src:f16x32::ZERO) } |
263 | } |
264 | |
265 | /// Return vector of type `__m128h` with undefined elements. In practice, this returns the all-zero |
266 | /// vector. |
267 | /// |
268 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_ph) |
269 | #[inline ] |
270 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
271 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
272 | pub fn _mm_undefined_ph() -> __m128h { |
273 | unsafe { transmute(src:f16x8::ZERO) } |
274 | } |
275 | |
276 | /// Return vector of type `__m256h` with undefined elements. In practice, this returns the all-zero |
277 | /// vector. |
278 | /// |
279 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_undefined_ph) |
280 | #[inline ] |
281 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
282 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
283 | pub fn _mm256_undefined_ph() -> __m256h { |
284 | unsafe { transmute(src:f16x16::ZERO) } |
285 | } |
286 | |
287 | /// Return vector of type `__m512h` with undefined elements. In practice, this returns the all-zero |
288 | /// vector. |
289 | /// |
290 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_ph) |
291 | #[inline ] |
292 | #[target_feature (enable = "avx512fp16" )] |
293 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
294 | pub fn _mm512_undefined_ph() -> __m512h { |
295 | unsafe { transmute(src:f16x32::ZERO) } |
296 | } |
297 | |
298 | /// Cast vector of type `__m128d` to type `__m128h`. This intrinsic is only used for compilation and |
299 | /// does not generate any instructions, thus it has zero latency. |
300 | /// |
301 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_ph) |
302 | #[inline ] |
303 | #[target_feature (enable = "avx512fp16" )] |
304 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
305 | pub fn _mm_castpd_ph(a: __m128d) -> __m128h { |
306 | unsafe { transmute(src:a) } |
307 | } |
308 | |
309 | /// Cast vector of type `__m256d` to type `__m256h`. This intrinsic is only used for compilation and |
310 | /// does not generate any instructions, thus it has zero latency. |
311 | /// |
312 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castpd_ph) |
313 | #[inline ] |
314 | #[target_feature (enable = "avx512fp16" )] |
315 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
316 | pub fn _mm256_castpd_ph(a: __m256d) -> __m256h { |
317 | unsafe { transmute(src:a) } |
318 | } |
319 | |
320 | /// Cast vector of type `__m512d` to type `__m512h`. This intrinsic is only used for compilation and |
321 | /// does not generate any instructions, thus it has zero latency. |
322 | /// |
323 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castpd_ph) |
324 | #[inline ] |
325 | #[target_feature (enable = "avx512fp16" )] |
326 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
327 | pub fn _mm512_castpd_ph(a: __m512d) -> __m512h { |
328 | unsafe { transmute(src:a) } |
329 | } |
330 | |
331 | /// Cast vector of type `__m128h` to type `__m128d`. This intrinsic is only used for compilation and |
332 | /// does not generate any instructions, thus it has zero latency. |
333 | /// |
334 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castph_pd) |
335 | #[inline ] |
336 | #[target_feature (enable = "avx512fp16" )] |
337 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
338 | pub fn _mm_castph_pd(a: __m128h) -> __m128d { |
339 | unsafe { transmute(src:a) } |
340 | } |
341 | |
342 | /// Cast vector of type `__m256h` to type `__m256d`. This intrinsic is only used for compilation and |
343 | /// does not generate any instructions, thus it has zero latency. |
344 | /// |
345 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph_pd) |
346 | #[inline ] |
347 | #[target_feature (enable = "avx512fp16" )] |
348 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
349 | pub fn _mm256_castph_pd(a: __m256h) -> __m256d { |
350 | unsafe { transmute(src:a) } |
351 | } |
352 | |
353 | /// Cast vector of type `__m512h` to type `__m512d`. This intrinsic is only used for compilation and |
354 | /// does not generate any instructions, thus it has zero latency. |
355 | /// |
356 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph_pd) |
357 | #[inline ] |
358 | #[target_feature (enable = "avx512fp16" )] |
359 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
360 | pub fn _mm512_castph_pd(a: __m512h) -> __m512d { |
361 | unsafe { transmute(src:a) } |
362 | } |
363 | |
364 | /// Cast vector of type `__m128` to type `__m128h`. This intrinsic is only used for compilation and |
365 | /// does not generate any instructions, thus it has zero latency. |
366 | /// |
367 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castps_ph) |
368 | #[inline ] |
369 | #[target_feature (enable = "avx512fp16" )] |
370 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
371 | pub fn _mm_castps_ph(a: __m128) -> __m128h { |
372 | unsafe { transmute(src:a) } |
373 | } |
374 | |
375 | /// Cast vector of type `__m256` to type `__m256h`. This intrinsic is only used for compilation and |
376 | /// does not generate any instructions, thus it has zero latency. |
377 | /// |
378 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castps_ph) |
379 | #[inline ] |
380 | #[target_feature (enable = "avx512fp16" )] |
381 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
382 | pub fn _mm256_castps_ph(a: __m256) -> __m256h { |
383 | unsafe { transmute(src:a) } |
384 | } |
385 | |
386 | /// Cast vector of type `__m512` to type `__m512h`. This intrinsic is only used for compilation and |
387 | /// does not generate any instructions, thus it has zero latency. |
388 | /// |
389 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castps_ph) |
390 | #[inline ] |
391 | #[target_feature (enable = "avx512fp16" )] |
392 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
393 | pub fn _mm512_castps_ph(a: __m512) -> __m512h { |
394 | unsafe { transmute(src:a) } |
395 | } |
396 | |
397 | /// Cast vector of type `__m128h` to type `__m128`. This intrinsic is only used for compilation and |
398 | /// does not generate any instructions, thus it has zero latency. |
399 | /// |
400 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castph_ps) |
401 | #[inline ] |
402 | #[target_feature (enable = "avx512fp16" )] |
403 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
404 | pub fn _mm_castph_ps(a: __m128h) -> __m128 { |
405 | unsafe { transmute(src:a) } |
406 | } |
407 | |
408 | /// Cast vector of type `__m256h` to type `__m256`. This intrinsic is only used for compilation and |
409 | /// does not generate any instructions, thus it has zero latency. |
410 | /// |
411 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph_ps) |
412 | #[inline ] |
413 | #[target_feature (enable = "avx512fp16" )] |
414 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
415 | pub fn _mm256_castph_ps(a: __m256h) -> __m256 { |
416 | unsafe { transmute(src:a) } |
417 | } |
418 | |
419 | /// Cast vector of type `__m512h` to type `__m512`. This intrinsic is only used for compilation and |
420 | /// does not generate any instructions, thus it has zero latency. |
421 | /// |
422 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph_ps) |
423 | #[inline ] |
424 | #[target_feature (enable = "avx512fp16" )] |
425 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
426 | pub fn _mm512_castph_ps(a: __m512h) -> __m512 { |
427 | unsafe { transmute(src:a) } |
428 | } |
429 | |
430 | /// Cast vector of type `__m128i` to type `__m128h`. This intrinsic is only used for compilation and |
431 | /// does not generate any instructions, thus it has zero latency. |
432 | /// |
433 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castsi128_ph) |
434 | #[inline ] |
435 | #[target_feature (enable = "avx512fp16" )] |
436 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
437 | pub fn _mm_castsi128_ph(a: __m128i) -> __m128h { |
438 | unsafe { transmute(src:a) } |
439 | } |
440 | |
441 | /// Cast vector of type `__m256i` to type `__m256h`. This intrinsic is only used for compilation and |
442 | /// does not generate any instructions, thus it has zero latency. |
443 | /// |
444 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castsi256_ph) |
445 | #[inline ] |
446 | #[target_feature (enable = "avx512fp16" )] |
447 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
448 | pub fn _mm256_castsi256_ph(a: __m256i) -> __m256h { |
449 | unsafe { transmute(src:a) } |
450 | } |
451 | |
452 | /// Cast vector of type `__m512i` to type `__m512h`. This intrinsic is only used for compilation and |
453 | /// does not generate any instructions, thus it has zero latency. |
454 | /// |
455 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castsi512_ph) |
456 | #[inline ] |
457 | #[target_feature (enable = "avx512fp16" )] |
458 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
459 | pub fn _mm512_castsi512_ph(a: __m512i) -> __m512h { |
460 | unsafe { transmute(src:a) } |
461 | } |
462 | |
463 | /// Cast vector of type `__m128h` to type `__m128i`. This intrinsic is only used for compilation and |
464 | /// does not generate any instructions, thus it has zero latency. |
465 | /// |
466 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castph_si128) |
467 | #[inline ] |
468 | #[target_feature (enable = "avx512fp16" )] |
469 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
470 | pub fn _mm_castph_si128(a: __m128h) -> __m128i { |
471 | unsafe { transmute(src:a) } |
472 | } |
473 | |
474 | /// Cast vector of type `__m256h` to type `__m256i`. This intrinsic is only used for compilation and |
475 | /// does not generate any instructions, thus it has zero latency. |
476 | /// |
477 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph_si256) |
478 | #[inline ] |
479 | #[target_feature (enable = "avx512fp16" )] |
480 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
481 | pub fn _mm256_castph_si256(a: __m256h) -> __m256i { |
482 | unsafe { transmute(src:a) } |
483 | } |
484 | |
485 | /// Cast vector of type `__m512h` to type `__m512i`. This intrinsic is only used for compilation and |
486 | /// does not generate any instructions, thus it has zero latency. |
487 | /// |
488 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph_si512) |
489 | #[inline ] |
490 | #[target_feature (enable = "avx512fp16" )] |
491 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
492 | pub fn _mm512_castph_si512(a: __m512h) -> __m512i { |
493 | unsafe { transmute(src:a) } |
494 | } |
495 | |
496 | /// Cast vector of type `__m256h` to type `__m128h`. This intrinsic is only used for compilation and |
497 | /// does not generate any instructions, thus it has zero latency. |
498 | /// |
499 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph256_ph128) |
500 | #[inline ] |
501 | #[target_feature (enable = "avx512fp16" )] |
502 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
503 | pub fn _mm256_castph256_ph128(a: __m256h) -> __m128h { |
504 | unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) } |
505 | } |
506 | |
507 | /// Cast vector of type `__m512h` to type `__m128h`. This intrinsic is only used for compilation and |
508 | /// does not generate any instructions, thus it has zero latency. |
509 | /// |
510 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph512_ph128) |
511 | #[inline ] |
512 | #[target_feature (enable = "avx512fp16" )] |
513 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
514 | pub fn _mm512_castph512_ph128(a: __m512h) -> __m128h { |
515 | unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) } |
516 | } |
517 | |
518 | /// Cast vector of type `__m512h` to type `__m256h`. This intrinsic is only used for compilation and |
519 | /// does not generate any instructions, thus it has zero latency. |
520 | /// |
521 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph512_ph256) |
522 | #[inline ] |
523 | #[target_feature (enable = "avx512fp16" )] |
524 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
525 | pub fn _mm512_castph512_ph256(a: __m512h) -> __m256h { |
526 | unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } |
527 | } |
528 | |
529 | /// Cast vector of type `__m128h` to type `__m256h`. The upper 8 elements of the result are undefined. |
530 | /// In practice, the upper elements are zeroed. This intrinsic can generate the `vzeroupper` instruction, |
531 | /// but most of the time it does not generate any instructions. |
532 | /// |
533 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph128_ph256) |
534 | #[inline ] |
535 | #[target_feature (enable = "avx512fp16" )] |
536 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
537 | pub fn _mm256_castph128_ph256(a: __m128h) -> __m256h { |
538 | unsafe { |
539 | simd_shuffle!( |
540 | a, |
541 | _mm_undefined_ph(), |
542 | [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8] |
543 | ) |
544 | } |
545 | } |
546 | |
547 | /// Cast vector of type `__m128h` to type `__m512h`. The upper 24 elements of the result are undefined. |
548 | /// In practice, the upper elements are zeroed. This intrinsic can generate the `vzeroupper` instruction, |
549 | /// but most of the time it does not generate any instructions. |
550 | /// |
551 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph128_ph512) |
552 | #[inline ] |
553 | #[target_feature (enable = "avx512fp16" )] |
554 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
555 | pub fn _mm512_castph128_ph512(a: __m128h) -> __m512h { |
556 | unsafe { |
557 | simd_shuffle!( |
558 | a, |
559 | _mm_undefined_ph(), |
560 | [ |
561 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, |
562 | 8, 8, 8, 8 |
563 | ] |
564 | ) |
565 | } |
566 | } |
567 | |
568 | /// Cast vector of type `__m256h` to type `__m512h`. The upper 16 elements of the result are undefined. |
569 | /// In practice, the upper elements are zeroed. This intrinsic can generate the `vzeroupper` instruction, |
570 | /// but most of the time it does not generate any instructions. |
571 | /// |
572 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph256_ph512) |
573 | #[inline ] |
574 | #[target_feature (enable = "avx512fp16" )] |
575 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
576 | pub fn _mm512_castph256_ph512(a: __m256h) -> __m512h { |
577 | unsafe { |
578 | simd_shuffle!( |
579 | a, |
580 | _mm256_undefined_ph(), |
581 | [ |
582 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, |
583 | 16, 16, 16, 16, 16, 16, 16, 16, 16 |
584 | ] |
585 | ) |
586 | } |
587 | } |
588 | |
589 | /// Cast vector of type `__m256h` to type `__m128h`. The upper 8 elements of the result are zeroed. |
590 | /// This intrinsic can generate the `vzeroupper` instruction, but most of the time it does not generate |
591 | /// any instructions. |
592 | /// |
593 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_zextph128_ph256) |
594 | #[inline ] |
595 | #[target_feature (enable = "avx512fp16" )] |
596 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
597 | pub fn _mm256_zextph128_ph256(a: __m128h) -> __m256h { |
598 | unsafe { |
599 | simd_shuffle!( |
600 | a, |
601 | _mm_setzero_ph(), |
602 | [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8] |
603 | ) |
604 | } |
605 | } |
606 | |
607 | /// Cast vector of type `__m256h` to type `__m512h`. The upper 16 elements of the result are zeroed. |
608 | /// This intrinsic can generate the `vzeroupper` instruction, but most of the time it does not generate |
609 | /// any instructions. |
610 | /// |
611 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_zextph256_ph512) |
612 | #[inline ] |
613 | #[target_feature (enable = "avx512fp16" )] |
614 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
615 | pub fn _mm512_zextph256_ph512(a: __m256h) -> __m512h { |
616 | unsafe { |
617 | simd_shuffle!( |
618 | a, |
619 | _mm256_setzero_ph(), |
620 | [ |
621 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, |
622 | 16, 16, 16, 16, 16, 16, 16, 16, 16 |
623 | ] |
624 | ) |
625 | } |
626 | } |
627 | |
628 | /// Cast vector of type `__m128h` to type `__m512h`. The upper 24 elements of the result are zeroed. |
629 | /// This intrinsic can generate the `vzeroupper` instruction, but most of the time it does not generate |
630 | /// any instructions. |
631 | /// |
632 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_zextph128_ph512) |
633 | #[inline ] |
634 | #[target_feature (enable = "avx512fp16" )] |
635 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
636 | pub fn _mm512_zextph128_ph512(a: __m128h) -> __m512h { |
637 | unsafe { |
638 | simd_shuffle!( |
639 | a, |
640 | _mm_setzero_ph(), |
641 | [ |
642 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, |
643 | 8, 8, 8, 8 |
644 | ] |
645 | ) |
646 | } |
647 | } |
648 | |
649 | macro_rules! cmp_asm { // FIXME: use LLVM intrinsics |
650 | ($mask_type: ty, $reg: ident, $a: expr, $b: expr) => {{ |
651 | let dst: $mask_type; |
652 | asm!( |
653 | "vcmpph {k}, {a}, {b}, {imm8}" , |
654 | k = lateout(kreg) dst, |
655 | a = in($reg) $a, |
656 | b = in($reg) $b, |
657 | imm8 = const IMM5, |
658 | options(pure, nomem, nostack) |
659 | ); |
660 | dst |
661 | }}; |
662 | ($mask_type: ty, $mask: expr, $reg: ident, $a: expr, $b: expr) => {{ |
663 | let dst: $mask_type; |
664 | asm!( |
665 | "vcmpph {k} {{ {mask} }}, {a}, {b}, {imm8}" , |
666 | k = lateout(kreg) dst, |
667 | mask = in(kreg) $mask, |
668 | a = in($reg) $a, |
669 | b = in($reg) $b, |
670 | imm8 = const IMM5, |
671 | options(pure, nomem, nostack) |
672 | ); |
673 | dst |
674 | }}; |
675 | } |
676 | |
677 | /// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison |
678 | /// operand specified by imm8, and store the results in mask vector k. |
679 | /// |
680 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ph_mask) |
681 | #[inline ] |
682 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
683 | #[rustc_legacy_const_generics (2)] |
684 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
685 | pub fn _mm_cmp_ph_mask<const IMM5: i32>(a: __m128h, b: __m128h) -> __mmask8 { |
686 | unsafe { |
687 | static_assert_uimm_bits!(IMM5, 5); |
688 | cmp_asm!(__mmask8, xmm_reg, a, b) |
689 | } |
690 | } |
691 | |
692 | /// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison |
693 | /// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are |
694 | /// zeroed out when the corresponding mask bit is not set). |
695 | /// |
696 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_ph_mask) |
697 | #[inline ] |
698 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
699 | #[rustc_legacy_const_generics (3)] |
700 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
701 | pub fn _mm_mask_cmp_ph_mask<const IMM5: i32>(k1: __mmask8, a: __m128h, b: __m128h) -> __mmask8 { |
702 | unsafe { |
703 | static_assert_uimm_bits!(IMM5, 5); |
704 | cmp_asm!(__mmask8, k1, xmm_reg, a, b) |
705 | } |
706 | } |
707 | |
708 | /// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison |
709 | /// operand specified by imm8, and store the results in mask vector k. |
710 | /// |
711 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmp_ph_mask) |
712 | #[inline ] |
713 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
714 | #[rustc_legacy_const_generics (2)] |
715 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
716 | pub fn _mm256_cmp_ph_mask<const IMM5: i32>(a: __m256h, b: __m256h) -> __mmask16 { |
717 | unsafe { |
718 | static_assert_uimm_bits!(IMM5, 5); |
719 | cmp_asm!(__mmask16, ymm_reg, a, b) |
720 | } |
721 | } |
722 | |
723 | /// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison |
724 | /// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are |
725 | /// zeroed out when the corresponding mask bit is not set). |
726 | /// |
727 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmp_ph_mask) |
728 | #[inline ] |
729 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
730 | #[rustc_legacy_const_generics (3)] |
731 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
732 | pub fn _mm256_mask_cmp_ph_mask<const IMM5: i32>( |
733 | k1: __mmask16, |
734 | a: __m256h, |
735 | b: __m256h, |
736 | ) -> __mmask16 { |
737 | unsafe { |
738 | static_assert_uimm_bits!(IMM5, 5); |
739 | cmp_asm!(__mmask16, k1, ymm_reg, a, b) |
740 | } |
741 | } |
742 | |
743 | /// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison |
744 | /// operand specified by imm8, and store the results in mask vector k. |
745 | /// |
746 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_ph_mask) |
747 | #[inline ] |
748 | #[target_feature (enable = "avx512fp16" )] |
749 | #[rustc_legacy_const_generics (2)] |
750 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
751 | pub fn _mm512_cmp_ph_mask<const IMM5: i32>(a: __m512h, b: __m512h) -> __mmask32 { |
752 | unsafe { |
753 | static_assert_uimm_bits!(IMM5, 5); |
754 | cmp_asm!(__mmask32, zmm_reg, a, b) |
755 | } |
756 | } |
757 | |
758 | /// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison |
759 | /// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are |
760 | /// zeroed out when the corresponding mask bit is not set). |
761 | /// |
762 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_ph_mask) |
763 | #[inline ] |
764 | #[target_feature (enable = "avx512fp16" )] |
765 | #[rustc_legacy_const_generics (3)] |
766 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
767 | pub fn _mm512_mask_cmp_ph_mask<const IMM5: i32>( |
768 | k1: __mmask32, |
769 | a: __m512h, |
770 | b: __m512h, |
771 | ) -> __mmask32 { |
772 | unsafe { |
773 | static_assert_uimm_bits!(IMM5, 5); |
774 | cmp_asm!(__mmask32, k1, zmm_reg, a, b) |
775 | } |
776 | } |
777 | |
778 | /// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison |
779 | /// operand specified by imm8, and store the results in mask vector k. |
780 | /// |
781 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
782 | /// |
783 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_round_ph_mask) |
784 | #[inline ] |
785 | #[target_feature (enable = "avx512fp16" )] |
786 | #[rustc_legacy_const_generics (2, 3)] |
787 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
788 | pub fn _mm512_cmp_round_ph_mask<const IMM5: i32, const SAE: i32>( |
789 | a: __m512h, |
790 | b: __m512h, |
791 | ) -> __mmask32 { |
792 | unsafe { |
793 | static_assert_uimm_bits!(IMM5, 5); |
794 | static_assert_sae!(SAE); |
795 | if SAE == _MM_FROUND_NO_EXC { |
796 | let dst: __mmask32; |
797 | asm!( |
798 | "vcmpph { k}, { a}, { b}, {{sae}}, { imm8}" , |
799 | k = lateout(kreg) dst, |
800 | a = in(zmm_reg) a, |
801 | b = in(zmm_reg) b, |
802 | imm8 = const IMM5, |
803 | options(pure, nomem, nostack) |
804 | ); |
805 | dst |
806 | } else { |
807 | cmp_asm!(__mmask32, zmm_reg, a, b) |
808 | } |
809 | } |
810 | } |
811 | |
812 | /// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison |
813 | /// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are |
814 | /// zeroed out when the corresponding mask bit is not set). |
815 | /// |
816 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
817 | /// |
818 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_round_ph_mask) |
819 | #[inline ] |
820 | #[target_feature (enable = "avx512fp16" )] |
821 | #[rustc_legacy_const_generics (3, 4)] |
822 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
823 | pub fn _mm512_mask_cmp_round_ph_mask<const IMM5: i32, const SAE: i32>( |
824 | k1: __mmask32, |
825 | a: __m512h, |
826 | b: __m512h, |
827 | ) -> __mmask32 { |
828 | unsafe { |
829 | static_assert_uimm_bits!(IMM5, 5); |
830 | static_assert_sae!(SAE); |
831 | if SAE == _MM_FROUND_NO_EXC { |
832 | let dst: __mmask32; |
833 | asm!( |
834 | "vcmpph { k} {{{ k1}}}, { a}, { b}, {{sae}}, { imm8}" , |
835 | k = lateout(kreg) dst, |
836 | k1 = in(kreg) k1, |
837 | a = in(zmm_reg) a, |
838 | b = in(zmm_reg) b, |
839 | imm8 = const IMM5, |
840 | options(pure, nomem, nostack) |
841 | ); |
842 | dst |
843 | } else { |
844 | cmp_asm!(__mmask32, k1, zmm_reg, a, b) |
845 | } |
846 | } |
847 | } |
848 | |
849 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison |
850 | /// operand specified by imm8, and store the result in mask vector k. Exceptions can be suppressed by |
851 | /// passing _MM_FROUND_NO_EXC in the sae parameter. |
852 | /// |
853 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_round_sh_mask) |
854 | #[inline ] |
855 | #[target_feature (enable = "avx512fp16" )] |
856 | #[rustc_legacy_const_generics (2, 3)] |
857 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
858 | pub fn _mm_cmp_round_sh_mask<const IMM5: i32, const SAE: i32>(a: __m128h, b: __m128h) -> __mmask8 { |
859 | static_assert_uimm_bits!(IMM5, 5); |
860 | static_assert_sae!(SAE); |
861 | _mm_mask_cmp_round_sh_mask::<IMM5, SAE>(k1:0xff, a, b) |
862 | } |
863 | |
864 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison |
865 | /// operand specified by imm8, and store the result in mask vector k using zeromask k1. Exceptions can be |
866 | /// suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
867 | /// |
868 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_round_sh_mask) |
869 | #[inline ] |
870 | #[target_feature (enable = "avx512fp16" )] |
871 | #[rustc_legacy_const_generics (3, 4)] |
872 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
873 | pub fn _mm_mask_cmp_round_sh_mask<const IMM5: i32, const SAE: i32>( |
874 | k1: __mmask8, |
875 | a: __m128h, |
876 | b: __m128h, |
877 | ) -> __mmask8 { |
878 | unsafe { |
879 | static_assert_uimm_bits!(IMM5, 5); |
880 | static_assert_sae!(SAE); |
881 | vcmpsh(a, b, IMM5, mask:k1, SAE) |
882 | } |
883 | } |
884 | |
885 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison |
886 | /// operand specified by imm8, and store the result in mask vector k. |
887 | /// |
888 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_sh_mask) |
889 | #[inline ] |
890 | #[target_feature (enable = "avx512fp16" )] |
891 | #[rustc_legacy_const_generics (2)] |
892 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
893 | pub fn _mm_cmp_sh_mask<const IMM5: i32>(a: __m128h, b: __m128h) -> __mmask8 { |
894 | static_assert_uimm_bits!(IMM5, 5); |
895 | _mm_cmp_round_sh_mask::<IMM5, _MM_FROUND_CUR_DIRECTION>(a, b) |
896 | } |
897 | |
898 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison |
899 | /// operand specified by imm8, and store the result in mask vector k using zeromask k1. |
900 | /// |
901 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_sh_mask) |
902 | #[inline ] |
903 | #[target_feature (enable = "avx512fp16" )] |
904 | #[rustc_legacy_const_generics (3)] |
905 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
906 | pub fn _mm_mask_cmp_sh_mask<const IMM5: i32>(k1: __mmask8, a: __m128h, b: __m128h) -> __mmask8 { |
907 | static_assert_uimm_bits!(IMM5, 5); |
908 | _mm_mask_cmp_round_sh_mask::<IMM5, _MM_FROUND_CUR_DIRECTION>(k1, a, b) |
909 | } |
910 | |
911 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison |
912 | /// operand specified by imm8, and return the boolean result (0 or 1). |
913 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
914 | /// |
915 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comi_round_sh) |
916 | #[inline ] |
917 | #[target_feature (enable = "avx512fp16" )] |
918 | #[rustc_legacy_const_generics (2, 3)] |
919 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
920 | pub fn _mm_comi_round_sh<const IMM5: i32, const SAE: i32>(a: __m128h, b: __m128h) -> i32 { |
921 | unsafe { |
922 | static_assert_uimm_bits!(IMM5, 5); |
923 | static_assert_sae!(SAE); |
924 | vcomish(a, b, IMM5, SAE) |
925 | } |
926 | } |
927 | |
928 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison |
929 | /// operand specified by imm8, and return the boolean result (0 or 1). |
930 | /// |
931 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comi_sh) |
932 | #[inline ] |
933 | #[target_feature (enable = "avx512fp16" )] |
934 | #[rustc_legacy_const_generics (2)] |
935 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
936 | pub fn _mm_comi_sh<const IMM5: i32>(a: __m128h, b: __m128h) -> i32 { |
937 | static_assert_uimm_bits!(IMM5, 5); |
938 | _mm_comi_round_sh::<IMM5, _MM_FROUND_CUR_DIRECTION>(a, b) |
939 | } |
940 | |
941 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b for equality, and return |
942 | /// the boolean result (0 or 1). |
943 | /// |
944 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comieq_sh) |
945 | #[inline ] |
946 | #[target_feature (enable = "avx512fp16" )] |
947 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
948 | pub fn _mm_comieq_sh(a: __m128h, b: __m128h) -> i32 { |
949 | _mm_comi_sh::<_CMP_EQ_OS>(a, b) |
950 | } |
951 | |
952 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than-or-equal, |
953 | /// and return the boolean result (0 or 1). |
954 | /// |
955 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comige_sh) |
956 | #[inline ] |
957 | #[target_feature (enable = "avx512fp16" )] |
958 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
959 | pub fn _mm_comige_sh(a: __m128h, b: __m128h) -> i32 { |
960 | _mm_comi_sh::<_CMP_GE_OS>(a, b) |
961 | } |
962 | |
963 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than, and return |
964 | /// the boolean result (0 or 1). |
965 | /// |
966 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comigt_sh) |
967 | #[inline ] |
968 | #[target_feature (enable = "avx512fp16" )] |
969 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
970 | pub fn _mm_comigt_sh(a: __m128h, b: __m128h) -> i32 { |
971 | _mm_comi_sh::<_CMP_GT_OS>(a, b) |
972 | } |
973 | |
974 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than-or-equal, and |
975 | /// return the boolean result (0 or 1). |
976 | /// |
977 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comile_sh) |
978 | #[inline ] |
979 | #[target_feature (enable = "avx512fp16" )] |
980 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
981 | pub fn _mm_comile_sh(a: __m128h, b: __m128h) -> i32 { |
982 | _mm_comi_sh::<_CMP_LE_OS>(a, b) |
983 | } |
984 | |
985 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than, and return |
986 | /// the boolean result (0 or 1). |
987 | /// |
988 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comilt_sh) |
989 | #[inline ] |
990 | #[target_feature (enable = "avx512fp16" )] |
991 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
992 | pub fn _mm_comilt_sh(a: __m128h, b: __m128h) -> i32 { |
993 | _mm_comi_sh::<_CMP_LT_OS>(a, b) |
994 | } |
995 | |
996 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b for not-equal, and return |
997 | /// the boolean result (0 or 1). |
998 | /// |
999 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comineq_sh) |
1000 | #[inline ] |
1001 | #[target_feature (enable = "avx512fp16" )] |
1002 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1003 | pub fn _mm_comineq_sh(a: __m128h, b: __m128h) -> i32 { |
1004 | _mm_comi_sh::<_CMP_NEQ_OS>(a, b) |
1005 | } |
1006 | |
1007 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b for equality, and |
1008 | /// return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. |
1009 | /// |
1010 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomieq_sh) |
1011 | #[inline ] |
1012 | #[target_feature (enable = "avx512fp16" )] |
1013 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1014 | pub fn _mm_ucomieq_sh(a: __m128h, b: __m128h) -> i32 { |
1015 | _mm_comi_sh::<_CMP_EQ_OQ>(a, b) |
1016 | } |
1017 | |
1018 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than-or-equal, |
1019 | /// and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. |
1020 | /// |
1021 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomige_sh) |
1022 | #[inline ] |
1023 | #[target_feature (enable = "avx512fp16" )] |
1024 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1025 | pub fn _mm_ucomige_sh(a: __m128h, b: __m128h) -> i32 { |
1026 | _mm_comi_sh::<_CMP_GE_OQ>(a, b) |
1027 | } |
1028 | |
1029 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than, and return |
1030 | /// the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. |
1031 | /// |
1032 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomigt_sh) |
1033 | #[inline ] |
1034 | #[target_feature (enable = "avx512fp16" )] |
1035 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1036 | pub fn _mm_ucomigt_sh(a: __m128h, b: __m128h) -> i32 { |
1037 | _mm_comi_sh::<_CMP_GT_OQ>(a, b) |
1038 | } |
1039 | |
1040 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than-or-equal, and |
1041 | /// return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. |
1042 | /// |
1043 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomile_sh) |
1044 | #[inline ] |
1045 | #[target_feature (enable = "avx512fp16" )] |
1046 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1047 | pub fn _mm_ucomile_sh(a: __m128h, b: __m128h) -> i32 { |
1048 | _mm_comi_sh::<_CMP_LE_OQ>(a, b) |
1049 | } |
1050 | |
1051 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than, and return |
1052 | /// the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. |
1053 | /// |
1054 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomilt_sh) |
1055 | #[inline ] |
1056 | #[target_feature (enable = "avx512fp16" )] |
1057 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1058 | pub fn _mm_ucomilt_sh(a: __m128h, b: __m128h) -> i32 { |
1059 | _mm_comi_sh::<_CMP_LT_OQ>(a, b) |
1060 | } |
1061 | |
1062 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b for not-equal, and return |
1063 | /// the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. |
1064 | /// |
1065 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomineq_sh) |
1066 | #[inline ] |
1067 | #[target_feature (enable = "avx512fp16" )] |
1068 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1069 | pub fn _mm_ucomineq_sh(a: __m128h, b: __m128h) -> i32 { |
1070 | _mm_comi_sh::<_CMP_NEQ_OQ>(a, b) |
1071 | } |
1072 | |
1073 | /// Load 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from memory into |
1074 | /// a new vector. The address must be aligned to 16 bytes or a general-protection exception may be generated. |
1075 | /// |
1076 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_ph) |
1077 | #[inline ] |
1078 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1079 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1080 | pub unsafe fn _mm_load_ph(mem_addr: *const f16) -> __m128h { |
1081 | *mem_addr.cast() |
1082 | } |
1083 | |
1084 | /// Load 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from memory into |
1085 | /// a new vector. The address must be aligned to 32 bytes or a general-protection exception may be generated. |
1086 | /// |
1087 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_load_ph) |
1088 | #[inline ] |
1089 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1090 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1091 | pub unsafe fn _mm256_load_ph(mem_addr: *const f16) -> __m256h { |
1092 | *mem_addr.cast() |
1093 | } |
1094 | |
1095 | /// Load 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from memory into |
1096 | /// a new vector. The address must be aligned to 64 bytes or a general-protection exception may be generated. |
1097 | /// |
1098 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_load_ph) |
1099 | #[inline ] |
1100 | #[target_feature (enable = "avx512fp16" )] |
1101 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1102 | pub unsafe fn _mm512_load_ph(mem_addr: *const f16) -> __m512h { |
1103 | *mem_addr.cast() |
1104 | } |
1105 | |
1106 | /// Load a half-precision (16-bit) floating-point element from memory into the lower element of a new vector, |
1107 | /// and zero the upper elements |
1108 | /// |
1109 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_sh) |
1110 | #[inline ] |
1111 | #[target_feature (enable = "avx512fp16" )] |
1112 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1113 | pub unsafe fn _mm_load_sh(mem_addr: *const f16) -> __m128h { |
1114 | _mm_set_sh(*mem_addr) |
1115 | } |
1116 | |
1117 | /// Load a half-precision (16-bit) floating-point element from memory into the lower element of a new vector |
1118 | /// using writemask k (the element is copied from src when mask bit 0 is not set), and zero the upper elements. |
1119 | /// |
1120 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_sh) |
1121 | #[inline ] |
1122 | #[target_feature (enable = "avx512fp16" )] |
1123 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1124 | pub unsafe fn _mm_mask_load_sh(src: __m128h, k: __mmask8, mem_addr: *const f16) -> __m128h { |
1125 | let mut dst: __m128h = src; |
1126 | asm!( |
1127 | vpl!("vmovsh {dst}{{{k}}}" ), |
1128 | dst = inout(xmm_reg) dst, |
1129 | k = in(kreg) k, |
1130 | p = in(reg) mem_addr, |
1131 | options(pure, readonly, nostack, preserves_flags) |
1132 | ); |
1133 | dst |
1134 | } |
1135 | |
1136 | /// Load a half-precision (16-bit) floating-point element from memory into the lower element of a new vector |
1137 | /// using zeromask k (the element is zeroed out when mask bit 0 is not set), and zero the upper elements. |
1138 | /// |
1139 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_sh) |
1140 | #[inline ] |
1141 | #[target_feature (enable = "avx512fp16" )] |
1142 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1143 | pub unsafe fn _mm_maskz_load_sh(k: __mmask8, mem_addr: *const f16) -> __m128h { |
1144 | let mut dst: __m128h; |
1145 | asm!( |
1146 | vpl!("vmovsh {dst}{{{k}}}{{z}}" ), |
1147 | dst = out(xmm_reg) dst, |
1148 | k = in(kreg) k, |
1149 | p = in(reg) mem_addr, |
1150 | options(pure, readonly, nostack, preserves_flags) |
1151 | ); |
1152 | dst |
1153 | } |
1154 | |
1155 | /// Load 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from memory into |
1156 | /// a new vector. The address does not need to be aligned to any particular boundary. |
1157 | /// |
1158 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_ph) |
1159 | #[inline ] |
1160 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1161 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1162 | pub unsafe fn _mm_loadu_ph(mem_addr: *const f16) -> __m128h { |
1163 | ptr::read_unaligned(src:mem_addr.cast()) |
1164 | } |
1165 | |
1166 | /// Load 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from memory into |
1167 | /// a new vector. The address does not need to be aligned to any particular boundary. |
1168 | /// |
1169 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_loadu_ph) |
1170 | #[inline ] |
1171 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1172 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1173 | pub unsafe fn _mm256_loadu_ph(mem_addr: *const f16) -> __m256h { |
1174 | ptr::read_unaligned(src:mem_addr.cast()) |
1175 | } |
1176 | |
1177 | /// Load 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from memory into |
1178 | /// a new vector. The address does not need to be aligned to any particular boundary. |
1179 | /// |
1180 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_ph) |
1181 | #[inline ] |
1182 | #[target_feature (enable = "avx512fp16" )] |
1183 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1184 | pub unsafe fn _mm512_loadu_ph(mem_addr: *const f16) -> __m512h { |
1185 | ptr::read_unaligned(src:mem_addr.cast()) |
1186 | } |
1187 | |
1188 | /// Move the lower half-precision (16-bit) floating-point element from b to the lower element of dst |
1189 | /// using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper |
1190 | /// 7 packed elements from a to the upper elements of dst. |
1191 | /// |
1192 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_move_sh) |
1193 | #[inline ] |
1194 | #[target_feature (enable = "avx512fp16" )] |
1195 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1196 | pub fn _mm_mask_move_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
1197 | unsafe { |
1198 | let mut mov: f16 = simd_extract!(src, 0); |
1199 | if (k & 1) != 0 { |
1200 | mov = simd_extract!(b, 0); |
1201 | } |
1202 | simd_insert!(a, 0, mov) |
1203 | } |
1204 | } |
1205 | |
1206 | /// Move the lower half-precision (16-bit) floating-point element from b to the lower element of dst |
1207 | /// using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed |
1208 | /// elements from a to the upper elements of dst. |
1209 | /// |
1210 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_move_sh) |
1211 | #[inline ] |
1212 | #[target_feature (enable = "avx512fp16" )] |
1213 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1214 | pub fn _mm_maskz_move_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
1215 | unsafe { |
1216 | let mut mov: f16 = 0.; |
1217 | if (k & 1) != 0 { |
1218 | mov = simd_extract!(b, 0); |
1219 | } |
1220 | simd_insert!(a, 0, mov) |
1221 | } |
1222 | } |
1223 | |
1224 | /// Move the lower half-precision (16-bit) floating-point element from b to the lower element of dst, |
1225 | /// and copy the upper 7 packed elements from a to the upper elements of dst. |
1226 | /// |
1227 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_sh) |
1228 | #[inline ] |
1229 | #[target_feature (enable = "avx512fp16" )] |
1230 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1231 | pub fn _mm_move_sh(a: __m128h, b: __m128h) -> __m128h { |
1232 | unsafe { |
1233 | let mov: f16 = simd_extract!(b, 0); |
1234 | simd_insert!(a, 0, mov) |
1235 | } |
1236 | } |
1237 | |
1238 | /// Store 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from a into memory. |
1239 | /// The address must be aligned to 16 bytes or a general-protection exception may be generated. |
1240 | /// |
1241 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_ph) |
1242 | #[inline ] |
1243 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1244 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1245 | pub unsafe fn _mm_store_ph(mem_addr: *mut f16, a: __m128h) { |
1246 | *mem_addr.cast() = a; |
1247 | } |
1248 | |
1249 | /// Store 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from a into memory. |
1250 | /// The address must be aligned to 32 bytes or a general-protection exception may be generated. |
1251 | /// |
1252 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_store_ph) |
1253 | #[inline ] |
1254 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1255 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1256 | pub unsafe fn _mm256_store_ph(mem_addr: *mut f16, a: __m256h) { |
1257 | *mem_addr.cast() = a; |
1258 | } |
1259 | |
1260 | /// Store 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from a into memory. |
1261 | /// The address must be aligned to 64 bytes or a general-protection exception may be generated. |
1262 | /// |
1263 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_store_ph) |
1264 | #[inline ] |
1265 | #[target_feature (enable = "avx512fp16" )] |
1266 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1267 | pub unsafe fn _mm512_store_ph(mem_addr: *mut f16, a: __m512h) { |
1268 | *mem_addr.cast() = a; |
1269 | } |
1270 | |
1271 | /// Store the lower half-precision (16-bit) floating-point element from a into memory. |
1272 | /// |
1273 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_sh) |
1274 | #[inline ] |
1275 | #[target_feature (enable = "avx512fp16" )] |
1276 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1277 | pub unsafe fn _mm_store_sh(mem_addr: *mut f16, a: __m128h) { |
1278 | *mem_addr = simd_extract!(a, 0); |
1279 | } |
1280 | |
1281 | /// Store the lower half-precision (16-bit) floating-point element from a into memory using writemask k |
1282 | /// |
1283 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_sh) |
1284 | #[inline ] |
1285 | #[target_feature (enable = "avx512fp16" )] |
1286 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1287 | pub unsafe fn _mm_mask_store_sh(mem_addr: *mut f16, k: __mmask8, a: __m128h) { |
1288 | asm!( |
1289 | vps!("vmovdqu16" , "{{{k}}}, {src}" ), |
1290 | p = in(reg) mem_addr, |
1291 | k = in(kreg) k, |
1292 | src = in(xmm_reg) a, |
1293 | options(nostack, preserves_flags) |
1294 | ); |
1295 | } |
1296 | |
1297 | /// Store 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from a into memory. |
1298 | /// The address does not need to be aligned to any particular boundary. |
1299 | /// |
1300 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_ph) |
1301 | #[inline ] |
1302 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1303 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1304 | pub unsafe fn _mm_storeu_ph(mem_addr: *mut f16, a: __m128h) { |
1305 | ptr::write_unaligned(dst:mem_addr.cast(), src:a); |
1306 | } |
1307 | |
1308 | /// Store 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from a into memory. |
1309 | /// The address does not need to be aligned to any particular boundary. |
1310 | /// |
1311 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_storeu_ph) |
1312 | #[inline ] |
1313 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1314 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1315 | pub unsafe fn _mm256_storeu_ph(mem_addr: *mut f16, a: __m256h) { |
1316 | ptr::write_unaligned(dst:mem_addr.cast(), src:a); |
1317 | } |
1318 | |
1319 | /// Store 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from a into memory. |
1320 | /// The address does not need to be aligned to any particular boundary. |
1321 | /// |
1322 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_ph) |
1323 | #[inline ] |
1324 | #[target_feature (enable = "avx512fp16" )] |
1325 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1326 | pub unsafe fn _mm512_storeu_ph(mem_addr: *mut f16, a: __m512h) { |
1327 | ptr::write_unaligned(dst:mem_addr.cast(), src:a); |
1328 | } |
1329 | |
1330 | /// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst. |
1331 | /// |
1332 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_ph) |
1333 | #[inline ] |
1334 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1335 | #[cfg_attr (test, assert_instr(vaddph))] |
1336 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1337 | pub fn _mm_add_ph(a: __m128h, b: __m128h) -> __m128h { |
1338 | unsafe { simd_add(x:a, y:b) } |
1339 | } |
1340 | |
1341 | /// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using |
1342 | /// writemask k (elements are copied from src when the corresponding mask bit is not set). |
1343 | /// |
1344 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_ph) |
1345 | #[inline ] |
1346 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1347 | #[cfg_attr (test, assert_instr(vaddph))] |
1348 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1349 | pub fn _mm_mask_add_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
1350 | unsafe { |
1351 | let r: __m128h = _mm_add_ph(a, b); |
1352 | simd_select_bitmask(m:k, yes:r, no:src) |
1353 | } |
1354 | } |
1355 | |
1356 | /// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using |
1357 | /// zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1358 | /// |
1359 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_ph) |
1360 | #[inline ] |
1361 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1362 | #[cfg_attr (test, assert_instr(vaddph))] |
1363 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1364 | pub fn _mm_maskz_add_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
1365 | unsafe { |
1366 | let r: __m128h = _mm_add_ph(a, b); |
1367 | simd_select_bitmask(m:k, yes:r, no:_mm_setzero_ph()) |
1368 | } |
1369 | } |
1370 | |
1371 | /// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst. |
1372 | /// |
1373 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_ph) |
1374 | #[inline ] |
1375 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1376 | #[cfg_attr (test, assert_instr(vaddph))] |
1377 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1378 | pub fn _mm256_add_ph(a: __m256h, b: __m256h) -> __m256h { |
1379 | unsafe { simd_add(x:a, y:b) } |
1380 | } |
1381 | |
1382 | /// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using |
1383 | /// writemask k (elements are copied from src when the corresponding mask bit is not set). |
1384 | /// |
1385 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_add_ph) |
1386 | #[inline ] |
1387 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1388 | #[cfg_attr (test, assert_instr(vaddph))] |
1389 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1390 | pub fn _mm256_mask_add_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h { |
1391 | unsafe { |
1392 | let r: __m256h = _mm256_add_ph(a, b); |
1393 | simd_select_bitmask(m:k, yes:r, no:src) |
1394 | } |
1395 | } |
1396 | |
1397 | /// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using |
1398 | /// zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1399 | /// |
1400 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_add_ph) |
1401 | #[inline ] |
1402 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1403 | #[cfg_attr (test, assert_instr(vaddph))] |
1404 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1405 | pub fn _mm256_maskz_add_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { |
1406 | unsafe { |
1407 | let r: __m256h = _mm256_add_ph(a, b); |
1408 | simd_select_bitmask(m:k, yes:r, no:_mm256_setzero_ph()) |
1409 | } |
1410 | } |
1411 | |
1412 | /// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst. |
1413 | /// |
1414 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_ph) |
1415 | #[inline ] |
1416 | #[target_feature (enable = "avx512fp16" )] |
1417 | #[cfg_attr (test, assert_instr(vaddph))] |
1418 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1419 | pub fn _mm512_add_ph(a: __m512h, b: __m512h) -> __m512h { |
1420 | unsafe { simd_add(x:a, y:b) } |
1421 | } |
1422 | |
1423 | /// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using |
1424 | /// writemask k (elements are copied from src when the corresponding mask bit is not set). |
1425 | /// |
1426 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_ph) |
1427 | #[inline ] |
1428 | #[target_feature (enable = "avx512fp16" )] |
1429 | #[cfg_attr (test, assert_instr(vaddph))] |
1430 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1431 | pub fn _mm512_mask_add_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h { |
1432 | unsafe { |
1433 | let r: __m512h = _mm512_add_ph(a, b); |
1434 | simd_select_bitmask(m:k, yes:r, no:src) |
1435 | } |
1436 | } |
1437 | |
1438 | /// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using |
1439 | /// zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1440 | /// |
1441 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_ph) |
1442 | #[inline ] |
1443 | #[target_feature (enable = "avx512fp16" )] |
1444 | #[cfg_attr (test, assert_instr(vaddph))] |
1445 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1446 | pub fn _mm512_maskz_add_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { |
1447 | unsafe { |
1448 | let r: __m512h = _mm512_add_ph(a, b); |
1449 | simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph()) |
1450 | } |
1451 | } |
1452 | |
1453 | /// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst. |
1454 | /// Rounding is done according to the rounding parameter, which can be one of: |
1455 | /// |
1456 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
1457 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
1458 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
1459 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
1460 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
1461 | /// |
1462 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_round_ph) |
1463 | #[inline ] |
1464 | #[target_feature (enable = "avx512fp16" )] |
1465 | #[cfg_attr (test, assert_instr(vaddph, ROUNDING = 8))] |
1466 | #[rustc_legacy_const_generics (2)] |
1467 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1468 | pub fn _mm512_add_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h { |
1469 | unsafe { |
1470 | static_assert_rounding!(ROUNDING); |
1471 | vaddph(a, b, ROUNDING) |
1472 | } |
1473 | } |
1474 | |
1475 | /// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using |
1476 | /// writemask k (elements are copied from src when the corresponding mask bit is not set). |
1477 | /// Rounding is done according to the rounding parameter, which can be one of: |
1478 | /// |
1479 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
1480 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
1481 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
1482 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
1483 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
1484 | /// |
1485 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_round_ph) |
1486 | #[inline ] |
1487 | #[target_feature (enable = "avx512fp16" )] |
1488 | #[cfg_attr (test, assert_instr(vaddph, ROUNDING = 8))] |
1489 | #[rustc_legacy_const_generics (4)] |
1490 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1491 | pub fn _mm512_mask_add_round_ph<const ROUNDING: i32>( |
1492 | src: __m512h, |
1493 | k: __mmask32, |
1494 | a: __m512h, |
1495 | b: __m512h, |
1496 | ) -> __m512h { |
1497 | unsafe { |
1498 | static_assert_rounding!(ROUNDING); |
1499 | let r: __m512h = _mm512_add_round_ph::<ROUNDING>(a, b); |
1500 | simd_select_bitmask(m:k, yes:r, no:src) |
1501 | } |
1502 | } |
1503 | |
1504 | /// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using |
1505 | /// zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1506 | /// Rounding is done according to the rounding parameter, which can be one of: |
1507 | /// |
1508 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
1509 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
1510 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
1511 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
1512 | /// |
1513 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_round_ph) |
1514 | #[inline ] |
1515 | #[target_feature (enable = "avx512fp16" )] |
1516 | #[cfg_attr (test, assert_instr(vaddph, ROUNDING = 8))] |
1517 | #[rustc_legacy_const_generics (3)] |
1518 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1519 | pub fn _mm512_maskz_add_round_ph<const ROUNDING: i32>( |
1520 | k: __mmask32, |
1521 | a: __m512h, |
1522 | b: __m512h, |
1523 | ) -> __m512h { |
1524 | unsafe { |
1525 | static_assert_rounding!(ROUNDING); |
1526 | let r: __m512h = _mm512_add_round_ph::<ROUNDING>(a, b); |
1527 | simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph()) |
1528 | } |
1529 | } |
1530 | |
1531 | /// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the |
1532 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst. |
1533 | /// Rounding is done according to the rounding parameter, which can be one of: |
1534 | /// |
1535 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
1536 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
1537 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
1538 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
1539 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
1540 | /// |
1541 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_round_sh) |
1542 | #[inline ] |
1543 | #[target_feature (enable = "avx512fp16" )] |
1544 | #[cfg_attr (test, assert_instr(vaddsh, ROUNDING = 8))] |
1545 | #[rustc_legacy_const_generics (2)] |
1546 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1547 | pub fn _mm_add_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h { |
1548 | static_assert_rounding!(ROUNDING); |
1549 | _mm_mask_add_round_sh::<ROUNDING>(src:_mm_undefined_ph(), k:0xff, a, b) |
1550 | } |
1551 | |
1552 | /// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the |
1553 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using |
1554 | /// writemask k (the element is copied from src when mask bit 0 is not set). |
1555 | /// Rounding is done according to the rounding parameter, which can be one of: |
1556 | /// |
1557 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
1558 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
1559 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
1560 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
1561 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
1562 | /// |
1563 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_round_sh) |
1564 | #[inline ] |
1565 | #[target_feature (enable = "avx512fp16" )] |
1566 | #[cfg_attr (test, assert_instr(vaddsh, ROUNDING = 8))] |
1567 | #[rustc_legacy_const_generics (4)] |
1568 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1569 | pub fn _mm_mask_add_round_sh<const ROUNDING: i32>( |
1570 | src: __m128h, |
1571 | k: __mmask8, |
1572 | a: __m128h, |
1573 | b: __m128h, |
1574 | ) -> __m128h { |
1575 | unsafe { |
1576 | static_assert_rounding!(ROUNDING); |
1577 | vaddsh(a, b, src, k, ROUNDING) |
1578 | } |
1579 | } |
1580 | |
1581 | /// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the |
1582 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using |
1583 | /// zeromask k (the element is zeroed out when mask bit 0 is not set). |
1584 | /// Rounding is done according to the rounding parameter, which can be one of: |
1585 | /// |
1586 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
1587 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
1588 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
1589 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
1590 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
1591 | /// |
1592 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_round_sh) |
1593 | #[inline ] |
1594 | #[target_feature (enable = "avx512fp16" )] |
1595 | #[cfg_attr (test, assert_instr(vaddsh, ROUNDING = 8))] |
1596 | #[rustc_legacy_const_generics (3)] |
1597 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1598 | pub fn _mm_maskz_add_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
1599 | static_assert_rounding!(ROUNDING); |
1600 | _mm_mask_add_round_sh::<ROUNDING>(src:_mm_setzero_ph(), k, a, b) |
1601 | } |
1602 | |
1603 | /// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the |
1604 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst. |
1605 | /// |
1606 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_sh) |
1607 | #[inline ] |
1608 | #[target_feature (enable = "avx512fp16" )] |
1609 | #[cfg_attr (test, assert_instr(vaddsh))] |
1610 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1611 | pub fn _mm_add_sh(a: __m128h, b: __m128h) -> __m128h { |
1612 | _mm_add_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b) |
1613 | } |
1614 | |
1615 | /// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the |
1616 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using |
1617 | /// writemask k (the element is copied from src when mask bit 0 is not set). |
1618 | /// |
1619 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_sh) |
1620 | #[inline ] |
1621 | #[target_feature (enable = "avx512fp16" )] |
1622 | #[cfg_attr (test, assert_instr(vaddsh))] |
1623 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1624 | pub fn _mm_mask_add_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
1625 | _mm_mask_add_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) |
1626 | } |
1627 | |
1628 | /// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the |
1629 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using |
1630 | /// zeromask k (the element is zeroed out when mask bit 0 is not set). |
1631 | /// |
1632 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_sh) |
1633 | #[inline ] |
1634 | #[target_feature (enable = "avx512fp16" )] |
1635 | #[cfg_attr (test, assert_instr(vaddsh))] |
1636 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1637 | pub fn _mm_maskz_add_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
1638 | _mm_maskz_add_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b) |
1639 | } |
1640 | |
1641 | /// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst. |
1642 | /// |
1643 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_ph) |
1644 | #[inline ] |
1645 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1646 | #[cfg_attr (test, assert_instr(vsubph))] |
1647 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1648 | pub fn _mm_sub_ph(a: __m128h, b: __m128h) -> __m128h { |
1649 | unsafe { simd_sub(lhs:a, rhs:b) } |
1650 | } |
1651 | |
1652 | /// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using |
1653 | /// writemask k (elements are copied from src when the corresponding mask bit is not set). |
1654 | /// |
1655 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_ph) |
1656 | #[inline ] |
1657 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1658 | #[cfg_attr (test, assert_instr(vsubph))] |
1659 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1660 | pub fn _mm_mask_sub_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
1661 | unsafe { |
1662 | let r: __m128h = _mm_sub_ph(a, b); |
1663 | simd_select_bitmask(m:k, yes:r, no:src) |
1664 | } |
1665 | } |
1666 | |
1667 | /// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using |
1668 | /// zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1669 | /// |
1670 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_ph) |
1671 | #[inline ] |
1672 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1673 | #[cfg_attr (test, assert_instr(vsubph))] |
1674 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1675 | pub fn _mm_maskz_sub_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
1676 | unsafe { |
1677 | let r: __m128h = _mm_sub_ph(a, b); |
1678 | simd_select_bitmask(m:k, yes:r, no:_mm_setzero_ph()) |
1679 | } |
1680 | } |
1681 | |
1682 | /// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst. |
1683 | /// |
1684 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_ph) |
1685 | #[inline ] |
1686 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1687 | #[cfg_attr (test, assert_instr(vsubph))] |
1688 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1689 | pub fn _mm256_sub_ph(a: __m256h, b: __m256h) -> __m256h { |
1690 | unsafe { simd_sub(lhs:a, rhs:b) } |
1691 | } |
1692 | |
1693 | /// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using |
1694 | /// writemask k (elements are copied from src when the corresponding mask bit is not set). |
1695 | /// |
1696 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sub_ph) |
1697 | #[inline ] |
1698 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1699 | #[cfg_attr (test, assert_instr(vsubph))] |
1700 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1701 | pub fn _mm256_mask_sub_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h { |
1702 | unsafe { |
1703 | let r: __m256h = _mm256_sub_ph(a, b); |
1704 | simd_select_bitmask(m:k, yes:r, no:src) |
1705 | } |
1706 | } |
1707 | |
1708 | /// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using |
1709 | /// zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1710 | /// |
1711 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sub_ph) |
1712 | #[inline ] |
1713 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1714 | #[cfg_attr (test, assert_instr(vsubph))] |
1715 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1716 | pub fn _mm256_maskz_sub_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { |
1717 | unsafe { |
1718 | let r: __m256h = _mm256_sub_ph(a, b); |
1719 | simd_select_bitmask(m:k, yes:r, no:_mm256_setzero_ph()) |
1720 | } |
1721 | } |
1722 | |
1723 | /// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst. |
1724 | /// |
1725 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_ph) |
1726 | #[inline ] |
1727 | #[target_feature (enable = "avx512fp16" )] |
1728 | #[cfg_attr (test, assert_instr(vsubph))] |
1729 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1730 | pub fn _mm512_sub_ph(a: __m512h, b: __m512h) -> __m512h { |
1731 | unsafe { simd_sub(lhs:a, rhs:b) } |
1732 | } |
1733 | |
1734 | /// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using |
1735 | /// writemask k (elements are copied from src when the corresponding mask bit is not set). |
1736 | /// |
1737 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_ph) |
1738 | #[inline ] |
1739 | #[target_feature (enable = "avx512fp16" )] |
1740 | #[cfg_attr (test, assert_instr(vsubph))] |
1741 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1742 | pub fn _mm512_mask_sub_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h { |
1743 | unsafe { |
1744 | let r: __m512h = _mm512_sub_ph(a, b); |
1745 | simd_select_bitmask(m:k, yes:r, no:src) |
1746 | } |
1747 | } |
1748 | |
1749 | /// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using |
1750 | /// zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1751 | /// |
1752 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_ph) |
1753 | #[inline ] |
1754 | #[target_feature (enable = "avx512fp16" )] |
1755 | #[cfg_attr (test, assert_instr(vsubph))] |
1756 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1757 | pub fn _mm512_maskz_sub_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { |
1758 | unsafe { |
1759 | let r: __m512h = _mm512_sub_ph(a, b); |
1760 | simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph()) |
1761 | } |
1762 | } |
1763 | |
1764 | /// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst. |
1765 | /// Rounding is done according to the rounding parameter, which can be one of: |
1766 | /// |
1767 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
1768 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
1769 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
1770 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
1771 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
1772 | /// |
1773 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_round_ph) |
1774 | #[inline ] |
1775 | #[target_feature (enable = "avx512fp16" )] |
1776 | #[cfg_attr (test, assert_instr(vsubph, ROUNDING = 8))] |
1777 | #[rustc_legacy_const_generics (2)] |
1778 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1779 | pub fn _mm512_sub_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h { |
1780 | unsafe { |
1781 | static_assert_rounding!(ROUNDING); |
1782 | vsubph(a, b, ROUNDING) |
1783 | } |
1784 | } |
1785 | |
1786 | /// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using |
1787 | /// writemask k (elements are copied from src when the corresponding mask bit is not set). |
1788 | /// Rounding is done according to the rounding parameter, which can be one of: |
1789 | /// |
1790 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
1791 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
1792 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
1793 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
1794 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
1795 | /// |
1796 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_round_ph) |
1797 | #[inline ] |
1798 | #[target_feature (enable = "avx512fp16" )] |
1799 | #[cfg_attr (test, assert_instr(vsubph, ROUNDING = 8))] |
1800 | #[rustc_legacy_const_generics (4)] |
1801 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1802 | pub fn _mm512_mask_sub_round_ph<const ROUNDING: i32>( |
1803 | src: __m512h, |
1804 | k: __mmask32, |
1805 | a: __m512h, |
1806 | b: __m512h, |
1807 | ) -> __m512h { |
1808 | unsafe { |
1809 | static_assert_rounding!(ROUNDING); |
1810 | let r: __m512h = _mm512_sub_round_ph::<ROUNDING>(a, b); |
1811 | simd_select_bitmask(m:k, yes:r, no:src) |
1812 | } |
1813 | } |
1814 | |
1815 | /// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using |
1816 | /// zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1817 | /// Rounding is done according to the rounding parameter, which can be one of: |
1818 | /// |
1819 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
1820 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
1821 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
1822 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
1823 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
1824 | /// |
1825 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_round_ph) |
1826 | #[inline ] |
1827 | #[target_feature (enable = "avx512fp16" )] |
1828 | #[cfg_attr (test, assert_instr(vsubph, ROUNDING = 8))] |
1829 | #[rustc_legacy_const_generics (3)] |
1830 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1831 | pub fn _mm512_maskz_sub_round_ph<const ROUNDING: i32>( |
1832 | k: __mmask32, |
1833 | a: __m512h, |
1834 | b: __m512h, |
1835 | ) -> __m512h { |
1836 | unsafe { |
1837 | static_assert_rounding!(ROUNDING); |
1838 | let r: __m512h = _mm512_sub_round_ph::<ROUNDING>(a, b); |
1839 | simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph()) |
1840 | } |
1841 | } |
1842 | |
1843 | /// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the |
1844 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst. |
1845 | /// Rounding is done according to the rounding parameter, which can be one of: |
1846 | /// |
1847 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
1848 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
1849 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
1850 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
1851 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
1852 | /// |
1853 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_round_sh) |
1854 | #[inline ] |
1855 | #[target_feature (enable = "avx512fp16" )] |
1856 | #[cfg_attr (test, assert_instr(vsubsh, ROUNDING = 8))] |
1857 | #[rustc_legacy_const_generics (2)] |
1858 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1859 | pub fn _mm_sub_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h { |
1860 | static_assert_rounding!(ROUNDING); |
1861 | _mm_mask_sub_round_sh::<ROUNDING>(src:_mm_undefined_ph(), k:0xff, a, b) |
1862 | } |
1863 | |
1864 | /// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the |
1865 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using |
1866 | /// writemask k (the element is copied from src when mask bit 0 is not set). |
1867 | /// Rounding is done according to the rounding parameter, which can be one of: |
1868 | /// |
1869 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
1870 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
1871 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
1872 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
1873 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
1874 | /// |
1875 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_round_sh) |
1876 | #[inline ] |
1877 | #[target_feature (enable = "avx512fp16" )] |
1878 | #[cfg_attr (test, assert_instr(vsubsh, ROUNDING = 8))] |
1879 | #[rustc_legacy_const_generics (4)] |
1880 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1881 | pub fn _mm_mask_sub_round_sh<const ROUNDING: i32>( |
1882 | src: __m128h, |
1883 | k: __mmask8, |
1884 | a: __m128h, |
1885 | b: __m128h, |
1886 | ) -> __m128h { |
1887 | unsafe { |
1888 | static_assert_rounding!(ROUNDING); |
1889 | vsubsh(a, b, src, k, ROUNDING) |
1890 | } |
1891 | } |
1892 | |
1893 | /// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the |
1894 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using |
1895 | /// zeromask k (the element is zeroed out when mask bit 0 is not set). |
1896 | /// Rounding is done according to the rounding parameter, which can be one of: |
1897 | /// |
1898 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
1899 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
1900 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
1901 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
1902 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
1903 | /// |
1904 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_round_sh) |
1905 | #[inline ] |
1906 | #[target_feature (enable = "avx512fp16" )] |
1907 | #[cfg_attr (test, assert_instr(vsubsh, ROUNDING = 8))] |
1908 | #[rustc_legacy_const_generics (3)] |
1909 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1910 | pub fn _mm_maskz_sub_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
1911 | static_assert_rounding!(ROUNDING); |
1912 | _mm_mask_sub_round_sh::<ROUNDING>(src:_mm_setzero_ph(), k, a, b) |
1913 | } |
1914 | |
1915 | /// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the |
1916 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst. |
1917 | /// |
1918 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_sh) |
1919 | #[inline ] |
1920 | #[target_feature (enable = "avx512fp16" )] |
1921 | #[cfg_attr (test, assert_instr(vsubsh))] |
1922 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1923 | pub fn _mm_sub_sh(a: __m128h, b: __m128h) -> __m128h { |
1924 | _mm_sub_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b) |
1925 | } |
1926 | |
1927 | /// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the |
1928 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using |
1929 | /// writemask k (the element is copied from src when mask bit 0 is not set). |
1930 | /// |
1931 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_sh) |
1932 | #[inline ] |
1933 | #[target_feature (enable = "avx512fp16" )] |
1934 | #[cfg_attr (test, assert_instr(vsubsh))] |
1935 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1936 | pub fn _mm_mask_sub_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
1937 | _mm_mask_sub_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) |
1938 | } |
1939 | |
1940 | /// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the |
1941 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using |
1942 | /// zeromask k (the element is zeroed out when mask bit 0 is not set). |
1943 | /// |
1944 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_sh) |
1945 | #[inline ] |
1946 | #[target_feature (enable = "avx512fp16" )] |
1947 | #[cfg_attr (test, assert_instr(vsubsh))] |
1948 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1949 | pub fn _mm_maskz_sub_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
1950 | _mm_maskz_sub_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b) |
1951 | } |
1952 | |
1953 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst. |
1954 | /// |
1955 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_ph) |
1956 | #[inline ] |
1957 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1958 | #[cfg_attr (test, assert_instr(vmulph))] |
1959 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1960 | pub fn _mm_mul_ph(a: __m128h, b: __m128h) -> __m128h { |
1961 | unsafe { simd_mul(x:a, y:b) } |
1962 | } |
1963 | |
1964 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using |
1965 | /// writemask k (elements are copied from src when the corresponding mask bit is not set). |
1966 | /// |
1967 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_ph) |
1968 | #[inline ] |
1969 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1970 | #[cfg_attr (test, assert_instr(vmulph))] |
1971 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1972 | pub fn _mm_mask_mul_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
1973 | unsafe { |
1974 | let r: __m128h = _mm_mul_ph(a, b); |
1975 | simd_select_bitmask(m:k, yes:r, no:src) |
1976 | } |
1977 | } |
1978 | |
1979 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using |
1980 | /// zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1981 | /// |
1982 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_ph) |
1983 | #[inline ] |
1984 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1985 | #[cfg_attr (test, assert_instr(vmulph))] |
1986 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
1987 | pub fn _mm_maskz_mul_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
1988 | unsafe { |
1989 | let r: __m128h = _mm_mul_ph(a, b); |
1990 | simd_select_bitmask(m:k, yes:r, no:_mm_setzero_ph()) |
1991 | } |
1992 | } |
1993 | |
1994 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst. |
1995 | /// |
1996 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_ph) |
1997 | #[inline ] |
1998 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
1999 | #[cfg_attr (test, assert_instr(vmulph))] |
2000 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2001 | pub fn _mm256_mul_ph(a: __m256h, b: __m256h) -> __m256h { |
2002 | unsafe { simd_mul(x:a, y:b) } |
2003 | } |
2004 | |
2005 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using |
2006 | /// writemask k (elements are copied from src when the corresponding mask bit is not set). |
2007 | /// |
2008 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mul_ph) |
2009 | #[inline ] |
2010 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2011 | #[cfg_attr (test, assert_instr(vmulph))] |
2012 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2013 | pub fn _mm256_mask_mul_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h { |
2014 | unsafe { |
2015 | let r: __m256h = _mm256_mul_ph(a, b); |
2016 | simd_select_bitmask(m:k, yes:r, no:src) |
2017 | } |
2018 | } |
2019 | |
2020 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using |
2021 | /// zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
2022 | /// |
2023 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mul_ph) |
2024 | #[inline ] |
2025 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2026 | #[cfg_attr (test, assert_instr(vmulph))] |
2027 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2028 | pub fn _mm256_maskz_mul_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { |
2029 | unsafe { |
2030 | let r: __m256h = _mm256_mul_ph(a, b); |
2031 | simd_select_bitmask(m:k, yes:r, no:_mm256_setzero_ph()) |
2032 | } |
2033 | } |
2034 | |
2035 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst. |
2036 | /// |
2037 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_ph) |
2038 | #[inline ] |
2039 | #[target_feature (enable = "avx512fp16" )] |
2040 | #[cfg_attr (test, assert_instr(vmulph))] |
2041 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2042 | pub fn _mm512_mul_ph(a: __m512h, b: __m512h) -> __m512h { |
2043 | unsafe { simd_mul(x:a, y:b) } |
2044 | } |
2045 | |
2046 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using |
2047 | /// writemask k (elements are copied from src when the corresponding mask bit is not set). |
2048 | /// |
2049 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_ph) |
2050 | #[inline ] |
2051 | #[target_feature (enable = "avx512fp16" )] |
2052 | #[cfg_attr (test, assert_instr(vmulph))] |
2053 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2054 | pub fn _mm512_mask_mul_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h { |
2055 | unsafe { |
2056 | let r: __m512h = _mm512_mul_ph(a, b); |
2057 | simd_select_bitmask(m:k, yes:r, no:src) |
2058 | } |
2059 | } |
2060 | |
2061 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using |
2062 | /// zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
2063 | /// |
2064 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_ph) |
2065 | #[inline ] |
2066 | #[target_feature (enable = "avx512fp16" )] |
2067 | #[cfg_attr (test, assert_instr(vmulph))] |
2068 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2069 | pub fn _mm512_maskz_mul_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { |
2070 | unsafe { |
2071 | let r: __m512h = _mm512_mul_ph(a, b); |
2072 | simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph()) |
2073 | } |
2074 | } |
2075 | |
2076 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst. |
2077 | /// Rounding is done according to the rounding parameter, which can be one of: |
2078 | /// |
2079 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2080 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2081 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2082 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2083 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2084 | /// |
2085 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_round_ph) |
2086 | #[inline ] |
2087 | #[target_feature (enable = "avx512fp16" )] |
2088 | #[cfg_attr (test, assert_instr(vmulph, ROUNDING = 8))] |
2089 | #[rustc_legacy_const_generics (2)] |
2090 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2091 | pub fn _mm512_mul_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h { |
2092 | unsafe { |
2093 | static_assert_rounding!(ROUNDING); |
2094 | vmulph(a, b, ROUNDING) |
2095 | } |
2096 | } |
2097 | |
2098 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using |
2099 | /// writemask k (elements are copied from src when the corresponding mask bit is not set). |
2100 | /// Rounding is done according to the rounding parameter, which can be one of: |
2101 | /// |
2102 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2103 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2104 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2105 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2106 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2107 | /// |
2108 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_round_ph) |
2109 | #[inline ] |
2110 | #[target_feature (enable = "avx512fp16" )] |
2111 | #[cfg_attr (test, assert_instr(vmulph, ROUNDING = 8))] |
2112 | #[rustc_legacy_const_generics (4)] |
2113 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2114 | pub fn _mm512_mask_mul_round_ph<const ROUNDING: i32>( |
2115 | src: __m512h, |
2116 | k: __mmask32, |
2117 | a: __m512h, |
2118 | b: __m512h, |
2119 | ) -> __m512h { |
2120 | unsafe { |
2121 | static_assert_rounding!(ROUNDING); |
2122 | let r: __m512h = _mm512_mul_round_ph::<ROUNDING>(a, b); |
2123 | simd_select_bitmask(m:k, yes:r, no:src) |
2124 | } |
2125 | } |
2126 | |
2127 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using |
2128 | /// zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
2129 | /// Rounding is done according to the rounding parameter, which can be one of: |
2130 | /// |
2131 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2132 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2133 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2134 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2135 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2136 | /// |
2137 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_round_ph) |
2138 | #[inline ] |
2139 | #[target_feature (enable = "avx512fp16" )] |
2140 | #[cfg_attr (test, assert_instr(vmulph, ROUNDING = 8))] |
2141 | #[rustc_legacy_const_generics (3)] |
2142 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2143 | pub fn _mm512_maskz_mul_round_ph<const ROUNDING: i32>( |
2144 | k: __mmask32, |
2145 | a: __m512h, |
2146 | b: __m512h, |
2147 | ) -> __m512h { |
2148 | unsafe { |
2149 | static_assert_rounding!(ROUNDING); |
2150 | let r: __m512h = _mm512_mul_round_ph::<ROUNDING>(a, b); |
2151 | simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph()) |
2152 | } |
2153 | } |
2154 | |
2155 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the |
2156 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst. |
2157 | /// Rounding is done according to the rounding parameter, which can be one of: |
2158 | /// |
2159 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2160 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2161 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2162 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2163 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2164 | /// |
2165 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_round_sh) |
2166 | #[inline ] |
2167 | #[target_feature (enable = "avx512fp16" )] |
2168 | #[cfg_attr (test, assert_instr(vmulsh, ROUNDING = 8))] |
2169 | #[rustc_legacy_const_generics (2)] |
2170 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2171 | pub fn _mm_mul_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h { |
2172 | static_assert_rounding!(ROUNDING); |
2173 | _mm_mask_mul_round_sh::<ROUNDING>(src:_mm_undefined_ph(), k:0xff, a, b) |
2174 | } |
2175 | |
2176 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the |
2177 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using |
2178 | /// writemask k (the element is copied from src when mask bit 0 is not set). |
2179 | /// Rounding is done according to the rounding parameter, which can be one of: |
2180 | /// |
2181 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2182 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2183 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2184 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2185 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2186 | /// |
2187 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_round_sh) |
2188 | #[inline ] |
2189 | #[target_feature (enable = "avx512fp16" )] |
2190 | #[cfg_attr (test, assert_instr(vmulsh, ROUNDING = 8))] |
2191 | #[rustc_legacy_const_generics (4)] |
2192 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2193 | pub fn _mm_mask_mul_round_sh<const ROUNDING: i32>( |
2194 | src: __m128h, |
2195 | k: __mmask8, |
2196 | a: __m128h, |
2197 | b: __m128h, |
2198 | ) -> __m128h { |
2199 | unsafe { |
2200 | static_assert_rounding!(ROUNDING); |
2201 | vmulsh(a, b, src, k, ROUNDING) |
2202 | } |
2203 | } |
2204 | |
2205 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the |
2206 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using |
2207 | /// zeromask k (the element is zeroed out when mask bit 0 is not set). |
2208 | /// Rounding is done according to the rounding parameter, which can be one of: |
2209 | /// |
2210 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2211 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2212 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2213 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2214 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2215 | /// |
2216 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_round_sh) |
2217 | #[inline ] |
2218 | #[target_feature (enable = "avx512fp16" )] |
2219 | #[cfg_attr (test, assert_instr(vmulsh, ROUNDING = 8))] |
2220 | #[rustc_legacy_const_generics (3)] |
2221 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2222 | pub fn _mm_maskz_mul_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
2223 | static_assert_rounding!(ROUNDING); |
2224 | _mm_mask_mul_round_sh::<ROUNDING>(src:_mm_setzero_ph(), k, a, b) |
2225 | } |
2226 | |
2227 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the |
2228 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst. |
2229 | /// |
2230 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_sh) |
2231 | #[inline ] |
2232 | #[target_feature (enable = "avx512fp16" )] |
2233 | #[cfg_attr (test, assert_instr(vmulsh))] |
2234 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2235 | pub fn _mm_mul_sh(a: __m128h, b: __m128h) -> __m128h { |
2236 | _mm_mul_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b) |
2237 | } |
2238 | |
2239 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the |
2240 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using |
2241 | /// writemask k (the element is copied from src when mask bit 0 is not set). |
2242 | /// |
2243 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_sh) |
2244 | #[inline ] |
2245 | #[target_feature (enable = "avx512fp16" )] |
2246 | #[cfg_attr (test, assert_instr(vmulsh))] |
2247 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2248 | pub fn _mm_mask_mul_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
2249 | _mm_mask_mul_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) |
2250 | } |
2251 | |
2252 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the |
2253 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using |
2254 | /// zeromask k (the element is zeroed out when mask bit 0 is not set). |
2255 | /// |
2256 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_sh) |
2257 | #[inline ] |
2258 | #[target_feature (enable = "avx512fp16" )] |
2259 | #[cfg_attr (test, assert_instr(vmulsh))] |
2260 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2261 | pub fn _mm_maskz_mul_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
2262 | _mm_maskz_mul_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b) |
2263 | } |
2264 | |
2265 | /// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst. |
2266 | /// |
2267 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_ph) |
2268 | #[inline ] |
2269 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2270 | #[cfg_attr (test, assert_instr(vdivph))] |
2271 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2272 | pub fn _mm_div_ph(a: __m128h, b: __m128h) -> __m128h { |
2273 | unsafe { simd_div(lhs:a, rhs:b) } |
2274 | } |
2275 | |
2276 | /// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using |
2277 | /// writemask k (elements are copied from src when the corresponding mask bit is not set). |
2278 | /// |
2279 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_ph) |
2280 | #[inline ] |
2281 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2282 | #[cfg_attr (test, assert_instr(vdivph))] |
2283 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2284 | pub fn _mm_mask_div_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
2285 | unsafe { |
2286 | let r: __m128h = _mm_div_ph(a, b); |
2287 | simd_select_bitmask(m:k, yes:r, no:src) |
2288 | } |
2289 | } |
2290 | |
2291 | /// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using |
2292 | /// zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
2293 | /// |
2294 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_ph) |
2295 | #[inline ] |
2296 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2297 | #[cfg_attr (test, assert_instr(vdivph))] |
2298 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2299 | pub fn _mm_maskz_div_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
2300 | unsafe { |
2301 | let r: __m128h = _mm_div_ph(a, b); |
2302 | simd_select_bitmask(m:k, yes:r, no:_mm_setzero_ph()) |
2303 | } |
2304 | } |
2305 | |
2306 | /// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst. |
2307 | /// |
2308 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_div_ph) |
2309 | #[inline ] |
2310 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2311 | #[cfg_attr (test, assert_instr(vdivph))] |
2312 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2313 | pub fn _mm256_div_ph(a: __m256h, b: __m256h) -> __m256h { |
2314 | unsafe { simd_div(lhs:a, rhs:b) } |
2315 | } |
2316 | |
2317 | /// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using |
2318 | /// writemask k (elements are copied from src when the corresponding mask bit is not set). |
2319 | /// |
2320 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_div_ph) |
2321 | #[inline ] |
2322 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2323 | #[cfg_attr (test, assert_instr(vdivph))] |
2324 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2325 | pub fn _mm256_mask_div_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h { |
2326 | unsafe { |
2327 | let r: __m256h = _mm256_div_ph(a, b); |
2328 | simd_select_bitmask(m:k, yes:r, no:src) |
2329 | } |
2330 | } |
2331 | |
2332 | /// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using |
2333 | /// zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
2334 | /// |
2335 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_div_ph) |
2336 | #[inline ] |
2337 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2338 | #[cfg_attr (test, assert_instr(vdivph))] |
2339 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2340 | pub fn _mm256_maskz_div_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { |
2341 | unsafe { |
2342 | let r: __m256h = _mm256_div_ph(a, b); |
2343 | simd_select_bitmask(m:k, yes:r, no:_mm256_setzero_ph()) |
2344 | } |
2345 | } |
2346 | |
2347 | /// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst. |
2348 | /// |
2349 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_div_ph) |
2350 | #[inline ] |
2351 | #[target_feature (enable = "avx512fp16" )] |
2352 | #[cfg_attr (test, assert_instr(vdivph))] |
2353 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2354 | pub fn _mm512_div_ph(a: __m512h, b: __m512h) -> __m512h { |
2355 | unsafe { simd_div(lhs:a, rhs:b) } |
2356 | } |
2357 | |
2358 | /// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using |
2359 | /// writemask k (elements are copied from src when the corresponding mask bit is not set). |
2360 | /// |
2361 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_div_ph) |
2362 | #[inline ] |
2363 | #[target_feature (enable = "avx512fp16" )] |
2364 | #[cfg_attr (test, assert_instr(vdivph))] |
2365 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2366 | pub fn _mm512_mask_div_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h { |
2367 | unsafe { |
2368 | let r: __m512h = _mm512_div_ph(a, b); |
2369 | simd_select_bitmask(m:k, yes:r, no:src) |
2370 | } |
2371 | } |
2372 | |
2373 | /// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using |
2374 | /// zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
2375 | /// |
2376 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_div_ph) |
2377 | #[inline ] |
2378 | #[target_feature (enable = "avx512fp16" )] |
2379 | #[cfg_attr (test, assert_instr(vdivph))] |
2380 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2381 | pub fn _mm512_maskz_div_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { |
2382 | unsafe { |
2383 | let r: __m512h = _mm512_div_ph(a, b); |
2384 | simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph()) |
2385 | } |
2386 | } |
2387 | |
2388 | /// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst. |
2389 | /// Rounding is done according to the rounding parameter, which can be one of: |
2390 | /// |
2391 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2392 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2393 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2394 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2395 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2396 | /// |
2397 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_div_round_ph) |
2398 | #[inline ] |
2399 | #[target_feature (enable = "avx512fp16" )] |
2400 | #[cfg_attr (test, assert_instr(vdivph, ROUNDING = 8))] |
2401 | #[rustc_legacy_const_generics (2)] |
2402 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2403 | pub fn _mm512_div_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h { |
2404 | unsafe { |
2405 | static_assert_rounding!(ROUNDING); |
2406 | vdivph(a, b, ROUNDING) |
2407 | } |
2408 | } |
2409 | |
2410 | /// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using |
2411 | /// writemask k (elements are copied from src when the corresponding mask bit is not set). |
2412 | /// Rounding is done according to the rounding parameter, which can be one of: |
2413 | /// |
2414 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2415 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2416 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2417 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2418 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2419 | /// |
2420 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_div_round_ph) |
2421 | #[inline ] |
2422 | #[target_feature (enable = "avx512fp16" )] |
2423 | #[cfg_attr (test, assert_instr(vdivph, ROUNDING = 8))] |
2424 | #[rustc_legacy_const_generics (4)] |
2425 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2426 | pub fn _mm512_mask_div_round_ph<const ROUNDING: i32>( |
2427 | src: __m512h, |
2428 | k: __mmask32, |
2429 | a: __m512h, |
2430 | b: __m512h, |
2431 | ) -> __m512h { |
2432 | unsafe { |
2433 | static_assert_rounding!(ROUNDING); |
2434 | let r: __m512h = _mm512_div_round_ph::<ROUNDING>(a, b); |
2435 | simd_select_bitmask(m:k, yes:r, no:src) |
2436 | } |
2437 | } |
2438 | |
2439 | /// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using |
2440 | /// zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
2441 | /// Rounding is done according to the rounding parameter, which can be one of: |
2442 | /// |
2443 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2444 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2445 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2446 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2447 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2448 | /// |
2449 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_div_round_ph) |
2450 | #[inline ] |
2451 | #[target_feature (enable = "avx512fp16" )] |
2452 | #[cfg_attr (test, assert_instr(vdivph, ROUNDING = 8))] |
2453 | #[rustc_legacy_const_generics (3)] |
2454 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2455 | pub fn _mm512_maskz_div_round_ph<const ROUNDING: i32>( |
2456 | k: __mmask32, |
2457 | a: __m512h, |
2458 | b: __m512h, |
2459 | ) -> __m512h { |
2460 | unsafe { |
2461 | static_assert_rounding!(ROUNDING); |
2462 | let r: __m512h = _mm512_div_round_ph::<ROUNDING>(a, b); |
2463 | simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph()) |
2464 | } |
2465 | } |
2466 | |
2467 | /// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the |
2468 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst. |
2469 | /// Rounding is done according to the rounding parameter, which can be one of: |
2470 | /// |
2471 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2472 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2473 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2474 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2475 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2476 | /// |
2477 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_round_sh) |
2478 | #[inline ] |
2479 | #[target_feature (enable = "avx512fp16" )] |
2480 | #[cfg_attr (test, assert_instr(vdivsh, ROUNDING = 8))] |
2481 | #[rustc_legacy_const_generics (2)] |
2482 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2483 | pub fn _mm_div_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h { |
2484 | static_assert_rounding!(ROUNDING); |
2485 | _mm_mask_div_round_sh::<ROUNDING>(src:_mm_undefined_ph(), k:0xff, a, b) |
2486 | } |
2487 | |
2488 | /// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the |
2489 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using |
2490 | /// writemask k (the element is copied from src when mask bit 0 is not set). |
2491 | /// Rounding is done according to the rounding parameter, which can be one of: |
2492 | /// |
2493 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2494 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2495 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2496 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2497 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2498 | /// |
2499 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_round_sh) |
2500 | #[inline ] |
2501 | #[target_feature (enable = "avx512fp16" )] |
2502 | #[cfg_attr (test, assert_instr(vdivsh, ROUNDING = 8))] |
2503 | #[rustc_legacy_const_generics (4)] |
2504 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2505 | pub fn _mm_mask_div_round_sh<const ROUNDING: i32>( |
2506 | src: __m128h, |
2507 | k: __mmask8, |
2508 | a: __m128h, |
2509 | b: __m128h, |
2510 | ) -> __m128h { |
2511 | unsafe { |
2512 | static_assert_rounding!(ROUNDING); |
2513 | vdivsh(a, b, src, k, ROUNDING) |
2514 | } |
2515 | } |
2516 | |
2517 | /// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the |
2518 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using |
2519 | /// zeromask k (the element is zeroed out when mask bit 0 is not set). |
2520 | /// Rounding is done according to the rounding parameter, which can be one of: |
2521 | /// |
2522 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2523 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2524 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2525 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2526 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2527 | /// |
2528 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_round_sh) |
2529 | #[inline ] |
2530 | #[target_feature (enable = "avx512fp16" )] |
2531 | #[cfg_attr (test, assert_instr(vdivsh, ROUNDING = 8))] |
2532 | #[rustc_legacy_const_generics (3)] |
2533 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2534 | pub fn _mm_maskz_div_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
2535 | static_assert_rounding!(ROUNDING); |
2536 | _mm_mask_div_round_sh::<ROUNDING>(src:_mm_setzero_ph(), k, a, b) |
2537 | } |
2538 | |
2539 | /// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the |
2540 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst. |
2541 | /// |
2542 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_sh) |
2543 | #[inline ] |
2544 | #[target_feature (enable = "avx512fp16" )] |
2545 | #[cfg_attr (test, assert_instr(vdivsh))] |
2546 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2547 | pub fn _mm_div_sh(a: __m128h, b: __m128h) -> __m128h { |
2548 | _mm_div_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b) |
2549 | } |
2550 | |
2551 | /// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the |
2552 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using |
2553 | /// writemask k (the element is copied from src when mask bit 0 is not set). |
2554 | /// |
2555 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_sh) |
2556 | #[inline ] |
2557 | #[target_feature (enable = "avx512fp16" )] |
2558 | #[cfg_attr (test, assert_instr(vdivsh))] |
2559 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2560 | pub fn _mm_mask_div_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
2561 | _mm_mask_div_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) |
2562 | } |
2563 | |
2564 | /// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the |
2565 | /// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using |
2566 | /// zeromask k (the element is zeroed out when mask bit 0 is not set). |
2567 | /// |
2568 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_sh) |
2569 | #[inline ] |
2570 | #[target_feature (enable = "avx512fp16" )] |
2571 | #[cfg_attr (test, assert_instr(vdivsh))] |
2572 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2573 | pub fn _mm_maskz_div_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
2574 | _mm_maskz_div_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b) |
2575 | } |
2576 | |
2577 | /// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is |
2578 | /// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex |
2579 | /// number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2580 | /// |
2581 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_pch) |
2582 | #[inline ] |
2583 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2584 | #[cfg_attr (test, assert_instr(vfmulcph))] |
2585 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2586 | pub fn _mm_mul_pch(a: __m128h, b: __m128h) -> __m128h { |
2587 | _mm_mask_mul_pch(src:_mm_undefined_ph(), k:0xff, a, b) |
2588 | } |
2589 | |
2590 | /// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element |
2591 | /// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent |
2592 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2593 | /// |
2594 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_pch) |
2595 | #[inline ] |
2596 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2597 | #[cfg_attr (test, assert_instr(vfmulcph))] |
2598 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2599 | pub fn _mm_mask_mul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
2600 | unsafe { transmute(src:vfmulcph_128(a:transmute(a), b:transmute(b), src:transmute(src), k)) } |
2601 | } |
2602 | |
2603 | /// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element |
2604 | /// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent |
2605 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2606 | /// |
2607 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_pch) |
2608 | #[inline ] |
2609 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2610 | #[cfg_attr (test, assert_instr(vfmulcph))] |
2611 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2612 | pub fn _mm_maskz_mul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
2613 | _mm_mask_mul_pch(src:_mm_setzero_ph(), k, a, b) |
2614 | } |
2615 | |
2616 | /// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is |
2617 | /// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex |
2618 | /// number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2619 | /// |
2620 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_pch) |
2621 | #[inline ] |
2622 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2623 | #[cfg_attr (test, assert_instr(vfmulcph))] |
2624 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2625 | pub fn _mm256_mul_pch(a: __m256h, b: __m256h) -> __m256h { |
2626 | _mm256_mask_mul_pch(src:_mm256_undefined_ph(), k:0xff, a, b) |
2627 | } |
2628 | |
2629 | /// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element |
2630 | /// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent |
2631 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2632 | /// |
2633 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mul_pch) |
2634 | #[inline ] |
2635 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2636 | #[cfg_attr (test, assert_instr(vfmulcph))] |
2637 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2638 | pub fn _mm256_mask_mul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h { |
2639 | unsafe { transmute(src:vfmulcph_256(a:transmute(a), b:transmute(b), src:transmute(src), k)) } |
2640 | } |
2641 | |
2642 | /// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element |
2643 | /// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent |
2644 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2645 | /// |
2646 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mul_pch) |
2647 | #[inline ] |
2648 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2649 | #[cfg_attr (test, assert_instr(vfmulcph))] |
2650 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2651 | pub fn _mm256_maskz_mul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h { |
2652 | _mm256_mask_mul_pch(src:_mm256_setzero_ph(), k, a, b) |
2653 | } |
2654 | |
2655 | /// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is |
2656 | /// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex |
2657 | /// number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2658 | /// |
2659 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_pch) |
2660 | #[inline ] |
2661 | #[target_feature (enable = "avx512fp16" )] |
2662 | #[cfg_attr (test, assert_instr(vfmulcph))] |
2663 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2664 | pub fn _mm512_mul_pch(a: __m512h, b: __m512h) -> __m512h { |
2665 | _mm512_mask_mul_pch(src:_mm512_undefined_ph(), k:0xffff, a, b) |
2666 | } |
2667 | |
2668 | /// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element |
2669 | /// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent |
2670 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2671 | /// |
2672 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_pch) |
2673 | #[inline ] |
2674 | #[target_feature (enable = "avx512fp16" )] |
2675 | #[cfg_attr (test, assert_instr(vfmulcph))] |
2676 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2677 | pub fn _mm512_mask_mul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h { |
2678 | _mm512_mask_mul_round_pch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) |
2679 | } |
2680 | |
2681 | /// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element |
2682 | /// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent |
2683 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2684 | /// |
2685 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_pch) |
2686 | #[inline ] |
2687 | #[target_feature (enable = "avx512fp16" )] |
2688 | #[cfg_attr (test, assert_instr(vfmulcph))] |
2689 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2690 | pub fn _mm512_maskz_mul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h { |
2691 | _mm512_mask_mul_pch(src:_mm512_setzero_ph(), k, a, b) |
2692 | } |
2693 | |
2694 | /// Multiply the packed complex numbers in a and b, and store the results in dst. Each complex number is |
2695 | /// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex |
2696 | /// number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2697 | /// |
2698 | /// Rounding is done according to the rounding parameter, which can be one of: |
2699 | /// |
2700 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2701 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2702 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2703 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2704 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2705 | /// |
2706 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_round_pch) |
2707 | #[inline ] |
2708 | #[target_feature (enable = "avx512fp16" )] |
2709 | #[cfg_attr (test, assert_instr(vfmulcph, ROUNDING = 8))] |
2710 | #[rustc_legacy_const_generics (2)] |
2711 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2712 | pub fn _mm512_mul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h { |
2713 | static_assert_rounding!(ROUNDING); |
2714 | _mm512_mask_mul_round_pch::<ROUNDING>(src:_mm512_undefined_ph(), k:0xffff, a, b) |
2715 | } |
2716 | |
2717 | /// Multiply the packed complex numbers in a and b, and store the results in dst using writemask k (the element |
2718 | /// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent |
2719 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2720 | /// |
2721 | /// Rounding is done according to the rounding parameter, which can be one of: |
2722 | /// |
2723 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2724 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2725 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2726 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2727 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2728 | /// |
2729 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_round_pch) |
2730 | #[inline ] |
2731 | #[target_feature (enable = "avx512fp16" )] |
2732 | #[cfg_attr (test, assert_instr(vfmulcph, ROUNDING = 8))] |
2733 | #[rustc_legacy_const_generics (4)] |
2734 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2735 | pub fn _mm512_mask_mul_round_pch<const ROUNDING: i32>( |
2736 | src: __m512h, |
2737 | k: __mmask16, |
2738 | a: __m512h, |
2739 | b: __m512h, |
2740 | ) -> __m512h { |
2741 | unsafe { |
2742 | static_assert_rounding!(ROUNDING); |
2743 | transmute(src:vfmulcph_512( |
2744 | a:transmute(a), |
2745 | b:transmute(b), |
2746 | src:transmute(src), |
2747 | k, |
2748 | ROUNDING, |
2749 | )) |
2750 | } |
2751 | } |
2752 | |
2753 | /// Multiply the packed complex numbers in a and b, and store the results in dst using zeromask k (the element |
2754 | /// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent |
2755 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2756 | /// |
2757 | /// Rounding is done according to the rounding parameter, which can be one of: |
2758 | /// |
2759 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2760 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2761 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2762 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2763 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2764 | /// |
2765 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_round_pch) |
2766 | #[inline ] |
2767 | #[target_feature (enable = "avx512fp16" )] |
2768 | #[cfg_attr (test, assert_instr(vfmulcph, ROUNDING = 8))] |
2769 | #[rustc_legacy_const_generics (3)] |
2770 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2771 | pub fn _mm512_maskz_mul_round_pch<const ROUNDING: i32>( |
2772 | k: __mmask16, |
2773 | a: __m512h, |
2774 | b: __m512h, |
2775 | ) -> __m512h { |
2776 | static_assert_rounding!(ROUNDING); |
2777 | _mm512_mask_mul_round_pch::<ROUNDING>(src:_mm512_setzero_ph(), k, a, b) |
2778 | } |
2779 | |
2780 | /// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst, |
2781 | /// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is |
2782 | /// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex |
2783 | /// number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2784 | /// |
2785 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_sch) |
2786 | #[inline ] |
2787 | #[target_feature (enable = "avx512fp16" )] |
2788 | #[cfg_attr (test, assert_instr(vfmulcsh))] |
2789 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2790 | pub fn _mm_mul_sch(a: __m128h, b: __m128h) -> __m128h { |
2791 | _mm_mask_mul_sch(src:_mm_undefined_ph(), k:0xff, a, b) |
2792 | } |
2793 | |
2794 | /// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using |
2795 | /// writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 6 packed |
2796 | /// elements from a to the upper elements of dst. Each complex number is composed of two adjacent |
2797 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2798 | /// |
2799 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_sch) |
2800 | #[inline ] |
2801 | #[target_feature (enable = "avx512fp16" )] |
2802 | #[cfg_attr (test, assert_instr(vfmulcsh))] |
2803 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2804 | pub fn _mm_mask_mul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
2805 | _mm_mask_mul_round_sch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) |
2806 | } |
2807 | |
2808 | /// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using |
2809 | /// zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements |
2810 | /// from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision |
2811 | /// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2812 | /// |
2813 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_sch) |
2814 | #[inline ] |
2815 | #[target_feature (enable = "avx512fp16" )] |
2816 | #[cfg_attr (test, assert_instr(vfmulcsh))] |
2817 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2818 | pub fn _mm_maskz_mul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
2819 | _mm_mask_mul_sch(src:_mm_setzero_ph(), k, a, b) |
2820 | } |
2821 | |
2822 | /// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst, |
2823 | /// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is |
2824 | /// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex |
2825 | /// number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2826 | /// |
2827 | /// Rounding is done according to the rounding parameter, which can be one of: |
2828 | /// |
2829 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2830 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2831 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2832 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2833 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2834 | /// |
2835 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_round_sch) |
2836 | #[inline ] |
2837 | #[target_feature (enable = "avx512fp16" )] |
2838 | #[cfg_attr (test, assert_instr(vfmulcsh, ROUNDING = 8))] |
2839 | #[rustc_legacy_const_generics (2)] |
2840 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2841 | pub fn _mm_mul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h { |
2842 | static_assert_rounding!(ROUNDING); |
2843 | _mm_mask_mul_round_sch::<ROUNDING>(src:_mm_undefined_ph(), k:0xff, a, b) |
2844 | } |
2845 | |
2846 | /// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using |
2847 | /// writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 6 packed |
2848 | /// elements from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision |
2849 | /// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2850 | /// |
2851 | /// Rounding is done according to the rounding parameter, which can be one of: |
2852 | /// |
2853 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2854 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2855 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2856 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2857 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2858 | /// |
2859 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_round_sch) |
2860 | #[inline ] |
2861 | #[target_feature (enable = "avx512fp16" )] |
2862 | #[cfg_attr (test, assert_instr(vfmulcsh, ROUNDING = 8))] |
2863 | #[rustc_legacy_const_generics (4)] |
2864 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2865 | pub fn _mm_mask_mul_round_sch<const ROUNDING: i32>( |
2866 | src: __m128h, |
2867 | k: __mmask8, |
2868 | a: __m128h, |
2869 | b: __m128h, |
2870 | ) -> __m128h { |
2871 | unsafe { |
2872 | static_assert_rounding!(ROUNDING); |
2873 | transmute(src:vfmulcsh( |
2874 | a:transmute(a), |
2875 | b:transmute(b), |
2876 | src:transmute(src), |
2877 | k, |
2878 | ROUNDING, |
2879 | )) |
2880 | } |
2881 | } |
2882 | |
2883 | /// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using |
2884 | /// zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements |
2885 | /// from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision |
2886 | /// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2887 | /// |
2888 | /// Rounding is done according to the rounding parameter, which can be one of: |
2889 | /// |
2890 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2891 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2892 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2893 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2894 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2895 | /// |
2896 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_round_sch) |
2897 | #[inline ] |
2898 | #[target_feature (enable = "avx512fp16" )] |
2899 | #[cfg_attr (test, assert_instr(vfmulcsh, ROUNDING = 8))] |
2900 | #[rustc_legacy_const_generics (3)] |
2901 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2902 | pub fn _mm_maskz_mul_round_sch<const ROUNDING: i32>( |
2903 | k: __mmask8, |
2904 | a: __m128h, |
2905 | b: __m128h, |
2906 | ) -> __m128h { |
2907 | static_assert_rounding!(ROUNDING); |
2908 | _mm_mask_mul_round_sch::<ROUNDING>(src:_mm_setzero_ph(), k, a, b) |
2909 | } |
2910 | |
2911 | /// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is |
2912 | /// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex |
2913 | /// number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2914 | /// |
2915 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmul_pch) |
2916 | #[inline ] |
2917 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2918 | #[cfg_attr (test, assert_instr(vfmulcph))] |
2919 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2920 | pub fn _mm_fmul_pch(a: __m128h, b: __m128h) -> __m128h { |
2921 | _mm_mul_pch(a, b) |
2922 | } |
2923 | |
2924 | /// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element |
2925 | /// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent |
2926 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2927 | /// |
2928 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmul_pch) |
2929 | #[inline ] |
2930 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2931 | #[cfg_attr (test, assert_instr(vfmulcph))] |
2932 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2933 | pub fn _mm_mask_fmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
2934 | _mm_mask_mul_pch(src, k, a, b) |
2935 | } |
2936 | |
2937 | /// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element |
2938 | /// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision |
2939 | /// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2940 | /// |
2941 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmul_pch) |
2942 | #[inline ] |
2943 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2944 | #[cfg_attr (test, assert_instr(vfmulcph))] |
2945 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2946 | pub fn _mm_maskz_fmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
2947 | _mm_maskz_mul_pch(k, a, b) |
2948 | } |
2949 | |
2950 | /// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is |
2951 | /// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex |
2952 | /// number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2953 | /// |
2954 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmul_pch) |
2955 | #[inline ] |
2956 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2957 | #[cfg_attr (test, assert_instr(vfmulcph))] |
2958 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2959 | pub fn _mm256_fmul_pch(a: __m256h, b: __m256h) -> __m256h { |
2960 | _mm256_mul_pch(a, b) |
2961 | } |
2962 | |
2963 | /// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element |
2964 | /// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision |
2965 | /// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2966 | /// |
2967 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmul_pch) |
2968 | #[inline ] |
2969 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2970 | #[cfg_attr (test, assert_instr(vfmulcph))] |
2971 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2972 | pub fn _mm256_mask_fmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h { |
2973 | _mm256_mask_mul_pch(src, k, a, b) |
2974 | } |
2975 | |
2976 | /// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element |
2977 | /// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision |
2978 | /// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2979 | /// |
2980 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmul_pch) |
2981 | #[inline ] |
2982 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
2983 | #[cfg_attr (test, assert_instr(vfmulcph))] |
2984 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2985 | pub fn _mm256_maskz_fmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h { |
2986 | _mm256_maskz_mul_pch(k, a, b) |
2987 | } |
2988 | |
2989 | /// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is composed |
2990 | /// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
2991 | /// |
2992 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmul_pch) |
2993 | #[inline ] |
2994 | #[target_feature (enable = "avx512fp16" )] |
2995 | #[cfg_attr (test, assert_instr(vfmulcph))] |
2996 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
2997 | pub fn _mm512_fmul_pch(a: __m512h, b: __m512h) -> __m512h { |
2998 | _mm512_mul_pch(a, b) |
2999 | } |
3000 | |
3001 | /// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element |
3002 | /// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision |
3003 | /// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
3004 | /// |
3005 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmul_pch) |
3006 | #[inline ] |
3007 | #[target_feature (enable = "avx512fp16" )] |
3008 | #[cfg_attr (test, assert_instr(vfmulcph))] |
3009 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3010 | pub fn _mm512_mask_fmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h { |
3011 | _mm512_mask_mul_pch(src, k, a, b) |
3012 | } |
3013 | |
3014 | /// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element |
3015 | /// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision |
3016 | /// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
3017 | /// |
3018 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmul_pch) |
3019 | #[inline ] |
3020 | #[target_feature (enable = "avx512fp16" )] |
3021 | #[cfg_attr (test, assert_instr(vfmulcph))] |
3022 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3023 | pub fn _mm512_maskz_fmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h { |
3024 | _mm512_maskz_mul_pch(k, a, b) |
3025 | } |
3026 | |
3027 | /// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is composed |
3028 | /// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
3029 | /// Rounding is done according to the rounding parameter, which can be one of: |
3030 | /// |
3031 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3032 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3033 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3034 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3035 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3036 | /// |
3037 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmul_round_pch) |
3038 | #[inline ] |
3039 | #[target_feature (enable = "avx512fp16" )] |
3040 | #[cfg_attr (test, assert_instr(vfmulcph, ROUNDING = 8))] |
3041 | #[rustc_legacy_const_generics (2)] |
3042 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3043 | pub fn _mm512_fmul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h { |
3044 | static_assert_rounding!(ROUNDING); |
3045 | _mm512_mul_round_pch::<ROUNDING>(a, b) |
3046 | } |
3047 | |
3048 | /// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element |
3049 | /// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision |
3050 | /// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
3051 | /// Rounding is done according to the rounding parameter, which can be one of: |
3052 | /// |
3053 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3054 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3055 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3056 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3057 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3058 | /// |
3059 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmul_round_pch) |
3060 | #[inline ] |
3061 | #[target_feature (enable = "avx512fp16" )] |
3062 | #[cfg_attr (test, assert_instr(vfmulcph, ROUNDING = 8))] |
3063 | #[rustc_legacy_const_generics (4)] |
3064 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3065 | pub fn _mm512_mask_fmul_round_pch<const ROUNDING: i32>( |
3066 | src: __m512h, |
3067 | k: __mmask16, |
3068 | a: __m512h, |
3069 | b: __m512h, |
3070 | ) -> __m512h { |
3071 | static_assert_rounding!(ROUNDING); |
3072 | _mm512_mask_mul_round_pch::<ROUNDING>(src, k, a, b) |
3073 | } |
3074 | |
3075 | /// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element |
3076 | /// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision |
3077 | /// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
3078 | /// Rounding is done according to the rounding parameter, which can be one of: |
3079 | /// |
3080 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3081 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3082 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3083 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3084 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3085 | /// |
3086 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmul_round_pch) |
3087 | #[inline ] |
3088 | #[target_feature (enable = "avx512fp16" )] |
3089 | #[cfg_attr (test, assert_instr(vfmulcph, ROUNDING = 8))] |
3090 | #[rustc_legacy_const_generics (3)] |
3091 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3092 | pub fn _mm512_maskz_fmul_round_pch<const ROUNDING: i32>( |
3093 | k: __mmask16, |
3094 | a: __m512h, |
3095 | b: __m512h, |
3096 | ) -> __m512h { |
3097 | static_assert_rounding!(ROUNDING); |
3098 | _mm512_maskz_mul_round_pch::<ROUNDING>(k, a, b) |
3099 | } |
3100 | |
3101 | /// Multiply the lower complex numbers in a and b, and store the results in dst. Each complex number is |
3102 | /// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex |
3103 | /// number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
3104 | /// |
3105 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmul_sch) |
3106 | #[inline ] |
3107 | #[target_feature (enable = "avx512fp16" )] |
3108 | #[cfg_attr (test, assert_instr(vfmulcsh))] |
3109 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3110 | pub fn _mm_fmul_sch(a: __m128h, b: __m128h) -> __m128h { |
3111 | _mm_mul_sch(a, b) |
3112 | } |
3113 | |
3114 | /// Multiply the lower complex numbers in a and b, and store the results in dst using writemask k (the element |
3115 | /// is copied from src when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision |
3116 | /// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
3117 | /// |
3118 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmul_sch) |
3119 | #[inline ] |
3120 | #[target_feature (enable = "avx512fp16" )] |
3121 | #[cfg_attr (test, assert_instr(vfmulcsh))] |
3122 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3123 | pub fn _mm_mask_fmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
3124 | _mm_mask_mul_sch(src, k, a, b) |
3125 | } |
3126 | |
3127 | /// Multiply the lower complex numbers in a and b, and store the results in dst using zeromask k (the element |
3128 | /// is zeroed out when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision |
3129 | /// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
3130 | /// |
3131 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmul_sch) |
3132 | #[inline ] |
3133 | #[target_feature (enable = "avx512fp16" )] |
3134 | #[cfg_attr (test, assert_instr(vfmulcsh))] |
3135 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3136 | pub fn _mm_maskz_fmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
3137 | _mm_maskz_mul_sch(k, a, b) |
3138 | } |
3139 | |
3140 | /// Multiply the lower complex numbers in a and b, and store the results in dst. Each complex number is composed |
3141 | /// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
3142 | /// |
3143 | /// Rounding is done according to the rounding parameter, which can be one of: |
3144 | /// |
3145 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3146 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3147 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3148 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3149 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3150 | /// |
3151 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmul_round_sch) |
3152 | #[inline ] |
3153 | #[target_feature (enable = "avx512fp16" )] |
3154 | #[cfg_attr (test, assert_instr(vfmulcsh, ROUNDING = 8))] |
3155 | #[rustc_legacy_const_generics (2)] |
3156 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3157 | pub fn _mm_fmul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h { |
3158 | static_assert_rounding!(ROUNDING); |
3159 | _mm_mul_round_sch::<ROUNDING>(a, b) |
3160 | } |
3161 | |
3162 | /// Multiply the lower complex numbers in a and b, and store the results in dst using writemask k (the element |
3163 | /// is copied from src when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision |
3164 | /// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
3165 | /// |
3166 | /// Rounding is done according to the rounding parameter, which can be one of: |
3167 | /// |
3168 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3169 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3170 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3171 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3172 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3173 | /// |
3174 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmul_round_sch) |
3175 | #[inline ] |
3176 | #[target_feature (enable = "avx512fp16" )] |
3177 | #[cfg_attr (test, assert_instr(vfmulcsh, ROUNDING = 8))] |
3178 | #[rustc_legacy_const_generics (4)] |
3179 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3180 | pub fn _mm_mask_fmul_round_sch<const ROUNDING: i32>( |
3181 | src: __m128h, |
3182 | k: __mmask8, |
3183 | a: __m128h, |
3184 | b: __m128h, |
3185 | ) -> __m128h { |
3186 | static_assert_rounding!(ROUNDING); |
3187 | _mm_mask_mul_round_sch::<ROUNDING>(src, k, a, b) |
3188 | } |
3189 | |
3190 | /// Multiply the lower complex numbers in a and b, and store the results in dst using zeromask k (the element |
3191 | /// is zeroed out when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision |
3192 | /// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
3193 | /// |
3194 | /// Rounding is done according to the rounding parameter, which can be one of: |
3195 | /// |
3196 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3197 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3198 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3199 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3200 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3201 | /// |
3202 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmul_round_sch) |
3203 | #[inline ] |
3204 | #[target_feature (enable = "avx512fp16" )] |
3205 | #[cfg_attr (test, assert_instr(vfmulcsh, ROUNDING = 8))] |
3206 | #[rustc_legacy_const_generics (3)] |
3207 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3208 | pub fn _mm_maskz_fmul_round_sch<const ROUNDING: i32>( |
3209 | k: __mmask8, |
3210 | a: __m128h, |
3211 | b: __m128h, |
3212 | ) -> __m128h { |
3213 | static_assert_rounding!(ROUNDING); |
3214 | _mm_maskz_mul_round_sch::<ROUNDING>(k, a, b) |
3215 | } |
3216 | |
3217 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3218 | /// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) |
3219 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
3220 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3221 | /// |
3222 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmul_pch) |
3223 | #[inline ] |
3224 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
3225 | #[cfg_attr (test, assert_instr(vfcmulcph))] |
3226 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3227 | pub fn _mm_cmul_pch(a: __m128h, b: __m128h) -> __m128h { |
3228 | _mm_mask_cmul_pch(src:_mm_undefined_ph(), k:0xff, a, b) |
3229 | } |
3230 | |
3231 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3232 | /// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set). |
3233 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3234 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3235 | /// |
3236 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmul_pch) |
3237 | #[inline ] |
3238 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
3239 | #[cfg_attr (test, assert_instr(vfcmulcph))] |
3240 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3241 | pub fn _mm_mask_cmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
3242 | unsafe { transmute(src:vfcmulcph_128(a:transmute(a), b:transmute(b), src:transmute(src), k)) } |
3243 | } |
3244 | |
3245 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3246 | /// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set). |
3247 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3248 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3249 | /// |
3250 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cmul_pch) |
3251 | #[inline ] |
3252 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
3253 | #[cfg_attr (test, assert_instr(vfcmulcph))] |
3254 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3255 | pub fn _mm_maskz_cmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
3256 | _mm_mask_cmul_pch(src:_mm_setzero_ph(), k, a, b) |
3257 | } |
3258 | |
3259 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3260 | /// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) |
3261 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
3262 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3263 | /// |
3264 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmul_pch) |
3265 | #[inline ] |
3266 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
3267 | #[cfg_attr (test, assert_instr(vfcmulcph))] |
3268 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3269 | pub fn _mm256_cmul_pch(a: __m256h, b: __m256h) -> __m256h { |
3270 | _mm256_mask_cmul_pch(src:_mm256_undefined_ph(), k:0xff, a, b) |
3271 | } |
3272 | |
3273 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3274 | /// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set). |
3275 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3276 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3277 | /// |
3278 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmul_pch) |
3279 | #[inline ] |
3280 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
3281 | #[cfg_attr (test, assert_instr(vfcmulcph))] |
3282 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3283 | pub fn _mm256_mask_cmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h { |
3284 | unsafe { transmute(src:vfcmulcph_256(a:transmute(a), b:transmute(b), src:transmute(src), k)) } |
3285 | } |
3286 | |
3287 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3288 | /// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set). |
3289 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3290 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3291 | /// |
3292 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cmul_pch) |
3293 | #[inline ] |
3294 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
3295 | #[cfg_attr (test, assert_instr(vfcmulcph))] |
3296 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3297 | pub fn _mm256_maskz_cmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h { |
3298 | _mm256_mask_cmul_pch(src:_mm256_setzero_ph(), k, a, b) |
3299 | } |
3300 | |
3301 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3302 | /// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) |
3303 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
3304 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3305 | /// |
3306 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmul_pch) |
3307 | #[inline ] |
3308 | #[target_feature (enable = "avx512fp16" )] |
3309 | #[cfg_attr (test, assert_instr(vfcmulcph))] |
3310 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3311 | pub fn _mm512_cmul_pch(a: __m512h, b: __m512h) -> __m512h { |
3312 | _mm512_mask_cmul_pch(src:_mm512_undefined_ph(), k:0xffff, a, b) |
3313 | } |
3314 | |
3315 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3316 | /// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set). |
3317 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3318 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3319 | /// |
3320 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmul_pch) |
3321 | #[inline ] |
3322 | #[target_feature (enable = "avx512fp16" )] |
3323 | #[cfg_attr (test, assert_instr(vfcmulcph))] |
3324 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3325 | pub fn _mm512_mask_cmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h { |
3326 | _mm512_mask_cmul_round_pch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) |
3327 | } |
3328 | |
3329 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3330 | /// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set). |
3331 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3332 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3333 | /// |
3334 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cmul_pch) |
3335 | #[inline ] |
3336 | #[target_feature (enable = "avx512fp16" )] |
3337 | #[cfg_attr (test, assert_instr(vfcmulcph))] |
3338 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3339 | pub fn _mm512_maskz_cmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h { |
3340 | _mm512_mask_cmul_pch(src:_mm512_setzero_ph(), k, a, b) |
3341 | } |
3342 | |
3343 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3344 | /// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) |
3345 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
3346 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3347 | /// |
3348 | /// Rounding is done according to the rounding parameter, which can be one of: |
3349 | /// |
3350 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3351 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3352 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3353 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3354 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3355 | /// |
3356 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmul_round_pch) |
3357 | #[inline ] |
3358 | #[target_feature (enable = "avx512fp16" )] |
3359 | #[cfg_attr (test, assert_instr(vfcmulcph, ROUNDING = 8))] |
3360 | #[rustc_legacy_const_generics (2)] |
3361 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3362 | pub fn _mm512_cmul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h { |
3363 | static_assert_rounding!(ROUNDING); |
3364 | _mm512_mask_cmul_round_pch::<ROUNDING>(src:_mm512_undefined_ph(), k:0xffff, a, b) |
3365 | } |
3366 | |
3367 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3368 | /// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set). |
3369 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3370 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3371 | /// |
3372 | /// Rounding is done according to the rounding parameter, which can be one of: |
3373 | /// |
3374 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3375 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3376 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3377 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3378 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3379 | /// |
3380 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmul_round_pch) |
3381 | #[inline ] |
3382 | #[target_feature (enable = "avx512fp16" )] |
3383 | #[cfg_attr (test, assert_instr(vfcmulcph, ROUNDING = 8))] |
3384 | #[rustc_legacy_const_generics (4)] |
3385 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3386 | pub fn _mm512_mask_cmul_round_pch<const ROUNDING: i32>( |
3387 | src: __m512h, |
3388 | k: __mmask16, |
3389 | a: __m512h, |
3390 | b: __m512h, |
3391 | ) -> __m512h { |
3392 | unsafe { |
3393 | static_assert_rounding!(ROUNDING); |
3394 | transmute(src:vfcmulcph_512( |
3395 | a:transmute(a), |
3396 | b:transmute(b), |
3397 | src:transmute(src), |
3398 | k, |
3399 | ROUNDING, |
3400 | )) |
3401 | } |
3402 | } |
3403 | |
3404 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3405 | /// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set). |
3406 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3407 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3408 | /// |
3409 | /// Rounding is done according to the rounding parameter, which can be one of: |
3410 | /// |
3411 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3412 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3413 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3414 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3415 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3416 | /// |
3417 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cmul_round_pch) |
3418 | #[inline ] |
3419 | #[target_feature (enable = "avx512fp16" )] |
3420 | #[cfg_attr (test, assert_instr(vfcmulcph, ROUNDING = 8))] |
3421 | #[rustc_legacy_const_generics (3)] |
3422 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3423 | pub fn _mm512_maskz_cmul_round_pch<const ROUNDING: i32>( |
3424 | k: __mmask16, |
3425 | a: __m512h, |
3426 | b: __m512h, |
3427 | ) -> __m512h { |
3428 | static_assert_rounding!(ROUNDING); |
3429 | _mm512_mask_cmul_round_pch::<ROUNDING>(src:_mm512_setzero_ph(), k, a, b) |
3430 | } |
3431 | |
3432 | /// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b, |
3433 | /// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) |
3434 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
3435 | /// |
3436 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmul_sch) |
3437 | #[inline ] |
3438 | #[target_feature (enable = "avx512fp16" )] |
3439 | #[cfg_attr (test, assert_instr(vfcmulcsh))] |
3440 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3441 | pub fn _mm_cmul_sch(a: __m128h, b: __m128h) -> __m128h { |
3442 | _mm_mask_cmul_sch(src:_mm_undefined_ph(), k:0xff, a, b) |
3443 | } |
3444 | |
3445 | /// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b, |
3446 | /// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set). |
3447 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3448 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
3449 | /// |
3450 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmul_sch) |
3451 | #[inline ] |
3452 | #[target_feature (enable = "avx512fp16" )] |
3453 | #[cfg_attr (test, assert_instr(vfcmulcsh))] |
3454 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3455 | pub fn _mm_mask_cmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
3456 | _mm_mask_cmul_round_sch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) |
3457 | } |
3458 | |
3459 | /// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b, |
3460 | /// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set). |
3461 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3462 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
3463 | /// |
3464 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cmul_sch) |
3465 | #[inline ] |
3466 | #[target_feature (enable = "avx512fp16" )] |
3467 | #[cfg_attr (test, assert_instr(vfcmulcsh))] |
3468 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3469 | pub fn _mm_maskz_cmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
3470 | _mm_mask_cmul_sch(src:_mm_setzero_ph(), k, a, b) |
3471 | } |
3472 | |
3473 | /// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b, |
3474 | /// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) |
3475 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
3476 | /// |
3477 | /// Rounding is done according to the rounding parameter, which can be one of: |
3478 | /// |
3479 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3480 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3481 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3482 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3483 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3484 | /// |
3485 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmul_round_sch) |
3486 | #[inline ] |
3487 | #[target_feature (enable = "avx512fp16" )] |
3488 | #[cfg_attr (test, assert_instr(vfcmulcsh, ROUNDING = 8))] |
3489 | #[rustc_legacy_const_generics (2)] |
3490 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3491 | pub fn _mm_cmul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h { |
3492 | static_assert_rounding!(ROUNDING); |
3493 | _mm_mask_cmul_round_sch::<ROUNDING>(src:_mm_undefined_ph(), k:0xff, a, b) |
3494 | } |
3495 | |
3496 | /// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b, |
3497 | /// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set). |
3498 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3499 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3500 | /// |
3501 | /// Rounding is done according to the rounding parameter, which can be one of: |
3502 | /// |
3503 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3504 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3505 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3506 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3507 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3508 | /// |
3509 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmul_round_sch) |
3510 | #[inline ] |
3511 | #[target_feature (enable = "avx512fp16" )] |
3512 | #[cfg_attr (test, assert_instr(vfcmulcsh, ROUNDING = 8))] |
3513 | #[rustc_legacy_const_generics (4)] |
3514 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3515 | pub fn _mm_mask_cmul_round_sch<const ROUNDING: i32>( |
3516 | src: __m128h, |
3517 | k: __mmask8, |
3518 | a: __m128h, |
3519 | b: __m128h, |
3520 | ) -> __m128h { |
3521 | unsafe { |
3522 | static_assert_rounding!(ROUNDING); |
3523 | transmute(src:vfcmulcsh( |
3524 | a:transmute(a), |
3525 | b:transmute(b), |
3526 | src:transmute(src), |
3527 | k, |
3528 | ROUNDING, |
3529 | )) |
3530 | } |
3531 | } |
3532 | |
3533 | /// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b, |
3534 | /// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set). |
3535 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3536 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3537 | /// |
3538 | /// Rounding is done according to the rounding parameter, which can be one of: |
3539 | /// |
3540 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3541 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3542 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3543 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3544 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3545 | /// |
3546 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cmul_round_sch) |
3547 | #[inline ] |
3548 | #[target_feature (enable = "avx512fp16" )] |
3549 | #[cfg_attr (test, assert_instr(vfcmulcsh, ROUNDING = 8))] |
3550 | #[rustc_legacy_const_generics (3)] |
3551 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3552 | pub fn _mm_maskz_cmul_round_sch<const ROUNDING: i32>( |
3553 | k: __mmask8, |
3554 | a: __m128h, |
3555 | b: __m128h, |
3556 | ) -> __m128h { |
3557 | static_assert_rounding!(ROUNDING); |
3558 | _mm_mask_cmul_round_sch::<ROUNDING>(src:_mm_setzero_ph(), k, a, b) |
3559 | } |
3560 | |
3561 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3562 | /// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) |
3563 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
3564 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3565 | /// |
3566 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmul_pch) |
3567 | #[inline ] |
3568 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
3569 | #[cfg_attr (test, assert_instr(vfcmulcph))] |
3570 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3571 | pub fn _mm_fcmul_pch(a: __m128h, b: __m128h) -> __m128h { |
3572 | _mm_cmul_pch(a, b) |
3573 | } |
3574 | |
3575 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3576 | /// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set). |
3577 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3578 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3579 | /// |
3580 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmul_pch) |
3581 | #[inline ] |
3582 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
3583 | #[cfg_attr (test, assert_instr(vfcmulcph))] |
3584 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3585 | pub fn _mm_mask_fcmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
3586 | _mm_mask_cmul_pch(src, k, a, b) |
3587 | } |
3588 | |
3589 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3590 | /// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set). |
3591 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3592 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3593 | /// |
3594 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmul_pch) |
3595 | #[inline ] |
3596 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
3597 | #[cfg_attr (test, assert_instr(vfcmulcph))] |
3598 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3599 | pub fn _mm_maskz_fcmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
3600 | _mm_maskz_cmul_pch(k, a, b) |
3601 | } |
3602 | |
3603 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3604 | /// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) |
3605 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
3606 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3607 | /// |
3608 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fcmul_pch) |
3609 | #[inline ] |
3610 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
3611 | #[cfg_attr (test, assert_instr(vfcmulcph))] |
3612 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3613 | pub fn _mm256_fcmul_pch(a: __m256h, b: __m256h) -> __m256h { |
3614 | _mm256_cmul_pch(a, b) |
3615 | } |
3616 | |
3617 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3618 | /// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set). |
3619 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3620 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3621 | /// |
3622 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fcmul_pch) |
3623 | #[inline ] |
3624 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
3625 | #[cfg_attr (test, assert_instr(vfcmulcph))] |
3626 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3627 | pub fn _mm256_mask_fcmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h { |
3628 | _mm256_mask_cmul_pch(src, k, a, b) |
3629 | } |
3630 | |
3631 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3632 | /// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set). |
3633 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3634 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3635 | /// |
3636 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fcmul_pch) |
3637 | #[inline ] |
3638 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
3639 | #[cfg_attr (test, assert_instr(vfcmulcph))] |
3640 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3641 | pub fn _mm256_maskz_fcmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h { |
3642 | _mm256_maskz_cmul_pch(k, a, b) |
3643 | } |
3644 | |
3645 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3646 | /// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) |
3647 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
3648 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3649 | /// |
3650 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmul_pch) |
3651 | #[inline ] |
3652 | #[target_feature (enable = "avx512fp16" )] |
3653 | #[cfg_attr (test, assert_instr(vfcmulcph))] |
3654 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3655 | pub fn _mm512_fcmul_pch(a: __m512h, b: __m512h) -> __m512h { |
3656 | _mm512_cmul_pch(a, b) |
3657 | } |
3658 | |
3659 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3660 | /// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set). |
3661 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3662 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3663 | /// |
3664 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmul_pch) |
3665 | #[inline ] |
3666 | #[target_feature (enable = "avx512fp16" )] |
3667 | #[cfg_attr (test, assert_instr(vfcmulcph))] |
3668 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3669 | pub fn _mm512_mask_fcmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h { |
3670 | _mm512_mask_cmul_pch(src, k, a, b) |
3671 | } |
3672 | |
3673 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3674 | /// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set). |
3675 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3676 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3677 | /// |
3678 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmul_pch) |
3679 | #[inline ] |
3680 | #[target_feature (enable = "avx512fp16" )] |
3681 | #[cfg_attr (test, assert_instr(vfcmulcph))] |
3682 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3683 | pub fn _mm512_maskz_fcmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h { |
3684 | _mm512_maskz_cmul_pch(k, a, b) |
3685 | } |
3686 | |
3687 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3688 | /// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) |
3689 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
3690 | /// |
3691 | /// Rounding is done according to the rounding parameter, which can be one of: |
3692 | /// |
3693 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3694 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3695 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3696 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3697 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3698 | /// |
3699 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmul_round_pch) |
3700 | #[inline ] |
3701 | #[target_feature (enable = "avx512fp16" )] |
3702 | #[cfg_attr (test, assert_instr(vfcmulcph, ROUNDING = 8))] |
3703 | #[rustc_legacy_const_generics (2)] |
3704 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3705 | pub fn _mm512_fcmul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h { |
3706 | static_assert_rounding!(ROUNDING); |
3707 | _mm512_cmul_round_pch::<ROUNDING>(a, b) |
3708 | } |
3709 | |
3710 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3711 | /// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set). |
3712 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3713 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3714 | /// |
3715 | /// Rounding is done according to the rounding parameter, which can be one of: |
3716 | /// |
3717 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3718 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3719 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3720 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3721 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3722 | /// |
3723 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmul_round_pch) |
3724 | #[inline ] |
3725 | #[target_feature (enable = "avx512fp16" )] |
3726 | #[cfg_attr (test, assert_instr(vfcmulcph, ROUNDING = 8))] |
3727 | #[rustc_legacy_const_generics (4)] |
3728 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3729 | pub fn _mm512_mask_fcmul_round_pch<const ROUNDING: i32>( |
3730 | src: __m512h, |
3731 | k: __mmask16, |
3732 | a: __m512h, |
3733 | b: __m512h, |
3734 | ) -> __m512h { |
3735 | static_assert_rounding!(ROUNDING); |
3736 | _mm512_mask_cmul_round_pch::<ROUNDING>(src, k, a, b) |
3737 | } |
3738 | |
3739 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and |
3740 | /// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set). |
3741 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3742 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3743 | /// |
3744 | /// Rounding is done according to the rounding parameter, which can be one of: |
3745 | /// |
3746 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3747 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3748 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3749 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3750 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3751 | /// |
3752 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmul_round_pch) |
3753 | #[inline ] |
3754 | #[target_feature (enable = "avx512fp16" )] |
3755 | #[cfg_attr (test, assert_instr(vfcmulcph, ROUNDING = 8))] |
3756 | #[rustc_legacy_const_generics (3)] |
3757 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3758 | pub fn _mm512_maskz_fcmul_round_pch<const ROUNDING: i32>( |
3759 | k: __mmask16, |
3760 | a: __m512h, |
3761 | b: __m512h, |
3762 | ) -> __m512h { |
3763 | static_assert_rounding!(ROUNDING); |
3764 | _mm512_maskz_cmul_round_pch::<ROUNDING>(k, a, b) |
3765 | } |
3766 | |
3767 | /// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b, |
3768 | /// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) |
3769 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
3770 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3771 | /// |
3772 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmul_sch) |
3773 | #[inline ] |
3774 | #[target_feature (enable = "avx512fp16" )] |
3775 | #[cfg_attr (test, assert_instr(vfcmulcsh))] |
3776 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3777 | pub fn _mm_fcmul_sch(a: __m128h, b: __m128h) -> __m128h { |
3778 | _mm_cmul_sch(a, b) |
3779 | } |
3780 | |
3781 | /// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b, |
3782 | /// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set). |
3783 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3784 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3785 | /// |
3786 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmul_sch) |
3787 | #[inline ] |
3788 | #[target_feature (enable = "avx512fp16" )] |
3789 | #[cfg_attr (test, assert_instr(vfcmulcsh))] |
3790 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3791 | pub fn _mm_mask_fcmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
3792 | _mm_mask_cmul_sch(src, k, a, b) |
3793 | } |
3794 | |
3795 | /// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b, |
3796 | /// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set). |
3797 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3798 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3799 | /// |
3800 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmul_sch) |
3801 | #[inline ] |
3802 | #[target_feature (enable = "avx512fp16" )] |
3803 | #[cfg_attr (test, assert_instr(vfcmulcsh))] |
3804 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3805 | pub fn _mm_maskz_fcmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
3806 | _mm_maskz_cmul_sch(k, a, b) |
3807 | } |
3808 | |
3809 | /// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b, |
3810 | /// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) |
3811 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
3812 | /// |
3813 | /// Rounding is done according to the rounding parameter, which can be one of: |
3814 | /// |
3815 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3816 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3817 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3818 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3819 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3820 | /// |
3821 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmul_round_sch) |
3822 | #[inline ] |
3823 | #[target_feature (enable = "avx512fp16" )] |
3824 | #[cfg_attr (test, assert_instr(vfcmulcsh, ROUNDING = 8))] |
3825 | #[rustc_legacy_const_generics (2)] |
3826 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3827 | pub fn _mm_fcmul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h { |
3828 | static_assert_rounding!(ROUNDING); |
3829 | _mm_cmul_round_sch::<ROUNDING>(a, b) |
3830 | } |
3831 | |
3832 | /// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b, |
3833 | /// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set). |
3834 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3835 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3836 | /// |
3837 | /// Rounding is done according to the rounding parameter, which can be one of: |
3838 | /// |
3839 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3840 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3841 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3842 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3843 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3844 | /// |
3845 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmul_round_sch) |
3846 | #[inline ] |
3847 | #[target_feature (enable = "avx512fp16" )] |
3848 | #[cfg_attr (test, assert_instr(vfcmulcsh, ROUNDING = 8))] |
3849 | #[rustc_legacy_const_generics (4)] |
3850 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3851 | pub fn _mm_mask_fcmul_round_sch<const ROUNDING: i32>( |
3852 | src: __m128h, |
3853 | k: __mmask8, |
3854 | a: __m128h, |
3855 | b: __m128h, |
3856 | ) -> __m128h { |
3857 | static_assert_rounding!(ROUNDING); |
3858 | _mm_mask_cmul_round_sch::<ROUNDING>(src, k, a, b) |
3859 | } |
3860 | |
3861 | /// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b, |
3862 | /// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set). |
3863 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
3864 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3865 | /// |
3866 | /// Rounding is done according to the rounding parameter, which can be one of: |
3867 | /// |
3868 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3869 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3870 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3871 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3872 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3873 | /// |
3874 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmul_round_sch) |
3875 | #[inline ] |
3876 | #[target_feature (enable = "avx512fp16" )] |
3877 | #[cfg_attr (test, assert_instr(vfcmulcsh, ROUNDING = 8))] |
3878 | #[rustc_legacy_const_generics (3)] |
3879 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3880 | pub fn _mm_maskz_fcmul_round_sch<const ROUNDING: i32>( |
3881 | k: __mmask8, |
3882 | a: __m128h, |
3883 | b: __m128h, |
3884 | ) -> __m128h { |
3885 | static_assert_rounding!(ROUNDING); |
3886 | _mm_maskz_cmul_round_sch::<ROUNDING>(k, a, b) |
3887 | } |
3888 | |
3889 | /// Finds the absolute value of each packed half-precision (16-bit) floating-point element in v2, storing |
3890 | /// the results in dst. |
3891 | /// |
3892 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_ph) |
3893 | #[inline ] |
3894 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
3895 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3896 | pub fn _mm_abs_ph(v2: __m128h) -> __m128h { |
3897 | unsafe { transmute(src:_mm_and_si128(a:transmute(v2), b:_mm_set1_epi16(i16::MAX))) } |
3898 | } |
3899 | |
3900 | /// Finds the absolute value of each packed half-precision (16-bit) floating-point element in v2, storing |
3901 | /// the result in dst. |
3902 | /// |
3903 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_abs_ph) |
3904 | #[inline ] |
3905 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
3906 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3907 | pub fn _mm256_abs_ph(v2: __m256h) -> __m256h { |
3908 | unsafe { transmute(src:_mm256_and_si256(a:transmute(v2), b:_mm256_set1_epi16(i16::MAX))) } |
3909 | } |
3910 | |
3911 | /// Finds the absolute value of each packed half-precision (16-bit) floating-point element in v2, storing |
3912 | /// the result in dst. |
3913 | /// |
3914 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_ph) |
3915 | #[inline ] |
3916 | #[target_feature (enable = "avx512fp16" )] |
3917 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3918 | pub fn _mm512_abs_ph(v2: __m512h) -> __m512h { |
3919 | unsafe { transmute(src:_mm512_and_si512(a:transmute(v2), b:_mm512_set1_epi16(i16::MAX))) } |
3920 | } |
3921 | |
3922 | /// Compute the complex conjugates of complex numbers in a, and store the results in dst. Each complex |
3923 | /// number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines |
3924 | /// the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate |
3925 | /// `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3926 | /// |
3927 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_conj_pch) |
3928 | #[inline ] |
3929 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
3930 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3931 | pub fn _mm_conj_pch(a: __m128h) -> __m128h { |
3932 | unsafe { transmute(src:_mm_xor_si128(a:transmute(a), b:_mm_set1_epi32(i32::MIN))) } |
3933 | } |
3934 | |
3935 | /// Compute the complex conjugates of complex numbers in a, and store the results in dst using writemask k |
3936 | /// (the element is copied from src when corresponding mask bit is not set). Each complex number is composed of two |
3937 | /// adjacent half-precision (16-bit) floating-point elements, which defines the complex number |
3938 | /// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3939 | /// |
3940 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_conj_pch) |
3941 | #[inline ] |
3942 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
3943 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3944 | pub fn _mm_mask_conj_pch(src: __m128h, k: __mmask8, a: __m128h) -> __m128h { |
3945 | unsafe { |
3946 | let r: __m128 = transmute(src:_mm_conj_pch(a)); |
3947 | transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src))) |
3948 | } |
3949 | } |
3950 | |
3951 | /// Compute the complex conjugates of complex numbers in a, and store the results in dst using zeromask k |
3952 | /// (the element is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent |
3953 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
3954 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3955 | /// |
3956 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_conj_pch) |
3957 | #[inline ] |
3958 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
3959 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3960 | pub fn _mm_maskz_conj_pch(k: __mmask8, a: __m128h) -> __m128h { |
3961 | _mm_mask_conj_pch(src:_mm_setzero_ph(), k, a) |
3962 | } |
3963 | |
3964 | /// Compute the complex conjugates of complex numbers in a, and store the results in dst. Each complex number |
3965 | /// is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex |
3966 | /// number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3967 | /// |
3968 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_conj_pch) |
3969 | #[inline ] |
3970 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
3971 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3972 | pub fn _mm256_conj_pch(a: __m256h) -> __m256h { |
3973 | unsafe { transmute(src:_mm256_xor_si256(a:transmute(a), b:_mm256_set1_epi32(i32::MIN))) } |
3974 | } |
3975 | |
3976 | /// Compute the complex conjugates of complex numbers in a, and store the results in dst using writemask k |
3977 | /// (the element is copied from src when corresponding mask bit is not set). Each complex number is composed of two |
3978 | /// adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
3979 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3980 | /// |
3981 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_conj_pch) |
3982 | #[inline ] |
3983 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
3984 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
3985 | pub fn _mm256_mask_conj_pch(src: __m256h, k: __mmask8, a: __m256h) -> __m256h { |
3986 | unsafe { |
3987 | let r: __m256 = transmute(src:_mm256_conj_pch(a)); |
3988 | transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src))) |
3989 | } |
3990 | } |
3991 | |
3992 | /// Compute the complex conjugates of complex numbers in a, and store the results in dst using zeromask k |
3993 | /// (the element is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent |
3994 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
3995 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
3996 | /// |
3997 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_conj_pch) |
3998 | #[inline ] |
3999 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
4000 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4001 | pub fn _mm256_maskz_conj_pch(k: __mmask8, a: __m256h) -> __m256h { |
4002 | _mm256_mask_conj_pch(src:_mm256_setzero_ph(), k, a) |
4003 | } |
4004 | |
4005 | /// Compute the complex conjugates of complex numbers in a, and store the results in dst. Each complex number |
4006 | /// is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex |
4007 | /// number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4008 | /// |
4009 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_conj_pch) |
4010 | #[inline ] |
4011 | #[target_feature (enable = "avx512fp16" )] |
4012 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4013 | pub fn _mm512_conj_pch(a: __m512h) -> __m512h { |
4014 | unsafe { transmute(src:_mm512_xor_si512(a:transmute(a), b:_mm512_set1_epi32(i32::MIN))) } |
4015 | } |
4016 | |
4017 | /// Compute the complex conjugates of complex numbers in a, and store the results in dst using writemask k |
4018 | /// (the element is copied from src when corresponding mask bit is not set). Each complex number is composed of two |
4019 | /// adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
4020 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4021 | /// |
4022 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_conj_pch) |
4023 | #[inline ] |
4024 | #[target_feature (enable = "avx512fp16" )] |
4025 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4026 | pub fn _mm512_mask_conj_pch(src: __m512h, k: __mmask16, a: __m512h) -> __m512h { |
4027 | unsafe { |
4028 | let r: __m512 = transmute(src:_mm512_conj_pch(a)); |
4029 | transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src))) |
4030 | } |
4031 | } |
4032 | |
4033 | /// Compute the complex conjugates of complex numbers in a, and store the results in dst using zeromask k |
4034 | /// (the element is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent |
4035 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
4036 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4037 | /// |
4038 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_conj_pch) |
4039 | #[inline ] |
4040 | #[target_feature (enable = "avx512fp16" )] |
4041 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4042 | pub fn _mm512_maskz_conj_pch(k: __mmask16, a: __m512h) -> __m512h { |
4043 | _mm512_mask_conj_pch(src:_mm512_setzero_ph(), k, a) |
4044 | } |
4045 | |
4046 | /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, |
4047 | /// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) |
4048 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4049 | /// |
4050 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_pch) |
4051 | #[inline ] |
4052 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
4053 | #[cfg_attr (test, assert_instr(vfmaddcph))] |
4054 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4055 | pub fn _mm_fmadd_pch(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
4056 | _mm_mask3_fmadd_pch(a, b, c, k:0xff) |
4057 | } |
4058 | |
4059 | /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, |
4060 | /// and store the results in dst using writemask k (the element is copied from a when the corresponding |
4061 | /// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) |
4062 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4063 | /// |
4064 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_pch) |
4065 | #[inline ] |
4066 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
4067 | #[cfg_attr (test, assert_instr(vfmaddcph))] |
4068 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4069 | pub fn _mm_mask_fmadd_pch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { |
4070 | unsafe { |
4071 | let r: __m128 = transmute(src:_mm_mask3_fmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does |
4072 | transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a))) |
4073 | } |
4074 | } |
4075 | |
4076 | /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, |
4077 | /// and store the results in dst using writemask k (the element is copied from c when the corresponding |
4078 | /// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) |
4079 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4080 | /// |
4081 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_pch) |
4082 | #[inline ] |
4083 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
4084 | #[cfg_attr (test, assert_instr(vfmaddcph))] |
4085 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4086 | pub fn _mm_mask3_fmadd_pch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { |
4087 | unsafe { |
4088 | transmute(src:vfmaddcph_mask3_128( |
4089 | a:transmute(a), |
4090 | b:transmute(b), |
4091 | c:transmute(src:c), |
4092 | k, |
4093 | )) |
4094 | } |
4095 | } |
4096 | |
4097 | /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, |
4098 | /// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask |
4099 | /// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point |
4100 | /// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4101 | /// |
4102 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_pch) |
4103 | #[inline ] |
4104 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
4105 | #[cfg_attr (test, assert_instr(vfmaddcph))] |
4106 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4107 | pub fn _mm_maskz_fmadd_pch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
4108 | unsafe { |
4109 | transmute(src:vfmaddcph_maskz_128( |
4110 | a:transmute(a), |
4111 | b:transmute(b), |
4112 | c:transmute(src:c), |
4113 | k, |
4114 | )) |
4115 | } |
4116 | } |
4117 | |
4118 | /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, |
4119 | /// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) |
4120 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4121 | /// |
4122 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmadd_pch) |
4123 | #[inline ] |
4124 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
4125 | #[cfg_attr (test, assert_instr(vfmaddcph))] |
4126 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4127 | pub fn _mm256_fmadd_pch(a: __m256h, b: __m256h, c: __m256h) -> __m256h { |
4128 | _mm256_mask3_fmadd_pch(a, b, c, k:0xff) |
4129 | } |
4130 | |
4131 | /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, |
4132 | /// and store the results in dst using writemask k (the element is copied from a when the corresponding mask |
4133 | /// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point |
4134 | /// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4135 | /// |
4136 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmadd_pch) |
4137 | #[inline ] |
4138 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
4139 | #[cfg_attr (test, assert_instr(vfmaddcph))] |
4140 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4141 | pub fn _mm256_mask_fmadd_pch(a: __m256h, k: __mmask8, b: __m256h, c: __m256h) -> __m256h { |
4142 | unsafe { |
4143 | let r: __m256 = transmute(src:_mm256_mask3_fmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does |
4144 | transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a))) |
4145 | } |
4146 | } |
4147 | |
4148 | /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, |
4149 | /// and store the results in dst using writemask k (the element is copied from c when the corresponding |
4150 | /// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) |
4151 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4152 | /// |
4153 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmadd_pch) |
4154 | #[inline ] |
4155 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
4156 | #[cfg_attr (test, assert_instr(vfmaddcph))] |
4157 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4158 | pub fn _mm256_mask3_fmadd_pch(a: __m256h, b: __m256h, c: __m256h, k: __mmask8) -> __m256h { |
4159 | unsafe { |
4160 | transmute(src:vfmaddcph_mask3_256( |
4161 | a:transmute(a), |
4162 | b:transmute(b), |
4163 | c:transmute(src:c), |
4164 | k, |
4165 | )) |
4166 | } |
4167 | } |
4168 | |
4169 | /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, |
4170 | /// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask |
4171 | /// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point |
4172 | /// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4173 | /// |
4174 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmadd_pch) |
4175 | #[inline ] |
4176 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
4177 | #[cfg_attr (test, assert_instr(vfmaddcph))] |
4178 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4179 | pub fn _mm256_maskz_fmadd_pch(k: __mmask8, a: __m256h, b: __m256h, c: __m256h) -> __m256h { |
4180 | unsafe { |
4181 | transmute(src:vfmaddcph_maskz_256( |
4182 | a:transmute(a), |
4183 | b:transmute(b), |
4184 | c:transmute(src:c), |
4185 | k, |
4186 | )) |
4187 | } |
4188 | } |
4189 | |
4190 | /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, |
4191 | /// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) |
4192 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4193 | /// |
4194 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_pch) |
4195 | #[inline ] |
4196 | #[target_feature (enable = "avx512fp16" )] |
4197 | #[cfg_attr (test, assert_instr(vfmaddcph))] |
4198 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4199 | pub fn _mm512_fmadd_pch(a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
4200 | _mm512_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c) |
4201 | } |
4202 | |
4203 | /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, |
4204 | /// and store the results in dst using writemask k (the element is copied from a when the corresponding mask |
4205 | /// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point |
4206 | /// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4207 | /// |
4208 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_pch) |
4209 | #[inline ] |
4210 | #[target_feature (enable = "avx512fp16" )] |
4211 | #[cfg_attr (test, assert_instr(vfmaddcph))] |
4212 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4213 | pub fn _mm512_mask_fmadd_pch(a: __m512h, k: __mmask16, b: __m512h, c: __m512h) -> __m512h { |
4214 | _mm512_mask_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c) |
4215 | } |
4216 | |
4217 | /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, |
4218 | /// and store the results in dst using writemask k (the element is copied from c when the corresponding |
4219 | /// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) |
4220 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4221 | /// |
4222 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_pch) |
4223 | #[inline ] |
4224 | #[target_feature (enable = "avx512fp16" )] |
4225 | #[cfg_attr (test, assert_instr(vfmaddcph))] |
4226 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4227 | pub fn _mm512_mask3_fmadd_pch(a: __m512h, b: __m512h, c: __m512h, k: __mmask16) -> __m512h { |
4228 | _mm512_mask3_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k) |
4229 | } |
4230 | |
4231 | /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, |
4232 | /// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask |
4233 | /// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point |
4234 | /// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4235 | /// |
4236 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_pch) |
4237 | #[inline ] |
4238 | #[target_feature (enable = "avx512fp16" )] |
4239 | #[cfg_attr (test, assert_instr(vfmaddcph))] |
4240 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4241 | pub fn _mm512_maskz_fmadd_pch(k: __mmask16, a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
4242 | _mm512_maskz_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c) |
4243 | } |
4244 | |
4245 | /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, |
4246 | /// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit) |
4247 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4248 | /// |
4249 | /// Rounding is done according to the rounding parameter, which can be one of: |
4250 | /// |
4251 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
4252 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
4253 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
4254 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
4255 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
4256 | /// |
4257 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_round_pch) |
4258 | #[inline ] |
4259 | #[target_feature (enable = "avx512fp16" )] |
4260 | #[cfg_attr (test, assert_instr(vfmaddcph, ROUNDING = 8))] |
4261 | #[rustc_legacy_const_generics (3)] |
4262 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4263 | pub fn _mm512_fmadd_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
4264 | static_assert_rounding!(ROUNDING); |
4265 | _mm512_mask3_fmadd_round_pch::<ROUNDING>(a, b, c, k:0xffff) |
4266 | } |
4267 | |
4268 | /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, |
4269 | /// and store the results in dst using writemask k (the element is copied from a when the corresponding mask |
4270 | /// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point |
4271 | /// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4272 | /// |
4273 | /// Rounding is done according to the rounding parameter, which can be one of: |
4274 | /// |
4275 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
4276 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
4277 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
4278 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
4279 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
4280 | /// |
4281 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_round_pch) |
4282 | #[inline ] |
4283 | #[target_feature (enable = "avx512fp16" )] |
4284 | #[cfg_attr (test, assert_instr(vfmaddcph, ROUNDING = 8))] |
4285 | #[rustc_legacy_const_generics (4)] |
4286 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4287 | pub fn _mm512_mask_fmadd_round_pch<const ROUNDING: i32>( |
4288 | a: __m512h, |
4289 | k: __mmask16, |
4290 | b: __m512h, |
4291 | c: __m512h, |
4292 | ) -> __m512h { |
4293 | unsafe { |
4294 | static_assert_rounding!(ROUNDING); |
4295 | let r: __m512 = transmute(src:_mm512_mask3_fmadd_round_pch::<ROUNDING>(a, b, c, k)); // using `0xffff` would have been fine here, but this is what CLang does |
4296 | transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a))) |
4297 | } |
4298 | } |
4299 | |
4300 | /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, |
4301 | /// and store the results in dst using writemask k (the element is copied from c when the corresponding |
4302 | /// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) |
4303 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4304 | /// |
4305 | /// Rounding is done according to the rounding parameter, which can be one of: |
4306 | /// |
4307 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
4308 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
4309 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
4310 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
4311 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
4312 | /// |
4313 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_round_pch) |
4314 | #[inline ] |
4315 | #[target_feature (enable = "avx512fp16" )] |
4316 | #[cfg_attr (test, assert_instr(vfmaddcph, ROUNDING = 8))] |
4317 | #[rustc_legacy_const_generics (4)] |
4318 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4319 | pub fn _mm512_mask3_fmadd_round_pch<const ROUNDING: i32>( |
4320 | a: __m512h, |
4321 | b: __m512h, |
4322 | c: __m512h, |
4323 | k: __mmask16, |
4324 | ) -> __m512h { |
4325 | unsafe { |
4326 | static_assert_rounding!(ROUNDING); |
4327 | transmute(src:vfmaddcph_mask3_512( |
4328 | a:transmute(a), |
4329 | b:transmute(b), |
4330 | c:transmute(src:c), |
4331 | k, |
4332 | ROUNDING, |
4333 | )) |
4334 | } |
4335 | } |
4336 | |
4337 | /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, |
4338 | /// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask |
4339 | /// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point |
4340 | /// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4341 | /// |
4342 | /// Rounding is done according to the rounding parameter, which can be one of: |
4343 | /// |
4344 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
4345 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
4346 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
4347 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
4348 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
4349 | /// |
4350 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_round_pch) |
4351 | #[inline ] |
4352 | #[target_feature (enable = "avx512fp16" )] |
4353 | #[cfg_attr (test, assert_instr(vfmaddcph, ROUNDING = 8))] |
4354 | #[rustc_legacy_const_generics (4)] |
4355 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4356 | pub fn _mm512_maskz_fmadd_round_pch<const ROUNDING: i32>( |
4357 | k: __mmask16, |
4358 | a: __m512h, |
4359 | b: __m512h, |
4360 | c: __m512h, |
4361 | ) -> __m512h { |
4362 | unsafe { |
4363 | static_assert_rounding!(ROUNDING); |
4364 | transmute(src:vfmaddcph_maskz_512( |
4365 | a:transmute(a), |
4366 | b:transmute(b), |
4367 | c:transmute(src:c), |
4368 | k, |
4369 | ROUNDING, |
4370 | )) |
4371 | } |
4372 | } |
4373 | |
4374 | /// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and |
4375 | /// store the result in the lower elements of dst, and copy the upper 6 packed elements from a to the |
4376 | /// upper elements of dst. Each complex number is composed of two adjacent half-precision (16-bit) |
4377 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4378 | /// |
4379 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_sch) |
4380 | #[inline ] |
4381 | #[target_feature (enable = "avx512fp16" )] |
4382 | #[cfg_attr (test, assert_instr(vfmaddcsh))] |
4383 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4384 | pub fn _mm_fmadd_sch(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
4385 | _mm_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c) |
4386 | } |
4387 | |
4388 | /// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and |
4389 | /// store the result in the lower elements of dst using writemask k (elements are copied from a when |
4390 | /// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst. |
4391 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, |
4392 | /// which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4393 | /// |
4394 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_sch) |
4395 | #[inline ] |
4396 | #[target_feature (enable = "avx512fp16" )] |
4397 | #[cfg_attr (test, assert_instr(vfmaddcsh))] |
4398 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4399 | pub fn _mm_mask_fmadd_sch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { |
4400 | _mm_mask_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c) |
4401 | } |
4402 | |
4403 | /// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and |
4404 | /// store the result in the lower elements of dst using writemask k (elements are copied from c when |
4405 | /// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst. |
4406 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, |
4407 | /// which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4408 | /// |
4409 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_sch) |
4410 | #[inline ] |
4411 | #[target_feature (enable = "avx512fp16" )] |
4412 | #[cfg_attr (test, assert_instr(vfmaddcsh))] |
4413 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4414 | pub fn _mm_mask3_fmadd_sch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { |
4415 | _mm_mask3_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k) |
4416 | } |
4417 | |
4418 | /// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and |
4419 | /// store the result in the lower elements of dst using zeromask k (elements are zeroed out when mask |
4420 | /// bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst. Each |
4421 | /// complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
4422 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4423 | /// |
4424 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_sch) |
4425 | #[inline ] |
4426 | #[target_feature (enable = "avx512fp16" )] |
4427 | #[cfg_attr (test, assert_instr(vfmaddcsh))] |
4428 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4429 | pub fn _mm_maskz_fmadd_sch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
4430 | _mm_maskz_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c) |
4431 | } |
4432 | |
4433 | /// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and |
4434 | /// store the result in the lower elements of dst. Each complex number is composed of two adjacent |
4435 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4436 | /// |
4437 | /// Rounding is done according to the rounding parameter, which can be one of: |
4438 | /// |
4439 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
4440 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
4441 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
4442 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
4443 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
4444 | /// |
4445 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_round_sch) |
4446 | #[inline ] |
4447 | #[target_feature (enable = "avx512fp16" )] |
4448 | #[cfg_attr (test, assert_instr(vfmaddcsh, ROUNDING = 8))] |
4449 | #[rustc_legacy_const_generics (3)] |
4450 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4451 | pub fn _mm_fmadd_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
4452 | unsafe { |
4453 | static_assert_rounding!(ROUNDING); |
4454 | transmute(src:vfmaddcsh_mask( |
4455 | a:transmute(a), |
4456 | b:transmute(b), |
4457 | c:transmute(c), |
4458 | k:0xff, |
4459 | ROUNDING, |
4460 | )) |
4461 | } |
4462 | } |
4463 | |
4464 | /// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and |
4465 | /// store the result in the lower elements of dst using writemask k (elements are copied from a when |
4466 | /// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst. |
4467 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, |
4468 | /// which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4469 | /// |
4470 | /// Rounding is done according to the rounding parameter, which can be one of: |
4471 | /// |
4472 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
4473 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
4474 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
4475 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
4476 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
4477 | /// |
4478 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_round_sch) |
4479 | #[inline ] |
4480 | #[target_feature (enable = "avx512fp16" )] |
4481 | #[cfg_attr (test, assert_instr(vfmaddcsh, ROUNDING = 8))] |
4482 | #[rustc_legacy_const_generics (4)] |
4483 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4484 | pub fn _mm_mask_fmadd_round_sch<const ROUNDING: i32>( |
4485 | a: __m128h, |
4486 | k: __mmask8, |
4487 | b: __m128h, |
4488 | c: __m128h, |
4489 | ) -> __m128h { |
4490 | unsafe { |
4491 | static_assert_rounding!(ROUNDING); |
4492 | let a: __m128 = transmute(src:a); |
4493 | let r: __m128 = vfmaddcsh_mask(a, b:transmute(b), c:transmute(src:c), k, ROUNDING); // using `0xff` would have been fine here, but this is what CLang does |
4494 | transmute(src:_mm_mask_move_ss(src:a, k, a, b:r)) |
4495 | } |
4496 | } |
4497 | |
4498 | /// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and |
4499 | /// store the result in the lower elements of dst using writemask k (elements are copied from c when |
4500 | /// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst. |
4501 | /// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, |
4502 | /// which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4503 | /// |
4504 | /// Rounding is done according to the rounding parameter, which can be one of: |
4505 | /// |
4506 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
4507 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
4508 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
4509 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
4510 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
4511 | /// |
4512 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_round_sch) |
4513 | #[inline ] |
4514 | #[target_feature (enable = "avx512fp16" )] |
4515 | #[cfg_attr (test, assert_instr(vfmaddcsh, ROUNDING = 8))] |
4516 | #[rustc_legacy_const_generics (4)] |
4517 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4518 | pub fn _mm_mask3_fmadd_round_sch<const ROUNDING: i32>( |
4519 | a: __m128h, |
4520 | b: __m128h, |
4521 | c: __m128h, |
4522 | k: __mmask8, |
4523 | ) -> __m128h { |
4524 | unsafe { |
4525 | static_assert_rounding!(ROUNDING); |
4526 | let c: __m128 = transmute(src:c); |
4527 | let r: __m128 = vfmaddcsh_mask(a:transmute(a), b:transmute(src:b), c, k, ROUNDING); |
4528 | transmute(src:_mm_move_ss(a:c, b:r)) |
4529 | } |
4530 | } |
4531 | |
4532 | /// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and |
4533 | /// store the result in the lower elements of dst using zeromask k (elements are zeroed out when mask |
4534 | /// bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst. Each |
4535 | /// complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which |
4536 | /// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`. |
4537 | /// |
4538 | /// Rounding is done according to the rounding parameter, which can be one of: |
4539 | /// |
4540 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
4541 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
4542 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
4543 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
4544 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
4545 | /// |
4546 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_round_sch) |
4547 | #[inline ] |
4548 | #[target_feature (enable = "avx512fp16" )] |
4549 | #[cfg_attr (test, assert_instr(vfmaddcsh, ROUNDING = 8))] |
4550 | #[rustc_legacy_const_generics (4)] |
4551 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4552 | pub fn _mm_maskz_fmadd_round_sch<const ROUNDING: i32>( |
4553 | k: __mmask8, |
4554 | a: __m128h, |
4555 | b: __m128h, |
4556 | c: __m128h, |
4557 | ) -> __m128h { |
4558 | unsafe { |
4559 | static_assert_rounding!(ROUNDING); |
4560 | let a: __m128 = transmute(src:a); |
4561 | let r: __m128 = vfmaddcsh_maskz(a, b:transmute(b), c:transmute(src:c), k, ROUNDING); |
4562 | transmute(src:_mm_move_ss(a, b:r)) // FIXME: If `k == 0`, then LLVM optimized `vfmaddcsh_maskz` to output an all-zero vector, which is incorrect |
4563 | } |
4564 | } |
4565 | |
4566 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate |
4567 | /// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed |
4568 | /// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number |
4569 | /// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4570 | /// |
4571 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_pch) |
4572 | #[inline ] |
4573 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
4574 | #[cfg_attr (test, assert_instr(vfcmaddcph))] |
4575 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4576 | pub fn _mm_fcmadd_pch(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
4577 | _mm_mask3_fcmadd_pch(a, b, c, k:0xff) |
4578 | } |
4579 | |
4580 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate |
4581 | /// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is |
4582 | /// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent |
4583 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
4584 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4585 | /// |
4586 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_pch) |
4587 | #[inline ] |
4588 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
4589 | #[cfg_attr (test, assert_instr(vfcmaddcph))] |
4590 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4591 | pub fn _mm_mask_fcmadd_pch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { |
4592 | unsafe { |
4593 | let r: __m128 = transmute(src:_mm_mask3_fcmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does |
4594 | transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a))) |
4595 | } |
4596 | } |
4597 | |
4598 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate |
4599 | /// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is |
4600 | /// copied from c when the corresponding mask bit is not set). Each complex number is composed of two adjacent |
4601 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
4602 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4603 | /// |
4604 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fcmadd_pch) |
4605 | #[inline ] |
4606 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
4607 | #[cfg_attr (test, assert_instr(vfcmaddcph))] |
4608 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4609 | pub fn _mm_mask3_fcmadd_pch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { |
4610 | unsafe { |
4611 | transmute(src:vfcmaddcph_mask3_128( |
4612 | a:transmute(a), |
4613 | b:transmute(b), |
4614 | c:transmute(src:c), |
4615 | k, |
4616 | )) |
4617 | } |
4618 | } |
4619 | |
4620 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate |
4621 | /// to the corresponding complex numbers in c, and store the results in dst using zeromask k (the element is |
4622 | /// zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent |
4623 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
4624 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4625 | /// |
4626 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_pch) |
4627 | #[inline ] |
4628 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
4629 | #[cfg_attr (test, assert_instr(vfcmaddcph))] |
4630 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4631 | pub fn _mm_maskz_fcmadd_pch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
4632 | unsafe { |
4633 | transmute(src:vfcmaddcph_maskz_128( |
4634 | a:transmute(a), |
4635 | b:transmute(b), |
4636 | c:transmute(src:c), |
4637 | k, |
4638 | )) |
4639 | } |
4640 | } |
4641 | |
4642 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate |
4643 | /// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed |
4644 | /// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number |
4645 | /// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4646 | /// |
4647 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fcmadd_pch) |
4648 | #[inline ] |
4649 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
4650 | #[cfg_attr (test, assert_instr(vfcmaddcph))] |
4651 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4652 | pub fn _mm256_fcmadd_pch(a: __m256h, b: __m256h, c: __m256h) -> __m256h { |
4653 | _mm256_mask3_fcmadd_pch(a, b, c, k:0xff) |
4654 | } |
4655 | |
4656 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate |
4657 | /// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is |
4658 | /// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent |
4659 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
4660 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4661 | /// |
4662 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fcmadd_pch) |
4663 | #[inline ] |
4664 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
4665 | #[cfg_attr (test, assert_instr(vfcmaddcph))] |
4666 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4667 | pub fn _mm256_mask_fcmadd_pch(a: __m256h, k: __mmask8, b: __m256h, c: __m256h) -> __m256h { |
4668 | unsafe { |
4669 | let r: __m256 = transmute(src:_mm256_mask3_fcmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does |
4670 | transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a))) |
4671 | } |
4672 | } |
4673 | |
4674 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate |
4675 | /// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is |
4676 | /// copied from c when the corresponding mask bit is not set). Each complex number is composed of two adjacent |
4677 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
4678 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4679 | /// |
4680 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fcmadd_pch) |
4681 | #[inline ] |
4682 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
4683 | #[cfg_attr (test, assert_instr(vfcmaddcph))] |
4684 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4685 | pub fn _mm256_mask3_fcmadd_pch(a: __m256h, b: __m256h, c: __m256h, k: __mmask8) -> __m256h { |
4686 | unsafe { |
4687 | transmute(src:vfcmaddcph_mask3_256( |
4688 | a:transmute(a), |
4689 | b:transmute(b), |
4690 | c:transmute(src:c), |
4691 | k, |
4692 | )) |
4693 | } |
4694 | } |
4695 | |
4696 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate |
4697 | /// to the corresponding complex numbers in c, and store the results in dst using zeromask k (the element is |
4698 | /// zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent |
4699 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
4700 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4701 | /// |
4702 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fcmadd_pch) |
4703 | #[inline ] |
4704 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
4705 | #[cfg_attr (test, assert_instr(vfcmaddcph))] |
4706 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4707 | pub fn _mm256_maskz_fcmadd_pch(k: __mmask8, a: __m256h, b: __m256h, c: __m256h) -> __m256h { |
4708 | unsafe { |
4709 | transmute(src:vfcmaddcph_maskz_256( |
4710 | a:transmute(a), |
4711 | b:transmute(b), |
4712 | c:transmute(src:c), |
4713 | k, |
4714 | )) |
4715 | } |
4716 | } |
4717 | |
4718 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate |
4719 | /// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed |
4720 | /// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number |
4721 | /// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4722 | /// |
4723 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmadd_pch) |
4724 | #[inline ] |
4725 | #[target_feature (enable = "avx512fp16" )] |
4726 | #[cfg_attr (test, assert_instr(vfcmaddcph))] |
4727 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4728 | pub fn _mm512_fcmadd_pch(a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
4729 | _mm512_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c) |
4730 | } |
4731 | |
4732 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate |
4733 | /// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is |
4734 | /// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent |
4735 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
4736 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4737 | /// |
4738 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmadd_pch) |
4739 | #[inline ] |
4740 | #[target_feature (enable = "avx512fp16" )] |
4741 | #[cfg_attr (test, assert_instr(vfcmaddcph))] |
4742 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4743 | pub fn _mm512_mask_fcmadd_pch(a: __m512h, k: __mmask16, b: __m512h, c: __m512h) -> __m512h { |
4744 | _mm512_mask_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c) |
4745 | } |
4746 | |
4747 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate |
4748 | /// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is |
4749 | /// copied from c when the corresponding mask bit is not set). Each complex number is composed of two adjacent |
4750 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
4751 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4752 | /// |
4753 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fcmadd_pch) |
4754 | #[inline ] |
4755 | #[target_feature (enable = "avx512fp16" )] |
4756 | #[cfg_attr (test, assert_instr(vfcmaddcph))] |
4757 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4758 | pub fn _mm512_mask3_fcmadd_pch(a: __m512h, b: __m512h, c: __m512h, k: __mmask16) -> __m512h { |
4759 | _mm512_mask3_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k) |
4760 | } |
4761 | |
4762 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate |
4763 | /// to the corresponding complex numbers in c, and store the results in dst using zeromask k (the element is |
4764 | /// zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent |
4765 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
4766 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4767 | /// |
4768 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmadd_pch) |
4769 | #[inline ] |
4770 | #[target_feature (enable = "avx512fp16" )] |
4771 | #[cfg_attr (test, assert_instr(vfcmaddcph))] |
4772 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4773 | pub fn _mm512_maskz_fcmadd_pch(k: __mmask16, a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
4774 | _mm512_maskz_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c) |
4775 | } |
4776 | |
4777 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate |
4778 | /// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed |
4779 | /// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number |
4780 | /// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4781 | /// |
4782 | /// Rounding is done according to the rounding parameter, which can be one of: |
4783 | /// |
4784 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
4785 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
4786 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
4787 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
4788 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
4789 | /// |
4790 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmadd_round_pch) |
4791 | #[inline ] |
4792 | #[target_feature (enable = "avx512fp16" )] |
4793 | #[cfg_attr (test, assert_instr(vfcmaddcph, ROUNDING = 8))] |
4794 | #[rustc_legacy_const_generics (3)] |
4795 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4796 | pub fn _mm512_fcmadd_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
4797 | static_assert_rounding!(ROUNDING); |
4798 | _mm512_mask3_fcmadd_round_pch::<ROUNDING>(a, b, c, k:0xffff) |
4799 | } |
4800 | |
4801 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate |
4802 | /// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is |
4803 | /// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent |
4804 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
4805 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4806 | /// |
4807 | /// Rounding is done according to the rounding parameter, which can be one of: |
4808 | /// |
4809 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
4810 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
4811 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
4812 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
4813 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
4814 | /// |
4815 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmadd_round_pch) |
4816 | #[inline ] |
4817 | #[target_feature (enable = "avx512fp16" )] |
4818 | #[cfg_attr (test, assert_instr(vfcmaddcph, ROUNDING = 8))] |
4819 | #[rustc_legacy_const_generics (4)] |
4820 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4821 | pub fn _mm512_mask_fcmadd_round_pch<const ROUNDING: i32>( |
4822 | a: __m512h, |
4823 | k: __mmask16, |
4824 | b: __m512h, |
4825 | c: __m512h, |
4826 | ) -> __m512h { |
4827 | unsafe { |
4828 | static_assert_rounding!(ROUNDING); |
4829 | let r: __m512 = transmute(src:_mm512_mask3_fcmadd_round_pch::<ROUNDING>(a, b, c, k)); // using `0xffff` would have been fine here, but this is what CLang does |
4830 | transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a))) |
4831 | } |
4832 | } |
4833 | |
4834 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate |
4835 | /// to the corresponding complex numbers in c using writemask k (the element is copied from c when the corresponding |
4836 | /// mask bit is not set), and store the results in dst. Each complex number is composed of two adjacent half-precision |
4837 | /// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1`, or the complex |
4838 | /// conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4839 | /// |
4840 | /// Rounding is done according to the rounding parameter, which can be one of: |
4841 | /// |
4842 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
4843 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
4844 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
4845 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
4846 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
4847 | /// |
4848 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fcmadd_round_pch) |
4849 | #[inline ] |
4850 | #[target_feature (enable = "avx512fp16" )] |
4851 | #[cfg_attr (test, assert_instr(vfcmaddcph, ROUNDING = 8))] |
4852 | #[rustc_legacy_const_generics (4)] |
4853 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4854 | pub fn _mm512_mask3_fcmadd_round_pch<const ROUNDING: i32>( |
4855 | a: __m512h, |
4856 | b: __m512h, |
4857 | c: __m512h, |
4858 | k: __mmask16, |
4859 | ) -> __m512h { |
4860 | unsafe { |
4861 | static_assert_rounding!(ROUNDING); |
4862 | transmute(src:vfcmaddcph_mask3_512( |
4863 | a:transmute(a), |
4864 | b:transmute(b), |
4865 | c:transmute(src:c), |
4866 | k, |
4867 | ROUNDING, |
4868 | )) |
4869 | } |
4870 | } |
4871 | |
4872 | /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate |
4873 | /// to the corresponding complex numbers in c using zeromask k (the element is zeroed out when the corresponding |
4874 | /// mask bit is not set), and store the results in dst. Each complex number is composed of two adjacent half-precision |
4875 | /// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1`, or the complex |
4876 | /// conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4877 | /// |
4878 | /// Rounding is done according to the rounding parameter, which can be one of: |
4879 | /// |
4880 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
4881 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
4882 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
4883 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
4884 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
4885 | /// |
4886 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmadd_round_pch) |
4887 | #[inline ] |
4888 | #[target_feature (enable = "avx512fp16" )] |
4889 | #[cfg_attr (test, assert_instr(vfcmaddcph, ROUNDING = 8))] |
4890 | #[rustc_legacy_const_generics (4)] |
4891 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4892 | pub fn _mm512_maskz_fcmadd_round_pch<const ROUNDING: i32>( |
4893 | k: __mmask16, |
4894 | a: __m512h, |
4895 | b: __m512h, |
4896 | c: __m512h, |
4897 | ) -> __m512h { |
4898 | unsafe { |
4899 | static_assert_rounding!(ROUNDING); |
4900 | transmute(src:vfcmaddcph_maskz_512( |
4901 | a:transmute(a), |
4902 | b:transmute(b), |
4903 | c:transmute(src:c), |
4904 | k, |
4905 | ROUNDING, |
4906 | )) |
4907 | } |
4908 | } |
4909 | |
4910 | /// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b, |
4911 | /// accumulate to the lower complex number in c, and store the result in the lower elements of dst, |
4912 | /// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is |
4913 | /// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex |
4914 | /// number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4915 | /// |
4916 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_sch) |
4917 | #[inline ] |
4918 | #[target_feature (enable = "avx512fp16" )] |
4919 | #[cfg_attr (test, assert_instr(vfcmaddcsh))] |
4920 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4921 | pub fn _mm_fcmadd_sch(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
4922 | _mm_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c) |
4923 | } |
4924 | |
4925 | /// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b, |
4926 | /// accumulate to the lower complex number in c, and store the result in the lower elements of dst using |
4927 | /// writemask k (the element is copied from a when the corresponding mask bit is not set), and copy the upper |
4928 | /// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent |
4929 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
4930 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4931 | /// |
4932 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_sch) |
4933 | #[inline ] |
4934 | #[target_feature (enable = "avx512fp16" )] |
4935 | #[cfg_attr (test, assert_instr(vfcmaddcsh))] |
4936 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4937 | pub fn _mm_mask_fcmadd_sch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { |
4938 | _mm_mask_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c) |
4939 | } |
4940 | |
4941 | /// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b, |
4942 | /// accumulate to the lower complex number in c, and store the result in the lower elements of dst using |
4943 | /// writemask k (the element is copied from c when the corresponding mask bit is not set), and copy the upper |
4944 | /// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent |
4945 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
4946 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4947 | /// |
4948 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fcmadd_sch) |
4949 | #[inline ] |
4950 | #[target_feature (enable = "avx512fp16" )] |
4951 | #[cfg_attr (test, assert_instr(vfcmaddcsh))] |
4952 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4953 | pub fn _mm_mask3_fcmadd_sch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { |
4954 | _mm_mask3_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k) |
4955 | } |
4956 | |
4957 | /// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b, |
4958 | /// accumulate to the lower complex number in c, and store the result in the lower elements of dst using |
4959 | /// zeromask k (the element is zeroed out when the corresponding mask bit is not set), and copy the upper |
4960 | /// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent |
4961 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
4962 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4963 | /// |
4964 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_sch) |
4965 | #[inline ] |
4966 | #[target_feature (enable = "avx512fp16" )] |
4967 | #[cfg_attr (test, assert_instr(vfcmaddcsh))] |
4968 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4969 | pub fn _mm_maskz_fcmadd_sch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
4970 | _mm_maskz_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c) |
4971 | } |
4972 | |
4973 | /// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b, |
4974 | /// accumulate to the lower complex number in c, and store the result in the lower elements of dst, |
4975 | /// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is |
4976 | /// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex |
4977 | /// number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
4978 | /// |
4979 | /// Rounding is done according to the rounding parameter, which can be one of: |
4980 | /// |
4981 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
4982 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
4983 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
4984 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
4985 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
4986 | /// |
4987 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_round_sch) |
4988 | #[inline ] |
4989 | #[target_feature (enable = "avx512fp16" )] |
4990 | #[cfg_attr (test, assert_instr(vfcmaddcsh, ROUNDING = 8))] |
4991 | #[rustc_legacy_const_generics (3)] |
4992 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
4993 | pub fn _mm_fcmadd_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
4994 | unsafe { |
4995 | static_assert_rounding!(ROUNDING); |
4996 | transmute(src:vfcmaddcsh_mask( |
4997 | a:transmute(a), |
4998 | b:transmute(b), |
4999 | c:transmute(c), |
5000 | k:0xff, |
5001 | ROUNDING, |
5002 | )) |
5003 | } |
5004 | } |
5005 | |
5006 | /// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b, |
5007 | /// accumulate to the lower complex number in c, and store the result in the lower elements of dst using |
5008 | /// writemask k (the element is copied from a when the corresponding mask bit is not set), and copy the upper |
5009 | /// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent |
5010 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
5011 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
5012 | /// |
5013 | /// Rounding is done according to the rounding parameter, which can be one of: |
5014 | /// |
5015 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
5016 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
5017 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
5018 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
5019 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5020 | /// |
5021 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_round_sch) |
5022 | #[inline ] |
5023 | #[target_feature (enable = "avx512fp16" )] |
5024 | #[cfg_attr (test, assert_instr(vfcmaddcsh, ROUNDING = 8))] |
5025 | #[rustc_legacy_const_generics (4)] |
5026 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5027 | pub fn _mm_mask_fcmadd_round_sch<const ROUNDING: i32>( |
5028 | a: __m128h, |
5029 | k: __mmask8, |
5030 | b: __m128h, |
5031 | c: __m128h, |
5032 | ) -> __m128h { |
5033 | unsafe { |
5034 | static_assert_rounding!(ROUNDING); |
5035 | let a: __m128 = transmute(src:a); |
5036 | let r: __m128 = vfcmaddcsh_mask(a, b:transmute(b), c:transmute(src:c), k, ROUNDING); |
5037 | transmute(src:_mm_mask_move_ss(src:a, k, a, b:r)) |
5038 | } |
5039 | } |
5040 | |
5041 | /// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b, |
5042 | /// accumulate to the lower complex number in c, and store the result in the lower elements of dst using |
5043 | /// writemask k (the element is copied from c when the corresponding mask bit is not set), and copy the upper |
5044 | /// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent |
5045 | /// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, |
5046 | /// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
5047 | /// |
5048 | /// Rounding is done according to the rounding parameter, which can be one of: |
5049 | /// |
5050 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
5051 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
5052 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
5053 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
5054 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5055 | /// |
5056 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fcmadd_round_sch) |
5057 | #[inline ] |
5058 | #[target_feature (enable = "avx512fp16" )] |
5059 | #[cfg_attr (test, assert_instr(vfcmaddcsh, ROUNDING = 8))] |
5060 | #[rustc_legacy_const_generics (4)] |
5061 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5062 | pub fn _mm_mask3_fcmadd_round_sch<const ROUNDING: i32>( |
5063 | a: __m128h, |
5064 | b: __m128h, |
5065 | c: __m128h, |
5066 | k: __mmask8, |
5067 | ) -> __m128h { |
5068 | unsafe { |
5069 | static_assert_rounding!(ROUNDING); |
5070 | let c: __m128 = transmute(src:c); |
5071 | let r: __m128 = vfcmaddcsh_mask(a:transmute(a), b:transmute(src:b), c, k, ROUNDING); |
5072 | transmute(src:_mm_move_ss(a:c, b:r)) |
5073 | } |
5074 | } |
5075 | |
5076 | /// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b, |
5077 | /// accumulate to the lower complex number in c using zeromask k (the element is zeroed out when the corresponding |
5078 | /// mask bit is not set), and store the result in the lower elements of dst, and copy the upper 6 packed elements |
5079 | /// from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision (16-bit) |
5080 | /// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1`, or the complex |
5081 | /// conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`. |
5082 | /// |
5083 | /// Rounding is done according to the rounding parameter, which can be one of: |
5084 | /// |
5085 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
5086 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
5087 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
5088 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
5089 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5090 | /// |
5091 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_round_sch) |
5092 | #[inline ] |
5093 | #[target_feature (enable = "avx512fp16" )] |
5094 | #[cfg_attr (test, assert_instr(vfcmaddcsh, ROUNDING = 8))] |
5095 | #[rustc_legacy_const_generics (4)] |
5096 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5097 | pub fn _mm_maskz_fcmadd_round_sch<const ROUNDING: i32>( |
5098 | k: __mmask8, |
5099 | a: __m128h, |
5100 | b: __m128h, |
5101 | c: __m128h, |
5102 | ) -> __m128h { |
5103 | unsafe { |
5104 | static_assert_rounding!(ROUNDING); |
5105 | let a: __m128 = transmute(src:a); |
5106 | let r: __m128 = vfcmaddcsh_maskz(a, b:transmute(b), c:transmute(src:c), k, ROUNDING); |
5107 | transmute(src:_mm_move_ss(a, b:r)) // FIXME: If `k == 0`, then LLVM optimized `vfcmaddcsh_maskz` to output an all-zero vector, which is incorrect |
5108 | } |
5109 | } |
5110 | |
5111 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate |
5112 | /// result to packed elements in c, and store the results in dst. |
5113 | /// |
5114 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_ph) |
5115 | #[inline ] |
5116 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
5117 | #[cfg_attr (test, assert_instr(vfmadd))] |
5118 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5119 | pub fn _mm_fmadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
5120 | unsafe { simd_fma(x:a, y:b, z:c) } |
5121 | } |
5122 | |
5123 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate |
5124 | /// result to packed elements in c, and store the results in dst using writemask k (the element is copied |
5125 | /// from a when the corresponding mask bit is not set). |
5126 | /// |
5127 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_ph) |
5128 | #[inline ] |
5129 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
5130 | #[cfg_attr (test, assert_instr(vfmadd))] |
5131 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5132 | pub fn _mm_mask_fmadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { |
5133 | unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ph(a, b, c), no:a) } |
5134 | } |
5135 | |
5136 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate |
5137 | /// result to packed elements in c, and store the results in dst using writemask k (the element is copied |
5138 | /// from c when the corresponding mask bit is not set). |
5139 | /// |
5140 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_ph) |
5141 | #[inline ] |
5142 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
5143 | #[cfg_attr (test, assert_instr(vfmadd))] |
5144 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5145 | pub fn _mm_mask3_fmadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { |
5146 | unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ph(a, b, c), no:c) } |
5147 | } |
5148 | |
5149 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate |
5150 | /// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed |
5151 | /// out when the corresponding mask bit is not set). |
5152 | /// |
5153 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_ph) |
5154 | #[inline ] |
5155 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
5156 | #[cfg_attr (test, assert_instr(vfmadd))] |
5157 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5158 | pub fn _mm_maskz_fmadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
5159 | unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ph(a, b, c), no:_mm_setzero_ph()) } |
5160 | } |
5161 | |
5162 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate |
5163 | /// result to packed elements in c, and store the results in dst. |
5164 | /// |
5165 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmadd_ph) |
5166 | #[inline ] |
5167 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
5168 | #[cfg_attr (test, assert_instr(vfmadd))] |
5169 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5170 | pub fn _mm256_fmadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h { |
5171 | unsafe { simd_fma(x:a, y:b, z:c) } |
5172 | } |
5173 | |
5174 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate |
5175 | /// result to packed elements in c, and store the results in dst using writemask k (the element is copied |
5176 | /// from a when the corresponding mask bit is not set). |
5177 | /// |
5178 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmadd_ph) |
5179 | #[inline ] |
5180 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
5181 | #[cfg_attr (test, assert_instr(vfmadd))] |
5182 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5183 | pub fn _mm256_mask_fmadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h { |
5184 | unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ph(a, b, c), no:a) } |
5185 | } |
5186 | |
5187 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate |
5188 | /// result to packed elements in c, and store the results in dst using writemask k (the element is copied |
5189 | /// from c when the corresponding mask bit is not set). |
5190 | /// |
5191 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmadd_ph) |
5192 | #[inline ] |
5193 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
5194 | #[cfg_attr (test, assert_instr(vfmadd))] |
5195 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5196 | pub fn _mm256_mask3_fmadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h { |
5197 | unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ph(a, b, c), no:c) } |
5198 | } |
5199 | |
5200 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate |
5201 | /// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed |
5202 | /// out when the corresponding mask bit is not set). |
5203 | /// |
5204 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmadd_ph) |
5205 | #[inline ] |
5206 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
5207 | #[cfg_attr (test, assert_instr(vfmadd))] |
5208 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5209 | pub fn _mm256_maskz_fmadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h { |
5210 | unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ph(a, b, c), no:_mm256_setzero_ph()) } |
5211 | } |
5212 | |
5213 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate |
5214 | /// result to packed elements in c, and store the results in dst. |
5215 | /// |
5216 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_ph) |
5217 | #[inline ] |
5218 | #[target_feature (enable = "avx512fp16" )] |
5219 | #[cfg_attr (test, assert_instr(vfmadd))] |
5220 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5221 | pub fn _mm512_fmadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
5222 | unsafe { simd_fma(x:a, y:b, z:c) } |
5223 | } |
5224 | |
5225 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate |
5226 | /// result to packed elements in c, and store the results in dst using writemask k (the element is copied |
5227 | /// from a when the corresponding mask bit is not set). |
5228 | /// |
5229 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_ph) |
5230 | #[inline ] |
5231 | #[target_feature (enable = "avx512fp16" )] |
5232 | #[cfg_attr (test, assert_instr(vfmadd))] |
5233 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5234 | pub fn _mm512_mask_fmadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h { |
5235 | unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ph(a, b, c), no:a) } |
5236 | } |
5237 | |
5238 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate |
5239 | /// result to packed elements in c, and store the results in dst using writemask k (the element is copied |
5240 | /// from c when the corresponding mask bit is not set). |
5241 | /// |
5242 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_ph) |
5243 | #[inline ] |
5244 | #[target_feature (enable = "avx512fp16" )] |
5245 | #[cfg_attr (test, assert_instr(vfmadd))] |
5246 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5247 | pub fn _mm512_mask3_fmadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h { |
5248 | unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ph(a, b, c), no:c) } |
5249 | } |
5250 | |
5251 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate |
5252 | /// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed |
5253 | /// out when the corresponding mask bit is not set). |
5254 | /// |
5255 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_ph) |
5256 | #[inline ] |
5257 | #[target_feature (enable = "avx512fp16" )] |
5258 | #[cfg_attr (test, assert_instr(vfmadd))] |
5259 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5260 | pub fn _mm512_maskz_fmadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
5261 | unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ph(a, b, c), no:_mm512_setzero_ph()) } |
5262 | } |
5263 | |
5264 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate |
5265 | /// result to packed elements in c, and store the results in dst. |
5266 | /// |
5267 | /// Rounding is done according to the rounding parameter, which can be one of: |
5268 | /// |
5269 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
5270 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
5271 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
5272 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
5273 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5274 | /// |
5275 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_round_ph) |
5276 | #[inline ] |
5277 | #[target_feature (enable = "avx512fp16" )] |
5278 | #[cfg_attr (test, assert_instr(vfmadd, ROUNDING = 8))] |
5279 | #[rustc_legacy_const_generics (3)] |
5280 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5281 | pub fn _mm512_fmadd_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
5282 | unsafe { |
5283 | static_assert_rounding!(ROUNDING); |
5284 | vfmaddph_512(a, b, c, ROUNDING) |
5285 | } |
5286 | } |
5287 | |
5288 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate |
5289 | /// result to packed elements in c, and store the results in dst using writemask k (the element is copied |
5290 | /// from a when the corresponding mask bit is not set). |
5291 | /// |
5292 | /// Rounding is done according to the rounding parameter, which can be one of: |
5293 | /// |
5294 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
5295 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
5296 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
5297 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
5298 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5299 | /// |
5300 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_round_ph) |
5301 | #[inline ] |
5302 | #[target_feature (enable = "avx512fp16" )] |
5303 | #[cfg_attr (test, assert_instr(vfmadd, ROUNDING = 8))] |
5304 | #[rustc_legacy_const_generics (4)] |
5305 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5306 | pub fn _mm512_mask_fmadd_round_ph<const ROUNDING: i32>( |
5307 | a: __m512h, |
5308 | k: __mmask32, |
5309 | b: __m512h, |
5310 | c: __m512h, |
5311 | ) -> __m512h { |
5312 | unsafe { |
5313 | static_assert_rounding!(ROUNDING); |
5314 | simd_select_bitmask(m:k, yes:_mm512_fmadd_round_ph::<ROUNDING>(a, b, c), no:a) |
5315 | } |
5316 | } |
5317 | |
5318 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate |
5319 | /// result to packed elements in c, and store the results in dst using writemask k (the element is copied |
5320 | /// from c when the corresponding mask bit is not set). |
5321 | /// |
5322 | /// Rounding is done according to the rounding parameter, which can be one of: |
5323 | /// |
5324 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
5325 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
5326 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
5327 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
5328 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5329 | /// |
5330 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_round_ph) |
5331 | #[inline ] |
5332 | #[target_feature (enable = "avx512fp16" )] |
5333 | #[cfg_attr (test, assert_instr(vfmadd, ROUNDING = 8))] |
5334 | #[rustc_legacy_const_generics (4)] |
5335 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5336 | pub fn _mm512_mask3_fmadd_round_ph<const ROUNDING: i32>( |
5337 | a: __m512h, |
5338 | b: __m512h, |
5339 | c: __m512h, |
5340 | k: __mmask32, |
5341 | ) -> __m512h { |
5342 | unsafe { |
5343 | static_assert_rounding!(ROUNDING); |
5344 | simd_select_bitmask(m:k, yes:_mm512_fmadd_round_ph::<ROUNDING>(a, b, c), no:c) |
5345 | } |
5346 | } |
5347 | |
5348 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate |
5349 | /// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed |
5350 | /// out when the corresponding mask bit is not set). |
5351 | /// |
5352 | /// Rounding is done according to the rounding parameter, which can be one of: |
5353 | /// |
5354 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
5355 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
5356 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
5357 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
5358 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5359 | /// |
5360 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_round_ph) |
5361 | #[inline ] |
5362 | #[target_feature (enable = "avx512fp16" )] |
5363 | #[cfg_attr (test, assert_instr(vfmadd, ROUNDING = 8))] |
5364 | #[rustc_legacy_const_generics (4)] |
5365 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5366 | pub fn _mm512_maskz_fmadd_round_ph<const ROUNDING: i32>( |
5367 | k: __mmask32, |
5368 | a: __m512h, |
5369 | b: __m512h, |
5370 | c: __m512h, |
5371 | ) -> __m512h { |
5372 | unsafe { |
5373 | static_assert_rounding!(ROUNDING); |
5374 | simd_select_bitmask( |
5375 | m:k, |
5376 | yes:_mm512_fmadd_round_ph::<ROUNDING>(a, b, c), |
5377 | no:_mm512_setzero_ph(), |
5378 | ) |
5379 | } |
5380 | } |
5381 | |
5382 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate |
5383 | /// result to the lower element in c. Store the result in the lower element of dst, and copy the upper |
5384 | /// 7 packed elements from a to the upper elements of dst. |
5385 | /// |
5386 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_sh) |
5387 | #[inline ] |
5388 | #[target_feature (enable = "avx512fp16" )] |
5389 | #[cfg_attr (test, assert_instr(vfmadd))] |
5390 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5391 | pub fn _mm_fmadd_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
5392 | unsafe { |
5393 | let extracta: f16 = simd_extract!(a, 0); |
5394 | let extractb: f16 = simd_extract!(b, 0); |
5395 | let extractc: f16 = simd_extract!(c, 0); |
5396 | let r: f16 = fmaf16(a:extracta, b:extractb, c:extractc); |
5397 | simd_insert!(a, 0, r) |
5398 | } |
5399 | } |
5400 | |
5401 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate |
5402 | /// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element |
5403 | /// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the |
5404 | /// upper elements of dst. |
5405 | /// |
5406 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_sh) |
5407 | #[inline ] |
5408 | #[target_feature (enable = "avx512fp16" )] |
5409 | #[cfg_attr (test, assert_instr(vfmadd))] |
5410 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5411 | pub fn _mm_mask_fmadd_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { |
5412 | unsafe { |
5413 | let mut fmadd: f16 = simd_extract!(a, 0); |
5414 | if k & 1 != 0 { |
5415 | let extractb: f16 = simd_extract!(b, 0); |
5416 | let extractc: f16 = simd_extract!(c, 0); |
5417 | fmadd = fmaf16(a:fmadd, b:extractb, c:extractc); |
5418 | } |
5419 | simd_insert!(a, 0, fmadd) |
5420 | } |
5421 | } |
5422 | |
5423 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate |
5424 | /// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element |
5425 | /// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the |
5426 | /// upper elements of dst. |
5427 | /// |
5428 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_sh) |
5429 | #[inline ] |
5430 | #[target_feature (enable = "avx512fp16" )] |
5431 | #[cfg_attr (test, assert_instr(vfmadd))] |
5432 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5433 | pub fn _mm_mask3_fmadd_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { |
5434 | unsafe { |
5435 | let mut fmadd: f16 = simd_extract!(c, 0); |
5436 | if k & 1 != 0 { |
5437 | let extracta: f16 = simd_extract!(a, 0); |
5438 | let extractb: f16 = simd_extract!(b, 0); |
5439 | fmadd = fmaf16(a:extracta, b:extractb, c:fmadd); |
5440 | } |
5441 | simd_insert!(c, 0, fmadd) |
5442 | } |
5443 | } |
5444 | |
5445 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate |
5446 | /// result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element |
5447 | /// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the |
5448 | /// upper elements of dst. |
5449 | /// |
5450 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_sh) |
5451 | #[inline ] |
5452 | #[target_feature (enable = "avx512fp16" )] |
5453 | #[cfg_attr (test, assert_instr(vfmadd))] |
5454 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5455 | pub fn _mm_maskz_fmadd_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
5456 | unsafe { |
5457 | let mut fmadd: f16 = 0.0; |
5458 | if k & 1 != 0 { |
5459 | let extracta: f16 = simd_extract!(a, 0); |
5460 | let extractb: f16 = simd_extract!(b, 0); |
5461 | let extractc: f16 = simd_extract!(c, 0); |
5462 | fmadd = fmaf16(a:extracta, b:extractb, c:extractc); |
5463 | } |
5464 | simd_insert!(a, 0, fmadd) |
5465 | } |
5466 | } |
5467 | |
5468 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate |
5469 | /// result to the lower element in c. Store the result in the lower element of dst, and copy the upper |
5470 | /// 7 packed elements from a to the upper elements of dst. |
5471 | /// |
5472 | /// Rounding is done according to the rounding parameter, which can be one of: |
5473 | /// |
5474 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
5475 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
5476 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
5477 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
5478 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5479 | /// |
5480 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_round_sh) |
5481 | #[inline ] |
5482 | #[target_feature (enable = "avx512fp16" )] |
5483 | #[cfg_attr (test, assert_instr(vfmadd, ROUNDING = 8))] |
5484 | #[rustc_legacy_const_generics (3)] |
5485 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5486 | pub fn _mm_fmadd_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
5487 | unsafe { |
5488 | static_assert_rounding!(ROUNDING); |
5489 | let extracta: f16 = simd_extract!(a, 0); |
5490 | let extractb: f16 = simd_extract!(b, 0); |
5491 | let extractc: f16 = simd_extract!(c, 0); |
5492 | let r: f16 = vfmaddsh(a:extracta, b:extractb, c:extractc, ROUNDING); |
5493 | simd_insert!(a, 0, r) |
5494 | } |
5495 | } |
5496 | |
5497 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate |
5498 | /// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element |
5499 | /// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the |
5500 | /// upper elements of dst. |
5501 | /// |
5502 | /// Rounding is done according to the rounding parameter, which can be one of: |
5503 | /// |
5504 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
5505 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
5506 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
5507 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
5508 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5509 | /// |
5510 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_round_sh) |
5511 | #[inline ] |
5512 | #[target_feature (enable = "avx512fp16" )] |
5513 | #[cfg_attr (test, assert_instr(vfmadd, ROUNDING = 8))] |
5514 | #[rustc_legacy_const_generics (4)] |
5515 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5516 | pub fn _mm_mask_fmadd_round_sh<const ROUNDING: i32>( |
5517 | a: __m128h, |
5518 | k: __mmask8, |
5519 | b: __m128h, |
5520 | c: __m128h, |
5521 | ) -> __m128h { |
5522 | unsafe { |
5523 | static_assert_rounding!(ROUNDING); |
5524 | let mut fmadd: f16 = simd_extract!(a, 0); |
5525 | if k & 1 != 0 { |
5526 | let extractb: f16 = simd_extract!(b, 0); |
5527 | let extractc: f16 = simd_extract!(c, 0); |
5528 | fmadd = vfmaddsh(a:fmadd, b:extractb, c:extractc, ROUNDING); |
5529 | } |
5530 | simd_insert!(a, 0, fmadd) |
5531 | } |
5532 | } |
5533 | |
5534 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate |
5535 | /// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element |
5536 | /// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the |
5537 | /// upper elements of dst. |
5538 | /// |
5539 | /// Rounding is done according to the rounding parameter, which can be one of: |
5540 | /// |
5541 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
5542 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
5543 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
5544 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
5545 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5546 | /// |
5547 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_round_sh) |
5548 | #[inline ] |
5549 | #[target_feature (enable = "avx512fp16" )] |
5550 | #[cfg_attr (test, assert_instr(vfmadd, ROUNDING = 8))] |
5551 | #[rustc_legacy_const_generics (4)] |
5552 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5553 | pub fn _mm_mask3_fmadd_round_sh<const ROUNDING: i32>( |
5554 | a: __m128h, |
5555 | b: __m128h, |
5556 | c: __m128h, |
5557 | k: __mmask8, |
5558 | ) -> __m128h { |
5559 | unsafe { |
5560 | static_assert_rounding!(ROUNDING); |
5561 | let mut fmadd: f16 = simd_extract!(c, 0); |
5562 | if k & 1 != 0 { |
5563 | let extracta: f16 = simd_extract!(a, 0); |
5564 | let extractb: f16 = simd_extract!(b, 0); |
5565 | fmadd = vfmaddsh(a:extracta, b:extractb, c:fmadd, ROUNDING); |
5566 | } |
5567 | simd_insert!(c, 0, fmadd) |
5568 | } |
5569 | } |
5570 | |
5571 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate |
5572 | /// result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element |
5573 | /// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the |
5574 | /// upper elements of dst. |
5575 | /// |
5576 | /// Rounding is done according to the rounding parameter, which can be one of: |
5577 | /// |
5578 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
5579 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
5580 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
5581 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
5582 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5583 | /// |
5584 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_round_sh) |
5585 | #[inline ] |
5586 | #[target_feature (enable = "avx512fp16" )] |
5587 | #[cfg_attr (test, assert_instr(vfmadd, ROUNDING = 8))] |
5588 | #[rustc_legacy_const_generics (4)] |
5589 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5590 | pub fn _mm_maskz_fmadd_round_sh<const ROUNDING: i32>( |
5591 | k: __mmask8, |
5592 | a: __m128h, |
5593 | b: __m128h, |
5594 | c: __m128h, |
5595 | ) -> __m128h { |
5596 | unsafe { |
5597 | static_assert_rounding!(ROUNDING); |
5598 | let mut fmadd: f16 = 0.0; |
5599 | if k & 1 != 0 { |
5600 | let extracta: f16 = simd_extract!(a, 0); |
5601 | let extractb: f16 = simd_extract!(b, 0); |
5602 | let extractc: f16 = simd_extract!(c, 0); |
5603 | fmadd = vfmaddsh(a:extracta, b:extractb, c:extractc, ROUNDING); |
5604 | } |
5605 | simd_insert!(a, 0, fmadd) |
5606 | } |
5607 | } |
5608 | |
5609 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
5610 | /// in c from the intermediate result, and store the results in dst. |
5611 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5612 | /// |
5613 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_ph) |
5614 | #[inline ] |
5615 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
5616 | #[cfg_attr (test, assert_instr(vfmsub))] |
5617 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5618 | pub fn _mm_fmsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
5619 | unsafe { simd_fma(x:a, y:b, z:simd_neg(c)) } |
5620 | } |
5621 | |
5622 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
5623 | /// in c from the intermediate result, and store the results in dst using writemask k (the element is copied |
5624 | /// from a when the corresponding mask bit is not set). |
5625 | /// |
5626 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_ph) |
5627 | #[inline ] |
5628 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
5629 | #[cfg_attr (test, assert_instr(vfmsub))] |
5630 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5631 | pub fn _mm_mask_fmsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { |
5632 | unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ph(a, b, c), no:a) } |
5633 | } |
5634 | |
5635 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
5636 | /// in c from the intermediate result, and store the results in dst using writemask k (the element is copied |
5637 | /// from c when the corresponding mask bit is not set). |
5638 | /// |
5639 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsub_ph) |
5640 | #[inline ] |
5641 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
5642 | #[cfg_attr (test, assert_instr(vfmsub))] |
5643 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5644 | pub fn _mm_mask3_fmsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { |
5645 | unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ph(a, b, c), no:c) } |
5646 | } |
5647 | |
5648 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
5649 | /// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed |
5650 | /// out when the corresponding mask bit is not set). |
5651 | /// |
5652 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_ph) |
5653 | #[inline ] |
5654 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
5655 | #[cfg_attr (test, assert_instr(vfmsub))] |
5656 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5657 | pub fn _mm_maskz_fmsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
5658 | unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ph(a, b, c), no:_mm_setzero_ph()) } |
5659 | } |
5660 | |
5661 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
5662 | /// in c from the intermediate result, and store the results in dst. |
5663 | /// |
5664 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsub_ph) |
5665 | #[inline ] |
5666 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
5667 | #[cfg_attr (test, assert_instr(vfmsub))] |
5668 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5669 | pub fn _mm256_fmsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h { |
5670 | unsafe { simd_fma(x:a, y:b, z:simd_neg(c)) } |
5671 | } |
5672 | |
5673 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
5674 | /// in c from the intermediate result, and store the results in dst using writemask k (the element is copied |
5675 | /// from a when the corresponding mask bit is not set). |
5676 | /// |
5677 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmsub_ph) |
5678 | #[inline ] |
5679 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
5680 | #[cfg_attr (test, assert_instr(vfmsub))] |
5681 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5682 | pub fn _mm256_mask_fmsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h { |
5683 | unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ph(a, b, c), no:a) } |
5684 | } |
5685 | |
5686 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
5687 | /// in c from the intermediate result, and store the results in dst using writemask k (the element is copied |
5688 | /// from c when the corresponding mask bit is not set). |
5689 | /// |
5690 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmsub_ph) |
5691 | #[inline ] |
5692 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
5693 | #[cfg_attr (test, assert_instr(vfmsub))] |
5694 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5695 | pub fn _mm256_mask3_fmsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h { |
5696 | unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ph(a, b, c), no:c) } |
5697 | } |
5698 | |
5699 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
5700 | /// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed |
5701 | /// out when the corresponding mask bit is not set). |
5702 | /// |
5703 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmsub_ph) |
5704 | #[inline ] |
5705 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
5706 | #[cfg_attr (test, assert_instr(vfmsub))] |
5707 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5708 | pub fn _mm256_maskz_fmsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h { |
5709 | unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ph(a, b, c), no:_mm256_setzero_ph()) } |
5710 | } |
5711 | |
5712 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
5713 | /// in c from the intermediate result, and store the results in dst. |
5714 | /// |
5715 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsub_ph) |
5716 | #[inline ] |
5717 | #[target_feature (enable = "avx512fp16" )] |
5718 | #[cfg_attr (test, assert_instr(vfmsub))] |
5719 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5720 | pub fn _mm512_fmsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
5721 | unsafe { simd_fma(x:a, y:b, z:simd_neg(c)) } |
5722 | } |
5723 | |
5724 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
5725 | /// in c from the intermediate result, and store the results in dst using writemask k (the element is copied |
5726 | /// from a when the corresponding mask bit is not set). |
5727 | /// |
5728 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsub_ph) |
5729 | #[inline ] |
5730 | #[target_feature (enable = "avx512fp16" )] |
5731 | #[cfg_attr (test, assert_instr(vfmsub))] |
5732 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5733 | pub fn _mm512_mask_fmsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h { |
5734 | unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ph(a, b, c), no:a) } |
5735 | } |
5736 | |
5737 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
5738 | /// in c from the intermediate result, and store the results in dst using writemask k (the element is copied |
5739 | /// from c when the corresponding mask bit is not set). |
5740 | /// |
5741 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsub_ph) |
5742 | #[inline ] |
5743 | #[target_feature (enable = "avx512fp16" )] |
5744 | #[cfg_attr (test, assert_instr(vfmsub))] |
5745 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5746 | pub fn _mm512_mask3_fmsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h { |
5747 | unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ph(a, b, c), no:c) } |
5748 | } |
5749 | |
5750 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
5751 | /// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed |
5752 | /// out when the corresponding mask bit is not set). |
5753 | /// |
5754 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsub_ph) |
5755 | #[inline ] |
5756 | #[target_feature (enable = "avx512fp16" )] |
5757 | #[cfg_attr (test, assert_instr(vfmsub))] |
5758 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5759 | pub fn _mm512_maskz_fmsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
5760 | unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ph(a, b, c), no:_mm512_setzero_ph()) } |
5761 | } |
5762 | |
5763 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
5764 | /// in c from the intermediate result, and store the results in dst. |
5765 | /// |
5766 | /// Rounding is done according to the rounding parameter, which can be one of: |
5767 | /// |
5768 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
5769 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
5770 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
5771 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
5772 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5773 | /// |
5774 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsub_round_ph) |
5775 | #[inline ] |
5776 | #[target_feature (enable = "avx512fp16" )] |
5777 | #[cfg_attr (test, assert_instr(vfmsub, ROUNDING = 8))] |
5778 | #[rustc_legacy_const_generics (3)] |
5779 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5780 | pub fn _mm512_fmsub_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
5781 | unsafe { |
5782 | static_assert_rounding!(ROUNDING); |
5783 | vfmaddph_512(a, b, c:simd_neg(c), ROUNDING) |
5784 | } |
5785 | } |
5786 | |
5787 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
5788 | /// in c from the intermediate result, and store the results in dst using writemask k (the element is copied |
5789 | /// from a when the corresponding mask bit is not set). |
5790 | /// |
5791 | /// Rounding is done according to the rounding parameter, which can be one of: |
5792 | /// |
5793 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
5794 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
5795 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
5796 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
5797 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5798 | /// |
5799 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsub_round_ph) |
5800 | #[inline ] |
5801 | #[target_feature (enable = "avx512fp16" )] |
5802 | #[cfg_attr (test, assert_instr(vfmsub, ROUNDING = 8))] |
5803 | #[rustc_legacy_const_generics (4)] |
5804 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5805 | pub fn _mm512_mask_fmsub_round_ph<const ROUNDING: i32>( |
5806 | a: __m512h, |
5807 | k: __mmask32, |
5808 | b: __m512h, |
5809 | c: __m512h, |
5810 | ) -> __m512h { |
5811 | unsafe { |
5812 | static_assert_rounding!(ROUNDING); |
5813 | simd_select_bitmask(m:k, yes:_mm512_fmsub_round_ph::<ROUNDING>(a, b, c), no:a) |
5814 | } |
5815 | } |
5816 | |
5817 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
5818 | /// in c from the intermediate result, and store the results in dst using writemask k (the element is copied |
5819 | /// from c when the corresponding mask bit is not set). |
5820 | /// |
5821 | /// Rounding is done according to the rounding parameter, which can be one of: |
5822 | /// |
5823 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
5824 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
5825 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
5826 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
5827 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5828 | /// |
5829 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsub_round_ph) |
5830 | #[inline ] |
5831 | #[target_feature (enable = "avx512fp16" )] |
5832 | #[cfg_attr (test, assert_instr(vfmsub, ROUNDING = 8))] |
5833 | #[rustc_legacy_const_generics (4)] |
5834 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5835 | pub fn _mm512_mask3_fmsub_round_ph<const ROUNDING: i32>( |
5836 | a: __m512h, |
5837 | b: __m512h, |
5838 | c: __m512h, |
5839 | k: __mmask32, |
5840 | ) -> __m512h { |
5841 | unsafe { |
5842 | static_assert_rounding!(ROUNDING); |
5843 | simd_select_bitmask(m:k, yes:_mm512_fmsub_round_ph::<ROUNDING>(a, b, c), no:c) |
5844 | } |
5845 | } |
5846 | |
5847 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
5848 | /// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed |
5849 | /// out when the corresponding mask bit is not set). |
5850 | /// |
5851 | /// Rounding is done according to the rounding parameter, which can be one of: |
5852 | /// |
5853 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
5854 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
5855 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
5856 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
5857 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5858 | /// |
5859 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsub_round_ph) |
5860 | #[inline ] |
5861 | #[target_feature (enable = "avx512fp16" )] |
5862 | #[cfg_attr (test, assert_instr(vfmsub, ROUNDING = 8))] |
5863 | #[rustc_legacy_const_generics (4)] |
5864 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5865 | pub fn _mm512_maskz_fmsub_round_ph<const ROUNDING: i32>( |
5866 | k: __mmask32, |
5867 | a: __m512h, |
5868 | b: __m512h, |
5869 | c: __m512h, |
5870 | ) -> __m512h { |
5871 | unsafe { |
5872 | static_assert_rounding!(ROUNDING); |
5873 | simd_select_bitmask( |
5874 | m:k, |
5875 | yes:_mm512_fmsub_round_ph::<ROUNDING>(a, b, c), |
5876 | no:_mm512_setzero_ph(), |
5877 | ) |
5878 | } |
5879 | } |
5880 | |
5881 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements |
5882 | /// in c from the intermediate result. Store the result in the lower element of dst, and copy the upper |
5883 | /// 7 packed elements from a to the upper elements of dst. |
5884 | /// |
5885 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_sh) |
5886 | #[inline ] |
5887 | #[target_feature (enable = "avx512fp16" )] |
5888 | #[cfg_attr (test, assert_instr(vfmsub))] |
5889 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5890 | pub fn _mm_fmsub_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
5891 | unsafe { |
5892 | let extracta: f16 = simd_extract!(a, 0); |
5893 | let extractb: f16 = simd_extract!(b, 0); |
5894 | let extractc: f16 = simd_extract!(c, 0); |
5895 | let r: f16 = fmaf16(a:extracta, b:extractb, -extractc); |
5896 | simd_insert!(a, 0, r) |
5897 | } |
5898 | } |
5899 | |
5900 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements |
5901 | /// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element |
5902 | /// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the |
5903 | /// upper elements of dst. |
5904 | /// |
5905 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_sh) |
5906 | #[inline ] |
5907 | #[target_feature (enable = "avx512fp16" )] |
5908 | #[cfg_attr (test, assert_instr(vfmsub))] |
5909 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5910 | pub fn _mm_mask_fmsub_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { |
5911 | unsafe { |
5912 | let mut fmsub: f16 = simd_extract!(a, 0); |
5913 | if k & 1 != 0 { |
5914 | let extractb: f16 = simd_extract!(b, 0); |
5915 | let extractc: f16 = simd_extract!(c, 0); |
5916 | fmsub = fmaf16(a:fmsub, b:extractb, -extractc); |
5917 | } |
5918 | simd_insert!(a, 0, fmsub) |
5919 | } |
5920 | } |
5921 | |
5922 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements |
5923 | /// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element |
5924 | /// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the |
5925 | /// upper elements of dst. |
5926 | /// |
5927 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsub_sh) |
5928 | #[inline ] |
5929 | #[target_feature (enable = "avx512fp16" )] |
5930 | #[cfg_attr (test, assert_instr(vfmsub))] |
5931 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5932 | pub fn _mm_mask3_fmsub_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { |
5933 | unsafe { |
5934 | let mut fmsub: f16 = simd_extract!(c, 0); |
5935 | if k & 1 != 0 { |
5936 | let extracta: f16 = simd_extract!(a, 0); |
5937 | let extractb: f16 = simd_extract!(b, 0); |
5938 | fmsub = fmaf16(a:extracta, b:extractb, -fmsub); |
5939 | } |
5940 | simd_insert!(c, 0, fmsub) |
5941 | } |
5942 | } |
5943 | |
5944 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements |
5945 | /// in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element |
5946 | /// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the |
5947 | /// upper elements of dst. |
5948 | /// |
5949 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_sh) |
5950 | #[inline ] |
5951 | #[target_feature (enable = "avx512fp16" )] |
5952 | #[cfg_attr (test, assert_instr(vfmsub))] |
5953 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5954 | pub fn _mm_maskz_fmsub_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
5955 | unsafe { |
5956 | let mut fmsub: f16 = 0.0; |
5957 | if k & 1 != 0 { |
5958 | let extracta: f16 = simd_extract!(a, 0); |
5959 | let extractb: f16 = simd_extract!(b, 0); |
5960 | let extractc: f16 = simd_extract!(c, 0); |
5961 | fmsub = fmaf16(a:extracta, b:extractb, -extractc); |
5962 | } |
5963 | simd_insert!(a, 0, fmsub) |
5964 | } |
5965 | } |
5966 | |
5967 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements |
5968 | /// in c from the intermediate result. Store the result in the lower element of dst, and copy the upper |
5969 | /// 7 packed elements from a to the upper elements of dst. |
5970 | /// |
5971 | /// Rounding is done according to the rounding parameter, which can be one of: |
5972 | /// |
5973 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
5974 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
5975 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
5976 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
5977 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5978 | /// |
5979 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_round_sh) |
5980 | #[inline ] |
5981 | #[target_feature (enable = "avx512fp16" )] |
5982 | #[cfg_attr (test, assert_instr(vfmsub, ROUNDING = 8))] |
5983 | #[rustc_legacy_const_generics (3)] |
5984 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
5985 | pub fn _mm_fmsub_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
5986 | unsafe { |
5987 | static_assert_rounding!(ROUNDING); |
5988 | let extracta: f16 = simd_extract!(a, 0); |
5989 | let extractb: f16 = simd_extract!(b, 0); |
5990 | let extractc: f16 = simd_extract!(c, 0); |
5991 | let r: f16 = vfmaddsh(a:extracta, b:extractb, -extractc, ROUNDING); |
5992 | simd_insert!(a, 0, r) |
5993 | } |
5994 | } |
5995 | |
5996 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements |
5997 | /// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element |
5998 | /// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the |
5999 | /// upper elements of dst. |
6000 | /// |
6001 | /// Rounding is done according to the rounding parameter, which can be one of: |
6002 | /// |
6003 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
6004 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
6005 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
6006 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
6007 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6008 | /// |
6009 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_round_sh) |
6010 | #[inline ] |
6011 | #[target_feature (enable = "avx512fp16" )] |
6012 | #[cfg_attr (test, assert_instr(vfmsub, ROUNDING = 8))] |
6013 | #[rustc_legacy_const_generics (4)] |
6014 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6015 | pub fn _mm_mask_fmsub_round_sh<const ROUNDING: i32>( |
6016 | a: __m128h, |
6017 | k: __mmask8, |
6018 | b: __m128h, |
6019 | c: __m128h, |
6020 | ) -> __m128h { |
6021 | unsafe { |
6022 | static_assert_rounding!(ROUNDING); |
6023 | let mut fmsub: f16 = simd_extract!(a, 0); |
6024 | if k & 1 != 0 { |
6025 | let extractb: f16 = simd_extract!(b, 0); |
6026 | let extractc: f16 = simd_extract!(c, 0); |
6027 | fmsub = vfmaddsh(a:fmsub, b:extractb, -extractc, ROUNDING); |
6028 | } |
6029 | simd_insert!(a, 0, fmsub) |
6030 | } |
6031 | } |
6032 | |
6033 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements |
6034 | /// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element |
6035 | /// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the |
6036 | /// upper elements of dst. |
6037 | /// |
6038 | /// Rounding is done according to the rounding parameter, which can be one of: |
6039 | /// |
6040 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
6041 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
6042 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
6043 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
6044 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6045 | /// |
6046 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsub_round_sh) |
6047 | #[inline ] |
6048 | #[target_feature (enable = "avx512fp16" )] |
6049 | #[cfg_attr (test, assert_instr(vfmsub, ROUNDING = 8))] |
6050 | #[rustc_legacy_const_generics (4)] |
6051 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6052 | pub fn _mm_mask3_fmsub_round_sh<const ROUNDING: i32>( |
6053 | a: __m128h, |
6054 | b: __m128h, |
6055 | c: __m128h, |
6056 | k: __mmask8, |
6057 | ) -> __m128h { |
6058 | unsafe { |
6059 | static_assert_rounding!(ROUNDING); |
6060 | let mut fmsub: f16 = simd_extract!(c, 0); |
6061 | if k & 1 != 0 { |
6062 | let extracta: f16 = simd_extract!(a, 0); |
6063 | let extractb: f16 = simd_extract!(b, 0); |
6064 | fmsub = vfmaddsh(a:extracta, b:extractb, -fmsub, ROUNDING); |
6065 | } |
6066 | simd_insert!(c, 0, fmsub) |
6067 | } |
6068 | } |
6069 | |
6070 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements |
6071 | /// in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element |
6072 | /// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the |
6073 | /// upper elements of dst. |
6074 | /// |
6075 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_round_sh) |
6076 | #[inline ] |
6077 | #[target_feature (enable = "avx512fp16" )] |
6078 | #[cfg_attr (test, assert_instr(vfmsub, ROUNDING = 8))] |
6079 | #[rustc_legacy_const_generics (4)] |
6080 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6081 | pub fn _mm_maskz_fmsub_round_sh<const ROUNDING: i32>( |
6082 | k: __mmask8, |
6083 | a: __m128h, |
6084 | b: __m128h, |
6085 | c: __m128h, |
6086 | ) -> __m128h { |
6087 | unsafe { |
6088 | static_assert_rounding!(ROUNDING); |
6089 | let mut fmsub: f16 = 0.0; |
6090 | if k & 1 != 0 { |
6091 | let extracta: f16 = simd_extract!(a, 0); |
6092 | let extractb: f16 = simd_extract!(b, 0); |
6093 | let extractc: f16 = simd_extract!(c, 0); |
6094 | fmsub = vfmaddsh(a:extracta, b:extractb, -extractc, ROUNDING); |
6095 | } |
6096 | simd_insert!(a, 0, fmsub) |
6097 | } |
6098 | } |
6099 | |
6100 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate |
6101 | /// result from packed elements in c, and store the results in dst. |
6102 | /// |
6103 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_ph) |
6104 | #[inline ] |
6105 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
6106 | #[cfg_attr (test, assert_instr(vfnmadd))] |
6107 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6108 | pub fn _mm_fnmadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
6109 | unsafe { simd_fma(x:simd_neg(a), y:b, z:c) } |
6110 | } |
6111 | |
6112 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate |
6113 | /// result from packed elements in c, and store the results in dst using writemask k (the element is copied |
6114 | /// from a when the corresponding mask bit is not set). |
6115 | /// |
6116 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_ph) |
6117 | #[inline ] |
6118 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
6119 | #[cfg_attr (test, assert_instr(vfnmadd))] |
6120 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6121 | pub fn _mm_mask_fnmadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { |
6122 | unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ph(a, b, c), no:a) } |
6123 | } |
6124 | |
6125 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate |
6126 | /// result from packed elements in c, and store the results in dst using writemask k (the element is copied |
6127 | /// from c when the corresponding mask bit is not set). |
6128 | /// |
6129 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_ph) |
6130 | #[inline ] |
6131 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
6132 | #[cfg_attr (test, assert_instr(vfnmadd))] |
6133 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6134 | pub fn _mm_mask3_fnmadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { |
6135 | unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ph(a, b, c), no:c) } |
6136 | } |
6137 | |
6138 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate |
6139 | /// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed |
6140 | /// out when the corresponding mask bit is not set). |
6141 | /// |
6142 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_ph) |
6143 | #[inline ] |
6144 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
6145 | #[cfg_attr (test, assert_instr(vfnmadd))] |
6146 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6147 | pub fn _mm_maskz_fnmadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
6148 | unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ph(a, b, c), no:_mm_setzero_ph()) } |
6149 | } |
6150 | |
6151 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate |
6152 | /// result from packed elements in c, and store the results in dst. |
6153 | /// |
6154 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmadd_ph) |
6155 | #[inline ] |
6156 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
6157 | #[cfg_attr (test, assert_instr(vfnmadd))] |
6158 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6159 | pub fn _mm256_fnmadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h { |
6160 | unsafe { simd_fma(x:simd_neg(a), y:b, z:c) } |
6161 | } |
6162 | |
6163 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate |
6164 | /// result from packed elements in c, and store the results in dst using writemask k (the element is copied |
6165 | /// from a when the corresponding mask bit is not set). |
6166 | /// |
6167 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fnmadd_ph) |
6168 | #[inline ] |
6169 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
6170 | #[cfg_attr (test, assert_instr(vfnmadd))] |
6171 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6172 | pub fn _mm256_mask_fnmadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h { |
6173 | unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ph(a, b, c), no:a) } |
6174 | } |
6175 | |
6176 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate |
6177 | /// result from packed elements in c, and store the results in dst using writemask k (the element is copied |
6178 | /// from c when the corresponding mask bit is not set). |
6179 | /// |
6180 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fnmadd_ph) |
6181 | #[inline ] |
6182 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
6183 | #[cfg_attr (test, assert_instr(vfnmadd))] |
6184 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6185 | pub fn _mm256_mask3_fnmadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h { |
6186 | unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ph(a, b, c), no:c) } |
6187 | } |
6188 | |
6189 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate |
6190 | /// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed |
6191 | /// out when the corresponding mask bit is not set). |
6192 | /// |
6193 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fnmadd_ph) |
6194 | #[inline ] |
6195 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
6196 | #[cfg_attr (test, assert_instr(vfnmadd))] |
6197 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6198 | pub fn _mm256_maskz_fnmadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h { |
6199 | unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ph(a, b, c), no:_mm256_setzero_ph()) } |
6200 | } |
6201 | |
6202 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate |
6203 | /// result from packed elements in c, and store the results in dst. |
6204 | /// |
6205 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmadd_ph) |
6206 | #[inline ] |
6207 | #[target_feature (enable = "avx512fp16" )] |
6208 | #[cfg_attr (test, assert_instr(vfnmadd))] |
6209 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6210 | pub fn _mm512_fnmadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
6211 | unsafe { simd_fma(x:simd_neg(a), y:b, z:c) } |
6212 | } |
6213 | |
6214 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate |
6215 | /// result from packed elements in c, and store the results in dst using writemask k (the element is copied |
6216 | /// from a when the corresponding mask bit is not set). |
6217 | /// |
6218 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmadd_ph) |
6219 | #[inline ] |
6220 | #[target_feature (enable = "avx512fp16" )] |
6221 | #[cfg_attr (test, assert_instr(vfnmadd))] |
6222 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6223 | pub fn _mm512_mask_fnmadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h { |
6224 | unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ph(a, b, c), no:a) } |
6225 | } |
6226 | |
6227 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate |
6228 | /// result from packed elements in c, and store the results in dst using writemask k (the element is copied |
6229 | /// from c when the corresponding mask bit is not set). |
6230 | /// |
6231 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmadd_ph) |
6232 | #[inline ] |
6233 | #[target_feature (enable = "avx512fp16" )] |
6234 | #[cfg_attr (test, assert_instr(vfnmadd))] |
6235 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6236 | pub fn _mm512_mask3_fnmadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h { |
6237 | unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ph(a, b, c), no:c) } |
6238 | } |
6239 | |
6240 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate |
6241 | /// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed |
6242 | /// out when the corresponding mask bit is not set). |
6243 | /// |
6244 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmadd_ph) |
6245 | #[inline ] |
6246 | #[target_feature (enable = "avx512fp16" )] |
6247 | #[cfg_attr (test, assert_instr(vfnmadd))] |
6248 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6249 | pub fn _mm512_maskz_fnmadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
6250 | unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ph(a, b, c), no:_mm512_setzero_ph()) } |
6251 | } |
6252 | |
6253 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate |
6254 | /// result from packed elements in c, and store the results in dst. |
6255 | /// |
6256 | /// Rounding is done according to the rounding parameter, which can be one of: |
6257 | /// |
6258 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
6259 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
6260 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
6261 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
6262 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6263 | /// |
6264 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmadd_round_ph) |
6265 | #[inline ] |
6266 | #[target_feature (enable = "avx512fp16" )] |
6267 | #[cfg_attr (test, assert_instr(vfnmadd, ROUNDING = 8))] |
6268 | #[rustc_legacy_const_generics (3)] |
6269 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6270 | pub fn _mm512_fnmadd_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
6271 | unsafe { |
6272 | static_assert_rounding!(ROUNDING); |
6273 | vfmaddph_512(a:simd_neg(a), b, c, ROUNDING) |
6274 | } |
6275 | } |
6276 | |
6277 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate |
6278 | /// result from packed elements in c, and store the results in dst using writemask k (the element is copied |
6279 | /// from a when the corresponding mask bit is not set). |
6280 | /// |
6281 | /// Rounding is done according to the rounding parameter, which can be one of: |
6282 | /// |
6283 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
6284 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
6285 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
6286 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
6287 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6288 | /// |
6289 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmadd_round_ph) |
6290 | #[inline ] |
6291 | #[target_feature (enable = "avx512fp16" )] |
6292 | #[cfg_attr (test, assert_instr(vfnmadd, ROUNDING = 8))] |
6293 | #[rustc_legacy_const_generics (4)] |
6294 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6295 | pub fn _mm512_mask_fnmadd_round_ph<const ROUNDING: i32>( |
6296 | a: __m512h, |
6297 | k: __mmask32, |
6298 | b: __m512h, |
6299 | c: __m512h, |
6300 | ) -> __m512h { |
6301 | unsafe { |
6302 | static_assert_rounding!(ROUNDING); |
6303 | simd_select_bitmask(m:k, yes:_mm512_fnmadd_round_ph::<ROUNDING>(a, b, c), no:a) |
6304 | } |
6305 | } |
6306 | |
6307 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate |
6308 | /// result from packed elements in c, and store the results in dst using writemask k (the element is copied |
6309 | /// from c when the corresponding mask bit is not set). |
6310 | /// |
6311 | /// Rounding is done according to the rounding parameter, which can be one of: |
6312 | /// |
6313 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
6314 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
6315 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
6316 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
6317 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6318 | /// |
6319 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmadd_round_ph) |
6320 | #[inline ] |
6321 | #[target_feature (enable = "avx512fp16" )] |
6322 | #[cfg_attr (test, assert_instr(vfnmadd, ROUNDING = 8))] |
6323 | #[rustc_legacy_const_generics (4)] |
6324 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6325 | pub fn _mm512_mask3_fnmadd_round_ph<const ROUNDING: i32>( |
6326 | a: __m512h, |
6327 | b: __m512h, |
6328 | c: __m512h, |
6329 | k: __mmask32, |
6330 | ) -> __m512h { |
6331 | unsafe { |
6332 | static_assert_rounding!(ROUNDING); |
6333 | simd_select_bitmask(m:k, yes:_mm512_fnmadd_round_ph::<ROUNDING>(a, b, c), no:c) |
6334 | } |
6335 | } |
6336 | |
6337 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate |
6338 | /// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed |
6339 | /// out when the corresponding mask bit is not set). |
6340 | /// |
6341 | /// Rounding is done according to the rounding parameter, which can be one of: |
6342 | /// |
6343 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
6344 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
6345 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
6346 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
6347 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6348 | /// |
6349 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmadd_round_ph) |
6350 | #[inline ] |
6351 | #[target_feature (enable = "avx512fp16" )] |
6352 | #[cfg_attr (test, assert_instr(vfnmadd, ROUNDING = 8))] |
6353 | #[rustc_legacy_const_generics (4)] |
6354 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6355 | pub fn _mm512_maskz_fnmadd_round_ph<const ROUNDING: i32>( |
6356 | k: __mmask32, |
6357 | a: __m512h, |
6358 | b: __m512h, |
6359 | c: __m512h, |
6360 | ) -> __m512h { |
6361 | unsafe { |
6362 | static_assert_rounding!(ROUNDING); |
6363 | simd_select_bitmask( |
6364 | m:k, |
6365 | yes:_mm512_fnmadd_round_ph::<ROUNDING>(a, b, c), |
6366 | no:_mm512_setzero_ph(), |
6367 | ) |
6368 | } |
6369 | } |
6370 | |
6371 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate |
6372 | /// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed |
6373 | /// elements from a to the upper elements of dst. |
6374 | /// |
6375 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_sh) |
6376 | #[inline ] |
6377 | #[target_feature (enable = "avx512fp16" )] |
6378 | #[cfg_attr (test, assert_instr(vfnmadd))] |
6379 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6380 | pub fn _mm_fnmadd_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
6381 | unsafe { |
6382 | let extracta: f16 = simd_extract!(a, 0); |
6383 | let extractb: f16 = simd_extract!(b, 0); |
6384 | let extractc: f16 = simd_extract!(c, 0); |
6385 | let r: f16 = fmaf16(-extracta, b:extractb, c:extractc); |
6386 | simd_insert!(a, 0, r) |
6387 | } |
6388 | } |
6389 | |
6390 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate |
6391 | /// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element |
6392 | /// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper |
6393 | /// elements of dst. |
6394 | /// |
6395 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_sh) |
6396 | #[inline ] |
6397 | #[target_feature (enable = "avx512fp16" )] |
6398 | #[cfg_attr (test, assert_instr(vfnmadd))] |
6399 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6400 | pub fn _mm_mask_fnmadd_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { |
6401 | unsafe { |
6402 | let mut fnmadd: f16 = simd_extract!(a, 0); |
6403 | if k & 1 != 0 { |
6404 | let extractb: f16 = simd_extract!(b, 0); |
6405 | let extractc: f16 = simd_extract!(c, 0); |
6406 | fnmadd = fmaf16(-fnmadd, b:extractb, c:extractc); |
6407 | } |
6408 | simd_insert!(a, 0, fnmadd) |
6409 | } |
6410 | } |
6411 | |
6412 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate |
6413 | /// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element |
6414 | /// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper |
6415 | /// elements of dst. |
6416 | /// |
6417 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_sh) |
6418 | #[inline ] |
6419 | #[target_feature (enable = "avx512fp16" )] |
6420 | #[cfg_attr (test, assert_instr(vfnmadd))] |
6421 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6422 | pub fn _mm_mask3_fnmadd_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { |
6423 | unsafe { |
6424 | let mut fnmadd: f16 = simd_extract!(c, 0); |
6425 | if k & 1 != 0 { |
6426 | let extracta: f16 = simd_extract!(a, 0); |
6427 | let extractb: f16 = simd_extract!(b, 0); |
6428 | fnmadd = fmaf16(-extracta, b:extractb, c:fnmadd); |
6429 | } |
6430 | simd_insert!(c, 0, fnmadd) |
6431 | } |
6432 | } |
6433 | |
6434 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate |
6435 | /// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element |
6436 | /// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper |
6437 | /// elements of dst. |
6438 | /// |
6439 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_sh) |
6440 | #[inline ] |
6441 | #[target_feature (enable = "avx512fp16" )] |
6442 | #[cfg_attr (test, assert_instr(vfnmadd))] |
6443 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6444 | pub fn _mm_maskz_fnmadd_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
6445 | unsafe { |
6446 | let mut fnmadd: f16 = 0.0; |
6447 | if k & 1 != 0 { |
6448 | let extracta: f16 = simd_extract!(a, 0); |
6449 | let extractb: f16 = simd_extract!(b, 0); |
6450 | let extractc: f16 = simd_extract!(c, 0); |
6451 | fnmadd = fmaf16(-extracta, b:extractb, c:extractc); |
6452 | } |
6453 | simd_insert!(a, 0, fnmadd) |
6454 | } |
6455 | } |
6456 | |
6457 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate |
6458 | /// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed |
6459 | /// elements from a to the upper elements of dst. |
6460 | /// |
6461 | /// Rounding is done according to the rounding parameter, which can be one of: |
6462 | /// |
6463 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
6464 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
6465 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
6466 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
6467 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6468 | /// |
6469 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_round_sh) |
6470 | #[inline ] |
6471 | #[target_feature (enable = "avx512fp16" )] |
6472 | #[cfg_attr (test, assert_instr(vfnmadd, ROUNDING = 8))] |
6473 | #[rustc_legacy_const_generics (3)] |
6474 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6475 | pub fn _mm_fnmadd_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
6476 | unsafe { |
6477 | static_assert_rounding!(ROUNDING); |
6478 | let extracta: f16 = simd_extract!(a, 0); |
6479 | let extractb: f16 = simd_extract!(b, 0); |
6480 | let extractc: f16 = simd_extract!(c, 0); |
6481 | let r: f16 = vfmaddsh(-extracta, b:extractb, c:extractc, ROUNDING); |
6482 | simd_insert!(a, 0, r) |
6483 | } |
6484 | } |
6485 | |
6486 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate |
6487 | /// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element |
6488 | /// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper |
6489 | /// elements of dst. |
6490 | /// |
6491 | /// Rounding is done according to the rounding parameter, which can be one of: |
6492 | /// |
6493 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
6494 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
6495 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
6496 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
6497 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6498 | /// |
6499 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_round_sh) |
6500 | #[inline ] |
6501 | #[target_feature (enable = "avx512fp16" )] |
6502 | #[cfg_attr (test, assert_instr(vfnmadd, ROUNDING = 8))] |
6503 | #[rustc_legacy_const_generics (4)] |
6504 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6505 | pub fn _mm_mask_fnmadd_round_sh<const ROUNDING: i32>( |
6506 | a: __m128h, |
6507 | k: __mmask8, |
6508 | b: __m128h, |
6509 | c: __m128h, |
6510 | ) -> __m128h { |
6511 | unsafe { |
6512 | static_assert_rounding!(ROUNDING); |
6513 | let mut fnmadd: f16 = simd_extract!(a, 0); |
6514 | if k & 1 != 0 { |
6515 | let extractb: f16 = simd_extract!(b, 0); |
6516 | let extractc: f16 = simd_extract!(c, 0); |
6517 | fnmadd = vfmaddsh(-fnmadd, b:extractb, c:extractc, ROUNDING); |
6518 | } |
6519 | simd_insert!(a, 0, fnmadd) |
6520 | } |
6521 | } |
6522 | |
6523 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate |
6524 | /// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element |
6525 | /// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper |
6526 | /// elements of dst. |
6527 | /// |
6528 | /// Rounding is done according to the rounding parameter, which can be one of: |
6529 | /// |
6530 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
6531 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
6532 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
6533 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
6534 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6535 | /// |
6536 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_round_sh) |
6537 | #[inline ] |
6538 | #[target_feature (enable = "avx512fp16" )] |
6539 | #[cfg_attr (test, assert_instr(vfnmadd, ROUNDING = 8))] |
6540 | #[rustc_legacy_const_generics (4)] |
6541 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6542 | pub fn _mm_mask3_fnmadd_round_sh<const ROUNDING: i32>( |
6543 | a: __m128h, |
6544 | b: __m128h, |
6545 | c: __m128h, |
6546 | k: __mmask8, |
6547 | ) -> __m128h { |
6548 | unsafe { |
6549 | static_assert_rounding!(ROUNDING); |
6550 | let mut fnmadd: f16 = simd_extract!(c, 0); |
6551 | if k & 1 != 0 { |
6552 | let extracta: f16 = simd_extract!(a, 0); |
6553 | let extractb: f16 = simd_extract!(b, 0); |
6554 | fnmadd = vfmaddsh(-extracta, b:extractb, c:fnmadd, ROUNDING); |
6555 | } |
6556 | simd_insert!(c, 0, fnmadd) |
6557 | } |
6558 | } |
6559 | |
6560 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate |
6561 | /// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element |
6562 | /// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper |
6563 | /// elements of dst. |
6564 | /// |
6565 | /// Rounding is done according to the rounding parameter, which can be one of: |
6566 | /// |
6567 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
6568 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
6569 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
6570 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
6571 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6572 | /// |
6573 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_round_sh) |
6574 | #[inline ] |
6575 | #[target_feature (enable = "avx512fp16" )] |
6576 | #[cfg_attr (test, assert_instr(vfnmadd, ROUNDING = 8))] |
6577 | #[rustc_legacy_const_generics (4)] |
6578 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6579 | pub fn _mm_maskz_fnmadd_round_sh<const ROUNDING: i32>( |
6580 | k: __mmask8, |
6581 | a: __m128h, |
6582 | b: __m128h, |
6583 | c: __m128h, |
6584 | ) -> __m128h { |
6585 | unsafe { |
6586 | static_assert_rounding!(ROUNDING); |
6587 | let mut fnmadd: f16 = 0.0; |
6588 | if k & 1 != 0 { |
6589 | let extracta: f16 = simd_extract!(a, 0); |
6590 | let extractb: f16 = simd_extract!(b, 0); |
6591 | let extractc: f16 = simd_extract!(c, 0); |
6592 | fnmadd = vfmaddsh(-extracta, b:extractb, c:extractc, ROUNDING); |
6593 | } |
6594 | simd_insert!(a, 0, fnmadd) |
6595 | } |
6596 | } |
6597 | |
6598 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
6599 | /// in c from the negated intermediate result, and store the results in dst. |
6600 | /// |
6601 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_ph) |
6602 | #[inline ] |
6603 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
6604 | #[cfg_attr (test, assert_instr(vfnmsub))] |
6605 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6606 | pub fn _mm_fnmsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
6607 | unsafe { simd_fma(x:simd_neg(a), y:b, z:simd_neg(c)) } |
6608 | } |
6609 | |
6610 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
6611 | /// in c from the negated intermediate result, and store the results in dst using writemask k (the element is |
6612 | /// copied from a when the corresponding mask bit is not set). |
6613 | /// |
6614 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_ph) |
6615 | #[inline ] |
6616 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
6617 | #[cfg_attr (test, assert_instr(vfnmsub))] |
6618 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6619 | pub fn _mm_mask_fnmsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { |
6620 | unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ph(a, b, c), no:a) } |
6621 | } |
6622 | |
6623 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
6624 | /// in c from the negated intermediate result, and store the results in dst using writemask k (the element is |
6625 | /// copied from c when the corresponding mask bit is not set). |
6626 | /// |
6627 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_ph) |
6628 | #[inline ] |
6629 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
6630 | #[cfg_attr (test, assert_instr(vfnmsub))] |
6631 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6632 | pub fn _mm_mask3_fnmsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { |
6633 | unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ph(a, b, c), no:c) } |
6634 | } |
6635 | |
6636 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
6637 | /// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is |
6638 | /// zeroed out when the corresponding mask bit is not set). |
6639 | /// |
6640 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_ph) |
6641 | #[inline ] |
6642 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
6643 | #[cfg_attr (test, assert_instr(vfnmsub))] |
6644 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6645 | pub fn _mm_maskz_fnmsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
6646 | unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ph(a, b, c), no:_mm_setzero_ph()) } |
6647 | } |
6648 | |
6649 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
6650 | /// in c from the negated intermediate result, and store the results in dst. |
6651 | /// |
6652 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmsub_ph) |
6653 | #[inline ] |
6654 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
6655 | #[cfg_attr (test, assert_instr(vfnmsub))] |
6656 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6657 | pub fn _mm256_fnmsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h { |
6658 | unsafe { simd_fma(x:simd_neg(a), y:b, z:simd_neg(c)) } |
6659 | } |
6660 | |
6661 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
6662 | /// in c from the negated intermediate result, and store the results in dst using writemask k (the element is |
6663 | /// copied from a when the corresponding mask bit is not set). |
6664 | /// |
6665 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fnmsub_ph) |
6666 | #[inline ] |
6667 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
6668 | #[cfg_attr (test, assert_instr(vfnmsub))] |
6669 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6670 | pub fn _mm256_mask_fnmsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h { |
6671 | unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ph(a, b, c), no:a) } |
6672 | } |
6673 | |
6674 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
6675 | /// in c from the negated intermediate result, and store the results in dst using writemask k (the element is |
6676 | /// copied from c when the corresponding mask bit is not set). |
6677 | /// |
6678 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fnmsub_ph) |
6679 | #[inline ] |
6680 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
6681 | #[cfg_attr (test, assert_instr(vfnmsub))] |
6682 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6683 | pub fn _mm256_mask3_fnmsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h { |
6684 | unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ph(a, b, c), no:c) } |
6685 | } |
6686 | |
6687 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
6688 | /// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is |
6689 | /// zeroed out when the corresponding mask bit is not set). |
6690 | /// |
6691 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fnmsub_ph) |
6692 | #[inline ] |
6693 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
6694 | #[cfg_attr (test, assert_instr(vfnmsub))] |
6695 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6696 | pub fn _mm256_maskz_fnmsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h { |
6697 | unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ph(a, b, c), no:_mm256_setzero_ph()) } |
6698 | } |
6699 | |
6700 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
6701 | /// in c from the negated intermediate result, and store the results in dst. |
6702 | /// |
6703 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmsub_ph) |
6704 | #[inline ] |
6705 | #[target_feature (enable = "avx512fp16" )] |
6706 | #[cfg_attr (test, assert_instr(vfnmsub))] |
6707 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6708 | pub fn _mm512_fnmsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
6709 | unsafe { simd_fma(x:simd_neg(a), y:b, z:simd_neg(c)) } |
6710 | } |
6711 | |
6712 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
6713 | /// in c from the negated intermediate result, and store the results in dst using writemask k (the element is |
6714 | /// copied from a when the corresponding mask bit is not set). |
6715 | /// |
6716 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmsub_ph) |
6717 | #[inline ] |
6718 | #[target_feature (enable = "avx512fp16" )] |
6719 | #[cfg_attr (test, assert_instr(vfnmsub))] |
6720 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6721 | pub fn _mm512_mask_fnmsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h { |
6722 | unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ph(a, b, c), no:a) } |
6723 | } |
6724 | |
6725 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
6726 | /// in c from the negated intermediate result, and store the results in dst using writemask k (the element is |
6727 | /// copied from c when the corresponding mask bit is not set). |
6728 | /// |
6729 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmsub_ph) |
6730 | #[inline ] |
6731 | #[target_feature (enable = "avx512fp16" )] |
6732 | #[cfg_attr (test, assert_instr(vfnmsub))] |
6733 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6734 | pub fn _mm512_mask3_fnmsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h { |
6735 | unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ph(a, b, c), no:c) } |
6736 | } |
6737 | |
6738 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
6739 | /// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is |
6740 | /// zeroed out when the corresponding mask bit is not set). |
6741 | /// |
6742 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmsub_ph) |
6743 | #[inline ] |
6744 | #[target_feature (enable = "avx512fp16" )] |
6745 | #[cfg_attr (test, assert_instr(vfnmsub))] |
6746 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6747 | pub fn _mm512_maskz_fnmsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
6748 | unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ph(a, b, c), no:_mm512_setzero_ph()) } |
6749 | } |
6750 | |
6751 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
6752 | /// in c from the negated intermediate result, and store the results in dst. |
6753 | /// |
6754 | /// Rounding is done according to the rounding parameter, which can be one of: |
6755 | /// |
6756 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
6757 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
6758 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
6759 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
6760 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6761 | /// |
6762 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmsub_round_ph) |
6763 | #[inline ] |
6764 | #[target_feature (enable = "avx512fp16" )] |
6765 | #[cfg_attr (test, assert_instr(vfnmsub, ROUNDING = 8))] |
6766 | #[rustc_legacy_const_generics (3)] |
6767 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6768 | pub fn _mm512_fnmsub_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
6769 | unsafe { |
6770 | static_assert_rounding!(ROUNDING); |
6771 | vfmaddph_512(a:simd_neg(a), b, c:simd_neg(c), ROUNDING) |
6772 | } |
6773 | } |
6774 | |
6775 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
6776 | /// in c from the negated intermediate result, and store the results in dst using writemask k (the element is |
6777 | /// copied from a when the corresponding mask bit is not set). |
6778 | /// |
6779 | /// Rounding is done according to the rounding parameter, which can be one of: |
6780 | /// |
6781 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
6782 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
6783 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
6784 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
6785 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6786 | /// |
6787 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmsub_round_ph) |
6788 | #[inline ] |
6789 | #[target_feature (enable = "avx512fp16" )] |
6790 | #[cfg_attr (test, assert_instr(vfnmsub, ROUNDING = 8))] |
6791 | #[rustc_legacy_const_generics (4)] |
6792 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6793 | pub fn _mm512_mask_fnmsub_round_ph<const ROUNDING: i32>( |
6794 | a: __m512h, |
6795 | k: __mmask32, |
6796 | b: __m512h, |
6797 | c: __m512h, |
6798 | ) -> __m512h { |
6799 | unsafe { |
6800 | static_assert_rounding!(ROUNDING); |
6801 | simd_select_bitmask(m:k, yes:_mm512_fnmsub_round_ph::<ROUNDING>(a, b, c), no:a) |
6802 | } |
6803 | } |
6804 | |
6805 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
6806 | /// in c from the negated intermediate result, and store the results in dst using writemask k (the element is |
6807 | /// copied from c when the corresponding mask bit is not set). |
6808 | /// |
6809 | /// Rounding is done according to the rounding parameter, which can be one of: |
6810 | /// |
6811 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
6812 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
6813 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
6814 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
6815 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6816 | /// |
6817 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmsub_round_ph) |
6818 | #[inline ] |
6819 | #[target_feature (enable = "avx512fp16" )] |
6820 | #[cfg_attr (test, assert_instr(vfnmsub, ROUNDING = 8))] |
6821 | #[rustc_legacy_const_generics (4)] |
6822 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6823 | pub fn _mm512_mask3_fnmsub_round_ph<const ROUNDING: i32>( |
6824 | a: __m512h, |
6825 | b: __m512h, |
6826 | c: __m512h, |
6827 | k: __mmask32, |
6828 | ) -> __m512h { |
6829 | unsafe { |
6830 | static_assert_rounding!(ROUNDING); |
6831 | simd_select_bitmask(m:k, yes:_mm512_fnmsub_round_ph::<ROUNDING>(a, b, c), no:c) |
6832 | } |
6833 | } |
6834 | |
6835 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements |
6836 | /// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is |
6837 | /// zeroed out when the corresponding mask bit is not set). |
6838 | /// |
6839 | /// Rounding is done according to the rounding parameter, which can be one of: |
6840 | /// |
6841 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
6842 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
6843 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
6844 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
6845 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6846 | /// |
6847 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmsub_round_ph) |
6848 | #[inline ] |
6849 | #[target_feature (enable = "avx512fp16" )] |
6850 | #[cfg_attr (test, assert_instr(vfnmsub, ROUNDING = 8))] |
6851 | #[rustc_legacy_const_generics (4)] |
6852 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6853 | pub fn _mm512_maskz_fnmsub_round_ph<const ROUNDING: i32>( |
6854 | k: __mmask32, |
6855 | a: __m512h, |
6856 | b: __m512h, |
6857 | c: __m512h, |
6858 | ) -> __m512h { |
6859 | unsafe { |
6860 | static_assert_rounding!(ROUNDING); |
6861 | simd_select_bitmask( |
6862 | m:k, |
6863 | yes:_mm512_fnmsub_round_ph::<ROUNDING>(a, b, c), |
6864 | no:_mm512_setzero_ph(), |
6865 | ) |
6866 | } |
6867 | } |
6868 | |
6869 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate |
6870 | /// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed |
6871 | /// elements from a to the upper elements of dst. |
6872 | /// |
6873 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_sh) |
6874 | #[inline ] |
6875 | #[target_feature (enable = "avx512fp16" )] |
6876 | #[cfg_attr (test, assert_instr(vfnmsub))] |
6877 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6878 | pub fn _mm_fnmsub_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
6879 | unsafe { |
6880 | let extracta: f16 = simd_extract!(a, 0); |
6881 | let extractb: f16 = simd_extract!(b, 0); |
6882 | let extractc: f16 = simd_extract!(c, 0); |
6883 | let r: f16 = fmaf16(-extracta, b:extractb, -extractc); |
6884 | simd_insert!(a, 0, r) |
6885 | } |
6886 | } |
6887 | |
6888 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate |
6889 | /// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element |
6890 | /// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper |
6891 | /// elements of dst. |
6892 | /// |
6893 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_sh) |
6894 | #[inline ] |
6895 | #[target_feature (enable = "avx512fp16" )] |
6896 | #[cfg_attr (test, assert_instr(vfnmsub))] |
6897 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6898 | pub fn _mm_mask_fnmsub_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { |
6899 | unsafe { |
6900 | let mut fnmsub: f16 = simd_extract!(a, 0); |
6901 | if k & 1 != 0 { |
6902 | let extractb: f16 = simd_extract!(b, 0); |
6903 | let extractc: f16 = simd_extract!(c, 0); |
6904 | fnmsub = fmaf16(-fnmsub, b:extractb, -extractc); |
6905 | } |
6906 | simd_insert!(a, 0, fnmsub) |
6907 | } |
6908 | } |
6909 | |
6910 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate |
6911 | /// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element |
6912 | /// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper |
6913 | /// elements of dst. |
6914 | /// |
6915 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_sh) |
6916 | #[inline ] |
6917 | #[target_feature (enable = "avx512fp16" )] |
6918 | #[cfg_attr (test, assert_instr(vfnmsub))] |
6919 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6920 | pub fn _mm_mask3_fnmsub_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { |
6921 | unsafe { |
6922 | let mut fnmsub: f16 = simd_extract!(c, 0); |
6923 | if k & 1 != 0 { |
6924 | let extracta: f16 = simd_extract!(a, 0); |
6925 | let extractb: f16 = simd_extract!(b, 0); |
6926 | fnmsub = fmaf16(-extracta, b:extractb, -fnmsub); |
6927 | } |
6928 | simd_insert!(c, 0, fnmsub) |
6929 | } |
6930 | } |
6931 | |
6932 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate |
6933 | /// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element |
6934 | /// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper |
6935 | /// elements of dst. |
6936 | /// |
6937 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_sh) |
6938 | #[inline ] |
6939 | #[target_feature (enable = "avx512fp16" )] |
6940 | #[cfg_attr (test, assert_instr(vfnmsub))] |
6941 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6942 | pub fn _mm_maskz_fnmsub_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
6943 | unsafe { |
6944 | let mut fnmsub: f16 = 0.0; |
6945 | if k & 1 != 0 { |
6946 | let extracta: f16 = simd_extract!(a, 0); |
6947 | let extractb: f16 = simd_extract!(b, 0); |
6948 | let extractc: f16 = simd_extract!(c, 0); |
6949 | fnmsub = fmaf16(-extracta, b:extractb, -extractc); |
6950 | } |
6951 | simd_insert!(a, 0, fnmsub) |
6952 | } |
6953 | } |
6954 | |
6955 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate |
6956 | /// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed |
6957 | /// elements from a to the upper elements of dst. |
6958 | /// |
6959 | /// Rounding is done according to the rounding parameter, which can be one of: |
6960 | /// |
6961 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
6962 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
6963 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
6964 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
6965 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6966 | /// |
6967 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_round_sh) |
6968 | #[inline ] |
6969 | #[target_feature (enable = "avx512fp16" )] |
6970 | #[cfg_attr (test, assert_instr(vfnmsub, ROUNDING = 8))] |
6971 | #[rustc_legacy_const_generics (3)] |
6972 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
6973 | pub fn _mm_fnmsub_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
6974 | unsafe { |
6975 | static_assert_rounding!(ROUNDING); |
6976 | let extracta: f16 = simd_extract!(a, 0); |
6977 | let extractb: f16 = simd_extract!(b, 0); |
6978 | let extractc: f16 = simd_extract!(c, 0); |
6979 | let r: f16 = vfmaddsh(-extracta, b:extractb, -extractc, ROUNDING); |
6980 | simd_insert!(a, 0, r) |
6981 | } |
6982 | } |
6983 | |
6984 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate |
6985 | /// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element |
6986 | /// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper |
6987 | /// elements of dst. |
6988 | /// |
6989 | /// Rounding is done according to the rounding parameter, which can be one of: |
6990 | /// |
6991 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
6992 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
6993 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
6994 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
6995 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6996 | /// |
6997 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_round_sh) |
6998 | #[inline ] |
6999 | #[target_feature (enable = "avx512fp16" )] |
7000 | #[cfg_attr (test, assert_instr(vfnmsub, ROUNDING = 8))] |
7001 | #[rustc_legacy_const_generics (4)] |
7002 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7003 | pub fn _mm_mask_fnmsub_round_sh<const ROUNDING: i32>( |
7004 | a: __m128h, |
7005 | k: __mmask8, |
7006 | b: __m128h, |
7007 | c: __m128h, |
7008 | ) -> __m128h { |
7009 | unsafe { |
7010 | static_assert_rounding!(ROUNDING); |
7011 | let mut fnmsub: f16 = simd_extract!(a, 0); |
7012 | if k & 1 != 0 { |
7013 | let extractb: f16 = simd_extract!(b, 0); |
7014 | let extractc: f16 = simd_extract!(c, 0); |
7015 | fnmsub = vfmaddsh(-fnmsub, b:extractb, -extractc, ROUNDING); |
7016 | } |
7017 | simd_insert!(a, 0, fnmsub) |
7018 | } |
7019 | } |
7020 | |
7021 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate |
7022 | /// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element |
7023 | /// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper |
7024 | /// elements of dst. |
7025 | /// |
7026 | /// Rounding is done according to the rounding parameter, which can be one of: |
7027 | /// |
7028 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
7029 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
7030 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
7031 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
7032 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
7033 | /// |
7034 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_round_sh) |
7035 | #[inline ] |
7036 | #[target_feature (enable = "avx512fp16" )] |
7037 | #[cfg_attr (test, assert_instr(vfnmsub, ROUNDING = 8))] |
7038 | #[rustc_legacy_const_generics (4)] |
7039 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7040 | pub fn _mm_mask3_fnmsub_round_sh<const ROUNDING: i32>( |
7041 | a: __m128h, |
7042 | b: __m128h, |
7043 | c: __m128h, |
7044 | k: __mmask8, |
7045 | ) -> __m128h { |
7046 | unsafe { |
7047 | static_assert_rounding!(ROUNDING); |
7048 | let mut fnmsub: f16 = simd_extract!(c, 0); |
7049 | if k & 1 != 0 { |
7050 | let extracta: f16 = simd_extract!(a, 0); |
7051 | let extractb: f16 = simd_extract!(b, 0); |
7052 | fnmsub = vfmaddsh(-extracta, b:extractb, -fnmsub, ROUNDING); |
7053 | } |
7054 | simd_insert!(c, 0, fnmsub) |
7055 | } |
7056 | } |
7057 | |
7058 | /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate |
7059 | /// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element |
7060 | /// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper |
7061 | /// elements of dst. |
7062 | /// |
7063 | /// Rounding is done according to the rounding parameter, which can be one of: |
7064 | /// |
7065 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
7066 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
7067 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
7068 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
7069 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
7070 | /// |
7071 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_round_sh) |
7072 | #[inline ] |
7073 | #[target_feature (enable = "avx512fp16" )] |
7074 | #[cfg_attr (test, assert_instr(vfnmsub, ROUNDING = 8))] |
7075 | #[rustc_legacy_const_generics (4)] |
7076 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7077 | pub fn _mm_maskz_fnmsub_round_sh<const ROUNDING: i32>( |
7078 | k: __mmask8, |
7079 | a: __m128h, |
7080 | b: __m128h, |
7081 | c: __m128h, |
7082 | ) -> __m128h { |
7083 | unsafe { |
7084 | static_assert_rounding!(ROUNDING); |
7085 | let mut fnmsub: f16 = 0.0; |
7086 | if k & 1 != 0 { |
7087 | let extracta: f16 = simd_extract!(a, 0); |
7088 | let extractb: f16 = simd_extract!(b, 0); |
7089 | let extractc: f16 = simd_extract!(c, 0); |
7090 | fnmsub = vfmaddsh(-extracta, b:extractb, -extractc, ROUNDING); |
7091 | } |
7092 | simd_insert!(a, 0, fnmsub) |
7093 | } |
7094 | } |
7095 | |
7096 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and |
7097 | /// subtract packed elements in c to/from the intermediate result, and store the results in dst. |
7098 | /// |
7099 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmaddsub_ph) |
7100 | #[inline ] |
7101 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7102 | #[cfg_attr (test, assert_instr(vfmaddsub))] |
7103 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7104 | pub fn _mm_fmaddsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
7105 | unsafe { vfmaddsubph_128(a, b, c) } |
7106 | } |
7107 | |
7108 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and |
7109 | /// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k |
7110 | /// (the element is copied from a when the corresponding mask bit is not set). |
7111 | /// |
7112 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmaddsub_ph) |
7113 | #[inline ] |
7114 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7115 | #[cfg_attr (test, assert_instr(vfmaddsub))] |
7116 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7117 | pub fn _mm_mask_fmaddsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { |
7118 | unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ph(a, b, c), no:a) } |
7119 | } |
7120 | |
7121 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and |
7122 | /// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k |
7123 | /// (the element is copied from c when the corresponding mask bit is not set). |
7124 | /// |
7125 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmaddsub_ph) |
7126 | #[inline ] |
7127 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7128 | #[cfg_attr (test, assert_instr(vfmaddsub))] |
7129 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7130 | pub fn _mm_mask3_fmaddsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { |
7131 | unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ph(a, b, c), no:c) } |
7132 | } |
7133 | |
7134 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and |
7135 | /// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k |
7136 | /// (the element is zeroed out when the corresponding mask bit is not set). |
7137 | /// |
7138 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmaddsub_ph) |
7139 | #[inline ] |
7140 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7141 | #[cfg_attr (test, assert_instr(vfmaddsub))] |
7142 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7143 | pub fn _mm_maskz_fmaddsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
7144 | unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ph(a, b, c), no:_mm_setzero_ph()) } |
7145 | } |
7146 | |
7147 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and |
7148 | /// subtract packed elements in c to/from the intermediate result, and store the results in dst. |
7149 | /// |
7150 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmaddsub_ph) |
7151 | #[inline ] |
7152 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7153 | #[cfg_attr (test, assert_instr(vfmaddsub))] |
7154 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7155 | pub fn _mm256_fmaddsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h { |
7156 | unsafe { vfmaddsubph_256(a, b, c) } |
7157 | } |
7158 | |
7159 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and |
7160 | /// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k |
7161 | /// (the element is copied from a when the corresponding mask bit is not set). |
7162 | /// |
7163 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmaddsub_ph) |
7164 | #[inline ] |
7165 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7166 | #[cfg_attr (test, assert_instr(vfmaddsub))] |
7167 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7168 | pub fn _mm256_mask_fmaddsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h { |
7169 | unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ph(a, b, c), no:a) } |
7170 | } |
7171 | |
7172 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and |
7173 | /// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k |
7174 | /// (the element is copied from c when the corresponding mask bit is not set). |
7175 | /// |
7176 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmaddsub_ph) |
7177 | #[inline ] |
7178 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7179 | #[cfg_attr (test, assert_instr(vfmaddsub))] |
7180 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7181 | pub fn _mm256_mask3_fmaddsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h { |
7182 | unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ph(a, b, c), no:c) } |
7183 | } |
7184 | |
7185 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and |
7186 | /// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k |
7187 | /// (the element is zeroed out when the corresponding mask bit is not set). |
7188 | /// |
7189 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmaddsub_ph) |
7190 | #[inline ] |
7191 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7192 | #[cfg_attr (test, assert_instr(vfmaddsub))] |
7193 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7194 | pub fn _mm256_maskz_fmaddsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h { |
7195 | unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ph(a, b, c), no:_mm256_setzero_ph()) } |
7196 | } |
7197 | |
7198 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and |
7199 | /// subtract packed elements in c to/from the intermediate result, and store the results in dst. |
7200 | /// |
7201 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmaddsub_ph) |
7202 | #[inline ] |
7203 | #[target_feature (enable = "avx512fp16" )] |
7204 | #[cfg_attr (test, assert_instr(vfmaddsub))] |
7205 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7206 | pub fn _mm512_fmaddsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
7207 | _mm512_fmaddsub_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b, c) |
7208 | } |
7209 | |
7210 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and |
7211 | /// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k |
7212 | /// (the element is copied from a when the corresponding mask bit is not set). |
7213 | /// |
7214 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmaddsub_ph) |
7215 | #[inline ] |
7216 | #[target_feature (enable = "avx512fp16" )] |
7217 | #[cfg_attr (test, assert_instr(vfmaddsub))] |
7218 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7219 | pub fn _mm512_mask_fmaddsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h { |
7220 | unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ph(a, b, c), no:a) } |
7221 | } |
7222 | |
7223 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and |
7224 | /// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k |
7225 | /// (the element is copied from c when the corresponding mask bit is not set). |
7226 | /// |
7227 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmaddsub_ph) |
7228 | #[inline ] |
7229 | #[target_feature (enable = "avx512fp16" )] |
7230 | #[cfg_attr (test, assert_instr(vfmaddsub))] |
7231 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7232 | pub fn _mm512_mask3_fmaddsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h { |
7233 | unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ph(a, b, c), no:c) } |
7234 | } |
7235 | |
7236 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and |
7237 | /// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k |
7238 | /// (the element is zeroed out when the corresponding mask bit is not set). |
7239 | /// |
7240 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmaddsub_ph) |
7241 | #[inline ] |
7242 | #[target_feature (enable = "avx512fp16" )] |
7243 | #[cfg_attr (test, assert_instr(vfmaddsub))] |
7244 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7245 | pub fn _mm512_maskz_fmaddsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
7246 | unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ph(a, b, c), no:_mm512_setzero_ph()) } |
7247 | } |
7248 | |
7249 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and |
7250 | /// subtract packed elements in c to/from the intermediate result, and store the results in dst. |
7251 | /// |
7252 | /// Rounding is done according to the rounding parameter, which can be one of: |
7253 | /// |
7254 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
7255 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
7256 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
7257 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
7258 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
7259 | /// |
7260 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmaddsub_round_ph) |
7261 | #[inline ] |
7262 | #[target_feature (enable = "avx512fp16" )] |
7263 | #[cfg_attr (test, assert_instr(vfmaddsub, ROUNDING = 8))] |
7264 | #[rustc_legacy_const_generics (3)] |
7265 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7266 | pub fn _mm512_fmaddsub_round_ph<const ROUNDING: i32>( |
7267 | a: __m512h, |
7268 | b: __m512h, |
7269 | c: __m512h, |
7270 | ) -> __m512h { |
7271 | unsafe { |
7272 | static_assert_rounding!(ROUNDING); |
7273 | vfmaddsubph_512(a, b, c, ROUNDING) |
7274 | } |
7275 | } |
7276 | |
7277 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and |
7278 | /// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k |
7279 | /// (the element is copied from a when the corresponding mask bit is not set). |
7280 | /// |
7281 | /// Rounding is done according to the rounding parameter, which can be one of: |
7282 | /// |
7283 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
7284 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
7285 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
7286 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
7287 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
7288 | /// |
7289 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmaddsub_round_ph) |
7290 | #[inline ] |
7291 | #[target_feature (enable = "avx512fp16" )] |
7292 | #[cfg_attr (test, assert_instr(vfmaddsub, ROUNDING = 8))] |
7293 | #[rustc_legacy_const_generics (4)] |
7294 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7295 | pub fn _mm512_mask_fmaddsub_round_ph<const ROUNDING: i32>( |
7296 | a: __m512h, |
7297 | k: __mmask32, |
7298 | b: __m512h, |
7299 | c: __m512h, |
7300 | ) -> __m512h { |
7301 | unsafe { |
7302 | static_assert_rounding!(ROUNDING); |
7303 | simd_select_bitmask(m:k, yes:_mm512_fmaddsub_round_ph::<ROUNDING>(a, b, c), no:a) |
7304 | } |
7305 | } |
7306 | |
7307 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and |
7308 | /// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k |
7309 | /// (the element is copied from c when the corresponding mask bit is not set). |
7310 | /// |
7311 | /// Rounding is done according to the rounding parameter, which can be one of: |
7312 | /// |
7313 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
7314 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
7315 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
7316 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
7317 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
7318 | /// |
7319 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmaddsub_round_ph) |
7320 | #[inline ] |
7321 | #[target_feature (enable = "avx512fp16" )] |
7322 | #[cfg_attr (test, assert_instr(vfmaddsub, ROUNDING = 8))] |
7323 | #[rustc_legacy_const_generics (4)] |
7324 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7325 | pub fn _mm512_mask3_fmaddsub_round_ph<const ROUNDING: i32>( |
7326 | a: __m512h, |
7327 | b: __m512h, |
7328 | c: __m512h, |
7329 | k: __mmask32, |
7330 | ) -> __m512h { |
7331 | unsafe { |
7332 | static_assert_rounding!(ROUNDING); |
7333 | simd_select_bitmask(m:k, yes:_mm512_fmaddsub_round_ph::<ROUNDING>(a, b, c), no:c) |
7334 | } |
7335 | } |
7336 | |
7337 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and |
7338 | /// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k |
7339 | /// (the element is zeroed out when the corresponding mask bit is not set). |
7340 | /// |
7341 | /// Rounding is done according to the rounding parameter, which can be one of: |
7342 | /// |
7343 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
7344 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
7345 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
7346 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
7347 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
7348 | /// |
7349 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmaddsub_round_ph) |
7350 | #[inline ] |
7351 | #[target_feature (enable = "avx512fp16" )] |
7352 | #[cfg_attr (test, assert_instr(vfmaddsub, ROUNDING = 8))] |
7353 | #[rustc_legacy_const_generics (4)] |
7354 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7355 | pub fn _mm512_maskz_fmaddsub_round_ph<const ROUNDING: i32>( |
7356 | k: __mmask32, |
7357 | a: __m512h, |
7358 | b: __m512h, |
7359 | c: __m512h, |
7360 | ) -> __m512h { |
7361 | unsafe { |
7362 | static_assert_rounding!(ROUNDING); |
7363 | simd_select_bitmask( |
7364 | m:k, |
7365 | yes:_mm512_fmaddsub_round_ph::<ROUNDING>(a, b, c), |
7366 | no:_mm512_setzero_ph(), |
7367 | ) |
7368 | } |
7369 | } |
7370 | |
7371 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract |
7372 | /// and add packed elements in c to/from the intermediate result, and store the results in dst. |
7373 | /// |
7374 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsubadd_ph) |
7375 | #[inline ] |
7376 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7377 | #[cfg_attr (test, assert_instr(vfmsubadd))] |
7378 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7379 | pub fn _mm_fmsubadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
7380 | unsafe { vfmaddsubph_128(a, b, c:simd_neg(c)) } |
7381 | } |
7382 | |
7383 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract |
7384 | /// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k |
7385 | /// (the element is copied from a when the corresponding mask bit is not set). |
7386 | /// |
7387 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsubadd_ph) |
7388 | #[inline ] |
7389 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7390 | #[cfg_attr (test, assert_instr(vfmsubadd))] |
7391 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7392 | pub fn _mm_mask_fmsubadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { |
7393 | unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ph(a, b, c), no:a) } |
7394 | } |
7395 | |
7396 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract |
7397 | /// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k |
7398 | /// (the element is copied from c when the corresponding mask bit is not set). |
7399 | /// |
7400 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsubadd_ph) |
7401 | #[inline ] |
7402 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7403 | #[cfg_attr (test, assert_instr(vfmsubadd))] |
7404 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7405 | pub fn _mm_mask3_fmsubadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { |
7406 | unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ph(a, b, c), no:c) } |
7407 | } |
7408 | |
7409 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract |
7410 | /// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k |
7411 | /// (the element is zeroed out when the corresponding mask bit is not set). |
7412 | /// |
7413 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsubadd_ph) |
7414 | #[inline ] |
7415 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7416 | #[cfg_attr (test, assert_instr(vfmsubadd))] |
7417 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7418 | pub fn _mm_maskz_fmsubadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { |
7419 | unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ph(a, b, c), no:_mm_setzero_ph()) } |
7420 | } |
7421 | |
7422 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract |
7423 | /// and add packed elements in c to/from the intermediate result, and store the results in dst. |
7424 | /// |
7425 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsubadd_ph) |
7426 | #[inline ] |
7427 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7428 | #[cfg_attr (test, assert_instr(vfmsubadd))] |
7429 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7430 | pub fn _mm256_fmsubadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h { |
7431 | unsafe { vfmaddsubph_256(a, b, c:simd_neg(c)) } |
7432 | } |
7433 | |
7434 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract |
7435 | /// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k |
7436 | /// (the element is copied from a when the corresponding mask bit is not set). |
7437 | /// |
7438 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmsubadd_ph) |
7439 | #[inline ] |
7440 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7441 | #[cfg_attr (test, assert_instr(vfmsubadd))] |
7442 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7443 | pub fn _mm256_mask_fmsubadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h { |
7444 | unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ph(a, b, c), no:a) } |
7445 | } |
7446 | |
7447 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract |
7448 | /// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k |
7449 | /// (the element is copied from c when the corresponding mask bit is not set). |
7450 | /// |
7451 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmsubadd_ph) |
7452 | #[inline ] |
7453 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7454 | #[cfg_attr (test, assert_instr(vfmsubadd))] |
7455 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7456 | pub fn _mm256_mask3_fmsubadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h { |
7457 | unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ph(a, b, c), no:c) } |
7458 | } |
7459 | |
7460 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract |
7461 | /// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k |
7462 | /// (the element is zeroed out when the corresponding mask bit is not set). |
7463 | /// |
7464 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmsubadd_ph) |
7465 | #[inline ] |
7466 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7467 | #[cfg_attr (test, assert_instr(vfmsubadd))] |
7468 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7469 | pub fn _mm256_maskz_fmsubadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h { |
7470 | unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ph(a, b, c), no:_mm256_setzero_ph()) } |
7471 | } |
7472 | |
7473 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract |
7474 | /// and add packed elements in c to/from the intermediate result, and store the results in dst. |
7475 | /// |
7476 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsubadd_ph) |
7477 | #[inline ] |
7478 | #[target_feature (enable = "avx512fp16" )] |
7479 | #[cfg_attr (test, assert_instr(vfmsubadd))] |
7480 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7481 | pub fn _mm512_fmsubadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
7482 | _mm512_fmsubadd_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b, c) |
7483 | } |
7484 | |
7485 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract |
7486 | /// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k |
7487 | /// (the element is copied from a when the corresponding mask bit is not set). |
7488 | /// |
7489 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsubadd_ph) |
7490 | #[inline ] |
7491 | #[target_feature (enable = "avx512fp16" )] |
7492 | #[cfg_attr (test, assert_instr(vfmsubadd))] |
7493 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7494 | pub fn _mm512_mask_fmsubadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h { |
7495 | unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ph(a, b, c), no:a) } |
7496 | } |
7497 | |
7498 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract |
7499 | /// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k |
7500 | /// (the element is copied from c when the corresponding mask bit is not set). |
7501 | /// |
7502 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsubadd_ph) |
7503 | #[inline ] |
7504 | #[target_feature (enable = "avx512fp16" )] |
7505 | #[cfg_attr (test, assert_instr(vfmsubadd))] |
7506 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7507 | pub fn _mm512_mask3_fmsubadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h { |
7508 | unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ph(a, b, c), no:c) } |
7509 | } |
7510 | |
7511 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract |
7512 | /// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k |
7513 | /// (the element is zeroed out when the corresponding mask bit is not set). |
7514 | /// |
7515 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsubadd_ph) |
7516 | #[inline ] |
7517 | #[target_feature (enable = "avx512fp16" )] |
7518 | #[cfg_attr (test, assert_instr(vfmsubadd))] |
7519 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7520 | pub fn _mm512_maskz_fmsubadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h { |
7521 | unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ph(a, b, c), no:_mm512_setzero_ph()) } |
7522 | } |
7523 | |
7524 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract |
7525 | /// and add packed elements in c to/from the intermediate result, and store the results in dst. |
7526 | /// |
7527 | /// Rounding is done according to the rounding parameter, which can be one of: |
7528 | /// |
7529 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
7530 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
7531 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
7532 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
7533 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
7534 | /// |
7535 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsubadd_round_ph) |
7536 | #[inline ] |
7537 | #[target_feature (enable = "avx512fp16" )] |
7538 | #[cfg_attr (test, assert_instr(vfmsubadd, ROUNDING = 8))] |
7539 | #[rustc_legacy_const_generics (3)] |
7540 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7541 | pub fn _mm512_fmsubadd_round_ph<const ROUNDING: i32>( |
7542 | a: __m512h, |
7543 | b: __m512h, |
7544 | c: __m512h, |
7545 | ) -> __m512h { |
7546 | unsafe { |
7547 | static_assert_rounding!(ROUNDING); |
7548 | vfmaddsubph_512(a, b, c:simd_neg(c), ROUNDING) |
7549 | } |
7550 | } |
7551 | |
7552 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract |
7553 | /// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k |
7554 | /// (the element is copied from a when the corresponding mask bit is not set). |
7555 | /// |
7556 | /// Rounding is done according to the rounding parameter, which can be one of: |
7557 | /// |
7558 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
7559 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
7560 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
7561 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
7562 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
7563 | /// |
7564 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsubadd_round_ph) |
7565 | #[inline ] |
7566 | #[target_feature (enable = "avx512fp16" )] |
7567 | #[cfg_attr (test, assert_instr(vfmsubadd, ROUNDING = 8))] |
7568 | #[rustc_legacy_const_generics (4)] |
7569 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7570 | pub fn _mm512_mask_fmsubadd_round_ph<const ROUNDING: i32>( |
7571 | a: __m512h, |
7572 | k: __mmask32, |
7573 | b: __m512h, |
7574 | c: __m512h, |
7575 | ) -> __m512h { |
7576 | unsafe { |
7577 | static_assert_rounding!(ROUNDING); |
7578 | simd_select_bitmask(m:k, yes:_mm512_fmsubadd_round_ph::<ROUNDING>(a, b, c), no:a) |
7579 | } |
7580 | } |
7581 | |
7582 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract |
7583 | /// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k |
7584 | /// (the element is copied from c when the corresponding mask bit is not set). |
7585 | /// |
7586 | /// Rounding is done according to the rounding parameter, which can be one of: |
7587 | /// |
7588 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
7589 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
7590 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
7591 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
7592 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
7593 | /// |
7594 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsubadd_round_ph) |
7595 | #[inline ] |
7596 | #[target_feature (enable = "avx512fp16" )] |
7597 | #[cfg_attr (test, assert_instr(vfmsubadd, ROUNDING = 8))] |
7598 | #[rustc_legacy_const_generics (4)] |
7599 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7600 | pub fn _mm512_mask3_fmsubadd_round_ph<const ROUNDING: i32>( |
7601 | a: __m512h, |
7602 | b: __m512h, |
7603 | c: __m512h, |
7604 | k: __mmask32, |
7605 | ) -> __m512h { |
7606 | unsafe { |
7607 | static_assert_rounding!(ROUNDING); |
7608 | simd_select_bitmask(m:k, yes:_mm512_fmsubadd_round_ph::<ROUNDING>(a, b, c), no:c) |
7609 | } |
7610 | } |
7611 | |
7612 | /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract |
7613 | /// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k |
7614 | /// (the element is zeroed out when the corresponding mask bit is not set). |
7615 | /// |
7616 | /// Rounding is done according to the rounding parameter, which can be one of: |
7617 | /// |
7618 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
7619 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
7620 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
7621 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
7622 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
7623 | /// |
7624 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsubadd_round_ph) |
7625 | #[inline ] |
7626 | #[target_feature (enable = "avx512fp16" )] |
7627 | #[cfg_attr (test, assert_instr(vfmsubadd, ROUNDING = 8))] |
7628 | #[rustc_legacy_const_generics (4)] |
7629 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7630 | pub fn _mm512_maskz_fmsubadd_round_ph<const ROUNDING: i32>( |
7631 | k: __mmask32, |
7632 | a: __m512h, |
7633 | b: __m512h, |
7634 | c: __m512h, |
7635 | ) -> __m512h { |
7636 | unsafe { |
7637 | static_assert_rounding!(ROUNDING); |
7638 | simd_select_bitmask( |
7639 | m:k, |
7640 | yes:_mm512_fmsubadd_round_ph::<ROUNDING>(a, b, c), |
7641 | no:_mm512_setzero_ph(), |
7642 | ) |
7643 | } |
7644 | } |
7645 | |
7646 | /// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`. |
7647 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7648 | /// |
7649 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_ph) |
7650 | #[inline ] |
7651 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7652 | #[cfg_attr (test, assert_instr(vrcpph))] |
7653 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7654 | pub fn _mm_rcp_ph(a: __m128h) -> __m128h { |
7655 | _mm_mask_rcp_ph(src:_mm_undefined_ph(), k:0xff, a) |
7656 | } |
7657 | |
7658 | /// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst` |
7659 | /// using writemask `k` (elements are copied from `src` when the corresponding mask bit is not set). |
7660 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7661 | /// |
7662 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rcp_ph) |
7663 | #[inline ] |
7664 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7665 | #[cfg_attr (test, assert_instr(vrcpph))] |
7666 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7667 | pub fn _mm_mask_rcp_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h { |
7668 | unsafe { vrcpph_128(a, src, k) } |
7669 | } |
7670 | |
7671 | /// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst` |
7672 | /// using zeromask `k` (elements are zeroed out when the corresponding mask bit is not set). |
7673 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7674 | /// |
7675 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rcp_ph) |
7676 | #[inline ] |
7677 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7678 | #[cfg_attr (test, assert_instr(vrcpph))] |
7679 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7680 | pub fn _mm_maskz_rcp_ph(k: __mmask8, a: __m128h) -> __m128h { |
7681 | _mm_mask_rcp_ph(src:_mm_setzero_ph(), k, a) |
7682 | } |
7683 | |
7684 | /// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`. |
7685 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7686 | /// |
7687 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rcp_ph) |
7688 | #[inline ] |
7689 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7690 | #[cfg_attr (test, assert_instr(vrcpph))] |
7691 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7692 | pub fn _mm256_rcp_ph(a: __m256h) -> __m256h { |
7693 | _mm256_mask_rcp_ph(src:_mm256_undefined_ph(), k:0xffff, a) |
7694 | } |
7695 | |
7696 | /// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst` |
7697 | /// using writemask `k` (elements are copied from `src` when the corresponding mask bit is not set). |
7698 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7699 | /// |
7700 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_rcp_ph) |
7701 | #[inline ] |
7702 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7703 | #[cfg_attr (test, assert_instr(vrcpph))] |
7704 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7705 | pub fn _mm256_mask_rcp_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h { |
7706 | unsafe { vrcpph_256(a, src, k) } |
7707 | } |
7708 | |
7709 | /// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst` |
7710 | /// using zeromask `k` (elements are zeroed out when the corresponding mask bit is not set). |
7711 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7712 | /// |
7713 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_rcp_ph) |
7714 | #[inline ] |
7715 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7716 | #[cfg_attr (test, assert_instr(vrcpph))] |
7717 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7718 | pub fn _mm256_maskz_rcp_ph(k: __mmask16, a: __m256h) -> __m256h { |
7719 | _mm256_mask_rcp_ph(src:_mm256_setzero_ph(), k, a) |
7720 | } |
7721 | |
7722 | /// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`. |
7723 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7724 | /// |
7725 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rcp_ph) |
7726 | #[inline ] |
7727 | #[target_feature (enable = "avx512fp16" )] |
7728 | #[cfg_attr (test, assert_instr(vrcpph))] |
7729 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7730 | pub fn _mm512_rcp_ph(a: __m512h) -> __m512h { |
7731 | _mm512_mask_rcp_ph(src:_mm512_undefined_ph(), k:0xffffffff, a) |
7732 | } |
7733 | |
7734 | /// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst` |
7735 | /// using writemask `k` (elements are copied from `src` when the corresponding mask bit is not set). |
7736 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7737 | /// |
7738 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rcp_ph) |
7739 | #[inline ] |
7740 | #[target_feature (enable = "avx512fp16" )] |
7741 | #[cfg_attr (test, assert_instr(vrcpph))] |
7742 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7743 | pub fn _mm512_mask_rcp_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h { |
7744 | unsafe { vrcpph_512(a, src, k) } |
7745 | } |
7746 | |
7747 | /// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst` |
7748 | /// using zeromask `k` (elements are zeroed out when the corresponding mask bit is not set). |
7749 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7750 | /// |
7751 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rcp_ph) |
7752 | #[inline ] |
7753 | #[target_feature (enable = "avx512fp16" )] |
7754 | #[cfg_attr (test, assert_instr(vrcpph))] |
7755 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7756 | pub fn _mm512_maskz_rcp_ph(k: __mmask32, a: __m512h) -> __m512h { |
7757 | _mm512_mask_rcp_ph(src:_mm512_setzero_ph(), k, a) |
7758 | } |
7759 | |
7760 | /// Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in b, |
7761 | /// store the result in the lower element of dst, and copy the upper 7 packed elements from a to the |
7762 | /// upper elements of dst. |
7763 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7764 | /// |
7765 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_sh) |
7766 | #[inline ] |
7767 | #[target_feature (enable = "avx512fp16" )] |
7768 | #[cfg_attr (test, assert_instr(vrcpsh))] |
7769 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7770 | pub fn _mm_rcp_sh(a: __m128h, b: __m128h) -> __m128h { |
7771 | _mm_mask_rcp_sh(src:_mm_undefined_ph(), k:0xff, a, b) |
7772 | } |
7773 | |
7774 | /// Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in b, |
7775 | /// store the result in the lower element of dst using writemask k (the element is copied from src when |
7776 | /// mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst. |
7777 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7778 | /// |
7779 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rcp_sh) |
7780 | #[inline ] |
7781 | #[target_feature (enable = "avx512fp16" )] |
7782 | #[cfg_attr (test, assert_instr(vrcpsh))] |
7783 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7784 | pub fn _mm_mask_rcp_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
7785 | unsafe { vrcpsh(a, b, src, k) } |
7786 | } |
7787 | |
7788 | /// Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in b, |
7789 | /// store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 |
7790 | /// is not set), and copy the upper 7 packed elements from a to the upper elements of dst. |
7791 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7792 | /// |
7793 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rcp_sh) |
7794 | #[inline ] |
7795 | #[target_feature (enable = "avx512fp16" )] |
7796 | #[cfg_attr (test, assert_instr(vrcpsh))] |
7797 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7798 | pub fn _mm_maskz_rcp_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
7799 | _mm_mask_rcp_sh(src:_mm_setzero_ph(), k, a, b) |
7800 | } |
7801 | |
7802 | /// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point |
7803 | /// elements in a, and store the results in dst. |
7804 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7805 | /// |
7806 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt_ph) |
7807 | #[inline ] |
7808 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7809 | #[cfg_attr (test, assert_instr(vrsqrtph))] |
7810 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7811 | pub fn _mm_rsqrt_ph(a: __m128h) -> __m128h { |
7812 | _mm_mask_rsqrt_ph(src:_mm_undefined_ph(), k:0xff, a) |
7813 | } |
7814 | |
7815 | /// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point |
7816 | /// elements in a, and store the results in dst using writemask k (elements are copied from src when |
7817 | /// the corresponding mask bit is not set). |
7818 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7819 | /// |
7820 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rsqrt_ph) |
7821 | #[inline ] |
7822 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7823 | #[cfg_attr (test, assert_instr(vrsqrtph))] |
7824 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7825 | pub fn _mm_mask_rsqrt_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h { |
7826 | unsafe { vrsqrtph_128(a, src, k) } |
7827 | } |
7828 | |
7829 | /// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point |
7830 | /// elements in a, and store the results in dst using zeromask k (elements are zeroed out when the |
7831 | /// corresponding mask bit is not set). |
7832 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7833 | /// |
7834 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rsqrt_ph) |
7835 | #[inline ] |
7836 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7837 | #[cfg_attr (test, assert_instr(vrsqrtph))] |
7838 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7839 | pub fn _mm_maskz_rsqrt_ph(k: __mmask8, a: __m128h) -> __m128h { |
7840 | _mm_mask_rsqrt_ph(src:_mm_setzero_ph(), k, a) |
7841 | } |
7842 | |
7843 | /// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point |
7844 | /// elements in a, and store the results in dst. |
7845 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7846 | /// |
7847 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rsqrt_ph) |
7848 | #[inline ] |
7849 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7850 | #[cfg_attr (test, assert_instr(vrsqrtph))] |
7851 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7852 | pub fn _mm256_rsqrt_ph(a: __m256h) -> __m256h { |
7853 | _mm256_mask_rsqrt_ph(src:_mm256_undefined_ph(), k:0xffff, a) |
7854 | } |
7855 | |
7856 | /// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point |
7857 | /// elements in a, and store the results in dst using writemask k (elements are copied from src when |
7858 | /// the corresponding mask bit is not set). |
7859 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7860 | /// |
7861 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_rsqrt_ph) |
7862 | #[inline ] |
7863 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7864 | #[cfg_attr (test, assert_instr(vrsqrtph))] |
7865 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7866 | pub fn _mm256_mask_rsqrt_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h { |
7867 | unsafe { vrsqrtph_256(a, src, k) } |
7868 | } |
7869 | |
7870 | /// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point |
7871 | /// elements in a, and store the results in dst using zeromask k (elements are zeroed out when the |
7872 | /// corresponding mask bit is not set). |
7873 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7874 | /// |
7875 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_rsqrt_ph) |
7876 | #[inline ] |
7877 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7878 | #[cfg_attr (test, assert_instr(vrsqrtph))] |
7879 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7880 | pub fn _mm256_maskz_rsqrt_ph(k: __mmask16, a: __m256h) -> __m256h { |
7881 | _mm256_mask_rsqrt_ph(src:_mm256_setzero_ph(), k, a) |
7882 | } |
7883 | |
7884 | /// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point |
7885 | /// elements in a, and store the results in dst. |
7886 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7887 | /// |
7888 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rsqrt_ph) |
7889 | #[inline ] |
7890 | #[target_feature (enable = "avx512fp16" )] |
7891 | #[cfg_attr (test, assert_instr(vrsqrtph))] |
7892 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7893 | pub fn _mm512_rsqrt_ph(a: __m512h) -> __m512h { |
7894 | _mm512_mask_rsqrt_ph(src:_mm512_undefined_ph(), k:0xffffffff, a) |
7895 | } |
7896 | |
7897 | /// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point |
7898 | /// elements in a, and store the results in dst using writemask k (elements are copied from src when |
7899 | /// the corresponding mask bit is not set). |
7900 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7901 | /// |
7902 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rsqrt_ph) |
7903 | #[inline ] |
7904 | #[target_feature (enable = "avx512fp16" )] |
7905 | #[cfg_attr (test, assert_instr(vrsqrtph))] |
7906 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7907 | pub fn _mm512_mask_rsqrt_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h { |
7908 | unsafe { vrsqrtph_512(a, src, k) } |
7909 | } |
7910 | |
7911 | /// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point |
7912 | /// elements in a, and store the results in dst using zeromask k (elements are zeroed out when the |
7913 | /// corresponding mask bit is not set). |
7914 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7915 | /// |
7916 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rsqrt_ph) |
7917 | #[inline ] |
7918 | #[target_feature (enable = "avx512fp16" )] |
7919 | #[cfg_attr (test, assert_instr(vrsqrtph))] |
7920 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7921 | pub fn _mm512_maskz_rsqrt_ph(k: __mmask32, a: __m512h) -> __m512h { |
7922 | _mm512_mask_rsqrt_ph(src:_mm512_setzero_ph(), k, a) |
7923 | } |
7924 | |
7925 | /// Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point |
7926 | /// element in b, store the result in the lower element of dst, and copy the upper 7 packed elements from a |
7927 | /// to the upper elements of dst. |
7928 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7929 | /// |
7930 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt_sh) |
7931 | #[inline ] |
7932 | #[target_feature (enable = "avx512fp16" )] |
7933 | #[cfg_attr (test, assert_instr(vrsqrtsh))] |
7934 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7935 | pub fn _mm_rsqrt_sh(a: __m128h, b: __m128h) -> __m128h { |
7936 | _mm_mask_rsqrt_sh(src:_mm_undefined_ph(), k:0xff, a, b) |
7937 | } |
7938 | |
7939 | /// Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point |
7940 | /// element in b, store the result in the lower element of dst using writemask k (the element is copied from src |
7941 | /// when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst. |
7942 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7943 | /// |
7944 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rsqrt_sh) |
7945 | #[inline ] |
7946 | #[target_feature (enable = "avx512fp16" )] |
7947 | #[cfg_attr (test, assert_instr(vrsqrtsh))] |
7948 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7949 | pub fn _mm_mask_rsqrt_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
7950 | unsafe { vrsqrtsh(a, b, src, k) } |
7951 | } |
7952 | |
7953 | /// Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point |
7954 | /// element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when |
7955 | /// mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst. |
7956 | /// The maximum relative error for this approximation is less than `1.5*2^-12`. |
7957 | /// |
7958 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rsqrt_sh) |
7959 | #[inline ] |
7960 | #[target_feature (enable = "avx512fp16" )] |
7961 | #[cfg_attr (test, assert_instr(vrsqrtsh))] |
7962 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7963 | pub fn _mm_maskz_rsqrt_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
7964 | _mm_mask_rsqrt_sh(src:_mm_setzero_ph(), k, a, b) |
7965 | } |
7966 | |
7967 | /// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the |
7968 | /// results in dst. |
7969 | /// |
7970 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_ph) |
7971 | #[inline ] |
7972 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7973 | #[cfg_attr (test, assert_instr(vsqrtph))] |
7974 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7975 | pub fn _mm_sqrt_ph(a: __m128h) -> __m128h { |
7976 | unsafe { simd_fsqrt(a) } |
7977 | } |
7978 | |
7979 | /// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the |
7980 | /// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
7981 | /// |
7982 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_ph) |
7983 | #[inline ] |
7984 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7985 | #[cfg_attr (test, assert_instr(vsqrtph))] |
7986 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7987 | pub fn _mm_mask_sqrt_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h { |
7988 | unsafe { simd_select_bitmask(m:k, yes:_mm_sqrt_ph(a), no:src) } |
7989 | } |
7990 | |
7991 | /// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the |
7992 | /// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
7993 | /// |
7994 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_ph) |
7995 | #[inline ] |
7996 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
7997 | #[cfg_attr (test, assert_instr(vsqrtph))] |
7998 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
7999 | pub fn _mm_maskz_sqrt_ph(k: __mmask8, a: __m128h) -> __m128h { |
8000 | unsafe { simd_select_bitmask(m:k, yes:_mm_sqrt_ph(a), no:_mm_setzero_ph()) } |
8001 | } |
8002 | |
8003 | /// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the |
8004 | /// results in dst. |
8005 | /// |
8006 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sqrt_ph) |
8007 | #[inline ] |
8008 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8009 | #[cfg_attr (test, assert_instr(vsqrtph))] |
8010 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8011 | pub fn _mm256_sqrt_ph(a: __m256h) -> __m256h { |
8012 | unsafe { simd_fsqrt(a) } |
8013 | } |
8014 | |
8015 | /// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the |
8016 | /// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
8017 | /// |
8018 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sqrt_ph) |
8019 | #[inline ] |
8020 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8021 | #[cfg_attr (test, assert_instr(vsqrtph))] |
8022 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8023 | pub fn _mm256_mask_sqrt_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h { |
8024 | unsafe { simd_select_bitmask(m:k, yes:_mm256_sqrt_ph(a), no:src) } |
8025 | } |
8026 | |
8027 | /// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the |
8028 | /// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
8029 | /// |
8030 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sqrt_ph) |
8031 | #[inline ] |
8032 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8033 | #[cfg_attr (test, assert_instr(vsqrtph))] |
8034 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8035 | pub fn _mm256_maskz_sqrt_ph(k: __mmask16, a: __m256h) -> __m256h { |
8036 | unsafe { simd_select_bitmask(m:k, yes:_mm256_sqrt_ph(a), no:_mm256_setzero_ph()) } |
8037 | } |
8038 | |
8039 | /// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the |
8040 | /// results in dst. |
8041 | /// |
8042 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sqrt_ph) |
8043 | #[inline ] |
8044 | #[target_feature (enable = "avx512fp16" )] |
8045 | #[cfg_attr (test, assert_instr(vsqrtph))] |
8046 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8047 | pub fn _mm512_sqrt_ph(a: __m512h) -> __m512h { |
8048 | unsafe { simd_fsqrt(a) } |
8049 | } |
8050 | |
8051 | /// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the |
8052 | /// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
8053 | /// |
8054 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sqrt_ph) |
8055 | #[inline ] |
8056 | #[target_feature (enable = "avx512fp16" )] |
8057 | #[cfg_attr (test, assert_instr(vsqrtph))] |
8058 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8059 | pub fn _mm512_mask_sqrt_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h { |
8060 | unsafe { simd_select_bitmask(m:k, yes:_mm512_sqrt_ph(a), no:src) } |
8061 | } |
8062 | |
8063 | /// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the |
8064 | /// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
8065 | /// |
8066 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sqrt_ph) |
8067 | #[inline ] |
8068 | #[target_feature (enable = "avx512fp16" )] |
8069 | #[cfg_attr (test, assert_instr(vsqrtph))] |
8070 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8071 | pub fn _mm512_maskz_sqrt_ph(k: __mmask32, a: __m512h) -> __m512h { |
8072 | unsafe { simd_select_bitmask(m:k, yes:_mm512_sqrt_ph(a), no:_mm512_setzero_ph()) } |
8073 | } |
8074 | |
8075 | /// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the |
8076 | /// results in dst. |
8077 | /// Rounding is done according to the rounding parameter, which can be one of: |
8078 | /// |
8079 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
8080 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
8081 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
8082 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
8083 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
8084 | /// |
8085 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sqrt_round_ph) |
8086 | #[inline ] |
8087 | #[target_feature (enable = "avx512fp16" )] |
8088 | #[cfg_attr (test, assert_instr(vsqrtph, ROUNDING = 8))] |
8089 | #[rustc_legacy_const_generics (1)] |
8090 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8091 | pub fn _mm512_sqrt_round_ph<const ROUNDING: i32>(a: __m512h) -> __m512h { |
8092 | unsafe { |
8093 | static_assert_rounding!(ROUNDING); |
8094 | vsqrtph_512(a, ROUNDING) |
8095 | } |
8096 | } |
8097 | |
8098 | /// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the |
8099 | /// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
8100 | /// Rounding is done according to the rounding parameter, which can be one of: |
8101 | /// |
8102 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
8103 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
8104 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
8105 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
8106 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
8107 | /// |
8108 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sqrt_round_ph) |
8109 | #[inline ] |
8110 | #[target_feature (enable = "avx512fp16" )] |
8111 | #[cfg_attr (test, assert_instr(vsqrtph, ROUNDING = 8))] |
8112 | #[rustc_legacy_const_generics (3)] |
8113 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8114 | pub fn _mm512_mask_sqrt_round_ph<const ROUNDING: i32>( |
8115 | src: __m512h, |
8116 | k: __mmask32, |
8117 | a: __m512h, |
8118 | ) -> __m512h { |
8119 | unsafe { |
8120 | static_assert_rounding!(ROUNDING); |
8121 | simd_select_bitmask(m:k, yes:_mm512_sqrt_round_ph::<ROUNDING>(a), no:src) |
8122 | } |
8123 | } |
8124 | |
8125 | /// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the |
8126 | /// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
8127 | /// Rounding is done according to the rounding parameter, which can be one of: |
8128 | /// |
8129 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
8130 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
8131 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
8132 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
8133 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
8134 | /// |
8135 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sqrt_round_ph) |
8136 | #[inline ] |
8137 | #[target_feature (enable = "avx512fp16" )] |
8138 | #[cfg_attr (test, assert_instr(vsqrtph, ROUNDING = 8))] |
8139 | #[rustc_legacy_const_generics (2)] |
8140 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8141 | pub fn _mm512_maskz_sqrt_round_ph<const ROUNDING: i32>(k: __mmask32, a: __m512h) -> __m512h { |
8142 | unsafe { |
8143 | static_assert_rounding!(ROUNDING); |
8144 | simd_select_bitmask(m:k, yes:_mm512_sqrt_round_ph::<ROUNDING>(a), no:_mm512_setzero_ph()) |
8145 | } |
8146 | } |
8147 | |
8148 | /// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store |
8149 | /// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper |
8150 | /// elements of dst. |
8151 | /// |
8152 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_sh) |
8153 | #[inline ] |
8154 | #[target_feature (enable = "avx512fp16" )] |
8155 | #[cfg_attr (test, assert_instr(vsqrtsh))] |
8156 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8157 | pub fn _mm_sqrt_sh(a: __m128h, b: __m128h) -> __m128h { |
8158 | _mm_mask_sqrt_sh(src:_mm_undefined_ph(), k:0xff, a, b) |
8159 | } |
8160 | |
8161 | /// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store |
8162 | /// the result in the lower element of dst using writemask k (the element is copied from src when mask |
8163 | /// bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst. |
8164 | /// |
8165 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_sh) |
8166 | #[inline ] |
8167 | #[target_feature (enable = "avx512fp16" )] |
8168 | #[cfg_attr (test, assert_instr(vsqrtsh))] |
8169 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8170 | pub fn _mm_mask_sqrt_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
8171 | _mm_mask_sqrt_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) |
8172 | } |
8173 | |
8174 | /// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store |
8175 | /// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 |
8176 | /// is not set), and copy the upper 7 packed elements from a to the upper elements of dst. |
8177 | /// |
8178 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_sh) |
8179 | #[inline ] |
8180 | #[target_feature (enable = "avx512fp16" )] |
8181 | #[cfg_attr (test, assert_instr(vsqrtsh))] |
8182 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8183 | pub fn _mm_maskz_sqrt_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
8184 | _mm_mask_sqrt_sh(src:_mm_setzero_ph(), k, a, b) |
8185 | } |
8186 | |
8187 | /// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store |
8188 | /// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper |
8189 | /// elements of dst. |
8190 | /// Rounding is done according to the rounding parameter, which can be one of: |
8191 | /// |
8192 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
8193 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
8194 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
8195 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
8196 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
8197 | /// |
8198 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_round_sh) |
8199 | #[inline ] |
8200 | #[target_feature (enable = "avx512fp16" )] |
8201 | #[cfg_attr (test, assert_instr(vsqrtsh, ROUNDING = 8))] |
8202 | #[rustc_legacy_const_generics (2)] |
8203 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8204 | pub fn _mm_sqrt_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h { |
8205 | static_assert_rounding!(ROUNDING); |
8206 | _mm_mask_sqrt_round_sh::<ROUNDING>(src:_mm_undefined_ph(), k:0xff, a, b) |
8207 | } |
8208 | |
8209 | /// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store |
8210 | /// the result in the lower element of dst using writemask k (the element is copied from src when mask |
8211 | /// bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst. |
8212 | /// Rounding is done according to the rounding parameter, which can be one of: |
8213 | /// |
8214 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
8215 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
8216 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
8217 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
8218 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
8219 | /// |
8220 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_round_sh) |
8221 | #[inline ] |
8222 | #[target_feature (enable = "avx512fp16" )] |
8223 | #[cfg_attr (test, assert_instr(vsqrtsh, ROUNDING = 8))] |
8224 | #[rustc_legacy_const_generics (4)] |
8225 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8226 | pub fn _mm_mask_sqrt_round_sh<const ROUNDING: i32>( |
8227 | src: __m128h, |
8228 | k: __mmask8, |
8229 | a: __m128h, |
8230 | b: __m128h, |
8231 | ) -> __m128h { |
8232 | unsafe { |
8233 | static_assert_rounding!(ROUNDING); |
8234 | vsqrtsh(a, b, src, k, ROUNDING) |
8235 | } |
8236 | } |
8237 | |
8238 | /// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store |
8239 | /// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 |
8240 | /// is not set), and copy the upper 7 packed elements from a to the upper elements of dst. |
8241 | /// Rounding is done according to the rounding parameter, which can be one of: |
8242 | /// |
8243 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
8244 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
8245 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
8246 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
8247 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
8248 | /// |
8249 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_round_sh) |
8250 | #[inline ] |
8251 | #[target_feature (enable = "avx512fp16" )] |
8252 | #[cfg_attr (test, assert_instr(vsqrtsh, ROUNDING = 8))] |
8253 | #[rustc_legacy_const_generics (3)] |
8254 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8255 | pub fn _mm_maskz_sqrt_round_sh<const ROUNDING: i32>( |
8256 | k: __mmask8, |
8257 | a: __m128h, |
8258 | b: __m128h, |
8259 | ) -> __m128h { |
8260 | static_assert_rounding!(ROUNDING); |
8261 | _mm_mask_sqrt_round_sh::<ROUNDING>(src:_mm_setzero_ph(), k, a, b) |
8262 | } |
8263 | |
8264 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum |
8265 | /// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum |
8266 | /// value when inputs are NaN or signed-zero values. |
8267 | /// |
8268 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_ph) |
8269 | #[inline ] |
8270 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8271 | #[cfg_attr (test, assert_instr(vmaxph))] |
8272 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8273 | pub fn _mm_max_ph(a: __m128h, b: __m128h) -> __m128h { |
8274 | unsafe { vmaxph_128(a, b) } |
8275 | } |
8276 | |
8277 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum |
8278 | /// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
8279 | /// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are |
8280 | /// NaN or signed-zero values. |
8281 | /// |
8282 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_ph) |
8283 | #[inline ] |
8284 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8285 | #[cfg_attr (test, assert_instr(vmaxph))] |
8286 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8287 | pub fn _mm_mask_max_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
8288 | unsafe { simd_select_bitmask(m:k, yes:_mm_max_ph(a, b), no:src) } |
8289 | } |
8290 | |
8291 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum |
8292 | /// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
8293 | /// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are |
8294 | /// NaN or signed-zero values. |
8295 | /// |
8296 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_ph) |
8297 | #[inline ] |
8298 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8299 | #[cfg_attr (test, assert_instr(vmaxph))] |
8300 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8301 | pub fn _mm_maskz_max_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
8302 | unsafe { simd_select_bitmask(m:k, yes:_mm_max_ph(a, b), no:_mm_setzero_ph()) } |
8303 | } |
8304 | |
8305 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum |
8306 | /// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum |
8307 | /// value when inputs are NaN or signed-zero values. |
8308 | /// |
8309 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_ph) |
8310 | #[inline ] |
8311 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8312 | #[cfg_attr (test, assert_instr(vmaxph))] |
8313 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8314 | pub fn _mm256_max_ph(a: __m256h, b: __m256h) -> __m256h { |
8315 | unsafe { vmaxph_256(a, b) } |
8316 | } |
8317 | |
8318 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum |
8319 | /// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
8320 | /// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are |
8321 | /// NaN or signed-zero values. |
8322 | /// |
8323 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_max_ph) |
8324 | #[inline ] |
8325 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8326 | #[cfg_attr (test, assert_instr(vmaxph))] |
8327 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8328 | pub fn _mm256_mask_max_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h { |
8329 | unsafe { simd_select_bitmask(m:k, yes:_mm256_max_ph(a, b), no:src) } |
8330 | } |
8331 | |
8332 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum |
8333 | /// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
8334 | /// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are |
8335 | /// NaN or signed-zero values. |
8336 | /// |
8337 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_max_ph) |
8338 | #[inline ] |
8339 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8340 | #[cfg_attr (test, assert_instr(vmaxph))] |
8341 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8342 | pub fn _mm256_maskz_max_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { |
8343 | unsafe { simd_select_bitmask(m:k, yes:_mm256_max_ph(a, b), no:_mm256_setzero_ph()) } |
8344 | } |
8345 | |
8346 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum |
8347 | /// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum |
8348 | /// value when inputs are NaN or signed-zero values. |
8349 | /// |
8350 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_ph) |
8351 | #[inline ] |
8352 | #[target_feature (enable = "avx512fp16" )] |
8353 | #[cfg_attr (test, assert_instr(vmaxph))] |
8354 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8355 | pub fn _mm512_max_ph(a: __m512h, b: __m512h) -> __m512h { |
8356 | _mm512_max_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b) |
8357 | } |
8358 | |
8359 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum |
8360 | /// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
8361 | /// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are |
8362 | /// NaN or signed-zero values. |
8363 | /// |
8364 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_ph) |
8365 | #[inline ] |
8366 | #[target_feature (enable = "avx512fp16" )] |
8367 | #[cfg_attr (test, assert_instr(vmaxph))] |
8368 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8369 | pub fn _mm512_mask_max_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h { |
8370 | unsafe { simd_select_bitmask(m:k, yes:_mm512_max_ph(a, b), no:src) } |
8371 | } |
8372 | |
8373 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum |
8374 | /// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
8375 | /// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are |
8376 | /// NaN or signed-zero values. |
8377 | /// |
8378 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_ph) |
8379 | #[inline ] |
8380 | #[target_feature (enable = "avx512fp16" )] |
8381 | #[cfg_attr (test, assert_instr(vmaxph))] |
8382 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8383 | pub fn _mm512_maskz_max_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { |
8384 | unsafe { simd_select_bitmask(m:k, yes:_mm512_max_ph(a, b), no:_mm512_setzero_ph()) } |
8385 | } |
8386 | |
8387 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum |
8388 | /// values in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
8389 | /// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are |
8390 | /// NaN or signed-zero values. |
8391 | /// |
8392 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_round_ph) |
8393 | #[inline ] |
8394 | #[target_feature (enable = "avx512fp16" )] |
8395 | #[cfg_attr (test, assert_instr(vmaxph, SAE = 8))] |
8396 | #[rustc_legacy_const_generics (2)] |
8397 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8398 | pub fn _mm512_max_round_ph<const SAE: i32>(a: __m512h, b: __m512h) -> __m512h { |
8399 | unsafe { |
8400 | static_assert_sae!(SAE); |
8401 | vmaxph_512(a, b, SAE) |
8402 | } |
8403 | } |
8404 | |
8405 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum |
8406 | /// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
8407 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the |
8408 | /// IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values. |
8409 | /// |
8410 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_round_ph) |
8411 | #[inline ] |
8412 | #[target_feature (enable = "avx512fp16" )] |
8413 | #[cfg_attr (test, assert_instr(vmaxph, SAE = 8))] |
8414 | #[rustc_legacy_const_generics (4)] |
8415 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8416 | pub fn _mm512_mask_max_round_ph<const SAE: i32>( |
8417 | src: __m512h, |
8418 | k: __mmask32, |
8419 | a: __m512h, |
8420 | b: __m512h, |
8421 | ) -> __m512h { |
8422 | unsafe { |
8423 | static_assert_sae!(SAE); |
8424 | simd_select_bitmask(m:k, yes:_mm512_max_round_ph::<SAE>(a, b), no:src) |
8425 | } |
8426 | } |
8427 | |
8428 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum |
8429 | /// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
8430 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the |
8431 | /// IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values. |
8432 | /// |
8433 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_round_ph) |
8434 | #[inline ] |
8435 | #[target_feature (enable = "avx512fp16" )] |
8436 | #[cfg_attr (test, assert_instr(vmaxph, SAE = 8))] |
8437 | #[rustc_legacy_const_generics (3)] |
8438 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8439 | pub fn _mm512_maskz_max_round_ph<const SAE: i32>(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { |
8440 | unsafe { |
8441 | static_assert_sae!(SAE); |
8442 | simd_select_bitmask(m:k, yes:_mm512_max_round_ph::<SAE>(a, b), no:_mm512_setzero_ph()) |
8443 | } |
8444 | } |
8445 | |
8446 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum |
8447 | /// value in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements |
8448 | /// of dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value |
8449 | /// when inputs are NaN or signed-zero values. |
8450 | /// |
8451 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_sh) |
8452 | #[inline ] |
8453 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8454 | #[cfg_attr (test, assert_instr(vmaxsh))] |
8455 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8456 | pub fn _mm_max_sh(a: __m128h, b: __m128h) -> __m128h { |
8457 | _mm_mask_max_sh(src:_mm_undefined_ph(), k:0xff, a, b) |
8458 | } |
8459 | |
8460 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum |
8461 | /// value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 |
8462 | /// is not set), and copy the upper 7 packed elements from a to the upper elements of dst. Does not follow |
8463 | /// the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values. |
8464 | /// |
8465 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_sh) |
8466 | #[inline ] |
8467 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8468 | #[cfg_attr (test, assert_instr(vmaxsh))] |
8469 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8470 | pub fn _mm_mask_max_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
8471 | _mm_mask_max_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) |
8472 | } |
8473 | |
8474 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value |
8475 | /// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and |
8476 | /// copy the upper 7 packed elements from a to the upper elements of dst. Does not follow the IEEE Standard |
8477 | /// for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values. |
8478 | /// |
8479 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_sh) |
8480 | #[inline ] |
8481 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8482 | #[cfg_attr (test, assert_instr(vmaxsh))] |
8483 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8484 | pub fn _mm_maskz_max_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
8485 | _mm_mask_max_sh(src:_mm_setzero_ph(), k, a, b) |
8486 | } |
8487 | |
8488 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value |
8489 | /// in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst. |
8490 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the |
8491 | /// IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values. |
8492 | /// |
8493 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_round_sh) |
8494 | #[inline ] |
8495 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8496 | #[cfg_attr (test, assert_instr(vmaxsh, SAE = 8))] |
8497 | #[rustc_legacy_const_generics (2)] |
8498 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8499 | pub fn _mm_max_round_sh<const SAE: i32>(a: __m128h, b: __m128h) -> __m128h { |
8500 | static_assert_sae!(SAE); |
8501 | _mm_mask_max_round_sh::<SAE>(src:_mm_undefined_ph(), k:0xff, a, b) |
8502 | } |
8503 | |
8504 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value |
8505 | /// in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), |
8506 | /// and copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by |
8507 | /// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic |
8508 | /// (IEEE 754) maximum value when inputs are NaN or signed-zero values. |
8509 | /// |
8510 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_round_sh) |
8511 | #[inline ] |
8512 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8513 | #[cfg_attr (test, assert_instr(vmaxsh, SAE = 8))] |
8514 | #[rustc_legacy_const_generics (4)] |
8515 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8516 | pub fn _mm_mask_max_round_sh<const SAE: i32>( |
8517 | src: __m128h, |
8518 | k: __mmask8, |
8519 | a: __m128h, |
8520 | b: __m128h, |
8521 | ) -> __m128h { |
8522 | unsafe { |
8523 | static_assert_sae!(SAE); |
8524 | vmaxsh(a, b, src, k, SAE) |
8525 | } |
8526 | } |
8527 | |
8528 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value |
8529 | /// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and |
8530 | /// copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by |
8531 | /// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic |
8532 | /// (IEEE 754) maximum value when inputs are NaN or signed-zero values. |
8533 | /// |
8534 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_round_sh) |
8535 | #[inline ] |
8536 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8537 | #[cfg_attr (test, assert_instr(vmaxsh, SAE = 8))] |
8538 | #[rustc_legacy_const_generics (3)] |
8539 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8540 | pub fn _mm_maskz_max_round_sh<const SAE: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
8541 | static_assert_sae!(SAE); |
8542 | _mm_mask_max_round_sh::<SAE>(src:_mm_setzero_ph(), k, a, b) |
8543 | } |
8544 | |
8545 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum |
8546 | /// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value |
8547 | /// when inputs are NaN or signed-zero values. |
8548 | /// |
8549 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_ph) |
8550 | #[inline ] |
8551 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8552 | #[cfg_attr (test, assert_instr(vminph))] |
8553 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8554 | pub fn _mm_min_ph(a: __m128h, b: __m128h) -> __m128h { |
8555 | unsafe { vminph_128(a, b) } |
8556 | } |
8557 | |
8558 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum |
8559 | /// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
8560 | /// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are |
8561 | /// NaN or signed-zero values. |
8562 | /// |
8563 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_ph) |
8564 | #[inline ] |
8565 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8566 | #[cfg_attr (test, assert_instr(vminph))] |
8567 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8568 | pub fn _mm_mask_min_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
8569 | unsafe { simd_select_bitmask(m:k, yes:_mm_min_ph(a, b), no:src) } |
8570 | } |
8571 | |
8572 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum |
8573 | /// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
8574 | /// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are |
8575 | /// NaN or signed-zero values. |
8576 | /// |
8577 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_ph) |
8578 | #[inline ] |
8579 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8580 | #[cfg_attr (test, assert_instr(vminph))] |
8581 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8582 | pub fn _mm_maskz_min_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
8583 | unsafe { simd_select_bitmask(m:k, yes:_mm_min_ph(a, b), no:_mm_setzero_ph()) } |
8584 | } |
8585 | |
8586 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum |
8587 | /// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value |
8588 | /// when inputs are NaN or signed-zero values. |
8589 | /// |
8590 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_ph) |
8591 | #[inline ] |
8592 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8593 | #[cfg_attr (test, assert_instr(vminph))] |
8594 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8595 | pub fn _mm256_min_ph(a: __m256h, b: __m256h) -> __m256h { |
8596 | unsafe { vminph_256(a, b) } |
8597 | } |
8598 | |
8599 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum |
8600 | /// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
8601 | /// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are |
8602 | /// NaN or signed-zero values. |
8603 | /// |
8604 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_min_ph) |
8605 | #[inline ] |
8606 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8607 | #[cfg_attr (test, assert_instr(vminph))] |
8608 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8609 | pub fn _mm256_mask_min_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h { |
8610 | unsafe { simd_select_bitmask(m:k, yes:_mm256_min_ph(a, b), no:src) } |
8611 | } |
8612 | |
8613 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum |
8614 | /// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
8615 | /// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are |
8616 | /// NaN or signed-zero values. |
8617 | /// |
8618 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_min_ph) |
8619 | #[inline ] |
8620 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8621 | #[cfg_attr (test, assert_instr(vminph))] |
8622 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8623 | pub fn _mm256_maskz_min_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { |
8624 | unsafe { simd_select_bitmask(m:k, yes:_mm256_min_ph(a, b), no:_mm256_setzero_ph()) } |
8625 | } |
8626 | |
8627 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum |
8628 | /// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value |
8629 | /// when inputs are NaN or signed-zero values. |
8630 | /// |
8631 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_ph) |
8632 | #[inline ] |
8633 | #[target_feature (enable = "avx512fp16" )] |
8634 | #[cfg_attr (test, assert_instr(vminph))] |
8635 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8636 | pub fn _mm512_min_ph(a: __m512h, b: __m512h) -> __m512h { |
8637 | _mm512_min_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b) |
8638 | } |
8639 | |
8640 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum |
8641 | /// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
8642 | /// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are |
8643 | /// NaN or signed-zero values. |
8644 | /// |
8645 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_ph) |
8646 | #[inline ] |
8647 | #[target_feature (enable = "avx512fp16" )] |
8648 | #[cfg_attr (test, assert_instr(vminph))] |
8649 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8650 | pub fn _mm512_mask_min_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h { |
8651 | unsafe { simd_select_bitmask(m:k, yes:_mm512_min_ph(a, b), no:src) } |
8652 | } |
8653 | |
8654 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum |
8655 | /// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
8656 | /// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are |
8657 | /// NaN or signed-zero values. |
8658 | /// |
8659 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_ph) |
8660 | #[inline ] |
8661 | #[target_feature (enable = "avx512fp16" )] |
8662 | #[cfg_attr (test, assert_instr(vminph))] |
8663 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8664 | pub fn _mm512_maskz_min_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { |
8665 | unsafe { simd_select_bitmask(m:k, yes:_mm512_min_ph(a, b), no:_mm512_setzero_ph()) } |
8666 | } |
8667 | |
8668 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum |
8669 | /// values in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not |
8670 | /// follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values. |
8671 | /// |
8672 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_round_ph) |
8673 | #[inline ] |
8674 | #[target_feature (enable = "avx512fp16" )] |
8675 | #[cfg_attr (test, assert_instr(vminph, SAE = 8))] |
8676 | #[rustc_legacy_const_generics (2)] |
8677 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8678 | pub fn _mm512_min_round_ph<const SAE: i32>(a: __m512h, b: __m512h) -> __m512h { |
8679 | unsafe { |
8680 | static_assert_sae!(SAE); |
8681 | vminph_512(a, b, SAE) |
8682 | } |
8683 | } |
8684 | |
8685 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum |
8686 | /// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
8687 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the |
8688 | /// IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values. |
8689 | /// |
8690 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_round_ph) |
8691 | #[inline ] |
8692 | #[target_feature (enable = "avx512fp16" )] |
8693 | #[cfg_attr (test, assert_instr(vminph, SAE = 8))] |
8694 | #[rustc_legacy_const_generics (4)] |
8695 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8696 | pub fn _mm512_mask_min_round_ph<const SAE: i32>( |
8697 | src: __m512h, |
8698 | k: __mmask32, |
8699 | a: __m512h, |
8700 | b: __m512h, |
8701 | ) -> __m512h { |
8702 | unsafe { |
8703 | static_assert_sae!(SAE); |
8704 | simd_select_bitmask(m:k, yes:_mm512_min_round_ph::<SAE>(a, b), no:src) |
8705 | } |
8706 | } |
8707 | |
8708 | /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum |
8709 | /// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
8710 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the |
8711 | /// IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values. |
8712 | /// |
8713 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_round_ph) |
8714 | #[inline ] |
8715 | #[target_feature (enable = "avx512fp16" )] |
8716 | #[cfg_attr (test, assert_instr(vminph, SAE = 8))] |
8717 | #[rustc_legacy_const_generics (3)] |
8718 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8719 | pub fn _mm512_maskz_min_round_ph<const SAE: i32>(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { |
8720 | unsafe { |
8721 | static_assert_sae!(SAE); |
8722 | simd_select_bitmask(m:k, yes:_mm512_min_round_ph::<SAE>(a, b), no:_mm512_setzero_ph()) |
8723 | } |
8724 | } |
8725 | |
8726 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum |
8727 | /// value in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements |
8728 | /// of dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when |
8729 | /// inputs are NaN or signed-zero values. |
8730 | /// |
8731 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_sh) |
8732 | #[inline ] |
8733 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8734 | #[cfg_attr (test, assert_instr(vminsh))] |
8735 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8736 | pub fn _mm_min_sh(a: __m128h, b: __m128h) -> __m128h { |
8737 | _mm_mask_min_sh(src:_mm_undefined_ph(), k:0xff, a, b) |
8738 | } |
8739 | |
8740 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum |
8741 | /// value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 |
8742 | /// is not set), and copy the upper 7 packed elements from a to the upper elements of dst. Does not follow |
8743 | /// the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values. |
8744 | /// |
8745 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_sh) |
8746 | #[inline ] |
8747 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8748 | #[cfg_attr (test, assert_instr(vminsh))] |
8749 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8750 | pub fn _mm_mask_min_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
8751 | _mm_mask_min_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) |
8752 | } |
8753 | |
8754 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value |
8755 | /// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and |
8756 | /// copy the upper 7 packed elements from a to the upper elements of dst. Does not follow the IEEE Standard |
8757 | /// for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values. |
8758 | /// |
8759 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_sh) |
8760 | #[inline ] |
8761 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8762 | #[cfg_attr (test, assert_instr(vminsh))] |
8763 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8764 | pub fn _mm_maskz_min_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
8765 | _mm_mask_min_sh(src:_mm_setzero_ph(), k, a, b) |
8766 | } |
8767 | |
8768 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value |
8769 | /// in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst. |
8770 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the |
8771 | /// IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values. |
8772 | /// |
8773 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_round_sh) |
8774 | #[inline ] |
8775 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8776 | #[cfg_attr (test, assert_instr(vminsh, SAE = 8))] |
8777 | #[rustc_legacy_const_generics (2)] |
8778 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8779 | pub fn _mm_min_round_sh<const SAE: i32>(a: __m128h, b: __m128h) -> __m128h { |
8780 | static_assert_sae!(SAE); |
8781 | _mm_mask_min_round_sh::<SAE>(src:_mm_undefined_ph(), k:0xff, a, b) |
8782 | } |
8783 | |
8784 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value |
8785 | /// in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), |
8786 | /// and copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by |
8787 | /// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic |
8788 | /// (IEEE 754) minimum value when inputs are NaN or signed-zero values. |
8789 | /// |
8790 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_round_sh) |
8791 | #[inline ] |
8792 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8793 | #[cfg_attr (test, assert_instr(vminsh, SAE = 8))] |
8794 | #[rustc_legacy_const_generics (4)] |
8795 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8796 | pub fn _mm_mask_min_round_sh<const SAE: i32>( |
8797 | src: __m128h, |
8798 | k: __mmask8, |
8799 | a: __m128h, |
8800 | b: __m128h, |
8801 | ) -> __m128h { |
8802 | unsafe { |
8803 | static_assert_sae!(SAE); |
8804 | vminsh(a, b, src, k, SAE) |
8805 | } |
8806 | } |
8807 | |
8808 | /// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value |
8809 | /// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and |
8810 | /// copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by |
8811 | /// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic |
8812 | /// (IEEE 754) minimum value when inputs are NaN or signed-zero values. |
8813 | /// |
8814 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_round_sh) |
8815 | #[inline ] |
8816 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8817 | #[cfg_attr (test, assert_instr(vminsh, SAE = 8))] |
8818 | #[rustc_legacy_const_generics (3)] |
8819 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8820 | pub fn _mm_maskz_min_round_sh<const SAE: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
8821 | static_assert_sae!(SAE); |
8822 | _mm_mask_min_round_sh::<SAE>(src:_mm_setzero_ph(), k, a, b) |
8823 | } |
8824 | |
8825 | /// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision |
8826 | /// (16-bit) floating-point number representing the integer exponent, and store the results in dst. |
8827 | /// This intrinsic essentially calculates `floor(log2(x))` for each element. |
8828 | /// |
8829 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_ph) |
8830 | #[inline ] |
8831 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8832 | #[cfg_attr (test, assert_instr(vgetexpph))] |
8833 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8834 | pub fn _mm_getexp_ph(a: __m128h) -> __m128h { |
8835 | _mm_mask_getexp_ph(src:_mm_undefined_ph(), k:0xff, a) |
8836 | } |
8837 | |
8838 | /// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision |
8839 | /// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k |
8840 | /// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates |
8841 | /// `floor(log2(x))` for each element. |
8842 | /// |
8843 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_ph) |
8844 | #[inline ] |
8845 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8846 | #[cfg_attr (test, assert_instr(vgetexpph))] |
8847 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8848 | pub fn _mm_mask_getexp_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h { |
8849 | unsafe { vgetexpph_128(a, src, k) } |
8850 | } |
8851 | |
8852 | /// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision |
8853 | /// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask |
8854 | /// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates |
8855 | /// `floor(log2(x))` for each element. |
8856 | /// |
8857 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_ph) |
8858 | #[inline ] |
8859 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8860 | #[cfg_attr (test, assert_instr(vgetexpph))] |
8861 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8862 | pub fn _mm_maskz_getexp_ph(k: __mmask8, a: __m128h) -> __m128h { |
8863 | _mm_mask_getexp_ph(src:_mm_setzero_ph(), k, a) |
8864 | } |
8865 | |
8866 | /// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision |
8867 | /// (16-bit) floating-point number representing the integer exponent, and store the results in dst. |
8868 | /// This intrinsic essentially calculates `floor(log2(x))` for each element. |
8869 | /// |
8870 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_getexp_ph) |
8871 | #[inline ] |
8872 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8873 | #[cfg_attr (test, assert_instr(vgetexpph))] |
8874 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8875 | pub fn _mm256_getexp_ph(a: __m256h) -> __m256h { |
8876 | _mm256_mask_getexp_ph(src:_mm256_undefined_ph(), k:0xffff, a) |
8877 | } |
8878 | |
8879 | /// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision |
8880 | /// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k |
8881 | /// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates |
8882 | /// `floor(log2(x))` for each element. |
8883 | /// |
8884 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_getexp_ph) |
8885 | #[inline ] |
8886 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8887 | #[cfg_attr (test, assert_instr(vgetexpph))] |
8888 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8889 | pub fn _mm256_mask_getexp_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h { |
8890 | unsafe { vgetexpph_256(a, src, k) } |
8891 | } |
8892 | |
8893 | /// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision |
8894 | /// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask |
8895 | /// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates |
8896 | /// `floor(log2(x))` for each element. |
8897 | /// |
8898 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_getexp_ph) |
8899 | #[inline ] |
8900 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
8901 | #[cfg_attr (test, assert_instr(vgetexpph))] |
8902 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8903 | pub fn _mm256_maskz_getexp_ph(k: __mmask16, a: __m256h) -> __m256h { |
8904 | _mm256_mask_getexp_ph(src:_mm256_setzero_ph(), k, a) |
8905 | } |
8906 | |
8907 | /// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision |
8908 | /// (16-bit) floating-point number representing the integer exponent, and store the results in dst. |
8909 | /// This intrinsic essentially calculates `floor(log2(x))` for each element. |
8910 | /// |
8911 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getexp_ph) |
8912 | #[inline ] |
8913 | #[target_feature (enable = "avx512fp16" )] |
8914 | #[cfg_attr (test, assert_instr(vgetexpph))] |
8915 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8916 | pub fn _mm512_getexp_ph(a: __m512h) -> __m512h { |
8917 | _mm512_mask_getexp_ph(src:_mm512_undefined_ph(), k:0xffffffff, a) |
8918 | } |
8919 | |
8920 | /// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision |
8921 | /// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k |
8922 | /// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates |
8923 | /// `floor(log2(x))` for each element. |
8924 | /// |
8925 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getexp_ph) |
8926 | #[inline ] |
8927 | #[target_feature (enable = "avx512fp16" )] |
8928 | #[cfg_attr (test, assert_instr(vgetexpph))] |
8929 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8930 | pub fn _mm512_mask_getexp_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h { |
8931 | _mm512_mask_getexp_round_ph::<_MM_FROUND_CUR_DIRECTION>(src, k, a) |
8932 | } |
8933 | |
8934 | /// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision |
8935 | /// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask |
8936 | /// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates |
8937 | /// `floor(log2(x))` for each element. |
8938 | /// |
8939 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getexp_ph) |
8940 | #[inline ] |
8941 | #[target_feature (enable = "avx512fp16" )] |
8942 | #[cfg_attr (test, assert_instr(vgetexpph))] |
8943 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8944 | pub fn _mm512_maskz_getexp_ph(k: __mmask32, a: __m512h) -> __m512h { |
8945 | _mm512_mask_getexp_ph(src:_mm512_setzero_ph(), k, a) |
8946 | } |
8947 | |
8948 | /// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision |
8949 | /// (16-bit) floating-point number representing the integer exponent, and store the results in dst. |
8950 | /// This intrinsic essentially calculates `floor(log2(x))` for each element. Exceptions can be suppressed |
8951 | /// by passing _MM_FROUND_NO_EXC in the sae parameter |
8952 | /// |
8953 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getexp_round_ph) |
8954 | #[inline ] |
8955 | #[target_feature (enable = "avx512fp16" )] |
8956 | #[cfg_attr (test, assert_instr(vgetexpph, SAE = 8))] |
8957 | #[rustc_legacy_const_generics (1)] |
8958 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8959 | pub fn _mm512_getexp_round_ph<const SAE: i32>(a: __m512h) -> __m512h { |
8960 | static_assert_sae!(SAE); |
8961 | _mm512_mask_getexp_round_ph::<SAE>(src:_mm512_undefined_ph(), k:0xffffffff, a) |
8962 | } |
8963 | |
8964 | /// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision |
8965 | /// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k |
8966 | /// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates |
8967 | /// `floor(log2(x))` for each element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
8968 | /// |
8969 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getexp_round_ph) |
8970 | #[inline ] |
8971 | #[target_feature (enable = "avx512fp16" )] |
8972 | #[cfg_attr (test, assert_instr(vgetexpph, SAE = 8))] |
8973 | #[rustc_legacy_const_generics (3)] |
8974 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8975 | pub fn _mm512_mask_getexp_round_ph<const SAE: i32>( |
8976 | src: __m512h, |
8977 | k: __mmask32, |
8978 | a: __m512h, |
8979 | ) -> __m512h { |
8980 | unsafe { |
8981 | static_assert_sae!(SAE); |
8982 | vgetexpph_512(a, src, k, SAE) |
8983 | } |
8984 | } |
8985 | |
8986 | /// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision |
8987 | /// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask |
8988 | /// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates |
8989 | /// `floor(log2(x))` for each element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
8990 | /// |
8991 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getexp_round_ph) |
8992 | #[inline ] |
8993 | #[target_feature (enable = "avx512fp16" )] |
8994 | #[cfg_attr (test, assert_instr(vgetexpph, SAE = 8))] |
8995 | #[rustc_legacy_const_generics (2)] |
8996 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
8997 | pub fn _mm512_maskz_getexp_round_ph<const SAE: i32>(k: __mmask32, a: __m512h) -> __m512h { |
8998 | static_assert_sae!(SAE); |
8999 | _mm512_mask_getexp_round_ph::<SAE>(src:_mm512_setzero_ph(), k, a) |
9000 | } |
9001 | |
9002 | /// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision |
9003 | /// (16-bit) floating-point number representing the integer exponent, store the result in the lower element |
9004 | /// of dst, and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially |
9005 | /// calculates `floor(log2(x))` for the lower element. |
9006 | /// |
9007 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_sh) |
9008 | #[inline ] |
9009 | #[target_feature (enable = "avx512fp16" )] |
9010 | #[cfg_attr (test, assert_instr(vgetexpsh))] |
9011 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9012 | pub fn _mm_getexp_sh(a: __m128h, b: __m128h) -> __m128h { |
9013 | _mm_mask_getexp_sh(src:_mm_undefined_ph(), k:0xff, a, b) |
9014 | } |
9015 | |
9016 | /// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision |
9017 | /// (16-bit) floating-point number representing the integer exponent, store the result in the lower element |
9018 | /// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 7 |
9019 | /// packed elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))` |
9020 | /// for the lower element. |
9021 | /// |
9022 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_sh) |
9023 | #[inline ] |
9024 | #[target_feature (enable = "avx512fp16" )] |
9025 | #[cfg_attr (test, assert_instr(vgetexpsh))] |
9026 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9027 | pub fn _mm_mask_getexp_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
9028 | _mm_mask_getexp_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) |
9029 | } |
9030 | |
9031 | /// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision |
9032 | /// (16-bit) floating-point number representing the integer exponent, store the result in the lower element |
9033 | /// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed |
9034 | /// elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))` for the |
9035 | /// lower element. |
9036 | /// |
9037 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_sh) |
9038 | #[inline ] |
9039 | #[target_feature (enable = "avx512fp16" )] |
9040 | #[cfg_attr (test, assert_instr(vgetexpsh))] |
9041 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9042 | pub fn _mm_maskz_getexp_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
9043 | _mm_mask_getexp_sh(src:_mm_setzero_ph(), k, a, b) |
9044 | } |
9045 | |
9046 | /// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision |
9047 | /// (16-bit) floating-point number representing the integer exponent, store the result in the lower element |
9048 | /// of dst, and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially |
9049 | /// calculates `floor(log2(x))` for the lower element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC |
9050 | /// in the sae parameter |
9051 | /// |
9052 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_round_sh) |
9053 | #[inline ] |
9054 | #[target_feature (enable = "avx512fp16" )] |
9055 | #[cfg_attr (test, assert_instr(vgetexpsh, SAE = 8))] |
9056 | #[rustc_legacy_const_generics (2)] |
9057 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9058 | pub fn _mm_getexp_round_sh<const SAE: i32>(a: __m128h, b: __m128h) -> __m128h { |
9059 | static_assert_sae!(SAE); |
9060 | _mm_mask_getexp_round_sh::<SAE>(src:_mm_undefined_ph(), k:0xff, a, b) |
9061 | } |
9062 | |
9063 | /// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision |
9064 | /// (16-bit) floating-point number representing the integer exponent, store the result in the lower element |
9065 | /// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 7 |
9066 | /// packed elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))` |
9067 | /// for the lower element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
9068 | /// |
9069 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_round_sh) |
9070 | #[inline ] |
9071 | #[target_feature (enable = "avx512fp16" )] |
9072 | #[cfg_attr (test, assert_instr(vgetexpsh, SAE = 8))] |
9073 | #[rustc_legacy_const_generics (4)] |
9074 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9075 | pub fn _mm_mask_getexp_round_sh<const SAE: i32>( |
9076 | src: __m128h, |
9077 | k: __mmask8, |
9078 | a: __m128h, |
9079 | b: __m128h, |
9080 | ) -> __m128h { |
9081 | unsafe { |
9082 | static_assert_sae!(SAE); |
9083 | vgetexpsh(a, b, src, k, SAE) |
9084 | } |
9085 | } |
9086 | |
9087 | /// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision |
9088 | /// (16-bit) floating-point number representing the integer exponent, store the result in the lower element |
9089 | /// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed |
9090 | /// elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))` for the |
9091 | /// lower element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
9092 | /// |
9093 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_round_sh) |
9094 | #[inline ] |
9095 | #[target_feature (enable = "avx512fp16" )] |
9096 | #[cfg_attr (test, assert_instr(vgetexpsh, SAE = 8))] |
9097 | #[rustc_legacy_const_generics (3)] |
9098 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9099 | pub fn _mm_maskz_getexp_round_sh<const SAE: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
9100 | static_assert_sae!(SAE); |
9101 | _mm_mask_getexp_round_sh::<SAE>(src:_mm_setzero_ph(), k, a, b) |
9102 | } |
9103 | |
9104 | /// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store |
9105 | /// the results in dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends |
9106 | /// on the interval range defined by norm and the sign depends on sign and the source sign. |
9107 | /// |
9108 | /// The mantissa is normalized to the interval specified by interv, which can take the following values: |
9109 | /// |
9110 | /// _MM_MANT_NORM_1_2 // interval [1, 2) |
9111 | /// _MM_MANT_NORM_p5_2 // interval [0.5, 2) |
9112 | /// _MM_MANT_NORM_p5_1 // interval [0.5, 1) |
9113 | /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) |
9114 | /// |
9115 | /// The sign is determined by sc which can take the following values: |
9116 | /// |
9117 | /// _MM_MANT_SIGN_src // sign = sign(src) |
9118 | /// _MM_MANT_SIGN_zero // sign = 0 |
9119 | /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 |
9120 | /// |
9121 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_ph) |
9122 | #[inline ] |
9123 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
9124 | #[cfg_attr (test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))] |
9125 | #[rustc_legacy_const_generics (1, 2)] |
9126 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9127 | pub fn _mm_getmant_ph<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>( |
9128 | a: __m128h, |
9129 | ) -> __m128h { |
9130 | static_assert_uimm_bits!(NORM, 4); |
9131 | static_assert_uimm_bits!(SIGN, 2); |
9132 | _mm_mask_getmant_ph::<NORM, SIGN>(src:_mm_undefined_ph(), k:0xff, a) |
9133 | } |
9134 | |
9135 | /// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store |
9136 | /// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
9137 | /// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined |
9138 | /// by norm and the sign depends on sign and the source sign. |
9139 | /// |
9140 | /// The mantissa is normalized to the interval specified by interv, which can take the following values: |
9141 | /// |
9142 | /// _MM_MANT_NORM_1_2 // interval [1, 2) |
9143 | /// _MM_MANT_NORM_p5_2 // interval [0.5, 2) |
9144 | /// _MM_MANT_NORM_p5_1 // interval [0.5, 1) |
9145 | /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) |
9146 | /// |
9147 | /// The sign is determined by sc which can take the following values: |
9148 | /// |
9149 | /// _MM_MANT_SIGN_src // sign = sign(src) |
9150 | /// _MM_MANT_SIGN_zero // sign = 0 |
9151 | /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 |
9152 | /// |
9153 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_ph) |
9154 | #[inline ] |
9155 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
9156 | #[cfg_attr (test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))] |
9157 | #[rustc_legacy_const_generics (3, 4)] |
9158 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9159 | pub fn _mm_mask_getmant_ph< |
9160 | const NORM: _MM_MANTISSA_NORM_ENUM, |
9161 | const SIGN: _MM_MANTISSA_SIGN_ENUM, |
9162 | >( |
9163 | src: __m128h, |
9164 | k: __mmask8, |
9165 | a: __m128h, |
9166 | ) -> __m128h { |
9167 | unsafe { |
9168 | static_assert_uimm_bits!(NORM, 4); |
9169 | static_assert_uimm_bits!(SIGN, 2); |
9170 | vgetmantph_128(a, (SIGN << 2) | NORM, src, k) |
9171 | } |
9172 | } |
9173 | |
9174 | /// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store |
9175 | /// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
9176 | /// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined |
9177 | /// by norm and the sign depends on sign and the source sign. |
9178 | /// |
9179 | /// The mantissa is normalized to the interval specified by interv, which can take the following values: |
9180 | /// |
9181 | /// _MM_MANT_NORM_1_2 // interval [1, 2) |
9182 | /// _MM_MANT_NORM_p5_2 // interval [0.5, 2) |
9183 | /// _MM_MANT_NORM_p5_1 // interval [0.5, 1) |
9184 | /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) |
9185 | /// |
9186 | /// The sign is determined by sc which can take the following values: |
9187 | /// |
9188 | /// _MM_MANT_SIGN_src // sign = sign(src) |
9189 | /// _MM_MANT_SIGN_zero // sign = 0 |
9190 | /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 |
9191 | /// |
9192 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_ph) |
9193 | #[inline ] |
9194 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
9195 | #[cfg_attr (test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))] |
9196 | #[rustc_legacy_const_generics (2, 3)] |
9197 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9198 | pub fn _mm_maskz_getmant_ph< |
9199 | const NORM: _MM_MANTISSA_NORM_ENUM, |
9200 | const SIGN: _MM_MANTISSA_SIGN_ENUM, |
9201 | >( |
9202 | k: __mmask8, |
9203 | a: __m128h, |
9204 | ) -> __m128h { |
9205 | static_assert_uimm_bits!(NORM, 4); |
9206 | static_assert_uimm_bits!(SIGN, 2); |
9207 | _mm_mask_getmant_ph::<NORM, SIGN>(src:_mm_setzero_ph(), k, a) |
9208 | } |
9209 | |
9210 | /// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store |
9211 | /// the results in dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends |
9212 | /// on the interval range defined by norm and the sign depends on sign and the source sign. |
9213 | /// |
9214 | /// The mantissa is normalized to the interval specified by interv, which can take the following values: |
9215 | /// |
9216 | /// _MM_MANT_NORM_1_2 // interval [1, 2) |
9217 | /// _MM_MANT_NORM_p5_2 // interval [0.5, 2) |
9218 | /// _MM_MANT_NORM_p5_1 // interval [0.5, 1) |
9219 | /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) |
9220 | /// |
9221 | /// The sign is determined by sc which can take the following values: |
9222 | /// |
9223 | /// _MM_MANT_SIGN_src // sign = sign(src) |
9224 | /// _MM_MANT_SIGN_zero // sign = 0 |
9225 | /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 |
9226 | /// |
9227 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_getmant_ph) |
9228 | #[inline ] |
9229 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
9230 | #[cfg_attr (test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))] |
9231 | #[rustc_legacy_const_generics (1, 2)] |
9232 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9233 | pub fn _mm256_getmant_ph<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>( |
9234 | a: __m256h, |
9235 | ) -> __m256h { |
9236 | static_assert_uimm_bits!(NORM, 4); |
9237 | static_assert_uimm_bits!(SIGN, 2); |
9238 | _mm256_mask_getmant_ph::<NORM, SIGN>(src:_mm256_undefined_ph(), k:0xffff, a) |
9239 | } |
9240 | |
9241 | /// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store |
9242 | /// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
9243 | /// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined |
9244 | /// by norm and the sign depends on sign and the source sign. |
9245 | /// |
9246 | /// The mantissa is normalized to the interval specified by interv, which can take the following values: |
9247 | /// |
9248 | /// _MM_MANT_NORM_1_2 // interval [1, 2) |
9249 | /// _MM_MANT_NORM_p5_2 // interval [0.5, 2) |
9250 | /// _MM_MANT_NORM_p5_1 // interval [0.5, 1) |
9251 | /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) |
9252 | /// |
9253 | /// The sign is determined by sc which can take the following values: |
9254 | /// |
9255 | /// _MM_MANT_SIGN_src // sign = sign(src) |
9256 | /// _MM_MANT_SIGN_zero // sign = 0 |
9257 | /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 |
9258 | /// |
9259 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_getmant_ph) |
9260 | #[inline ] |
9261 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
9262 | #[cfg_attr (test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))] |
9263 | #[rustc_legacy_const_generics (3, 4)] |
9264 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9265 | pub fn _mm256_mask_getmant_ph< |
9266 | const NORM: _MM_MANTISSA_NORM_ENUM, |
9267 | const SIGN: _MM_MANTISSA_SIGN_ENUM, |
9268 | >( |
9269 | src: __m256h, |
9270 | k: __mmask16, |
9271 | a: __m256h, |
9272 | ) -> __m256h { |
9273 | unsafe { |
9274 | static_assert_uimm_bits!(NORM, 4); |
9275 | static_assert_uimm_bits!(SIGN, 2); |
9276 | vgetmantph_256(a, (SIGN << 2) | NORM, src, k) |
9277 | } |
9278 | } |
9279 | |
9280 | /// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store |
9281 | /// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
9282 | /// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined |
9283 | /// by norm and the sign depends on sign and the source sign. |
9284 | /// |
9285 | /// The mantissa is normalized to the interval specified by interv, which can take the following values: |
9286 | /// |
9287 | /// _MM_MANT_NORM_1_2 // interval [1, 2) |
9288 | /// _MM_MANT_NORM_p5_2 // interval [0.5, 2) |
9289 | /// _MM_MANT_NORM_p5_1 // interval [0.5, 1) |
9290 | /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) |
9291 | /// |
9292 | /// The sign is determined by sc which can take the following values: |
9293 | /// |
9294 | /// _MM_MANT_SIGN_src // sign = sign(src) |
9295 | /// _MM_MANT_SIGN_zero // sign = 0 |
9296 | /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 |
9297 | /// |
9298 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_getmant_ph) |
9299 | #[inline ] |
9300 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
9301 | #[cfg_attr (test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))] |
9302 | #[rustc_legacy_const_generics (2, 3)] |
9303 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9304 | pub fn _mm256_maskz_getmant_ph< |
9305 | const NORM: _MM_MANTISSA_NORM_ENUM, |
9306 | const SIGN: _MM_MANTISSA_SIGN_ENUM, |
9307 | >( |
9308 | k: __mmask16, |
9309 | a: __m256h, |
9310 | ) -> __m256h { |
9311 | static_assert_uimm_bits!(NORM, 4); |
9312 | static_assert_uimm_bits!(SIGN, 2); |
9313 | _mm256_mask_getmant_ph::<NORM, SIGN>(src:_mm256_setzero_ph(), k, a) |
9314 | } |
9315 | |
9316 | /// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store |
9317 | /// the results in dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends |
9318 | /// on the interval range defined by norm and the sign depends on sign and the source sign. |
9319 | /// |
9320 | /// The mantissa is normalized to the interval specified by interv, which can take the following values: |
9321 | /// |
9322 | /// _MM_MANT_NORM_1_2 // interval [1, 2) |
9323 | /// _MM_MANT_NORM_p5_2 // interval [0.5, 2) |
9324 | /// _MM_MANT_NORM_p5_1 // interval [0.5, 1) |
9325 | /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) |
9326 | /// |
9327 | /// The sign is determined by sc which can take the following values: |
9328 | /// |
9329 | /// _MM_MANT_SIGN_src // sign = sign(src) |
9330 | /// _MM_MANT_SIGN_zero // sign = 0 |
9331 | /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 |
9332 | /// |
9333 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getmant_ph) |
9334 | #[inline ] |
9335 | #[target_feature (enable = "avx512fp16" )] |
9336 | #[cfg_attr (test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))] |
9337 | #[rustc_legacy_const_generics (1, 2)] |
9338 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9339 | pub fn _mm512_getmant_ph<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>( |
9340 | a: __m512h, |
9341 | ) -> __m512h { |
9342 | static_assert_uimm_bits!(NORM, 4); |
9343 | static_assert_uimm_bits!(SIGN, 2); |
9344 | _mm512_mask_getmant_ph::<NORM, SIGN>(src:_mm512_undefined_ph(), k:0xffffffff, a) |
9345 | } |
9346 | |
9347 | /// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store |
9348 | /// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
9349 | /// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined |
9350 | /// by norm and the sign depends on sign and the source sign. |
9351 | /// |
9352 | /// The mantissa is normalized to the interval specified by interv, which can take the following values: |
9353 | /// |
9354 | /// _MM_MANT_NORM_1_2 // interval [1, 2) |
9355 | /// _MM_MANT_NORM_p5_2 // interval [0.5, 2) |
9356 | /// _MM_MANT_NORM_p5_1 // interval [0.5, 1) |
9357 | /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) |
9358 | /// |
9359 | /// The sign is determined by sc which can take the following values: |
9360 | /// |
9361 | /// _MM_MANT_SIGN_src // sign = sign(src) |
9362 | /// _MM_MANT_SIGN_zero // sign = 0 |
9363 | /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 |
9364 | /// |
9365 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getmant_ph) |
9366 | #[inline ] |
9367 | #[target_feature (enable = "avx512fp16" )] |
9368 | #[cfg_attr (test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))] |
9369 | #[rustc_legacy_const_generics (3, 4)] |
9370 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9371 | pub fn _mm512_mask_getmant_ph< |
9372 | const NORM: _MM_MANTISSA_NORM_ENUM, |
9373 | const SIGN: _MM_MANTISSA_SIGN_ENUM, |
9374 | >( |
9375 | src: __m512h, |
9376 | k: __mmask32, |
9377 | a: __m512h, |
9378 | ) -> __m512h { |
9379 | static_assert_uimm_bits!(NORM, 4); |
9380 | static_assert_uimm_bits!(SIGN, 2); |
9381 | _mm512_mask_getmant_round_ph::<NORM, SIGN, _MM_FROUND_CUR_DIRECTION>(src, k, a) |
9382 | } |
9383 | |
9384 | /// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store |
9385 | /// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
9386 | /// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined |
9387 | /// by norm and the sign depends on sign and the source sign. |
9388 | /// |
9389 | /// The mantissa is normalized to the interval specified by interv, which can take the following values: |
9390 | /// |
9391 | /// _MM_MANT_NORM_1_2 // interval [1, 2) |
9392 | /// _MM_MANT_NORM_p5_2 // interval [0.5, 2) |
9393 | /// _MM_MANT_NORM_p5_1 // interval [0.5, 1) |
9394 | /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) |
9395 | /// |
9396 | /// The sign is determined by sc which can take the following values: |
9397 | /// |
9398 | /// _MM_MANT_SIGN_src // sign = sign(src) |
9399 | /// _MM_MANT_SIGN_zero // sign = 0 |
9400 | /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 |
9401 | /// |
9402 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getmant_ph) |
9403 | #[inline ] |
9404 | #[target_feature (enable = "avx512fp16" )] |
9405 | #[cfg_attr (test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))] |
9406 | #[rustc_legacy_const_generics (2, 3)] |
9407 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9408 | pub fn _mm512_maskz_getmant_ph< |
9409 | const NORM: _MM_MANTISSA_NORM_ENUM, |
9410 | const SIGN: _MM_MANTISSA_SIGN_ENUM, |
9411 | >( |
9412 | k: __mmask32, |
9413 | a: __m512h, |
9414 | ) -> __m512h { |
9415 | static_assert_uimm_bits!(NORM, 4); |
9416 | static_assert_uimm_bits!(SIGN, 2); |
9417 | _mm512_mask_getmant_ph::<NORM, SIGN>(src:_mm512_setzero_ph(), k, a) |
9418 | } |
9419 | |
9420 | /// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store |
9421 | /// the results in dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends |
9422 | /// on the interval range defined by norm and the sign depends on sign and the source sign. Exceptions can |
9423 | /// be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
9424 | /// |
9425 | /// The mantissa is normalized to the interval specified by interv, which can take the following values: |
9426 | /// |
9427 | /// _MM_MANT_NORM_1_2 // interval [1, 2) |
9428 | /// _MM_MANT_NORM_p5_2 // interval [0.5, 2) |
9429 | /// _MM_MANT_NORM_p5_1 // interval [0.5, 1) |
9430 | /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) |
9431 | /// |
9432 | /// The sign is determined by sc which can take the following values: |
9433 | /// |
9434 | /// _MM_MANT_SIGN_src // sign = sign(src) |
9435 | /// _MM_MANT_SIGN_zero // sign = 0 |
9436 | /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 |
9437 | /// |
9438 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
9439 | /// |
9440 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getmant_round_ph) |
9441 | #[inline ] |
9442 | #[target_feature (enable = "avx512fp16" )] |
9443 | #[cfg_attr (test, assert_instr(vgetmantph, NORM = 0, SIGN = 0, SAE = 8))] |
9444 | #[rustc_legacy_const_generics (1, 2, 3)] |
9445 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9446 | pub fn _mm512_getmant_round_ph< |
9447 | const NORM: _MM_MANTISSA_NORM_ENUM, |
9448 | const SIGN: _MM_MANTISSA_SIGN_ENUM, |
9449 | const SAE: i32, |
9450 | >( |
9451 | a: __m512h, |
9452 | ) -> __m512h { |
9453 | static_assert_uimm_bits!(NORM, 4); |
9454 | static_assert_uimm_bits!(SIGN, 2); |
9455 | static_assert_sae!(SAE); |
9456 | _mm512_mask_getmant_round_ph::<NORM, SIGN, SAE>(src:_mm512_undefined_ph(), k:0xffffffff, a) |
9457 | } |
9458 | |
9459 | /// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store |
9460 | /// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
9461 | /// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined |
9462 | /// by norm and the sign depends on sign and the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC |
9463 | /// in the sae parameter |
9464 | /// |
9465 | /// The mantissa is normalized to the interval specified by interv, which can take the following values: |
9466 | /// |
9467 | /// _MM_MANT_NORM_1_2 // interval [1, 2) |
9468 | /// _MM_MANT_NORM_p5_2 // interval [0.5, 2) |
9469 | /// _MM_MANT_NORM_p5_1 // interval [0.5, 1) |
9470 | /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) |
9471 | /// |
9472 | /// The sign is determined by sc which can take the following values: |
9473 | /// |
9474 | /// _MM_MANT_SIGN_src // sign = sign(src) |
9475 | /// _MM_MANT_SIGN_zero // sign = 0 |
9476 | /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 |
9477 | /// |
9478 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
9479 | /// |
9480 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getmant_round_ph) |
9481 | #[inline ] |
9482 | #[target_feature (enable = "avx512fp16" )] |
9483 | #[cfg_attr (test, assert_instr(vgetmantph, NORM = 0, SIGN = 0, SAE = 8))] |
9484 | #[rustc_legacy_const_generics (3, 4, 5)] |
9485 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9486 | pub fn _mm512_mask_getmant_round_ph< |
9487 | const NORM: _MM_MANTISSA_NORM_ENUM, |
9488 | const SIGN: _MM_MANTISSA_SIGN_ENUM, |
9489 | const SAE: i32, |
9490 | >( |
9491 | src: __m512h, |
9492 | k: __mmask32, |
9493 | a: __m512h, |
9494 | ) -> __m512h { |
9495 | unsafe { |
9496 | static_assert_uimm_bits!(NORM, 4); |
9497 | static_assert_uimm_bits!(SIGN, 2); |
9498 | static_assert_sae!(SAE); |
9499 | vgetmantph_512(a, (SIGN << 2) | NORM, src, k, SAE) |
9500 | } |
9501 | } |
9502 | |
9503 | /// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store |
9504 | /// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
9505 | /// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined |
9506 | /// by norm and the sign depends on sign and the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC |
9507 | /// in the sae parameter |
9508 | /// |
9509 | /// The mantissa is normalized to the interval specified by interv, which can take the following values: |
9510 | /// |
9511 | /// _MM_MANT_NORM_1_2 // interval [1, 2) |
9512 | /// _MM_MANT_NORM_p5_2 // interval [0.5, 2) |
9513 | /// _MM_MANT_NORM_p5_1 // interval [0.5, 1) |
9514 | /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) |
9515 | /// |
9516 | /// The sign is determined by sc which can take the following values: |
9517 | /// |
9518 | /// _MM_MANT_SIGN_src // sign = sign(src) |
9519 | /// _MM_MANT_SIGN_zero // sign = 0 |
9520 | /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 |
9521 | /// |
9522 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
9523 | /// |
9524 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getmant_round_ph) |
9525 | #[inline ] |
9526 | #[target_feature (enable = "avx512fp16" )] |
9527 | #[cfg_attr (test, assert_instr(vgetmantph, NORM = 0, SIGN = 0, SAE = 8))] |
9528 | #[rustc_legacy_const_generics (2, 3, 4)] |
9529 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9530 | pub fn _mm512_maskz_getmant_round_ph< |
9531 | const NORM: _MM_MANTISSA_NORM_ENUM, |
9532 | const SIGN: _MM_MANTISSA_SIGN_ENUM, |
9533 | const SAE: i32, |
9534 | >( |
9535 | k: __mmask32, |
9536 | a: __m512h, |
9537 | ) -> __m512h { |
9538 | static_assert_uimm_bits!(NORM, 4); |
9539 | static_assert_uimm_bits!(SIGN, 2); |
9540 | static_assert_sae!(SAE); |
9541 | _mm512_mask_getmant_round_ph::<NORM, SIGN, SAE>(src:_mm512_setzero_ph(), k, a) |
9542 | } |
9543 | |
9544 | /// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store |
9545 | /// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper |
9546 | /// elements of dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends |
9547 | /// on the interval range defined by norm and the sign depends on sign and the source sign. |
9548 | /// |
9549 | /// The mantissa is normalized to the interval specified by interv, which can take the following values: |
9550 | /// |
9551 | /// _MM_MANT_NORM_1_2 // interval [1, 2) |
9552 | /// _MM_MANT_NORM_p5_2 // interval [0.5, 2) |
9553 | /// _MM_MANT_NORM_p5_1 // interval [0.5, 1) |
9554 | /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) |
9555 | /// |
9556 | /// The sign is determined by sc which can take the following values: |
9557 | /// |
9558 | /// _MM_MANT_SIGN_src // sign = sign(src) |
9559 | /// _MM_MANT_SIGN_zero // sign = 0 |
9560 | /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 |
9561 | /// |
9562 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_sh) |
9563 | #[inline ] |
9564 | #[target_feature (enable = "avx512fp16" )] |
9565 | #[cfg_attr (test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0))] |
9566 | #[rustc_legacy_const_generics (2, 3)] |
9567 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9568 | pub fn _mm_getmant_sh<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>( |
9569 | a: __m128h, |
9570 | b: __m128h, |
9571 | ) -> __m128h { |
9572 | static_assert_uimm_bits!(NORM, 4); |
9573 | static_assert_uimm_bits!(SIGN, 2); |
9574 | _mm_mask_getmant_sh::<NORM, SIGN>(src:_mm_undefined_ph(), k:0xff, a, b) |
9575 | } |
9576 | |
9577 | /// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store |
9578 | /// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), |
9579 | /// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates |
9580 | /// `±(2^k)*|x.significand|`, where k depends on the interval range defined by norm and the sign depends on sign and |
9581 | /// the source sign. |
9582 | /// |
9583 | /// The mantissa is normalized to the interval specified by interv, which can take the following values: |
9584 | /// |
9585 | /// _MM_MANT_NORM_1_2 // interval [1, 2) |
9586 | /// _MM_MANT_NORM_p5_2 // interval [0.5, 2) |
9587 | /// _MM_MANT_NORM_p5_1 // interval [0.5, 1) |
9588 | /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) |
9589 | /// |
9590 | /// The sign is determined by sc which can take the following values: |
9591 | /// |
9592 | /// _MM_MANT_SIGN_src // sign = sign(src) |
9593 | /// _MM_MANT_SIGN_zero // sign = 0 |
9594 | /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 |
9595 | /// |
9596 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_sh) |
9597 | #[inline ] |
9598 | #[target_feature (enable = "avx512fp16" )] |
9599 | #[cfg_attr (test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0))] |
9600 | #[rustc_legacy_const_generics (4, 5)] |
9601 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9602 | pub fn _mm_mask_getmant_sh< |
9603 | const NORM: _MM_MANTISSA_NORM_ENUM, |
9604 | const SIGN: _MM_MANTISSA_SIGN_ENUM, |
9605 | >( |
9606 | src: __m128h, |
9607 | k: __mmask8, |
9608 | a: __m128h, |
9609 | b: __m128h, |
9610 | ) -> __m128h { |
9611 | static_assert_uimm_bits!(NORM, 4); |
9612 | static_assert_uimm_bits!(SIGN, 2); |
9613 | _mm_mask_getmant_round_sh::<NORM, SIGN, _MM_FROUND_CUR_DIRECTION>(src, k, a, b) |
9614 | } |
9615 | |
9616 | /// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store |
9617 | /// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), |
9618 | /// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates |
9619 | /// `±(2^k)*|x.significand|`, where k depends on the interval range defined by norm and the sign depends on sign and |
9620 | /// the source sign. |
9621 | /// |
9622 | /// The mantissa is normalized to the interval specified by interv, which can take the following values: |
9623 | /// |
9624 | /// _MM_MANT_NORM_1_2 // interval [1, 2) |
9625 | /// _MM_MANT_NORM_p5_2 // interval [0.5, 2) |
9626 | /// _MM_MANT_NORM_p5_1 // interval [0.5, 1) |
9627 | /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) |
9628 | /// |
9629 | /// The sign is determined by sc which can take the following values: |
9630 | /// |
9631 | /// _MM_MANT_SIGN_src // sign = sign(src) |
9632 | /// _MM_MANT_SIGN_zero // sign = 0 |
9633 | /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 |
9634 | /// |
9635 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_sh) |
9636 | #[inline ] |
9637 | #[target_feature (enable = "avx512fp16" )] |
9638 | #[cfg_attr (test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0))] |
9639 | #[rustc_legacy_const_generics (3, 4)] |
9640 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9641 | pub fn _mm_maskz_getmant_sh< |
9642 | const NORM: _MM_MANTISSA_NORM_ENUM, |
9643 | const SIGN: _MM_MANTISSA_SIGN_ENUM, |
9644 | >( |
9645 | k: __mmask8, |
9646 | a: __m128h, |
9647 | b: __m128h, |
9648 | ) -> __m128h { |
9649 | static_assert_uimm_bits!(NORM, 4); |
9650 | static_assert_uimm_bits!(SIGN, 2); |
9651 | _mm_mask_getmant_sh::<NORM, SIGN>(src:_mm_setzero_ph(), k, a, b) |
9652 | } |
9653 | |
9654 | /// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store |
9655 | /// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper |
9656 | /// elements of dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends |
9657 | /// on the interval range defined by norm and the sign depends on sign and the source sign. Exceptions can |
9658 | /// be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
9659 | /// |
9660 | /// The mantissa is normalized to the interval specified by interv, which can take the following values: |
9661 | /// |
9662 | /// _MM_MANT_NORM_1_2 // interval [1, 2) |
9663 | /// _MM_MANT_NORM_p5_2 // interval [0.5, 2) |
9664 | /// _MM_MANT_NORM_p5_1 // interval [0.5, 1) |
9665 | /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) |
9666 | /// |
9667 | /// The sign is determined by sc which can take the following values: |
9668 | /// |
9669 | /// _MM_MANT_SIGN_src // sign = sign(src) |
9670 | /// _MM_MANT_SIGN_zero // sign = 0 |
9671 | /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 |
9672 | /// |
9673 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
9674 | /// |
9675 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_round_sh) |
9676 | #[inline ] |
9677 | #[target_feature (enable = "avx512fp16" )] |
9678 | #[cfg_attr (test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0, SAE = 8))] |
9679 | #[rustc_legacy_const_generics (2, 3, 4)] |
9680 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9681 | pub fn _mm_getmant_round_sh< |
9682 | const NORM: _MM_MANTISSA_NORM_ENUM, |
9683 | const SIGN: _MM_MANTISSA_SIGN_ENUM, |
9684 | const SAE: i32, |
9685 | >( |
9686 | a: __m128h, |
9687 | b: __m128h, |
9688 | ) -> __m128h { |
9689 | static_assert_uimm_bits!(NORM, 4); |
9690 | static_assert_uimm_bits!(SIGN, 2); |
9691 | static_assert_sae!(SAE); |
9692 | _mm_mask_getmant_round_sh::<NORM, SIGN, SAE>(src:_mm_undefined_ph(), k:0xff, a, b) |
9693 | } |
9694 | |
9695 | /// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store |
9696 | /// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), |
9697 | /// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates |
9698 | /// `±(2^k)*|x.significand|`, where k depends on the interval range defined by norm and the sign depends on sign and |
9699 | /// the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
9700 | /// |
9701 | /// The mantissa is normalized to the interval specified by interv, which can take the following values: |
9702 | /// |
9703 | /// _MM_MANT_NORM_1_2 // interval [1, 2) |
9704 | /// _MM_MANT_NORM_p5_2 // interval [0.5, 2) |
9705 | /// _MM_MANT_NORM_p5_1 // interval [0.5, 1) |
9706 | /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) |
9707 | /// |
9708 | /// The sign is determined by sc which can take the following values: |
9709 | /// |
9710 | /// _MM_MANT_SIGN_src // sign = sign(src) |
9711 | /// _MM_MANT_SIGN_zero // sign = 0 |
9712 | /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 |
9713 | /// |
9714 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
9715 | /// |
9716 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_round_sh) |
9717 | #[inline ] |
9718 | #[target_feature (enable = "avx512fp16" )] |
9719 | #[cfg_attr (test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0, SAE = 8))] |
9720 | #[rustc_legacy_const_generics (4, 5, 6)] |
9721 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9722 | pub fn _mm_mask_getmant_round_sh< |
9723 | const NORM: _MM_MANTISSA_NORM_ENUM, |
9724 | const SIGN: _MM_MANTISSA_SIGN_ENUM, |
9725 | const SAE: i32, |
9726 | >( |
9727 | src: __m128h, |
9728 | k: __mmask8, |
9729 | a: __m128h, |
9730 | b: __m128h, |
9731 | ) -> __m128h { |
9732 | unsafe { |
9733 | static_assert_uimm_bits!(NORM, 4); |
9734 | static_assert_uimm_bits!(SIGN, 2); |
9735 | static_assert_sae!(SAE); |
9736 | vgetmantsh(a, b, (SIGN << 2) | NORM, src, k, SAE) |
9737 | } |
9738 | } |
9739 | |
9740 | /// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store |
9741 | /// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), |
9742 | /// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates |
9743 | /// `±(2^k)*|x.significand|`, where k depends on the interval range defined by norm and the sign depends on sign and |
9744 | /// the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
9745 | /// |
9746 | /// The mantissa is normalized to the interval specified by interv, which can take the following values: |
9747 | /// |
9748 | /// _MM_MANT_NORM_1_2 // interval [1, 2) |
9749 | /// _MM_MANT_NORM_p5_2 // interval [0.5, 2) |
9750 | /// _MM_MANT_NORM_p5_1 // interval [0.5, 1) |
9751 | /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5) |
9752 | /// |
9753 | /// The sign is determined by sc which can take the following values: |
9754 | /// |
9755 | /// _MM_MANT_SIGN_src // sign = sign(src) |
9756 | /// _MM_MANT_SIGN_zero // sign = 0 |
9757 | /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1 |
9758 | /// |
9759 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
9760 | /// |
9761 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_round_sh) |
9762 | #[inline ] |
9763 | #[target_feature (enable = "avx512fp16" )] |
9764 | #[cfg_attr (test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0, SAE = 8))] |
9765 | #[rustc_legacy_const_generics (3, 4, 5)] |
9766 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9767 | pub fn _mm_maskz_getmant_round_sh< |
9768 | const NORM: _MM_MANTISSA_NORM_ENUM, |
9769 | const SIGN: _MM_MANTISSA_SIGN_ENUM, |
9770 | const SAE: i32, |
9771 | >( |
9772 | k: __mmask8, |
9773 | a: __m128h, |
9774 | b: __m128h, |
9775 | ) -> __m128h { |
9776 | static_assert_uimm_bits!(NORM, 4); |
9777 | static_assert_uimm_bits!(SIGN, 2); |
9778 | static_assert_sae!(SAE); |
9779 | _mm_mask_getmant_round_sh::<NORM, SIGN, SAE>(src:_mm_setzero_ph(), k, a, b) |
9780 | } |
9781 | |
9782 | /// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits |
9783 | /// specified by imm8, and store the results in dst. |
9784 | /// |
9785 | /// Rounding is done according to the imm8 parameter, which can be one of: |
9786 | /// |
9787 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
9788 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
9789 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
9790 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
9791 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
9792 | /// |
9793 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_ph) |
9794 | #[inline ] |
9795 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
9796 | #[cfg_attr (test, assert_instr(vrndscaleph, IMM8 = 0))] |
9797 | #[rustc_legacy_const_generics (1)] |
9798 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9799 | pub fn _mm_roundscale_ph<const IMM8: i32>(a: __m128h) -> __m128h { |
9800 | static_assert_uimm_bits!(IMM8, 8); |
9801 | _mm_mask_roundscale_ph::<IMM8>(src:_mm_undefined_ph(), k:0xff, a) |
9802 | } |
9803 | |
9804 | /// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits |
9805 | /// specified by imm8, and store the results in dst using writemask k (elements are copied from src when |
9806 | /// the corresponding mask bit is not set). |
9807 | /// |
9808 | /// Rounding is done according to the imm8 parameter, which can be one of: |
9809 | /// |
9810 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
9811 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
9812 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
9813 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
9814 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
9815 | /// |
9816 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_ph) |
9817 | #[inline ] |
9818 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
9819 | #[cfg_attr (test, assert_instr(vrndscaleph, IMM8 = 0))] |
9820 | #[rustc_legacy_const_generics (3)] |
9821 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9822 | pub fn _mm_mask_roundscale_ph<const IMM8: i32>(src: __m128h, k: __mmask8, a: __m128h) -> __m128h { |
9823 | unsafe { |
9824 | static_assert_uimm_bits!(IMM8, 8); |
9825 | vrndscaleph_128(a, IMM8, src, k) |
9826 | } |
9827 | } |
9828 | |
9829 | /// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits |
9830 | /// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding |
9831 | /// mask bit is not set). |
9832 | /// |
9833 | /// Rounding is done according to the imm8 parameter, which can be one of: |
9834 | /// |
9835 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
9836 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
9837 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
9838 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
9839 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
9840 | /// |
9841 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_ph) |
9842 | #[inline ] |
9843 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
9844 | #[cfg_attr (test, assert_instr(vrndscaleph, IMM8 = 0))] |
9845 | #[rustc_legacy_const_generics (2)] |
9846 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9847 | pub fn _mm_maskz_roundscale_ph<const IMM8: i32>(k: __mmask8, a: __m128h) -> __m128h { |
9848 | static_assert_uimm_bits!(IMM8, 8); |
9849 | _mm_mask_roundscale_ph::<IMM8>(src:_mm_setzero_ph(), k, a) |
9850 | } |
9851 | |
9852 | /// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits |
9853 | /// specified by imm8, and store the results in dst. |
9854 | /// |
9855 | /// Rounding is done according to the imm8 parameter, which can be one of: |
9856 | /// |
9857 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
9858 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
9859 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
9860 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
9861 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
9862 | /// |
9863 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_roundscale_ph) |
9864 | #[inline ] |
9865 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
9866 | #[cfg_attr (test, assert_instr(vrndscaleph, IMM8 = 0))] |
9867 | #[rustc_legacy_const_generics (1)] |
9868 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9869 | pub fn _mm256_roundscale_ph<const IMM8: i32>(a: __m256h) -> __m256h { |
9870 | static_assert_uimm_bits!(IMM8, 8); |
9871 | _mm256_mask_roundscale_ph::<IMM8>(src:_mm256_undefined_ph(), k:0xffff, a) |
9872 | } |
9873 | |
9874 | /// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits |
9875 | /// specified by imm8, and store the results in dst using writemask k (elements are copied from src when |
9876 | /// the corresponding mask bit is not set). |
9877 | /// |
9878 | /// Rounding is done according to the imm8 parameter, which can be one of: |
9879 | /// |
9880 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
9881 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
9882 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
9883 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
9884 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
9885 | /// |
9886 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_roundscale_ph) |
9887 | #[inline ] |
9888 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
9889 | #[cfg_attr (test, assert_instr(vrndscaleph, IMM8 = 0))] |
9890 | #[rustc_legacy_const_generics (3)] |
9891 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9892 | pub fn _mm256_mask_roundscale_ph<const IMM8: i32>( |
9893 | src: __m256h, |
9894 | k: __mmask16, |
9895 | a: __m256h, |
9896 | ) -> __m256h { |
9897 | unsafe { |
9898 | static_assert_uimm_bits!(IMM8, 8); |
9899 | vrndscaleph_256(a, IMM8, src, k) |
9900 | } |
9901 | } |
9902 | |
9903 | /// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits |
9904 | /// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding |
9905 | /// mask bit is not set). |
9906 | /// |
9907 | /// Rounding is done according to the imm8 parameter, which can be one of: |
9908 | /// |
9909 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
9910 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
9911 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
9912 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
9913 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
9914 | /// |
9915 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_roundscale_ph) |
9916 | #[inline ] |
9917 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
9918 | #[cfg_attr (test, assert_instr(vrndscaleph, IMM8 = 0))] |
9919 | #[rustc_legacy_const_generics (2)] |
9920 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9921 | pub fn _mm256_maskz_roundscale_ph<const IMM8: i32>(k: __mmask16, a: __m256h) -> __m256h { |
9922 | static_assert_uimm_bits!(IMM8, 8); |
9923 | _mm256_mask_roundscale_ph::<IMM8>(src:_mm256_setzero_ph(), k, a) |
9924 | } |
9925 | |
9926 | /// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits |
9927 | /// specified by imm8, and store the results in dst. |
9928 | /// |
9929 | /// Rounding is done according to the imm8 parameter, which can be one of: |
9930 | /// |
9931 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
9932 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
9933 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
9934 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
9935 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
9936 | /// |
9937 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_roundscale_ph) |
9938 | #[inline ] |
9939 | #[target_feature (enable = "avx512fp16" )] |
9940 | #[cfg_attr (test, assert_instr(vrndscaleph, IMM8 = 0))] |
9941 | #[rustc_legacy_const_generics (1)] |
9942 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9943 | pub fn _mm512_roundscale_ph<const IMM8: i32>(a: __m512h) -> __m512h { |
9944 | static_assert_uimm_bits!(IMM8, 8); |
9945 | _mm512_mask_roundscale_ph::<IMM8>(src:_mm512_undefined_ph(), k:0xffffffff, a) |
9946 | } |
9947 | |
9948 | /// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits |
9949 | /// specified by imm8, and store the results in dst using writemask k (elements are copied from src when |
9950 | /// the corresponding mask bit is not set). |
9951 | /// |
9952 | /// Rounding is done according to the imm8 parameter, which can be one of: |
9953 | /// |
9954 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
9955 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
9956 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
9957 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
9958 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
9959 | /// |
9960 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_roundscale_ph) |
9961 | #[inline ] |
9962 | #[target_feature (enable = "avx512fp16" )] |
9963 | #[cfg_attr (test, assert_instr(vrndscaleph, IMM8 = 0))] |
9964 | #[rustc_legacy_const_generics (3)] |
9965 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9966 | pub fn _mm512_mask_roundscale_ph<const IMM8: i32>( |
9967 | src: __m512h, |
9968 | k: __mmask32, |
9969 | a: __m512h, |
9970 | ) -> __m512h { |
9971 | static_assert_uimm_bits!(IMM8, 8); |
9972 | _mm512_mask_roundscale_round_ph::<IMM8, _MM_FROUND_CUR_DIRECTION>(src, k, a) |
9973 | } |
9974 | |
9975 | /// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits |
9976 | /// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding |
9977 | /// mask bit is not set). |
9978 | /// |
9979 | /// Rounding is done according to the imm8 parameter, which can be one of: |
9980 | /// |
9981 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
9982 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
9983 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
9984 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
9985 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
9986 | /// |
9987 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_roundscale_ph) |
9988 | #[inline ] |
9989 | #[target_feature (enable = "avx512fp16" )] |
9990 | #[cfg_attr (test, assert_instr(vrndscaleph, IMM8 = 0))] |
9991 | #[rustc_legacy_const_generics (2)] |
9992 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
9993 | pub fn _mm512_maskz_roundscale_ph<const IMM8: i32>(k: __mmask32, a: __m512h) -> __m512h { |
9994 | static_assert_uimm_bits!(IMM8, 8); |
9995 | _mm512_mask_roundscale_ph::<IMM8>(src:_mm512_setzero_ph(), k, a) |
9996 | } |
9997 | |
9998 | /// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits |
9999 | /// specified by imm8, and store the results in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC |
10000 | /// in the sae parameter |
10001 | /// |
10002 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10003 | /// |
10004 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10005 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10006 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10007 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10008 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10009 | /// |
10010 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_roundscale_round_ph) |
10011 | #[inline ] |
10012 | #[target_feature (enable = "avx512fp16" )] |
10013 | #[cfg_attr (test, assert_instr(vrndscaleph, IMM8 = 0, SAE = 8))] |
10014 | #[rustc_legacy_const_generics (1, 2)] |
10015 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10016 | pub fn _mm512_roundscale_round_ph<const IMM8: i32, const SAE: i32>(a: __m512h) -> __m512h { |
10017 | static_assert_uimm_bits!(IMM8, 8); |
10018 | static_assert_sae!(SAE); |
10019 | _mm512_mask_roundscale_round_ph::<IMM8, SAE>(src:_mm512_undefined_ph(), k:0xffffffff, a) |
10020 | } |
10021 | |
10022 | /// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits |
10023 | /// specified by imm8, and store the results in dst using writemask k (elements are copied from src when |
10024 | /// the corresponding mask bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC |
10025 | /// in the sae parameter |
10026 | /// |
10027 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10028 | /// |
10029 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10030 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10031 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10032 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10033 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10034 | /// |
10035 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_roundscale_round_ph) |
10036 | #[inline ] |
10037 | #[target_feature (enable = "avx512fp16" )] |
10038 | #[cfg_attr (test, assert_instr(vrndscaleph, IMM8 = 0, SAE = 8))] |
10039 | #[rustc_legacy_const_generics (3, 4)] |
10040 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10041 | pub fn _mm512_mask_roundscale_round_ph<const IMM8: i32, const SAE: i32>( |
10042 | src: __m512h, |
10043 | k: __mmask32, |
10044 | a: __m512h, |
10045 | ) -> __m512h { |
10046 | unsafe { |
10047 | static_assert_uimm_bits!(IMM8, 8); |
10048 | static_assert_sae!(SAE); |
10049 | vrndscaleph_512(a, IMM8, src, k, SAE) |
10050 | } |
10051 | } |
10052 | |
10053 | /// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits |
10054 | /// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding |
10055 | /// mask bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
10056 | /// |
10057 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10058 | /// |
10059 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10060 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10061 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10062 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10063 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10064 | /// |
10065 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_roundscale_round_ph) |
10066 | #[inline ] |
10067 | #[target_feature (enable = "avx512fp16" )] |
10068 | #[cfg_attr (test, assert_instr(vrndscaleph, IMM8 = 0, SAE = 8))] |
10069 | #[rustc_legacy_const_generics (2, 3)] |
10070 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10071 | pub fn _mm512_maskz_roundscale_round_ph<const IMM8: i32, const SAE: i32>( |
10072 | k: __mmask32, |
10073 | a: __m512h, |
10074 | ) -> __m512h { |
10075 | static_assert_uimm_bits!(IMM8, 8); |
10076 | static_assert_sae!(SAE); |
10077 | _mm512_mask_roundscale_round_ph::<IMM8, SAE>(src:_mm512_setzero_ph(), k, a) |
10078 | } |
10079 | |
10080 | /// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits |
10081 | /// specified by imm8, store the result in the lower element of dst, and copy the upper 7 packed elements |
10082 | /// from a to the upper elements of dst. |
10083 | /// |
10084 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10085 | /// |
10086 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10087 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10088 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10089 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10090 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10091 | /// |
10092 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_sh) |
10093 | #[inline ] |
10094 | #[target_feature (enable = "avx512fp16" )] |
10095 | #[cfg_attr (test, assert_instr(vrndscalesh, IMM8 = 0))] |
10096 | #[rustc_legacy_const_generics (2)] |
10097 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10098 | pub fn _mm_roundscale_sh<const IMM8: i32>(a: __m128h, b: __m128h) -> __m128h { |
10099 | static_assert_uimm_bits!(IMM8, 8); |
10100 | _mm_mask_roundscale_sh::<IMM8>(src:_mm_undefined_ph(), k:0xff, a, b) |
10101 | } |
10102 | |
10103 | /// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits |
10104 | /// specified by imm8, store the result in the lower element of dst using writemask k (the element is copied |
10105 | /// from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst. |
10106 | /// |
10107 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10108 | /// |
10109 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10110 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10111 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10112 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10113 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10114 | /// |
10115 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_sh) |
10116 | #[inline ] |
10117 | #[target_feature (enable = "avx512fp16" )] |
10118 | #[cfg_attr (test, assert_instr(vrndscalesh, IMM8 = 0))] |
10119 | #[rustc_legacy_const_generics (4)] |
10120 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10121 | pub fn _mm_mask_roundscale_sh<const IMM8: i32>( |
10122 | src: __m128h, |
10123 | k: __mmask8, |
10124 | a: __m128h, |
10125 | b: __m128h, |
10126 | ) -> __m128h { |
10127 | static_assert_uimm_bits!(IMM8, 8); |
10128 | _mm_mask_roundscale_round_sh::<IMM8, _MM_FROUND_CUR_DIRECTION>(src, k, a, b) |
10129 | } |
10130 | |
10131 | /// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits |
10132 | /// specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed |
10133 | /// out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst. |
10134 | /// |
10135 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10136 | /// |
10137 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10138 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10139 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10140 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10141 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10142 | /// |
10143 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_sh) |
10144 | #[inline ] |
10145 | #[target_feature (enable = "avx512fp16" )] |
10146 | #[cfg_attr (test, assert_instr(vrndscalesh, IMM8 = 0))] |
10147 | #[rustc_legacy_const_generics (3)] |
10148 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10149 | pub fn _mm_maskz_roundscale_sh<const IMM8: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
10150 | static_assert_uimm_bits!(IMM8, 8); |
10151 | _mm_mask_roundscale_sh::<IMM8>(src:_mm_setzero_ph(), k, a, b) |
10152 | } |
10153 | |
10154 | /// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits |
10155 | /// specified by imm8, store the result in the lower element of dst, and copy the upper 7 packed elements |
10156 | /// from a to the upper elements of dst. |
10157 | /// |
10158 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10159 | /// |
10160 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10161 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10162 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10163 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10164 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10165 | /// |
10166 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
10167 | /// |
10168 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_round_sh) |
10169 | #[inline ] |
10170 | #[target_feature (enable = "avx512fp16" )] |
10171 | #[cfg_attr (test, assert_instr(vrndscalesh, IMM8 = 0, SAE = 8))] |
10172 | #[rustc_legacy_const_generics (2, 3)] |
10173 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10174 | pub fn _mm_roundscale_round_sh<const IMM8: i32, const SAE: i32>(a: __m128h, b: __m128h) -> __m128h { |
10175 | static_assert_uimm_bits!(IMM8, 8); |
10176 | static_assert_sae!(SAE); |
10177 | _mm_mask_roundscale_round_sh::<IMM8, SAE>(src:_mm_undefined_ph(), k:0xff, a, b) |
10178 | } |
10179 | |
10180 | /// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits |
10181 | /// specified by imm8, store the result in the lower element of dst using writemask k (the element is copied |
10182 | /// from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst. |
10183 | /// |
10184 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10185 | /// |
10186 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10187 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10188 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10189 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10190 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10191 | /// |
10192 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
10193 | /// |
10194 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_round_sh) |
10195 | #[inline ] |
10196 | #[target_feature (enable = "avx512fp16" )] |
10197 | #[cfg_attr (test, assert_instr(vrndscalesh, IMM8 = 0, SAE = 8))] |
10198 | #[rustc_legacy_const_generics (4, 5)] |
10199 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10200 | pub fn _mm_mask_roundscale_round_sh<const IMM8: i32, const SAE: i32>( |
10201 | src: __m128h, |
10202 | k: __mmask8, |
10203 | a: __m128h, |
10204 | b: __m128h, |
10205 | ) -> __m128h { |
10206 | unsafe { |
10207 | static_assert_uimm_bits!(IMM8, 8); |
10208 | static_assert_sae!(SAE); |
10209 | vrndscalesh(a, b, src, k, IMM8, SAE) |
10210 | } |
10211 | } |
10212 | |
10213 | /// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits |
10214 | /// specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed |
10215 | /// out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst. |
10216 | /// |
10217 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10218 | /// |
10219 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10220 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10221 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10222 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10223 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10224 | /// |
10225 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter |
10226 | /// |
10227 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_round_sh) |
10228 | #[inline ] |
10229 | #[target_feature (enable = "avx512fp16" )] |
10230 | #[cfg_attr (test, assert_instr(vrndscalesh, IMM8 = 0, SAE = 8))] |
10231 | #[rustc_legacy_const_generics (3, 4)] |
10232 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10233 | pub fn _mm_maskz_roundscale_round_sh<const IMM8: i32, const SAE: i32>( |
10234 | k: __mmask8, |
10235 | a: __m128h, |
10236 | b: __m128h, |
10237 | ) -> __m128h { |
10238 | static_assert_uimm_bits!(IMM8, 8); |
10239 | static_assert_sae!(SAE); |
10240 | _mm_mask_roundscale_round_sh::<IMM8, SAE>(src:_mm_setzero_ph(), k, a, b) |
10241 | } |
10242 | |
10243 | /// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store |
10244 | /// the results in dst. |
10245 | /// |
10246 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_ph) |
10247 | #[inline ] |
10248 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
10249 | #[cfg_attr (test, assert_instr(vscalefph))] |
10250 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10251 | pub fn _mm_scalef_ph(a: __m128h, b: __m128h) -> __m128h { |
10252 | _mm_mask_scalef_ph(src:_mm_undefined_ph(), k:0xff, a, b) |
10253 | } |
10254 | |
10255 | /// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store |
10256 | /// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
10257 | /// |
10258 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_ph) |
10259 | #[inline ] |
10260 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
10261 | #[cfg_attr (test, assert_instr(vscalefph))] |
10262 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10263 | pub fn _mm_mask_scalef_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
10264 | unsafe { vscalefph_128(a, b, src, k) } |
10265 | } |
10266 | |
10267 | /// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store |
10268 | /// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
10269 | /// |
10270 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_ph) |
10271 | #[inline ] |
10272 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
10273 | #[cfg_attr (test, assert_instr(vscalefph))] |
10274 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10275 | pub fn _mm_maskz_scalef_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
10276 | _mm_mask_scalef_ph(src:_mm_setzero_ph(), k, a, b) |
10277 | } |
10278 | |
10279 | /// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store |
10280 | /// the results in dst. |
10281 | /// |
10282 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_scalef_ph) |
10283 | #[inline ] |
10284 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
10285 | #[cfg_attr (test, assert_instr(vscalefph))] |
10286 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10287 | pub fn _mm256_scalef_ph(a: __m256h, b: __m256h) -> __m256h { |
10288 | _mm256_mask_scalef_ph(src:_mm256_undefined_ph(), k:0xffff, a, b) |
10289 | } |
10290 | |
10291 | /// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store |
10292 | /// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
10293 | /// |
10294 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_scalef_ph) |
10295 | #[inline ] |
10296 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
10297 | #[cfg_attr (test, assert_instr(vscalefph))] |
10298 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10299 | pub fn _mm256_mask_scalef_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h { |
10300 | unsafe { vscalefph_256(a, b, src, k) } |
10301 | } |
10302 | |
10303 | /// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store |
10304 | /// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
10305 | /// |
10306 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_scalef_ph) |
10307 | #[inline ] |
10308 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
10309 | #[cfg_attr (test, assert_instr(vscalefph))] |
10310 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10311 | pub fn _mm256_maskz_scalef_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { |
10312 | _mm256_mask_scalef_ph(src:_mm256_setzero_ph(), k, a, b) |
10313 | } |
10314 | |
10315 | /// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store |
10316 | /// the results in dst. |
10317 | /// |
10318 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_scalef_ph) |
10319 | #[inline ] |
10320 | #[target_feature (enable = "avx512fp16" )] |
10321 | #[cfg_attr (test, assert_instr(vscalefph))] |
10322 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10323 | pub fn _mm512_scalef_ph(a: __m512h, b: __m512h) -> __m512h { |
10324 | _mm512_mask_scalef_ph(src:_mm512_undefined_ph(), k:0xffffffff, a, b) |
10325 | } |
10326 | |
10327 | /// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store |
10328 | /// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
10329 | /// |
10330 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_scalef_ph) |
10331 | #[inline ] |
10332 | #[target_feature (enable = "avx512fp16" )] |
10333 | #[cfg_attr (test, assert_instr(vscalefph))] |
10334 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10335 | pub fn _mm512_mask_scalef_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h { |
10336 | _mm512_mask_scalef_round_ph::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) |
10337 | } |
10338 | |
10339 | /// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store |
10340 | /// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
10341 | /// |
10342 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_scalef_ph) |
10343 | #[inline ] |
10344 | #[target_feature (enable = "avx512fp16" )] |
10345 | #[cfg_attr (test, assert_instr(vscalefph))] |
10346 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10347 | pub fn _mm512_maskz_scalef_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { |
10348 | _mm512_mask_scalef_ph(src:_mm512_setzero_ph(), k, a, b) |
10349 | } |
10350 | |
10351 | /// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store |
10352 | /// the results in dst. |
10353 | /// |
10354 | /// Rounding is done according to the rounding parameter, which can be one of: |
10355 | /// |
10356 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
10357 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
10358 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
10359 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
10360 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10361 | /// |
10362 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_scalef_round_ph) |
10363 | #[inline ] |
10364 | #[target_feature (enable = "avx512fp16" )] |
10365 | #[cfg_attr (test, assert_instr(vscalefph, ROUNDING = 8))] |
10366 | #[rustc_legacy_const_generics (2)] |
10367 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10368 | pub fn _mm512_scalef_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h { |
10369 | static_assert_rounding!(ROUNDING); |
10370 | _mm512_mask_scalef_round_ph::<ROUNDING>(src:_mm512_undefined_ph(), k:0xffffffff, a, b) |
10371 | } |
10372 | |
10373 | /// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store |
10374 | /// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
10375 | /// |
10376 | /// Rounding is done according to the rounding parameter, which can be one of: |
10377 | /// |
10378 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
10379 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
10380 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
10381 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
10382 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10383 | /// |
10384 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_scalef_round_ph) |
10385 | #[inline ] |
10386 | #[target_feature (enable = "avx512fp16" )] |
10387 | #[cfg_attr (test, assert_instr(vscalefph, ROUNDING = 8))] |
10388 | #[rustc_legacy_const_generics (4)] |
10389 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10390 | pub fn _mm512_mask_scalef_round_ph<const ROUNDING: i32>( |
10391 | src: __m512h, |
10392 | k: __mmask32, |
10393 | a: __m512h, |
10394 | b: __m512h, |
10395 | ) -> __m512h { |
10396 | unsafe { |
10397 | static_assert_rounding!(ROUNDING); |
10398 | vscalefph_512(a, b, src, k, ROUNDING) |
10399 | } |
10400 | } |
10401 | |
10402 | /// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store |
10403 | /// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
10404 | /// |
10405 | /// Rounding is done according to the rounding parameter, which can be one of: |
10406 | /// |
10407 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
10408 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
10409 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
10410 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
10411 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10412 | /// |
10413 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_scalef_round_ph) |
10414 | #[inline ] |
10415 | #[target_feature (enable = "avx512fp16" )] |
10416 | #[cfg_attr (test, assert_instr(vscalefph, ROUNDING = 8))] |
10417 | #[rustc_legacy_const_generics (3)] |
10418 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10419 | pub fn _mm512_maskz_scalef_round_ph<const ROUNDING: i32>( |
10420 | k: __mmask32, |
10421 | a: __m512h, |
10422 | b: __m512h, |
10423 | ) -> __m512h { |
10424 | static_assert_rounding!(ROUNDING); |
10425 | _mm512_mask_scalef_round_ph::<ROUNDING>(src:_mm512_setzero_ph(), k, a, b) |
10426 | } |
10427 | |
10428 | /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store |
10429 | /// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper |
10430 | /// elements of dst. |
10431 | /// |
10432 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_sh) |
10433 | #[inline ] |
10434 | #[target_feature (enable = "avx512fp16" )] |
10435 | #[cfg_attr (test, assert_instr(vscalefsh))] |
10436 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10437 | pub fn _mm_scalef_sh(a: __m128h, b: __m128h) -> __m128h { |
10438 | _mm_mask_scalef_sh(src:_mm_undefined_ph(), k:0xff, a, b) |
10439 | } |
10440 | |
10441 | /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store |
10442 | /// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), |
10443 | /// and copy the upper 7 packed elements from a to the upper elements of dst. |
10444 | /// |
10445 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_sh) |
10446 | #[inline ] |
10447 | #[target_feature (enable = "avx512fp16" )] |
10448 | #[cfg_attr (test, assert_instr(vscalefsh))] |
10449 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10450 | pub fn _mm_mask_scalef_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
10451 | _mm_mask_scalef_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) |
10452 | } |
10453 | |
10454 | /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store |
10455 | /// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), |
10456 | /// and copy the upper 7 packed elements from a to the upper elements of dst. |
10457 | /// |
10458 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_sh) |
10459 | #[inline ] |
10460 | #[target_feature (enable = "avx512fp16" )] |
10461 | #[cfg_attr (test, assert_instr(vscalefsh))] |
10462 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10463 | pub fn _mm_maskz_scalef_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
10464 | _mm_mask_scalef_sh(src:_mm_setzero_ph(), k, a, b) |
10465 | } |
10466 | |
10467 | /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store |
10468 | /// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper |
10469 | /// elements of dst. |
10470 | /// |
10471 | /// Rounding is done according to the rounding parameter, which can be one of: |
10472 | /// |
10473 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
10474 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
10475 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
10476 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
10477 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10478 | /// |
10479 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_round_sh) |
10480 | #[inline ] |
10481 | #[target_feature (enable = "avx512fp16" )] |
10482 | #[cfg_attr (test, assert_instr(vscalefsh, ROUNDING = 8))] |
10483 | #[rustc_legacy_const_generics (2)] |
10484 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10485 | pub fn _mm_scalef_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h { |
10486 | static_assert_rounding!(ROUNDING); |
10487 | _mm_mask_scalef_round_sh::<ROUNDING>(src:_mm_undefined_ph(), k:0xff, a, b) |
10488 | } |
10489 | |
10490 | /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store |
10491 | /// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), |
10492 | /// and copy the upper 7 packed elements from a to the upper elements of dst. |
10493 | /// |
10494 | /// Rounding is done according to the rounding parameter, which can be one of: |
10495 | /// |
10496 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
10497 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
10498 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
10499 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
10500 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10501 | /// |
10502 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_round_sh) |
10503 | #[inline ] |
10504 | #[target_feature (enable = "avx512fp16" )] |
10505 | #[cfg_attr (test, assert_instr(vscalefsh, ROUNDING = 8))] |
10506 | #[rustc_legacy_const_generics (4)] |
10507 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10508 | pub fn _mm_mask_scalef_round_sh<const ROUNDING: i32>( |
10509 | src: __m128h, |
10510 | k: __mmask8, |
10511 | a: __m128h, |
10512 | b: __m128h, |
10513 | ) -> __m128h { |
10514 | unsafe { |
10515 | static_assert_rounding!(ROUNDING); |
10516 | vscalefsh(a, b, src, k, ROUNDING) |
10517 | } |
10518 | } |
10519 | |
10520 | /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store |
10521 | /// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), |
10522 | /// and copy the upper 7 packed elements from a to the upper elements of dst. |
10523 | /// |
10524 | /// Rounding is done according to the rounding parameter, which can be one of: |
10525 | /// |
10526 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
10527 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
10528 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
10529 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
10530 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10531 | /// |
10532 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_round_sh) |
10533 | #[inline ] |
10534 | #[target_feature (enable = "avx512fp16" )] |
10535 | #[cfg_attr (test, assert_instr(vscalefsh, ROUNDING = 8))] |
10536 | #[rustc_legacy_const_generics (3)] |
10537 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10538 | pub fn _mm_maskz_scalef_round_sh<const ROUNDING: i32>( |
10539 | k: __mmask8, |
10540 | a: __m128h, |
10541 | b: __m128h, |
10542 | ) -> __m128h { |
10543 | static_assert_rounding!(ROUNDING); |
10544 | _mm_mask_scalef_round_sh::<ROUNDING>(src:_mm_setzero_ph(), k, a, b) |
10545 | } |
10546 | |
10547 | /// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the |
10548 | /// number of bits specified by imm8, and store the results in dst. |
10549 | /// |
10550 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10551 | /// |
10552 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10553 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10554 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10555 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10556 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10557 | /// |
10558 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_ph) |
10559 | #[inline ] |
10560 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
10561 | #[cfg_attr (test, assert_instr(vreduceph, IMM8 = 0))] |
10562 | #[rustc_legacy_const_generics (1)] |
10563 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10564 | pub fn _mm_reduce_ph<const IMM8: i32>(a: __m128h) -> __m128h { |
10565 | static_assert_uimm_bits!(IMM8, 8); |
10566 | _mm_mask_reduce_ph::<IMM8>(src:_mm_undefined_ph(), k:0xff, a) |
10567 | } |
10568 | |
10569 | /// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the |
10570 | /// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied |
10571 | /// from src when the corresponding mask bit is not set). |
10572 | /// |
10573 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10574 | /// |
10575 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10576 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10577 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10578 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10579 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10580 | /// |
10581 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_ph) |
10582 | #[inline ] |
10583 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
10584 | #[cfg_attr (test, assert_instr(vreduceph, IMM8 = 0))] |
10585 | #[rustc_legacy_const_generics (3)] |
10586 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10587 | pub fn _mm_mask_reduce_ph<const IMM8: i32>(src: __m128h, k: __mmask8, a: __m128h) -> __m128h { |
10588 | unsafe { |
10589 | static_assert_uimm_bits!(IMM8, 8); |
10590 | vreduceph_128(a, IMM8, src, k) |
10591 | } |
10592 | } |
10593 | |
10594 | /// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the |
10595 | /// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed |
10596 | /// out when the corresponding mask bit is not set). |
10597 | /// |
10598 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10599 | /// |
10600 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10601 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10602 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10603 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10604 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10605 | /// |
10606 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_ph) |
10607 | #[inline ] |
10608 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
10609 | #[cfg_attr (test, assert_instr(vreduceph, IMM8 = 0))] |
10610 | #[rustc_legacy_const_generics (2)] |
10611 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10612 | pub fn _mm_maskz_reduce_ph<const IMM8: i32>(k: __mmask8, a: __m128h) -> __m128h { |
10613 | static_assert_uimm_bits!(IMM8, 8); |
10614 | _mm_mask_reduce_ph::<IMM8>(src:_mm_setzero_ph(), k, a) |
10615 | } |
10616 | |
10617 | /// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the |
10618 | /// number of bits specified by imm8, and store the results in dst. |
10619 | /// |
10620 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10621 | /// |
10622 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10623 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10624 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10625 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10626 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10627 | /// |
10628 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_ph) |
10629 | #[inline ] |
10630 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
10631 | #[cfg_attr (test, assert_instr(vreduceph, IMM8 = 0))] |
10632 | #[rustc_legacy_const_generics (1)] |
10633 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10634 | pub fn _mm256_reduce_ph<const IMM8: i32>(a: __m256h) -> __m256h { |
10635 | static_assert_uimm_bits!(IMM8, 8); |
10636 | _mm256_mask_reduce_ph::<IMM8>(src:_mm256_undefined_ph(), k:0xffff, a) |
10637 | } |
10638 | |
10639 | /// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the |
10640 | /// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied |
10641 | /// from src when the corresponding mask bit is not set). |
10642 | /// |
10643 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10644 | /// |
10645 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10646 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10647 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10648 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10649 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10650 | /// |
10651 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_reduce_ph) |
10652 | #[inline ] |
10653 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
10654 | #[cfg_attr (test, assert_instr(vreduceph, IMM8 = 0))] |
10655 | #[rustc_legacy_const_generics (3)] |
10656 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10657 | pub fn _mm256_mask_reduce_ph<const IMM8: i32>(src: __m256h, k: __mmask16, a: __m256h) -> __m256h { |
10658 | unsafe { |
10659 | static_assert_uimm_bits!(IMM8, 8); |
10660 | vreduceph_256(a, IMM8, src, k) |
10661 | } |
10662 | } |
10663 | |
10664 | /// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the |
10665 | /// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed |
10666 | /// out when the corresponding mask bit is not set). |
10667 | /// |
10668 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10669 | /// |
10670 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10671 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10672 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10673 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10674 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10675 | /// |
10676 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_reduce_ph) |
10677 | #[inline ] |
10678 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
10679 | #[cfg_attr (test, assert_instr(vreduceph, IMM8 = 0))] |
10680 | #[rustc_legacy_const_generics (2)] |
10681 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10682 | pub fn _mm256_maskz_reduce_ph<const IMM8: i32>(k: __mmask16, a: __m256h) -> __m256h { |
10683 | static_assert_uimm_bits!(IMM8, 8); |
10684 | _mm256_mask_reduce_ph::<IMM8>(src:_mm256_setzero_ph(), k, a) |
10685 | } |
10686 | |
10687 | /// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the |
10688 | /// number of bits specified by imm8, and store the results in dst. |
10689 | /// |
10690 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10691 | /// |
10692 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10693 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10694 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10695 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10696 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10697 | /// |
10698 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_ph) |
10699 | #[inline ] |
10700 | #[target_feature (enable = "avx512fp16" )] |
10701 | #[cfg_attr (test, assert_instr(vreduceph, IMM8 = 0))] |
10702 | #[rustc_legacy_const_generics (1)] |
10703 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10704 | pub fn _mm512_reduce_ph<const IMM8: i32>(a: __m512h) -> __m512h { |
10705 | static_assert_uimm_bits!(IMM8, 8); |
10706 | _mm512_mask_reduce_ph::<IMM8>(src:_mm512_undefined_ph(), k:0xffffffff, a) |
10707 | } |
10708 | |
10709 | /// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the |
10710 | /// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied |
10711 | /// from src when the corresponding mask bit is not set). |
10712 | /// |
10713 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10714 | /// |
10715 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10716 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10717 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10718 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10719 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10720 | /// |
10721 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_ph) |
10722 | #[inline ] |
10723 | #[target_feature (enable = "avx512fp16" )] |
10724 | #[cfg_attr (test, assert_instr(vreduceph, IMM8 = 0))] |
10725 | #[rustc_legacy_const_generics (3)] |
10726 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10727 | pub fn _mm512_mask_reduce_ph<const IMM8: i32>(src: __m512h, k: __mmask32, a: __m512h) -> __m512h { |
10728 | static_assert_uimm_bits!(IMM8, 8); |
10729 | _mm512_mask_reduce_round_ph::<IMM8, _MM_FROUND_CUR_DIRECTION>(src, k, a) |
10730 | } |
10731 | |
10732 | /// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the |
10733 | /// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed |
10734 | /// out when the corresponding mask bit is not set). |
10735 | /// |
10736 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10737 | /// |
10738 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10739 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10740 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10741 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10742 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10743 | /// |
10744 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_reduce_ph) |
10745 | #[inline ] |
10746 | #[target_feature (enable = "avx512fp16" )] |
10747 | #[cfg_attr (test, assert_instr(vreduceph, IMM8 = 0))] |
10748 | #[rustc_legacy_const_generics (2)] |
10749 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10750 | pub fn _mm512_maskz_reduce_ph<const IMM8: i32>(k: __mmask32, a: __m512h) -> __m512h { |
10751 | static_assert_uimm_bits!(IMM8, 8); |
10752 | _mm512_mask_reduce_ph::<IMM8>(src:_mm512_setzero_ph(), k, a) |
10753 | } |
10754 | |
10755 | /// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the |
10756 | /// number of bits specified by imm8, and store the results in dst. |
10757 | /// |
10758 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10759 | /// |
10760 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10761 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10762 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10763 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10764 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10765 | /// |
10766 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
10767 | /// |
10768 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_round_ph) |
10769 | #[inline ] |
10770 | #[target_feature (enable = "avx512fp16" )] |
10771 | #[cfg_attr (test, assert_instr(vreduceph, IMM8 = 0, SAE = 8))] |
10772 | #[rustc_legacy_const_generics (1, 2)] |
10773 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10774 | pub fn _mm512_reduce_round_ph<const IMM8: i32, const SAE: i32>(a: __m512h) -> __m512h { |
10775 | static_assert_uimm_bits!(IMM8, 8); |
10776 | static_assert_sae!(SAE); |
10777 | _mm512_mask_reduce_round_ph::<IMM8, SAE>(src:_mm512_undefined_ph(), k:0xffffffff, a) |
10778 | } |
10779 | |
10780 | /// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the |
10781 | /// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied |
10782 | /// from src when the corresponding mask bit is not set). |
10783 | /// |
10784 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10785 | /// |
10786 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10787 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10788 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10789 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10790 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10791 | /// |
10792 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
10793 | /// |
10794 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_round_ph) |
10795 | #[inline ] |
10796 | #[target_feature (enable = "avx512fp16" )] |
10797 | #[cfg_attr (test, assert_instr(vreduceph, IMM8 = 0, SAE = 8))] |
10798 | #[rustc_legacy_const_generics (3, 4)] |
10799 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10800 | pub fn _mm512_mask_reduce_round_ph<const IMM8: i32, const SAE: i32>( |
10801 | src: __m512h, |
10802 | k: __mmask32, |
10803 | a: __m512h, |
10804 | ) -> __m512h { |
10805 | unsafe { |
10806 | static_assert_uimm_bits!(IMM8, 8); |
10807 | static_assert_sae!(SAE); |
10808 | vreduceph_512(a, IMM8, src, k, SAE) |
10809 | } |
10810 | } |
10811 | |
10812 | /// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the |
10813 | /// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed |
10814 | /// out when the corresponding mask bit is not set). |
10815 | /// |
10816 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10817 | /// |
10818 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10819 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10820 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10821 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10822 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10823 | /// |
10824 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
10825 | /// |
10826 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_reduce_round_ph) |
10827 | #[inline ] |
10828 | #[target_feature (enable = "avx512fp16" )] |
10829 | #[cfg_attr (test, assert_instr(vreduceph, IMM8 = 0, SAE = 8))] |
10830 | #[rustc_legacy_const_generics (2, 3)] |
10831 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10832 | pub fn _mm512_maskz_reduce_round_ph<const IMM8: i32, const SAE: i32>( |
10833 | k: __mmask32, |
10834 | a: __m512h, |
10835 | ) -> __m512h { |
10836 | static_assert_uimm_bits!(IMM8, 8); |
10837 | static_assert_sae!(SAE); |
10838 | _mm512_mask_reduce_round_ph::<IMM8, SAE>(src:_mm512_setzero_ph(), k, a) |
10839 | } |
10840 | |
10841 | /// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by |
10842 | /// the number of bits specified by imm8, store the result in the lower element of dst, and copy the |
10843 | /// upper 7 packed elements from a to the upper elements of dst. |
10844 | /// |
10845 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10846 | /// |
10847 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10848 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10849 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10850 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10851 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10852 | /// |
10853 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_sh) |
10854 | #[inline ] |
10855 | #[target_feature (enable = "avx512fp16" )] |
10856 | #[cfg_attr (test, assert_instr(vreducesh, IMM8 = 0))] |
10857 | #[rustc_legacy_const_generics (2)] |
10858 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10859 | pub fn _mm_reduce_sh<const IMM8: i32>(a: __m128h, b: __m128h) -> __m128h { |
10860 | static_assert_uimm_bits!(IMM8, 8); |
10861 | _mm_mask_reduce_sh::<IMM8>(src:_mm_undefined_ph(), k:0xff, a, b) |
10862 | } |
10863 | |
10864 | /// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by |
10865 | /// the number of bits specified by imm8, store the result in the lower element of dst using writemask k |
10866 | /// (the element is copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from |
10867 | /// a to the upper elements of dst. |
10868 | /// |
10869 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10870 | /// |
10871 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10872 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10873 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10874 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10875 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10876 | /// |
10877 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_sh) |
10878 | #[inline ] |
10879 | #[target_feature (enable = "avx512fp16" )] |
10880 | #[cfg_attr (test, assert_instr(vreducesh, IMM8 = 0))] |
10881 | #[rustc_legacy_const_generics (4)] |
10882 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10883 | pub fn _mm_mask_reduce_sh<const IMM8: i32>( |
10884 | src: __m128h, |
10885 | k: __mmask8, |
10886 | a: __m128h, |
10887 | b: __m128h, |
10888 | ) -> __m128h { |
10889 | static_assert_uimm_bits!(IMM8, 8); |
10890 | _mm_mask_reduce_round_sh::<IMM8, _MM_FROUND_CUR_DIRECTION>(src, k, a, b) |
10891 | } |
10892 | |
10893 | /// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by |
10894 | /// the number of bits specified by imm8, store the result in the lower element of dst using zeromask k |
10895 | /// (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a |
10896 | /// to the upper elements of dst. |
10897 | /// |
10898 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10899 | /// |
10900 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10901 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10902 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10903 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10904 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10905 | /// |
10906 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_sh) |
10907 | #[inline ] |
10908 | #[target_feature (enable = "avx512fp16" )] |
10909 | #[cfg_attr (test, assert_instr(vreducesh, IMM8 = 0))] |
10910 | #[rustc_legacy_const_generics (3)] |
10911 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10912 | pub fn _mm_maskz_reduce_sh<const IMM8: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
10913 | static_assert_uimm_bits!(IMM8, 8); |
10914 | _mm_mask_reduce_sh::<IMM8>(src:_mm_setzero_ph(), k, a, b) |
10915 | } |
10916 | |
10917 | /// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by |
10918 | /// the number of bits specified by imm8, store the result in the lower element of dst, and copy the upper |
10919 | /// 7 packed elements from a to the upper elements of dst. |
10920 | /// |
10921 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10922 | /// |
10923 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10924 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10925 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10926 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10927 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10928 | /// |
10929 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
10930 | /// |
10931 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_round_sh) |
10932 | #[inline ] |
10933 | #[target_feature (enable = "avx512fp16" )] |
10934 | #[cfg_attr (test, assert_instr(vreducesh, IMM8 = 0, SAE = 8))] |
10935 | #[rustc_legacy_const_generics (2, 3)] |
10936 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10937 | pub fn _mm_reduce_round_sh<const IMM8: i32, const SAE: i32>(a: __m128h, b: __m128h) -> __m128h { |
10938 | static_assert_uimm_bits!(IMM8, 8); |
10939 | static_assert_sae!(SAE); |
10940 | _mm_mask_reduce_round_sh::<IMM8, SAE>(src:_mm_undefined_ph(), k:0xff, a, b) |
10941 | } |
10942 | |
10943 | /// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by |
10944 | /// the number of bits specified by imm8, store the result in the lower element of dst using writemask k |
10945 | /// (the element is copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a |
10946 | /// to the upper elements of dst. |
10947 | /// |
10948 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10949 | /// |
10950 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10951 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10952 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10953 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10954 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10955 | /// |
10956 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
10957 | /// |
10958 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_round_sh) |
10959 | #[inline ] |
10960 | #[target_feature (enable = "avx512fp16" )] |
10961 | #[cfg_attr (test, assert_instr(vreducesh, IMM8 = 0, SAE = 8))] |
10962 | #[rustc_legacy_const_generics (4, 5)] |
10963 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10964 | pub fn _mm_mask_reduce_round_sh<const IMM8: i32, const SAE: i32>( |
10965 | src: __m128h, |
10966 | k: __mmask8, |
10967 | a: __m128h, |
10968 | b: __m128h, |
10969 | ) -> __m128h { |
10970 | unsafe { |
10971 | static_assert_uimm_bits!(IMM8, 8); |
10972 | static_assert_sae!(SAE); |
10973 | vreducesh(a, b, src, k, IMM8, SAE) |
10974 | } |
10975 | } |
10976 | |
10977 | /// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by |
10978 | /// the number of bits specified by imm8, store the result in the lower element of dst using zeromask k |
10979 | /// (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a |
10980 | /// to the upper elements of dst. |
10981 | /// |
10982 | /// Rounding is done according to the imm8 parameter, which can be one of: |
10983 | /// |
10984 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
10985 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
10986 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
10987 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
10988 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
10989 | /// |
10990 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
10991 | /// |
10992 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_round_sh) |
10993 | #[inline ] |
10994 | #[target_feature (enable = "avx512fp16" )] |
10995 | #[cfg_attr (test, assert_instr(vreducesh, IMM8 = 0, SAE = 8))] |
10996 | #[rustc_legacy_const_generics (3, 4)] |
10997 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
10998 | pub fn _mm_maskz_reduce_round_sh<const IMM8: i32, const SAE: i32>( |
10999 | k: __mmask8, |
11000 | a: __m128h, |
11001 | b: __m128h, |
11002 | ) -> __m128h { |
11003 | static_assert_uimm_bits!(IMM8, 8); |
11004 | static_assert_sae!(SAE); |
11005 | _mm_mask_reduce_round_sh::<IMM8, SAE>(src:_mm_setzero_ph(), k, a, b) |
11006 | } |
11007 | |
11008 | /// Reduce the packed half-precision (16-bit) floating-point elements in a by addition. Returns the |
11009 | /// sum of all elements in a. |
11010 | /// |
11011 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_add_ph) |
11012 | #[inline ] |
11013 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11014 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11015 | pub fn _mm_reduce_add_ph(a: __m128h) -> f16 { |
11016 | unsafe { |
11017 | let b: __m128h = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]); |
11018 | let a: __m128h = _mm_add_ph(a, b); |
11019 | let b: __m128h = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]); |
11020 | let a: __m128h = _mm_add_ph(a, b); |
11021 | simd_extract::<_, f16>(x:a, idx:0) + simd_extract::<_, f16>(x:a, idx:1) |
11022 | } |
11023 | } |
11024 | |
11025 | /// Reduce the packed half-precision (16-bit) floating-point elements in a by addition. Returns the |
11026 | /// sum of all elements in a. |
11027 | /// |
11028 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_add_ph) |
11029 | #[inline ] |
11030 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11031 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11032 | pub fn _mm256_reduce_add_ph(a: __m256h) -> f16 { |
11033 | unsafe { |
11034 | let p: __m128h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); |
11035 | let q: __m128h = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); |
11036 | _mm_reduce_add_ph(_mm_add_ph(a:p, b:q)) |
11037 | } |
11038 | } |
11039 | |
11040 | /// Reduce the packed half-precision (16-bit) floating-point elements in a by addition. Returns the |
11041 | /// sum of all elements in a. |
11042 | /// |
11043 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_add_ph) |
11044 | #[inline ] |
11045 | #[target_feature (enable = "avx512fp16" )] |
11046 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11047 | pub fn _mm512_reduce_add_ph(a: __m512h) -> f16 { |
11048 | unsafe { |
11049 | let p: __m256h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); |
11050 | let q: __m256h = simd_shuffle!( |
11051 | a, |
11052 | a, |
11053 | [ |
11054 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 |
11055 | ] |
11056 | ); |
11057 | _mm256_reduce_add_ph(_mm256_add_ph(a:p, b:q)) |
11058 | } |
11059 | } |
11060 | |
11061 | /// Reduce the packed half-precision (16-bit) floating-point elements in a by multiplication. Returns |
11062 | /// the product of all elements in a. |
11063 | /// |
11064 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_mul_ph) |
11065 | #[inline ] |
11066 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11067 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11068 | pub fn _mm_reduce_mul_ph(a: __m128h) -> f16 { |
11069 | unsafe { |
11070 | let b: __m128h = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]); |
11071 | let a: __m128h = _mm_mul_ph(a, b); |
11072 | let b: __m128h = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]); |
11073 | let a: __m128h = _mm_mul_ph(a, b); |
11074 | simd_extract::<_, f16>(x:a, idx:0) * simd_extract::<_, f16>(x:a, idx:1) |
11075 | } |
11076 | } |
11077 | |
11078 | /// Reduce the packed half-precision (16-bit) floating-point elements in a by multiplication. Returns |
11079 | /// the product of all elements in a. |
11080 | /// |
11081 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_mul_ph) |
11082 | #[inline ] |
11083 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11084 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11085 | pub fn _mm256_reduce_mul_ph(a: __m256h) -> f16 { |
11086 | unsafe { |
11087 | let p: __m128h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); |
11088 | let q: __m128h = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); |
11089 | _mm_reduce_mul_ph(_mm_mul_ph(a:p, b:q)) |
11090 | } |
11091 | } |
11092 | |
11093 | /// Reduce the packed half-precision (16-bit) floating-point elements in a by multiplication. Returns |
11094 | /// the product of all elements in a. |
11095 | /// |
11096 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_mul_ph) |
11097 | #[inline ] |
11098 | #[target_feature (enable = "avx512fp16" )] |
11099 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11100 | pub unsafe fn _mm512_reduce_mul_ph(a: __m512h) -> f16 { |
11101 | unsafe { |
11102 | let p: __m256h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); |
11103 | let q: __m256h = simd_shuffle!( |
11104 | a, |
11105 | a, |
11106 | [ |
11107 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 |
11108 | ] |
11109 | ); |
11110 | _mm256_reduce_mul_ph(_mm256_mul_ph(a:p, b:q)) |
11111 | } |
11112 | } |
11113 | |
11114 | /// Reduce the packed half-precision (16-bit) floating-point elements in a by minimum. Returns the |
11115 | /// minimum of all elements in a. |
11116 | /// |
11117 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_min_ph) |
11118 | #[inline ] |
11119 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11120 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11121 | pub fn _mm_reduce_min_ph(a: __m128h) -> f16 { |
11122 | unsafe { |
11123 | let b: __m128h = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]); |
11124 | let a: __m128h = _mm_min_ph(a, b); |
11125 | let b: __m128h = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]); |
11126 | let a: __m128h = _mm_min_ph(a, b); |
11127 | let b: __m128h = simd_shuffle!(a, a, [1, 0, 2, 3, 4, 5, 6, 7]); |
11128 | simd_extract!(_mm_min_sh(a, b), 0) |
11129 | } |
11130 | } |
11131 | |
11132 | /// Reduce the packed half-precision (16-bit) floating-point elements in a by minimum. Returns the |
11133 | /// minimum of all elements in a. |
11134 | /// |
11135 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_min_ph) |
11136 | #[inline ] |
11137 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11138 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11139 | pub fn _mm256_reduce_min_ph(a: __m256h) -> f16 { |
11140 | unsafe { |
11141 | let p: __m128h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); |
11142 | let q: __m128h = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); |
11143 | _mm_reduce_min_ph(_mm_min_ph(a:p, b:q)) |
11144 | } |
11145 | } |
11146 | |
11147 | /// Reduce the packed half-precision (16-bit) floating-point elements in a by minimum. Returns the |
11148 | /// minimum of all elements in a. |
11149 | /// |
11150 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_min_ph) |
11151 | #[inline ] |
11152 | #[target_feature (enable = "avx512fp16" )] |
11153 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11154 | pub fn _mm512_reduce_min_ph(a: __m512h) -> f16 { |
11155 | unsafe { |
11156 | let p: __m256h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); |
11157 | let q: __m256h = simd_shuffle!( |
11158 | a, |
11159 | a, |
11160 | [ |
11161 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 |
11162 | ] |
11163 | ); |
11164 | _mm256_reduce_min_ph(_mm256_min_ph(a:p, b:q)) |
11165 | } |
11166 | } |
11167 | |
11168 | /// Reduce the packed half-precision (16-bit) floating-point elements in a by maximum. Returns the |
11169 | /// maximum of all elements in a. |
11170 | /// |
11171 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_max_ph) |
11172 | #[inline ] |
11173 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11174 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11175 | pub fn _mm_reduce_max_ph(a: __m128h) -> f16 { |
11176 | unsafe { |
11177 | let b: __m128h = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]); |
11178 | let a: __m128h = _mm_max_ph(a, b); |
11179 | let b: __m128h = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]); |
11180 | let a: __m128h = _mm_max_ph(a, b); |
11181 | let b: __m128h = simd_shuffle!(a, a, [1, 0, 2, 3, 4, 5, 6, 7]); |
11182 | simd_extract!(_mm_max_sh(a, b), 0) |
11183 | } |
11184 | } |
11185 | |
11186 | /// Reduce the packed half-precision (16-bit) floating-point elements in a by maximum. Returns the |
11187 | /// maximum of all elements in a. |
11188 | /// |
11189 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_max_ph) |
11190 | #[inline ] |
11191 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11192 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11193 | pub fn _mm256_reduce_max_ph(a: __m256h) -> f16 { |
11194 | unsafe { |
11195 | let p: __m128h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); |
11196 | let q: __m128h = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); |
11197 | _mm_reduce_max_ph(_mm_max_ph(a:p, b:q)) |
11198 | } |
11199 | } |
11200 | |
11201 | /// Reduce the packed half-precision (16-bit) floating-point elements in a by maximum. Returns the |
11202 | /// maximum of all elements in a. |
11203 | /// |
11204 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_max_ph) |
11205 | #[inline ] |
11206 | #[target_feature (enable = "avx512fp16" )] |
11207 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11208 | pub fn _mm512_reduce_max_ph(a: __m512h) -> f16 { |
11209 | unsafe { |
11210 | let p: __m256h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); |
11211 | let q: __m256h = simd_shuffle!( |
11212 | a, |
11213 | a, |
11214 | [ |
11215 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 |
11216 | ] |
11217 | ); |
11218 | _mm256_reduce_max_ph(_mm256_max_ph(a:p, b:q)) |
11219 | } |
11220 | } |
11221 | |
11222 | macro_rules! fpclass_asm { // FIXME: use LLVM intrinsics |
11223 | ($mask_type: ty, $reg: ident, $a: expr) => {{ |
11224 | let dst: $mask_type; |
11225 | asm!( |
11226 | "vfpclassph {k}, {src}, {imm8}" , |
11227 | k = lateout(kreg) dst, |
11228 | src = in($reg) $a, |
11229 | imm8 = const IMM8, |
11230 | options(pure, nomem, nostack) |
11231 | ); |
11232 | dst |
11233 | }}; |
11234 | ($mask_type: ty, $mask: expr, $reg: ident, $a: expr) => {{ |
11235 | let dst: $mask_type; |
11236 | asm!( |
11237 | "vfpclassph {k} {{ {mask} }}, {src}, {imm8}" , |
11238 | k = lateout(kreg) dst, |
11239 | mask = in(kreg) $mask, |
11240 | src = in($reg) $a, |
11241 | imm8 = const IMM8, |
11242 | options(pure, nomem, nostack) |
11243 | ); |
11244 | dst |
11245 | }}; |
11246 | } |
11247 | |
11248 | /// Test packed half-precision (16-bit) floating-point elements in a for special categories specified |
11249 | /// by imm8, and store the results in mask vector k. |
11250 | /// imm can be a combination of: |
11251 | /// |
11252 | /// 0x01 // QNaN |
11253 | /// 0x02 // Positive Zero |
11254 | /// 0x04 // Negative Zero |
11255 | /// 0x08 // Positive Infinity |
11256 | /// 0x10 // Negative Infinity |
11257 | /// 0x20 // Denormal |
11258 | /// 0x40 // Negative |
11259 | /// 0x80 // SNaN |
11260 | /// |
11261 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fpclass_ph_mask) |
11262 | #[inline ] |
11263 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11264 | #[cfg_attr (test, assert_instr(vfpclassph, IMM8 = 0))] |
11265 | #[rustc_legacy_const_generics (1)] |
11266 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11267 | pub fn _mm_fpclass_ph_mask<const IMM8: i32>(a: __m128h) -> __mmask8 { |
11268 | unsafe { |
11269 | static_assert_uimm_bits!(IMM8, 8); |
11270 | fpclass_asm!(__mmask8, xmm_reg, a) |
11271 | } |
11272 | } |
11273 | |
11274 | /// Test packed half-precision (16-bit) floating-point elements in a for special categories specified |
11275 | /// by imm8, and store the results in mask vector k using zeromask k (elements are zeroed out when the |
11276 | /// corresponding mask bit is not set). |
11277 | /// imm can be a combination of: |
11278 | /// |
11279 | /// 0x01 // QNaN |
11280 | /// 0x02 // Positive Zero |
11281 | /// 0x04 // Negative Zero |
11282 | /// 0x08 // Positive Infinity |
11283 | /// 0x10 // Negative Infinity |
11284 | /// 0x20 // Denormal |
11285 | /// 0x40 // Negative |
11286 | /// 0x80 // SNaN |
11287 | /// |
11288 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fpclass_ph_mask) |
11289 | #[inline ] |
11290 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11291 | #[cfg_attr (test, assert_instr(vfpclassph, IMM8 = 0))] |
11292 | #[rustc_legacy_const_generics (2)] |
11293 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11294 | pub fn _mm_mask_fpclass_ph_mask<const IMM8: i32>(k1: __mmask8, a: __m128h) -> __mmask8 { |
11295 | unsafe { |
11296 | static_assert_uimm_bits!(IMM8, 8); |
11297 | fpclass_asm!(__mmask8, k1, xmm_reg, a) |
11298 | } |
11299 | } |
11300 | |
11301 | /// Test packed half-precision (16-bit) floating-point elements in a for special categories specified |
11302 | /// by imm8, and store the results in mask vector k. |
11303 | /// imm can be a combination of: |
11304 | /// |
11305 | /// 0x01 // QNaN |
11306 | /// 0x02 // Positive Zero |
11307 | /// 0x04 // Negative Zero |
11308 | /// 0x08 // Positive Infinity |
11309 | /// 0x10 // Negative Infinity |
11310 | /// 0x20 // Denormal |
11311 | /// 0x40 // Negative |
11312 | /// 0x80 // SNaN |
11313 | /// |
11314 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fpclass_ph_mask) |
11315 | #[inline ] |
11316 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11317 | #[cfg_attr (test, assert_instr(vfpclassph, IMM8 = 0))] |
11318 | #[rustc_legacy_const_generics (1)] |
11319 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11320 | pub fn _mm256_fpclass_ph_mask<const IMM8: i32>(a: __m256h) -> __mmask16 { |
11321 | unsafe { |
11322 | static_assert_uimm_bits!(IMM8, 8); |
11323 | fpclass_asm!(__mmask16, ymm_reg, a) |
11324 | } |
11325 | } |
11326 | |
11327 | /// Test packed half-precision (16-bit) floating-point elements in a for special categories specified |
11328 | /// by imm8, and store the results in mask vector k using zeromask k (elements are zeroed out when the |
11329 | /// corresponding mask bit is not set). |
11330 | /// imm can be a combination of: |
11331 | /// |
11332 | /// 0x01 // QNaN |
11333 | /// 0x02 // Positive Zero |
11334 | /// 0x04 // Negative Zero |
11335 | /// 0x08 // Positive Infinity |
11336 | /// 0x10 // Negative Infinity |
11337 | /// 0x20 // Denormal |
11338 | /// 0x40 // Negative |
11339 | /// 0x80 // SNaN |
11340 | /// |
11341 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fpclass_ph_mask) |
11342 | #[inline ] |
11343 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11344 | #[cfg_attr (test, assert_instr(vfpclassph, IMM8 = 0))] |
11345 | #[rustc_legacy_const_generics (2)] |
11346 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11347 | pub fn _mm256_mask_fpclass_ph_mask<const IMM8: i32>(k1: __mmask16, a: __m256h) -> __mmask16 { |
11348 | unsafe { |
11349 | static_assert_uimm_bits!(IMM8, 8); |
11350 | fpclass_asm!(__mmask16, k1, ymm_reg, a) |
11351 | } |
11352 | } |
11353 | |
11354 | /// Test packed half-precision (16-bit) floating-point elements in a for special categories specified |
11355 | /// by imm8, and store the results in mask vector k. |
11356 | /// imm can be a combination of: |
11357 | /// |
11358 | /// 0x01 // QNaN |
11359 | /// 0x02 // Positive Zero |
11360 | /// 0x04 // Negative Zero |
11361 | /// 0x08 // Positive Infinity |
11362 | /// 0x10 // Negative Infinity |
11363 | /// 0x20 // Denormal |
11364 | /// 0x40 // Negative |
11365 | /// 0x80 // SNaN |
11366 | /// |
11367 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fpclass_ph_mask) |
11368 | #[inline ] |
11369 | #[target_feature (enable = "avx512fp16" )] |
11370 | #[cfg_attr (test, assert_instr(vfpclassph, IMM8 = 0))] |
11371 | #[rustc_legacy_const_generics (1)] |
11372 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11373 | pub fn _mm512_fpclass_ph_mask<const IMM8: i32>(a: __m512h) -> __mmask32 { |
11374 | unsafe { |
11375 | static_assert_uimm_bits!(IMM8, 8); |
11376 | fpclass_asm!(__mmask32, zmm_reg, a) |
11377 | } |
11378 | } |
11379 | |
11380 | /// Test packed half-precision (16-bit) floating-point elements in a for special categories specified |
11381 | /// by imm8, and store the results in mask vector k using zeromask k (elements are zeroed out when the |
11382 | /// corresponding mask bit is not set). |
11383 | /// imm can be a combination of: |
11384 | /// |
11385 | /// 0x01 // QNaN |
11386 | /// 0x02 // Positive Zero |
11387 | /// 0x04 // Negative Zero |
11388 | /// 0x08 // Positive Infinity |
11389 | /// 0x10 // Negative Infinity |
11390 | /// 0x20 // Denormal |
11391 | /// 0x40 // Negative |
11392 | /// 0x80 // SNaN |
11393 | /// |
11394 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fpclass_ph_mask) |
11395 | #[inline ] |
11396 | #[target_feature (enable = "avx512fp16" )] |
11397 | #[cfg_attr (test, assert_instr(vfpclassph, IMM8 = 0))] |
11398 | #[rustc_legacy_const_generics (2)] |
11399 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11400 | pub fn _mm512_mask_fpclass_ph_mask<const IMM8: i32>(k1: __mmask32, a: __m512h) -> __mmask32 { |
11401 | unsafe { |
11402 | static_assert_uimm_bits!(IMM8, 8); |
11403 | fpclass_asm!(__mmask32, k1, zmm_reg, a) |
11404 | } |
11405 | } |
11406 | |
11407 | /// Test the lower half-precision (16-bit) floating-point element in a for special categories specified |
11408 | /// by imm8, and store the result in mask vector k. |
11409 | /// imm can be a combination of: |
11410 | /// |
11411 | /// 0x01 // QNaN |
11412 | /// 0x02 // Positive Zero |
11413 | /// 0x04 // Negative Zero |
11414 | /// 0x08 // Positive Infinity |
11415 | /// 0x10 // Negative Infinity |
11416 | /// 0x20 // Denormal |
11417 | /// 0x40 // Negative |
11418 | /// 0x80 // SNaN |
11419 | /// |
11420 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fpclass_sh_mask) |
11421 | #[inline ] |
11422 | #[target_feature (enable = "avx512fp16" )] |
11423 | #[cfg_attr (test, assert_instr(vfpclasssh, IMM8 = 0))] |
11424 | #[rustc_legacy_const_generics (1)] |
11425 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11426 | pub fn _mm_fpclass_sh_mask<const IMM8: i32>(a: __m128h) -> __mmask8 { |
11427 | _mm_mask_fpclass_sh_mask::<IMM8>(k1:0xff, a) |
11428 | } |
11429 | |
11430 | /// Test the lower half-precision (16-bit) floating-point element in a for special categories specified |
11431 | /// by imm8, and store the result in mask vector k using zeromask k (elements are zeroed out when the |
11432 | /// corresponding mask bit is not set). |
11433 | /// imm can be a combination of: |
11434 | /// |
11435 | /// 0x01 // QNaN |
11436 | /// 0x02 // Positive Zero |
11437 | /// 0x04 // Negative Zero |
11438 | /// 0x08 // Positive Infinity |
11439 | /// 0x10 // Negative Infinity |
11440 | /// 0x20 // Denormal |
11441 | /// 0x40 // Negative |
11442 | /// 0x80 // SNaN |
11443 | /// |
11444 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fpclass_sh_mask) |
11445 | #[inline ] |
11446 | #[target_feature (enable = "avx512fp16" )] |
11447 | #[cfg_attr (test, assert_instr(vfpclasssh, IMM8 = 0))] |
11448 | #[rustc_legacy_const_generics (2)] |
11449 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11450 | pub fn _mm_mask_fpclass_sh_mask<const IMM8: i32>(k1: __mmask8, a: __m128h) -> __mmask8 { |
11451 | unsafe { |
11452 | static_assert_uimm_bits!(IMM8, 8); |
11453 | vfpclasssh(a, IMM8, k:k1) |
11454 | } |
11455 | } |
11456 | |
11457 | /// Blend packed half-precision (16-bit) floating-point elements from a and b using control mask k, |
11458 | /// and store the results in dst. |
11459 | /// |
11460 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_blend_ph) |
11461 | #[inline ] |
11462 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11463 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11464 | pub fn _mm_mask_blend_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { |
11465 | unsafe { simd_select_bitmask(m:k, yes:b, no:a) } |
11466 | } |
11467 | |
11468 | /// Blend packed half-precision (16-bit) floating-point elements from a and b using control mask k, |
11469 | /// and store the results in dst. |
11470 | /// |
11471 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_blend_ph) |
11472 | #[inline ] |
11473 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11474 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11475 | pub fn _mm256_mask_blend_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { |
11476 | unsafe { simd_select_bitmask(m:k, yes:b, no:a) } |
11477 | } |
11478 | |
11479 | /// Blend packed half-precision (16-bit) floating-point elements from a and b using control mask k, |
11480 | /// and store the results in dst. |
11481 | /// |
11482 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_blend_ph) |
11483 | #[inline ] |
11484 | #[target_feature (enable = "avx512fp16" )] |
11485 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11486 | pub fn _mm512_mask_blend_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { |
11487 | unsafe { simd_select_bitmask(m:k, yes:b, no:a) } |
11488 | } |
11489 | |
11490 | /// Shuffle half-precision (16-bit) floating-point elements in a and b using the corresponding selector |
11491 | /// and index in idx, and store the results in dst. |
11492 | /// |
11493 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutex2var_ph) |
11494 | #[inline ] |
11495 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11496 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11497 | pub fn _mm_permutex2var_ph(a: __m128h, idx: __m128i, b: __m128h) -> __m128h { |
11498 | _mm_castsi128_ph(_mm_permutex2var_epi16( |
11499 | a:_mm_castph_si128(a), |
11500 | idx, |
11501 | b:_mm_castph_si128(b), |
11502 | )) |
11503 | } |
11504 | |
11505 | /// Shuffle half-precision (16-bit) floating-point elements in a and b using the corresponding selector |
11506 | /// and index in idx, and store the results in dst. |
11507 | /// |
11508 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutex2var_ph) |
11509 | #[inline ] |
11510 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11511 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11512 | pub fn _mm256_permutex2var_ph(a: __m256h, idx: __m256i, b: __m256h) -> __m256h { |
11513 | _mm256_castsi256_ph(_mm256_permutex2var_epi16( |
11514 | a:_mm256_castph_si256(a), |
11515 | idx, |
11516 | b:_mm256_castph_si256(b), |
11517 | )) |
11518 | } |
11519 | |
11520 | /// Shuffle half-precision (16-bit) floating-point elements in a and b using the corresponding selector |
11521 | /// and index in idx, and store the results in dst. |
11522 | /// |
11523 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutex2var_ph) |
11524 | #[inline ] |
11525 | #[target_feature (enable = "avx512fp16" )] |
11526 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11527 | pub fn _mm512_permutex2var_ph(a: __m512h, idx: __m512i, b: __m512h) -> __m512h { |
11528 | _mm512_castsi512_ph(_mm512_permutex2var_epi16( |
11529 | a:_mm512_castph_si512(a), |
11530 | idx, |
11531 | b:_mm512_castph_si512(b), |
11532 | )) |
11533 | } |
11534 | |
11535 | /// Shuffle half-precision (16-bit) floating-point elements in a using the corresponding index in idx, |
11536 | /// and store the results in dst. |
11537 | /// |
11538 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutexvar_ph) |
11539 | #[inline ] |
11540 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11541 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11542 | pub fn _mm_permutexvar_ph(idx: __m128i, a: __m128h) -> __m128h { |
11543 | _mm_castsi128_ph(_mm_permutexvar_epi16(idx, a:_mm_castph_si128(a))) |
11544 | } |
11545 | |
11546 | /// Shuffle half-precision (16-bit) floating-point elements in a using the corresponding index in idx, |
11547 | /// and store the results in dst. |
11548 | /// |
11549 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutexvar_ph) |
11550 | #[inline ] |
11551 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11552 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11553 | pub fn _mm256_permutexvar_ph(idx: __m256i, a: __m256h) -> __m256h { |
11554 | _mm256_castsi256_ph(_mm256_permutexvar_epi16(idx, a:_mm256_castph_si256(a))) |
11555 | } |
11556 | |
11557 | /// Shuffle half-precision (16-bit) floating-point elements in a using the corresponding index in idx, |
11558 | /// and store the results in dst. |
11559 | /// |
11560 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutexvar_ph) |
11561 | #[inline ] |
11562 | #[target_feature (enable = "avx512fp16" )] |
11563 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11564 | pub fn _mm512_permutexvar_ph(idx: __m512i, a: __m512h) -> __m512h { |
11565 | _mm512_castsi512_ph(_mm512_permutexvar_epi16(idx, a:_mm512_castph_si512(a))) |
11566 | } |
11567 | |
11568 | /// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11569 | /// and store the results in dst. |
11570 | /// |
11571 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi16_ph) |
11572 | #[inline ] |
11573 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11574 | #[cfg_attr (test, assert_instr(vcvtw2ph))] |
11575 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11576 | pub fn _mm_cvtepi16_ph(a: __m128i) -> __m128h { |
11577 | unsafe { vcvtw2ph_128(a.as_i16x8(), _MM_FROUND_CUR_DIRECTION) } |
11578 | } |
11579 | |
11580 | /// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11581 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
11582 | /// mask bit is not set). |
11583 | /// |
11584 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi16_ph) |
11585 | #[inline ] |
11586 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11587 | #[cfg_attr (test, assert_instr(vcvtw2ph))] |
11588 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11589 | pub fn _mm_mask_cvtepi16_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h { |
11590 | unsafe { simd_select_bitmask(m:k, yes:_mm_cvtepi16_ph(a), no:src) } |
11591 | } |
11592 | |
11593 | /// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11594 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
11595 | /// |
11596 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi16_ph) |
11597 | #[inline ] |
11598 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11599 | #[cfg_attr (test, assert_instr(vcvtw2ph))] |
11600 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11601 | pub fn _mm_maskz_cvtepi16_ph(k: __mmask8, a: __m128i) -> __m128h { |
11602 | _mm_mask_cvtepi16_ph(src:_mm_setzero_ph(), k, a) |
11603 | } |
11604 | |
11605 | /// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11606 | /// and store the results in dst. |
11607 | /// |
11608 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi16_ph) |
11609 | #[inline ] |
11610 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11611 | #[cfg_attr (test, assert_instr(vcvtw2ph))] |
11612 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11613 | pub fn _mm256_cvtepi16_ph(a: __m256i) -> __m256h { |
11614 | unsafe { vcvtw2ph_256(a.as_i16x16(), _MM_FROUND_CUR_DIRECTION) } |
11615 | } |
11616 | |
11617 | /// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11618 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
11619 | /// mask bit is not set). |
11620 | /// |
11621 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi16_ph) |
11622 | #[inline ] |
11623 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11624 | #[cfg_attr (test, assert_instr(vcvtw2ph))] |
11625 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11626 | pub fn _mm256_mask_cvtepi16_ph(src: __m256h, k: __mmask16, a: __m256i) -> __m256h { |
11627 | unsafe { simd_select_bitmask(m:k, yes:_mm256_cvtepi16_ph(a), no:src) } |
11628 | } |
11629 | |
11630 | /// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11631 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
11632 | /// |
11633 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi16_ph) |
11634 | #[inline ] |
11635 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11636 | #[cfg_attr (test, assert_instr(vcvtw2ph))] |
11637 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11638 | pub fn _mm256_maskz_cvtepi16_ph(k: __mmask16, a: __m256i) -> __m256h { |
11639 | _mm256_mask_cvtepi16_ph(src:_mm256_setzero_ph(), k, a) |
11640 | } |
11641 | |
11642 | /// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11643 | /// and store the results in dst. |
11644 | /// |
11645 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi16_ph) |
11646 | #[inline ] |
11647 | #[target_feature (enable = "avx512fp16" )] |
11648 | #[cfg_attr (test, assert_instr(vcvtw2ph))] |
11649 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11650 | pub fn _mm512_cvtepi16_ph(a: __m512i) -> __m512h { |
11651 | unsafe { vcvtw2ph_512(a.as_i16x32(), _MM_FROUND_CUR_DIRECTION) } |
11652 | } |
11653 | |
11654 | /// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11655 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
11656 | /// mask bit is not set). |
11657 | /// |
11658 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi16_ph) |
11659 | #[inline ] |
11660 | #[target_feature (enable = "avx512fp16" )] |
11661 | #[cfg_attr (test, assert_instr(vcvtw2ph))] |
11662 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11663 | pub fn _mm512_mask_cvtepi16_ph(src: __m512h, k: __mmask32, a: __m512i) -> __m512h { |
11664 | unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepi16_ph(a), no:src) } |
11665 | } |
11666 | |
11667 | /// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11668 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
11669 | /// |
11670 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi16_ph) |
11671 | #[inline ] |
11672 | #[target_feature (enable = "avx512fp16" )] |
11673 | #[cfg_attr (test, assert_instr(vcvtw2ph))] |
11674 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11675 | pub fn _mm512_maskz_cvtepi16_ph(k: __mmask32, a: __m512i) -> __m512h { |
11676 | _mm512_mask_cvtepi16_ph(src:_mm512_setzero_ph(), k, a) |
11677 | } |
11678 | |
11679 | /// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11680 | /// and store the results in dst. |
11681 | /// |
11682 | /// Rounding is done according to the rounding parameter, which can be one of: |
11683 | /// |
11684 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
11685 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
11686 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
11687 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
11688 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
11689 | /// |
11690 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi16_ph) |
11691 | #[inline ] |
11692 | #[target_feature (enable = "avx512fp16" )] |
11693 | #[cfg_attr (test, assert_instr(vcvtw2ph, ROUNDING = 8))] |
11694 | #[rustc_legacy_const_generics (1)] |
11695 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11696 | pub fn _mm512_cvt_roundepi16_ph<const ROUNDING: i32>(a: __m512i) -> __m512h { |
11697 | unsafe { |
11698 | static_assert_rounding!(ROUNDING); |
11699 | vcvtw2ph_512(a.as_i16x32(), ROUNDING) |
11700 | } |
11701 | } |
11702 | |
11703 | /// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11704 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
11705 | /// mask bit is not set). |
11706 | /// |
11707 | /// Rounding is done according to the rounding parameter, which can be one of: |
11708 | /// |
11709 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
11710 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
11711 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
11712 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
11713 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
11714 | /// |
11715 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi16_ph) |
11716 | #[inline ] |
11717 | #[target_feature (enable = "avx512fp16" )] |
11718 | #[cfg_attr (test, assert_instr(vcvtw2ph, ROUNDING = 8))] |
11719 | #[rustc_legacy_const_generics (3)] |
11720 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11721 | pub fn _mm512_mask_cvt_roundepi16_ph<const ROUNDING: i32>( |
11722 | src: __m512h, |
11723 | k: __mmask32, |
11724 | a: __m512i, |
11725 | ) -> __m512h { |
11726 | unsafe { |
11727 | static_assert_rounding!(ROUNDING); |
11728 | simd_select_bitmask(m:k, yes:_mm512_cvt_roundepi16_ph::<ROUNDING>(a), no:src) |
11729 | } |
11730 | } |
11731 | |
11732 | /// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11733 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
11734 | /// |
11735 | /// Rounding is done according to the rounding parameter, which can be one of: |
11736 | /// |
11737 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
11738 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
11739 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
11740 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
11741 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
11742 | /// |
11743 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi16_ph) |
11744 | #[inline ] |
11745 | #[target_feature (enable = "avx512fp16" )] |
11746 | #[cfg_attr (test, assert_instr(vcvtw2ph, ROUNDING = 8))] |
11747 | #[rustc_legacy_const_generics (2)] |
11748 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11749 | pub fn _mm512_maskz_cvt_roundepi16_ph<const ROUNDING: i32>(k: __mmask32, a: __m512i) -> __m512h { |
11750 | static_assert_rounding!(ROUNDING); |
11751 | _mm512_mask_cvt_roundepi16_ph::<ROUNDING>(src:_mm512_setzero_ph(), k, a) |
11752 | } |
11753 | |
11754 | /// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11755 | /// and store the results in dst. |
11756 | /// |
11757 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu16_ph) |
11758 | #[inline ] |
11759 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11760 | #[cfg_attr (test, assert_instr(vcvtuw2ph))] |
11761 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11762 | pub fn _mm_cvtepu16_ph(a: __m128i) -> __m128h { |
11763 | unsafe { vcvtuw2ph_128(a.as_u16x8(), _MM_FROUND_CUR_DIRECTION) } |
11764 | } |
11765 | |
11766 | /// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11767 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
11768 | /// mask bit is not set). |
11769 | /// |
11770 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu16_ph) |
11771 | #[inline ] |
11772 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11773 | #[cfg_attr (test, assert_instr(vcvtuw2ph))] |
11774 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11775 | pub fn _mm_mask_cvtepu16_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h { |
11776 | unsafe { simd_select_bitmask(m:k, yes:_mm_cvtepu16_ph(a), no:src) } |
11777 | } |
11778 | |
11779 | /// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11780 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
11781 | /// |
11782 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu16_ph) |
11783 | #[inline ] |
11784 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11785 | #[cfg_attr (test, assert_instr(vcvtuw2ph))] |
11786 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11787 | pub fn _mm_maskz_cvtepu16_ph(k: __mmask8, a: __m128i) -> __m128h { |
11788 | _mm_mask_cvtepu16_ph(src:_mm_setzero_ph(), k, a) |
11789 | } |
11790 | |
11791 | /// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11792 | /// and store the results in dst. |
11793 | /// |
11794 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu16_ph) |
11795 | #[inline ] |
11796 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11797 | #[cfg_attr (test, assert_instr(vcvtuw2ph))] |
11798 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11799 | pub fn _mm256_cvtepu16_ph(a: __m256i) -> __m256h { |
11800 | unsafe { vcvtuw2ph_256(a.as_u16x16(), _MM_FROUND_CUR_DIRECTION) } |
11801 | } |
11802 | |
11803 | /// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11804 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
11805 | /// mask bit is not set). |
11806 | /// |
11807 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu16_ph) |
11808 | #[inline ] |
11809 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11810 | #[cfg_attr (test, assert_instr(vcvtuw2ph))] |
11811 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11812 | pub fn _mm256_mask_cvtepu16_ph(src: __m256h, k: __mmask16, a: __m256i) -> __m256h { |
11813 | unsafe { simd_select_bitmask(m:k, yes:_mm256_cvtepu16_ph(a), no:src) } |
11814 | } |
11815 | |
11816 | /// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11817 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
11818 | /// |
11819 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu16_ph) |
11820 | #[inline ] |
11821 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11822 | #[cfg_attr (test, assert_instr(vcvtuw2ph))] |
11823 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11824 | pub fn _mm256_maskz_cvtepu16_ph(k: __mmask16, a: __m256i) -> __m256h { |
11825 | _mm256_mask_cvtepu16_ph(src:_mm256_setzero_ph(), k, a) |
11826 | } |
11827 | |
11828 | /// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11829 | /// and store the results in dst. |
11830 | /// |
11831 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu16_ph) |
11832 | #[inline ] |
11833 | #[target_feature (enable = "avx512fp16" )] |
11834 | #[cfg_attr (test, assert_instr(vcvtuw2ph))] |
11835 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11836 | pub fn _mm512_cvtepu16_ph(a: __m512i) -> __m512h { |
11837 | unsafe { vcvtuw2ph_512(a.as_u16x32(), _MM_FROUND_CUR_DIRECTION) } |
11838 | } |
11839 | |
11840 | /// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11841 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
11842 | /// mask bit is not set). |
11843 | /// |
11844 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu16_ph) |
11845 | #[inline ] |
11846 | #[target_feature (enable = "avx512fp16" )] |
11847 | #[cfg_attr (test, assert_instr(vcvtuw2ph))] |
11848 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11849 | pub fn _mm512_mask_cvtepu16_ph(src: __m512h, k: __mmask32, a: __m512i) -> __m512h { |
11850 | unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepu16_ph(a), no:src) } |
11851 | } |
11852 | |
11853 | /// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11854 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
11855 | /// |
11856 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu16_ph) |
11857 | #[inline ] |
11858 | #[target_feature (enable = "avx512fp16" )] |
11859 | #[cfg_attr (test, assert_instr(vcvtuw2ph))] |
11860 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11861 | pub fn _mm512_maskz_cvtepu16_ph(k: __mmask32, a: __m512i) -> __m512h { |
11862 | _mm512_mask_cvtepu16_ph(src:_mm512_setzero_ph(), k, a) |
11863 | } |
11864 | |
11865 | /// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11866 | /// and store the results in dst. |
11867 | /// |
11868 | /// Rounding is done according to the rounding parameter, which can be one of: |
11869 | /// |
11870 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
11871 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
11872 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
11873 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
11874 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
11875 | /// |
11876 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepu16_ph) |
11877 | #[inline ] |
11878 | #[target_feature (enable = "avx512fp16" )] |
11879 | #[cfg_attr (test, assert_instr(vcvtuw2ph, ROUNDING = 8))] |
11880 | #[rustc_legacy_const_generics (1)] |
11881 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11882 | pub fn _mm512_cvt_roundepu16_ph<const ROUNDING: i32>(a: __m512i) -> __m512h { |
11883 | unsafe { |
11884 | static_assert_rounding!(ROUNDING); |
11885 | vcvtuw2ph_512(a.as_u16x32(), ROUNDING) |
11886 | } |
11887 | } |
11888 | |
11889 | /// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11890 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
11891 | /// mask bit is not set). |
11892 | /// |
11893 | /// Rounding is done according to the rounding parameter, which can be one of: |
11894 | /// |
11895 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
11896 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
11897 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
11898 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
11899 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
11900 | /// |
11901 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepu16_ph) |
11902 | #[inline ] |
11903 | #[target_feature (enable = "avx512fp16" )] |
11904 | #[cfg_attr (test, assert_instr(vcvtuw2ph, ROUNDING = 8))] |
11905 | #[rustc_legacy_const_generics (3)] |
11906 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11907 | pub fn _mm512_mask_cvt_roundepu16_ph<const ROUNDING: i32>( |
11908 | src: __m512h, |
11909 | k: __mmask32, |
11910 | a: __m512i, |
11911 | ) -> __m512h { |
11912 | unsafe { |
11913 | static_assert_rounding!(ROUNDING); |
11914 | simd_select_bitmask(m:k, yes:_mm512_cvt_roundepu16_ph::<ROUNDING>(a), no:src) |
11915 | } |
11916 | } |
11917 | |
11918 | /// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11919 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
11920 | /// |
11921 | /// Rounding is done according to the rounding parameter, which can be one of: |
11922 | /// |
11923 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
11924 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
11925 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
11926 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
11927 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
11928 | /// |
11929 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepu16_ph) |
11930 | #[inline ] |
11931 | #[target_feature (enable = "avx512fp16" )] |
11932 | #[cfg_attr (test, assert_instr(vcvtuw2ph, ROUNDING = 8))] |
11933 | #[rustc_legacy_const_generics (2)] |
11934 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11935 | pub fn _mm512_maskz_cvt_roundepu16_ph<const ROUNDING: i32>(k: __mmask32, a: __m512i) -> __m512h { |
11936 | static_assert_rounding!(ROUNDING); |
11937 | _mm512_mask_cvt_roundepu16_ph::<ROUNDING>(src:_mm512_setzero_ph(), k, a) |
11938 | } |
11939 | |
11940 | /// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11941 | /// and store the results in dst. The upper 64 bits of dst are zeroed out. |
11942 | /// |
11943 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_ph) |
11944 | #[inline ] |
11945 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11946 | #[cfg_attr (test, assert_instr(vcvtdq2ph))] |
11947 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11948 | pub fn _mm_cvtepi32_ph(a: __m128i) -> __m128h { |
11949 | _mm_mask_cvtepi32_ph(src:_mm_setzero_ph(), k:0xff, a) |
11950 | } |
11951 | |
11952 | /// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11953 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
11954 | /// mask bit is not set). The upper 64 bits of dst are zeroed out. |
11955 | /// |
11956 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi32_ph) |
11957 | #[inline ] |
11958 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11959 | #[cfg_attr (test, assert_instr(vcvtdq2ph))] |
11960 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11961 | pub fn _mm_mask_cvtepi32_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h { |
11962 | unsafe { vcvtdq2ph_128(a.as_i32x4(), src, k) } |
11963 | } |
11964 | |
11965 | /// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11966 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
11967 | /// The upper 64 bits of dst are zeroed out. |
11968 | /// |
11969 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi32_ph) |
11970 | #[inline ] |
11971 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11972 | #[cfg_attr (test, assert_instr(vcvtdq2ph))] |
11973 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11974 | pub fn _mm_maskz_cvtepi32_ph(k: __mmask8, a: __m128i) -> __m128h { |
11975 | _mm_mask_cvtepi32_ph(src:_mm_setzero_ph(), k, a) |
11976 | } |
11977 | |
11978 | /// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11979 | /// and store the results in dst. |
11980 | /// |
11981 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi32_ph) |
11982 | #[inline ] |
11983 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11984 | #[cfg_attr (test, assert_instr(vcvtdq2ph))] |
11985 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11986 | pub fn _mm256_cvtepi32_ph(a: __m256i) -> __m128h { |
11987 | unsafe { vcvtdq2ph_256(a.as_i32x8(), _MM_FROUND_CUR_DIRECTION) } |
11988 | } |
11989 | |
11990 | /// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
11991 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
11992 | /// mask bit is not set). |
11993 | /// |
11994 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi32_ph) |
11995 | #[inline ] |
11996 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
11997 | #[cfg_attr (test, assert_instr(vcvtdq2ph))] |
11998 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
11999 | pub fn _mm256_mask_cvtepi32_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h { |
12000 | unsafe { simd_select_bitmask(m:k, yes:_mm256_cvtepi32_ph(a), no:src) } |
12001 | } |
12002 | |
12003 | /// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12004 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
12005 | /// |
12006 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi32_ph) |
12007 | #[inline ] |
12008 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12009 | #[cfg_attr (test, assert_instr(vcvtdq2ph))] |
12010 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12011 | pub fn _mm256_maskz_cvtepi32_ph(k: __mmask8, a: __m256i) -> __m128h { |
12012 | _mm256_mask_cvtepi32_ph(src:_mm_setzero_ph(), k, a) |
12013 | } |
12014 | |
12015 | /// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12016 | /// and store the results in dst. |
12017 | /// |
12018 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi32_ph) |
12019 | #[inline ] |
12020 | #[target_feature (enable = "avx512fp16" )] |
12021 | #[cfg_attr (test, assert_instr(vcvtdq2ph))] |
12022 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12023 | pub fn _mm512_cvtepi32_ph(a: __m512i) -> __m256h { |
12024 | unsafe { vcvtdq2ph_512(a.as_i32x16(), _MM_FROUND_CUR_DIRECTION) } |
12025 | } |
12026 | |
12027 | /// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12028 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
12029 | /// mask bit is not set). |
12030 | /// |
12031 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi32_ph) |
12032 | #[inline ] |
12033 | #[target_feature (enable = "avx512fp16" )] |
12034 | #[cfg_attr (test, assert_instr(vcvtdq2ph))] |
12035 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12036 | pub fn _mm512_mask_cvtepi32_ph(src: __m256h, k: __mmask16, a: __m512i) -> __m256h { |
12037 | unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepi32_ph(a), no:src) } |
12038 | } |
12039 | |
12040 | /// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12041 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
12042 | /// |
12043 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi32_ph) |
12044 | #[inline ] |
12045 | #[target_feature (enable = "avx512fp16" )] |
12046 | #[cfg_attr (test, assert_instr(vcvtdq2ph))] |
12047 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12048 | pub fn _mm512_maskz_cvtepi32_ph(k: __mmask16, a: __m512i) -> __m256h { |
12049 | _mm512_mask_cvtepi32_ph(src:_mm256_setzero_ph(), k, a) |
12050 | } |
12051 | |
12052 | /// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12053 | /// and store the results in dst. |
12054 | /// |
12055 | /// Rounding is done according to the rounding parameter, which can be one of: |
12056 | /// |
12057 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
12058 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
12059 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
12060 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
12061 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
12062 | /// |
12063 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi32_ph) |
12064 | #[inline ] |
12065 | #[target_feature (enable = "avx512fp16" )] |
12066 | #[cfg_attr (test, assert_instr(vcvtdq2ph, ROUNDING = 8))] |
12067 | #[rustc_legacy_const_generics (1)] |
12068 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12069 | pub fn _mm512_cvt_roundepi32_ph<const ROUNDING: i32>(a: __m512i) -> __m256h { |
12070 | unsafe { |
12071 | static_assert_rounding!(ROUNDING); |
12072 | vcvtdq2ph_512(a.as_i32x16(), ROUNDING) |
12073 | } |
12074 | } |
12075 | |
12076 | /// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12077 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
12078 | /// mask bit is not set). |
12079 | /// |
12080 | /// Rounding is done according to the rounding parameter, which can be one of: |
12081 | /// |
12082 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
12083 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
12084 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
12085 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
12086 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
12087 | /// |
12088 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi32_ph) |
12089 | #[inline ] |
12090 | #[target_feature (enable = "avx512fp16" )] |
12091 | #[cfg_attr (test, assert_instr(vcvtdq2ph, ROUNDING = 8))] |
12092 | #[rustc_legacy_const_generics (3)] |
12093 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12094 | pub fn _mm512_mask_cvt_roundepi32_ph<const ROUNDING: i32>( |
12095 | src: __m256h, |
12096 | k: __mmask16, |
12097 | a: __m512i, |
12098 | ) -> __m256h { |
12099 | unsafe { |
12100 | static_assert_rounding!(ROUNDING); |
12101 | simd_select_bitmask(m:k, yes:_mm512_cvt_roundepi32_ph::<ROUNDING>(a), no:src) |
12102 | } |
12103 | } |
12104 | |
12105 | /// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12106 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
12107 | /// |
12108 | /// Rounding is done according to the rounding parameter, which can be one of: |
12109 | /// |
12110 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
12111 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
12112 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
12113 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
12114 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
12115 | /// |
12116 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi32_ph) |
12117 | #[inline ] |
12118 | #[target_feature (enable = "avx512fp16" )] |
12119 | #[cfg_attr (test, assert_instr(vcvtdq2ph, ROUNDING = 8))] |
12120 | #[rustc_legacy_const_generics (2)] |
12121 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12122 | pub fn _mm512_maskz_cvt_roundepi32_ph<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m256h { |
12123 | static_assert_rounding!(ROUNDING); |
12124 | _mm512_mask_cvt_roundepi32_ph::<ROUNDING>(src:_mm256_setzero_ph(), k, a) |
12125 | } |
12126 | |
12127 | /// Convert the signed 32-bit integer b to a half-precision (16-bit) floating-point element, store the |
12128 | /// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements |
12129 | /// of dst. |
12130 | /// |
12131 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvti32_sh) |
12132 | #[inline ] |
12133 | #[target_feature (enable = "avx512fp16" )] |
12134 | #[cfg_attr (test, assert_instr(vcvtsi2sh))] |
12135 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12136 | pub fn _mm_cvti32_sh(a: __m128h, b: i32) -> __m128h { |
12137 | unsafe { vcvtsi2sh(a, b, _MM_FROUND_CUR_DIRECTION) } |
12138 | } |
12139 | |
12140 | /// Convert the signed 32-bit integer b to a half-precision (16-bit) floating-point element, store the |
12141 | /// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements |
12142 | /// of dst. |
12143 | /// |
12144 | /// Rounding is done according to the rounding parameter, which can be one of: |
12145 | /// |
12146 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
12147 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
12148 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
12149 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
12150 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
12151 | /// |
12152 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi32_sh) |
12153 | #[inline ] |
12154 | #[target_feature (enable = "avx512fp16" )] |
12155 | #[cfg_attr (test, assert_instr(vcvtsi2sh, ROUNDING = 8))] |
12156 | #[rustc_legacy_const_generics (2)] |
12157 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12158 | pub fn _mm_cvt_roundi32_sh<const ROUNDING: i32>(a: __m128h, b: i32) -> __m128h { |
12159 | unsafe { |
12160 | static_assert_rounding!(ROUNDING); |
12161 | vcvtsi2sh(a, b, ROUNDING) |
12162 | } |
12163 | } |
12164 | |
12165 | /// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12166 | /// and store the results in dst. The upper 64 bits of dst are zeroed out. |
12167 | /// |
12168 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu32_ph) |
12169 | #[inline ] |
12170 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12171 | #[cfg_attr (test, assert_instr(vcvtudq2ph))] |
12172 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12173 | pub fn _mm_cvtepu32_ph(a: __m128i) -> __m128h { |
12174 | _mm_mask_cvtepu32_ph(src:_mm_setzero_ph(), k:0xff, a) |
12175 | } |
12176 | |
12177 | /// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12178 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
12179 | /// mask bit is not set). The upper 64 bits of dst are zeroed out. |
12180 | /// |
12181 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu32_ph) |
12182 | #[inline ] |
12183 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12184 | #[cfg_attr (test, assert_instr(vcvtudq2ph))] |
12185 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12186 | pub fn _mm_mask_cvtepu32_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h { |
12187 | unsafe { vcvtudq2ph_128(a.as_u32x4(), src, k) } |
12188 | } |
12189 | |
12190 | /// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12191 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
12192 | /// The upper 64 bits of dst are zeroed out. |
12193 | /// |
12194 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu32_ph) |
12195 | #[inline ] |
12196 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12197 | #[cfg_attr (test, assert_instr(vcvtudq2ph))] |
12198 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12199 | pub fn _mm_maskz_cvtepu32_ph(k: __mmask8, a: __m128i) -> __m128h { |
12200 | _mm_mask_cvtepu32_ph(src:_mm_setzero_ph(), k, a) |
12201 | } |
12202 | |
12203 | /// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12204 | /// and store the results in dst. |
12205 | /// |
12206 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu32_ph) |
12207 | #[inline ] |
12208 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12209 | #[cfg_attr (test, assert_instr(vcvtudq2ph))] |
12210 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12211 | pub fn _mm256_cvtepu32_ph(a: __m256i) -> __m128h { |
12212 | unsafe { vcvtudq2ph_256(a.as_u32x8(), _MM_FROUND_CUR_DIRECTION) } |
12213 | } |
12214 | |
12215 | /// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12216 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
12217 | /// mask bit is not set). |
12218 | /// |
12219 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu32_ph) |
12220 | #[inline ] |
12221 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12222 | #[cfg_attr (test, assert_instr(vcvtudq2ph))] |
12223 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12224 | pub fn _mm256_mask_cvtepu32_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h { |
12225 | unsafe { simd_select_bitmask(m:k, yes:_mm256_cvtepu32_ph(a), no:src) } |
12226 | } |
12227 | |
12228 | /// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12229 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
12230 | /// |
12231 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu32_ph) |
12232 | #[inline ] |
12233 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12234 | #[cfg_attr (test, assert_instr(vcvtudq2ph))] |
12235 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12236 | pub fn _mm256_maskz_cvtepu32_ph(k: __mmask8, a: __m256i) -> __m128h { |
12237 | _mm256_mask_cvtepu32_ph(src:_mm_setzero_ph(), k, a) |
12238 | } |
12239 | |
12240 | /// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12241 | /// and store the results in dst. |
12242 | /// |
12243 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu32_ph) |
12244 | #[inline ] |
12245 | #[target_feature (enable = "avx512fp16" )] |
12246 | #[cfg_attr (test, assert_instr(vcvtudq2ph))] |
12247 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12248 | pub fn _mm512_cvtepu32_ph(a: __m512i) -> __m256h { |
12249 | unsafe { vcvtudq2ph_512(a.as_u32x16(), _MM_FROUND_CUR_DIRECTION) } |
12250 | } |
12251 | |
12252 | /// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12253 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
12254 | /// mask bit is not set). |
12255 | /// |
12256 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu32_ph) |
12257 | #[inline ] |
12258 | #[target_feature (enable = "avx512fp16" )] |
12259 | #[cfg_attr (test, assert_instr(vcvtudq2ph))] |
12260 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12261 | pub fn _mm512_mask_cvtepu32_ph(src: __m256h, k: __mmask16, a: __m512i) -> __m256h { |
12262 | unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepu32_ph(a), no:src) } |
12263 | } |
12264 | |
12265 | /// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12266 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
12267 | /// |
12268 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu32_ph) |
12269 | #[inline ] |
12270 | #[target_feature (enable = "avx512fp16" )] |
12271 | #[cfg_attr (test, assert_instr(vcvtudq2ph))] |
12272 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12273 | pub fn _mm512_maskz_cvtepu32_ph(k: __mmask16, a: __m512i) -> __m256h { |
12274 | _mm512_mask_cvtepu32_ph(src:_mm256_setzero_ph(), k, a) |
12275 | } |
12276 | |
12277 | /// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12278 | /// and store the results in dst. |
12279 | /// |
12280 | /// Rounding is done according to the rounding parameter, which can be one of: |
12281 | /// |
12282 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
12283 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
12284 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
12285 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
12286 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
12287 | /// |
12288 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepu32_ph) |
12289 | #[inline ] |
12290 | #[target_feature (enable = "avx512fp16" )] |
12291 | #[cfg_attr (test, assert_instr(vcvtudq2ph, ROUNDING = 8))] |
12292 | #[rustc_legacy_const_generics (1)] |
12293 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12294 | pub fn _mm512_cvt_roundepu32_ph<const ROUNDING: i32>(a: __m512i) -> __m256h { |
12295 | unsafe { |
12296 | static_assert_rounding!(ROUNDING); |
12297 | vcvtudq2ph_512(a.as_u32x16(), ROUNDING) |
12298 | } |
12299 | } |
12300 | |
12301 | /// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12302 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
12303 | /// mask bit is not set). |
12304 | /// |
12305 | /// Rounding is done according to the rounding parameter, which can be one of: |
12306 | /// |
12307 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
12308 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
12309 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
12310 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
12311 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
12312 | /// |
12313 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepu32_ph) |
12314 | #[inline ] |
12315 | #[target_feature (enable = "avx512fp16" )] |
12316 | #[cfg_attr (test, assert_instr(vcvtudq2ph, ROUNDING = 8))] |
12317 | #[rustc_legacy_const_generics (3)] |
12318 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12319 | pub fn _mm512_mask_cvt_roundepu32_ph<const ROUNDING: i32>( |
12320 | src: __m256h, |
12321 | k: __mmask16, |
12322 | a: __m512i, |
12323 | ) -> __m256h { |
12324 | unsafe { |
12325 | static_assert_rounding!(ROUNDING); |
12326 | simd_select_bitmask(m:k, yes:_mm512_cvt_roundepu32_ph::<ROUNDING>(a), no:src) |
12327 | } |
12328 | } |
12329 | |
12330 | /// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12331 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
12332 | /// |
12333 | /// Rounding is done according to the rounding parameter, which can be one of: |
12334 | /// |
12335 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
12336 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
12337 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
12338 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
12339 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
12340 | /// |
12341 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepu32_ph) |
12342 | #[inline ] |
12343 | #[target_feature (enable = "avx512fp16" )] |
12344 | #[cfg_attr (test, assert_instr(vcvtudq2ph, ROUNDING = 8))] |
12345 | #[rustc_legacy_const_generics (2)] |
12346 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12347 | pub fn _mm512_maskz_cvt_roundepu32_ph<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m256h { |
12348 | static_assert_rounding!(ROUNDING); |
12349 | _mm512_mask_cvt_roundepu32_ph::<ROUNDING>(src:_mm256_setzero_ph(), k, a) |
12350 | } |
12351 | |
12352 | /// Convert the unsigned 32-bit integer b to a half-precision (16-bit) floating-point element, store the |
12353 | /// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements |
12354 | /// of dst. |
12355 | /// |
12356 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu32_sh) |
12357 | #[inline ] |
12358 | #[target_feature (enable = "avx512fp16" )] |
12359 | #[cfg_attr (test, assert_instr(vcvtusi2sh))] |
12360 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12361 | pub fn _mm_cvtu32_sh(a: __m128h, b: u32) -> __m128h { |
12362 | unsafe { vcvtusi2sh(a, b, _MM_FROUND_CUR_DIRECTION) } |
12363 | } |
12364 | |
12365 | /// Convert the unsigned 32-bit integer b to a half-precision (16-bit) floating-point element, store the |
12366 | /// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements |
12367 | /// of dst. |
12368 | /// |
12369 | /// Rounding is done according to the rounding parameter, which can be one of: |
12370 | /// |
12371 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
12372 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
12373 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
12374 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
12375 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
12376 | /// |
12377 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundu32_sh) |
12378 | #[inline ] |
12379 | #[target_feature (enable = "avx512fp16" )] |
12380 | #[cfg_attr (test, assert_instr(vcvtusi2sh, ROUNDING = 8))] |
12381 | #[rustc_legacy_const_generics (2)] |
12382 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12383 | pub fn _mm_cvt_roundu32_sh<const ROUNDING: i32>(a: __m128h, b: u32) -> __m128h { |
12384 | unsafe { |
12385 | static_assert_rounding!(ROUNDING); |
12386 | vcvtusi2sh(a, b, ROUNDING) |
12387 | } |
12388 | } |
12389 | |
12390 | /// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12391 | /// and store the results in dst. The upper 96 bits of dst are zeroed out. |
12392 | /// |
12393 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi64_ph) |
12394 | #[inline ] |
12395 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12396 | #[cfg_attr (test, assert_instr(vcvtqq2ph))] |
12397 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12398 | pub fn _mm_cvtepi64_ph(a: __m128i) -> __m128h { |
12399 | _mm_mask_cvtepi64_ph(src:_mm_setzero_ph(), k:0xff, a) |
12400 | } |
12401 | |
12402 | /// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12403 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
12404 | /// mask bit is not set). The upper 96 bits of dst are zeroed out. |
12405 | /// |
12406 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi64_ph) |
12407 | #[inline ] |
12408 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12409 | #[cfg_attr (test, assert_instr(vcvtqq2ph))] |
12410 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12411 | pub fn _mm_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h { |
12412 | unsafe { vcvtqq2ph_128(a.as_i64x2(), src, k) } |
12413 | } |
12414 | |
12415 | /// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12416 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
12417 | /// The upper 96 bits of dst are zeroed out. |
12418 | /// |
12419 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi64_ph) |
12420 | #[inline ] |
12421 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12422 | #[cfg_attr (test, assert_instr(vcvtqq2ph))] |
12423 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12424 | pub fn _mm_maskz_cvtepi64_ph(k: __mmask8, a: __m128i) -> __m128h { |
12425 | _mm_mask_cvtepi64_ph(src:_mm_setzero_ph(), k, a) |
12426 | } |
12427 | |
12428 | /// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12429 | /// and store the results in dst. The upper 64 bits of dst are zeroed out. |
12430 | /// |
12431 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi64_ph) |
12432 | #[inline ] |
12433 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12434 | #[cfg_attr (test, assert_instr(vcvtqq2ph))] |
12435 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12436 | pub fn _mm256_cvtepi64_ph(a: __m256i) -> __m128h { |
12437 | _mm256_mask_cvtepi64_ph(src:_mm_setzero_ph(), k:0xff, a) |
12438 | } |
12439 | |
12440 | /// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12441 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
12442 | /// mask bit is not set). The upper 64 bits of dst are zeroed out. |
12443 | /// |
12444 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi64_ph) |
12445 | #[inline ] |
12446 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12447 | #[cfg_attr (test, assert_instr(vcvtqq2ph))] |
12448 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12449 | pub fn _mm256_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h { |
12450 | unsafe { vcvtqq2ph_256(a.as_i64x4(), src, k) } |
12451 | } |
12452 | |
12453 | /// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12454 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
12455 | /// The upper 64 bits of dst are zeroed out. |
12456 | /// |
12457 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi64_ph) |
12458 | #[inline ] |
12459 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12460 | #[cfg_attr (test, assert_instr(vcvtqq2ph))] |
12461 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12462 | pub fn _mm256_maskz_cvtepi64_ph(k: __mmask8, a: __m256i) -> __m128h { |
12463 | _mm256_mask_cvtepi64_ph(src:_mm_setzero_ph(), k, a) |
12464 | } |
12465 | |
12466 | /// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12467 | /// and store the results in dst. |
12468 | /// |
12469 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi64_ph) |
12470 | #[inline ] |
12471 | #[target_feature (enable = "avx512fp16" )] |
12472 | #[cfg_attr (test, assert_instr(vcvtqq2ph))] |
12473 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12474 | pub fn _mm512_cvtepi64_ph(a: __m512i) -> __m128h { |
12475 | unsafe { vcvtqq2ph_512(a.as_i64x8(), _MM_FROUND_CUR_DIRECTION) } |
12476 | } |
12477 | |
12478 | /// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12479 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
12480 | /// mask bit is not set). |
12481 | /// |
12482 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi64_ph) |
12483 | #[inline ] |
12484 | #[target_feature (enable = "avx512fp16" )] |
12485 | #[cfg_attr (test, assert_instr(vcvtqq2ph))] |
12486 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12487 | pub fn _mm512_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m512i) -> __m128h { |
12488 | unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepi64_ph(a), no:src) } |
12489 | } |
12490 | |
12491 | /// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12492 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
12493 | /// |
12494 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi64_ph) |
12495 | #[inline ] |
12496 | #[target_feature (enable = "avx512fp16" )] |
12497 | #[cfg_attr (test, assert_instr(vcvtqq2ph))] |
12498 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12499 | pub fn _mm512_maskz_cvtepi64_ph(k: __mmask8, a: __m512i) -> __m128h { |
12500 | _mm512_mask_cvtepi64_ph(src:_mm_setzero_ph(), k, a) |
12501 | } |
12502 | |
12503 | /// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12504 | /// and store the results in dst. |
12505 | /// |
12506 | /// Rounding is done according to the rounding parameter, which can be one of: |
12507 | /// |
12508 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
12509 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
12510 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
12511 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
12512 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
12513 | /// |
12514 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi64_ph) |
12515 | #[inline ] |
12516 | #[target_feature (enable = "avx512fp16" )] |
12517 | #[cfg_attr (test, assert_instr(vcvtqq2ph, ROUNDING = 8))] |
12518 | #[rustc_legacy_const_generics (1)] |
12519 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12520 | pub fn _mm512_cvt_roundepi64_ph<const ROUNDING: i32>(a: __m512i) -> __m128h { |
12521 | unsafe { |
12522 | static_assert_rounding!(ROUNDING); |
12523 | vcvtqq2ph_512(a.as_i64x8(), ROUNDING) |
12524 | } |
12525 | } |
12526 | |
12527 | /// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12528 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
12529 | /// mask bit is not set). |
12530 | /// |
12531 | /// Rounding is done according to the rounding parameter, which can be one of: |
12532 | /// |
12533 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
12534 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
12535 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
12536 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
12537 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
12538 | /// |
12539 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi64_ph) |
12540 | #[inline ] |
12541 | #[target_feature (enable = "avx512fp16" )] |
12542 | #[cfg_attr (test, assert_instr(vcvtqq2ph, ROUNDING = 8))] |
12543 | #[rustc_legacy_const_generics (3)] |
12544 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12545 | pub fn _mm512_mask_cvt_roundepi64_ph<const ROUNDING: i32>( |
12546 | src: __m128h, |
12547 | k: __mmask8, |
12548 | a: __m512i, |
12549 | ) -> __m128h { |
12550 | unsafe { |
12551 | static_assert_rounding!(ROUNDING); |
12552 | simd_select_bitmask(m:k, yes:_mm512_cvt_roundepi64_ph::<ROUNDING>(a), no:src) |
12553 | } |
12554 | } |
12555 | |
12556 | /// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12557 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
12558 | /// |
12559 | /// Rounding is done according to the rounding parameter, which can be one of: |
12560 | /// |
12561 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
12562 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
12563 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
12564 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
12565 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
12566 | /// |
12567 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi64_ph) |
12568 | #[inline ] |
12569 | #[target_feature (enable = "avx512fp16" )] |
12570 | #[cfg_attr (test, assert_instr(vcvtqq2ph, ROUNDING = 8))] |
12571 | #[rustc_legacy_const_generics (2)] |
12572 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12573 | pub fn _mm512_maskz_cvt_roundepi64_ph<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m128h { |
12574 | static_assert_rounding!(ROUNDING); |
12575 | _mm512_mask_cvt_roundepi64_ph::<ROUNDING>(src:_mm_setzero_ph(), k, a) |
12576 | } |
12577 | |
12578 | /// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12579 | /// and store the results in dst. The upper 96 bits of dst are zeroed out. |
12580 | /// |
12581 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu64_ph) |
12582 | #[inline ] |
12583 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12584 | #[cfg_attr (test, assert_instr(vcvtuqq2ph))] |
12585 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12586 | pub fn _mm_cvtepu64_ph(a: __m128i) -> __m128h { |
12587 | _mm_mask_cvtepu64_ph(src:_mm_setzero_ph(), k:0xff, a) |
12588 | } |
12589 | |
12590 | /// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12591 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
12592 | /// mask bit is not set). The upper 96 bits of dst are zeroed out. |
12593 | /// |
12594 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu64_ph) |
12595 | #[inline ] |
12596 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12597 | #[cfg_attr (test, assert_instr(vcvtuqq2ph))] |
12598 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12599 | pub fn _mm_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h { |
12600 | unsafe { vcvtuqq2ph_128(a.as_u64x2(), src, k) } |
12601 | } |
12602 | |
12603 | /// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12604 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
12605 | /// The upper 96 bits of dst are zeroed out. |
12606 | /// |
12607 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu64_ph) |
12608 | #[inline ] |
12609 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12610 | #[cfg_attr (test, assert_instr(vcvtuqq2ph))] |
12611 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12612 | pub fn _mm_maskz_cvtepu64_ph(k: __mmask8, a: __m128i) -> __m128h { |
12613 | _mm_mask_cvtepu64_ph(src:_mm_setzero_ph(), k, a) |
12614 | } |
12615 | |
12616 | /// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12617 | /// and store the results in dst. The upper 64 bits of dst are zeroed out. |
12618 | /// |
12619 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu64_ph) |
12620 | #[inline ] |
12621 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12622 | #[cfg_attr (test, assert_instr(vcvtuqq2ph))] |
12623 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12624 | pub fn _mm256_cvtepu64_ph(a: __m256i) -> __m128h { |
12625 | _mm256_mask_cvtepu64_ph(src:_mm_setzero_ph(), k:0xff, a) |
12626 | } |
12627 | |
12628 | /// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12629 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
12630 | /// mask bit is not set). The upper 64 bits of dst are zeroed out. |
12631 | /// |
12632 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu64_ph) |
12633 | #[inline ] |
12634 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12635 | #[cfg_attr (test, assert_instr(vcvtuqq2ph))] |
12636 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12637 | pub fn _mm256_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h { |
12638 | unsafe { vcvtuqq2ph_256(a.as_u64x4(), src, k) } |
12639 | } |
12640 | |
12641 | /// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12642 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
12643 | /// The upper 64 bits of dst are zeroed out. |
12644 | /// |
12645 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu64_ph) |
12646 | #[inline ] |
12647 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12648 | #[cfg_attr (test, assert_instr(vcvtuqq2ph))] |
12649 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12650 | pub fn _mm256_maskz_cvtepu64_ph(k: __mmask8, a: __m256i) -> __m128h { |
12651 | _mm256_mask_cvtepu64_ph(src:_mm_setzero_ph(), k, a) |
12652 | } |
12653 | |
12654 | /// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12655 | /// and store the results in dst. |
12656 | /// |
12657 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu64_ph) |
12658 | #[inline ] |
12659 | #[target_feature (enable = "avx512fp16" )] |
12660 | #[cfg_attr (test, assert_instr(vcvtuqq2ph))] |
12661 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12662 | pub fn _mm512_cvtepu64_ph(a: __m512i) -> __m128h { |
12663 | unsafe { vcvtuqq2ph_512(a.as_u64x8(), _MM_FROUND_CUR_DIRECTION) } |
12664 | } |
12665 | |
12666 | /// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12667 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
12668 | /// mask bit is not set). |
12669 | /// |
12670 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu64_ph) |
12671 | #[inline ] |
12672 | #[target_feature (enable = "avx512fp16" )] |
12673 | #[cfg_attr (test, assert_instr(vcvtuqq2ph))] |
12674 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12675 | pub fn _mm512_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m512i) -> __m128h { |
12676 | unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepu64_ph(a), no:src) } |
12677 | } |
12678 | |
12679 | /// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12680 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
12681 | /// |
12682 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu64_ph) |
12683 | #[inline ] |
12684 | #[target_feature (enable = "avx512fp16" )] |
12685 | #[cfg_attr (test, assert_instr(vcvtuqq2ph))] |
12686 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12687 | pub fn _mm512_maskz_cvtepu64_ph(k: __mmask8, a: __m512i) -> __m128h { |
12688 | _mm512_mask_cvtepu64_ph(src:_mm_setzero_ph(), k, a) |
12689 | } |
12690 | |
12691 | /// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12692 | /// and store the results in dst. |
12693 | /// |
12694 | /// Rounding is done according to the rounding parameter, which can be one of: |
12695 | /// |
12696 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
12697 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
12698 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
12699 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
12700 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
12701 | /// |
12702 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepu64_ph) |
12703 | #[inline ] |
12704 | #[target_feature (enable = "avx512fp16" )] |
12705 | #[cfg_attr (test, assert_instr(vcvtuqq2ph, ROUNDING = 8))] |
12706 | #[rustc_legacy_const_generics (1)] |
12707 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12708 | pub fn _mm512_cvt_roundepu64_ph<const ROUNDING: i32>(a: __m512i) -> __m128h { |
12709 | unsafe { |
12710 | static_assert_rounding!(ROUNDING); |
12711 | vcvtuqq2ph_512(a.as_u64x8(), ROUNDING) |
12712 | } |
12713 | } |
12714 | |
12715 | /// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12716 | /// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding |
12717 | /// mask bit is not set). |
12718 | /// |
12719 | /// Rounding is done according to the rounding parameter, which can be one of: |
12720 | /// |
12721 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
12722 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
12723 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
12724 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
12725 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
12726 | /// |
12727 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepu64_ph) |
12728 | #[inline ] |
12729 | #[target_feature (enable = "avx512fp16" )] |
12730 | #[cfg_attr (test, assert_instr(vcvtuqq2ph, ROUNDING = 8))] |
12731 | #[rustc_legacy_const_generics (3)] |
12732 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12733 | pub fn _mm512_mask_cvt_roundepu64_ph<const ROUNDING: i32>( |
12734 | src: __m128h, |
12735 | k: __mmask8, |
12736 | a: __m512i, |
12737 | ) -> __m128h { |
12738 | unsafe { |
12739 | static_assert_rounding!(ROUNDING); |
12740 | simd_select_bitmask(m:k, yes:_mm512_cvt_roundepu64_ph::<ROUNDING>(a), no:src) |
12741 | } |
12742 | } |
12743 | |
12744 | /// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, |
12745 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
12746 | /// |
12747 | /// Rounding is done according to the rounding parameter, which can be one of: |
12748 | /// |
12749 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
12750 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
12751 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
12752 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
12753 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
12754 | /// |
12755 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepu64_ph) |
12756 | #[inline ] |
12757 | #[target_feature (enable = "avx512fp16" )] |
12758 | #[cfg_attr (test, assert_instr(vcvtuqq2ph, ROUNDING = 8))] |
12759 | #[rustc_legacy_const_generics (2)] |
12760 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12761 | pub fn _mm512_maskz_cvt_roundepu64_ph<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m128h { |
12762 | static_assert_rounding!(ROUNDING); |
12763 | _mm512_mask_cvt_roundepu64_ph::<ROUNDING>(src:_mm_setzero_ph(), k, a) |
12764 | } |
12765 | |
12766 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) |
12767 | /// floating-point elements, and store the results in dst. |
12768 | /// |
12769 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtxps_ph) |
12770 | #[inline ] |
12771 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12772 | #[cfg_attr (test, assert_instr(vcvtps2phx))] |
12773 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12774 | pub fn _mm_cvtxps_ph(a: __m128) -> __m128h { |
12775 | _mm_mask_cvtxps_ph(src:_mm_setzero_ph(), k:0xff, a) |
12776 | } |
12777 | |
12778 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) |
12779 | /// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst |
12780 | /// when the corresponding mask bit is not set). The upper 64 bits of dst are zeroed out. |
12781 | /// |
12782 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtxps_ph) |
12783 | #[inline ] |
12784 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12785 | #[cfg_attr (test, assert_instr(vcvtps2phx))] |
12786 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12787 | pub fn _mm_mask_cvtxps_ph(src: __m128h, k: __mmask8, a: __m128) -> __m128h { |
12788 | unsafe { vcvtps2phx_128(a, src, k) } |
12789 | } |
12790 | |
12791 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) |
12792 | /// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the |
12793 | /// corresponding mask bit is not set). The upper 64 bits of dst are zeroed out. |
12794 | /// |
12795 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtxps_ph) |
12796 | #[inline ] |
12797 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12798 | #[cfg_attr (test, assert_instr(vcvtps2phx))] |
12799 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12800 | pub fn _mm_maskz_cvtxps_ph(k: __mmask8, a: __m128) -> __m128h { |
12801 | _mm_mask_cvtxps_ph(src:_mm_setzero_ph(), k, a) |
12802 | } |
12803 | |
12804 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) |
12805 | /// floating-point elements, and store the results in dst. |
12806 | /// |
12807 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtxps_ph) |
12808 | #[inline ] |
12809 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12810 | #[cfg_attr (test, assert_instr(vcvtps2phx))] |
12811 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12812 | pub fn _mm256_cvtxps_ph(a: __m256) -> __m128h { |
12813 | _mm256_mask_cvtxps_ph(src:_mm_setzero_ph(), k:0xff, a) |
12814 | } |
12815 | |
12816 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) |
12817 | /// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst |
12818 | /// when the corresponding mask bit is not set). |
12819 | /// |
12820 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtxps_ph) |
12821 | #[inline ] |
12822 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12823 | #[cfg_attr (test, assert_instr(vcvtps2phx))] |
12824 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12825 | pub fn _mm256_mask_cvtxps_ph(src: __m128h, k: __mmask8, a: __m256) -> __m128h { |
12826 | unsafe { vcvtps2phx_256(a, src, k) } |
12827 | } |
12828 | |
12829 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) |
12830 | /// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the |
12831 | /// corresponding mask bit is not set). |
12832 | /// |
12833 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtxps_ph) |
12834 | #[inline ] |
12835 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
12836 | #[cfg_attr (test, assert_instr(vcvtps2phx))] |
12837 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12838 | pub fn _mm256_maskz_cvtxps_ph(k: __mmask8, a: __m256) -> __m128h { |
12839 | _mm256_mask_cvtxps_ph(src:_mm_setzero_ph(), k, a) |
12840 | } |
12841 | |
12842 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) |
12843 | /// floating-point elements, and store the results in dst. |
12844 | /// |
12845 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtxps_ph) |
12846 | #[inline ] |
12847 | #[target_feature (enable = "avx512fp16" )] |
12848 | #[cfg_attr (test, assert_instr(vcvtps2phx))] |
12849 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12850 | pub fn _mm512_cvtxps_ph(a: __m512) -> __m256h { |
12851 | _mm512_mask_cvtxps_ph(src:_mm256_setzero_ph(), k:0xffff, a) |
12852 | } |
12853 | |
12854 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) |
12855 | /// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst |
12856 | /// when the corresponding mask bit is not set). |
12857 | /// |
12858 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtxps_ph) |
12859 | #[inline ] |
12860 | #[target_feature (enable = "avx512fp16" )] |
12861 | #[cfg_attr (test, assert_instr(vcvtps2phx))] |
12862 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12863 | pub fn _mm512_mask_cvtxps_ph(src: __m256h, k: __mmask16, a: __m512) -> __m256h { |
12864 | unsafe { vcvtps2phx_512(a, src, k, _MM_FROUND_CUR_DIRECTION) } |
12865 | } |
12866 | |
12867 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) |
12868 | /// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the |
12869 | /// corresponding mask bit is not set). |
12870 | /// |
12871 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtxps_ph) |
12872 | #[inline ] |
12873 | #[target_feature (enable = "avx512fp16" )] |
12874 | #[cfg_attr (test, assert_instr(vcvtps2phx))] |
12875 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12876 | pub fn _mm512_maskz_cvtxps_ph(k: __mmask16, a: __m512) -> __m256h { |
12877 | _mm512_mask_cvtxps_ph(src:_mm256_setzero_ph(), k, a) |
12878 | } |
12879 | |
12880 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) |
12881 | /// floating-point elements, and store the results in dst. |
12882 | /// |
12883 | /// Rounding is done according to the rounding parameter, which can be one of: |
12884 | /// |
12885 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
12886 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
12887 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
12888 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
12889 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
12890 | /// |
12891 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtx_roundps_ph) |
12892 | #[inline ] |
12893 | #[target_feature (enable = "avx512fp16" )] |
12894 | #[cfg_attr (test, assert_instr(vcvtps2phx, ROUNDING = 8))] |
12895 | #[rustc_legacy_const_generics (1)] |
12896 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12897 | pub fn _mm512_cvtx_roundps_ph<const ROUNDING: i32>(a: __m512) -> __m256h { |
12898 | static_assert_rounding!(ROUNDING); |
12899 | _mm512_mask_cvtx_roundps_ph::<ROUNDING>(src:_mm256_setzero_ph(), k:0xffff, a) |
12900 | } |
12901 | |
12902 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) |
12903 | /// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst |
12904 | /// when the corresponding mask bit is not set). |
12905 | /// |
12906 | /// Rounding is done according to the rounding parameter, which can be one of: |
12907 | /// |
12908 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
12909 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
12910 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
12911 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
12912 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
12913 | /// |
12914 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtx_roundps_ph) |
12915 | #[inline ] |
12916 | #[target_feature (enable = "avx512fp16" )] |
12917 | #[cfg_attr (test, assert_instr(vcvtps2phx, ROUNDING = 8))] |
12918 | #[rustc_legacy_const_generics (3)] |
12919 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12920 | pub fn _mm512_mask_cvtx_roundps_ph<const ROUNDING: i32>( |
12921 | src: __m256h, |
12922 | k: __mmask16, |
12923 | a: __m512, |
12924 | ) -> __m256h { |
12925 | unsafe { |
12926 | static_assert_rounding!(ROUNDING); |
12927 | vcvtps2phx_512(a, src, k, ROUNDING) |
12928 | } |
12929 | } |
12930 | |
12931 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) |
12932 | /// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the |
12933 | /// corresponding mask bit is not set). |
12934 | /// |
12935 | /// Rounding is done according to the rounding parameter, which can be one of: |
12936 | /// |
12937 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
12938 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
12939 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
12940 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
12941 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
12942 | /// |
12943 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtx_roundps_ph) |
12944 | #[inline ] |
12945 | #[target_feature (enable = "avx512fp16" )] |
12946 | #[cfg_attr (test, assert_instr(vcvtps2phx, ROUNDING = 8))] |
12947 | #[rustc_legacy_const_generics (2)] |
12948 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12949 | pub fn _mm512_maskz_cvtx_roundps_ph<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m256h { |
12950 | static_assert_rounding!(ROUNDING); |
12951 | _mm512_mask_cvtx_roundps_ph::<ROUNDING>(src:_mm256_setzero_ph(), k, a) |
12952 | } |
12953 | |
12954 | /// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit) |
12955 | /// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed |
12956 | /// elements from a to the upper elements of dst. |
12957 | /// |
12958 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_sh) |
12959 | #[inline ] |
12960 | #[target_feature (enable = "avx512fp16" )] |
12961 | #[cfg_attr (test, assert_instr(vcvtss2sh))] |
12962 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12963 | pub fn _mm_cvtss_sh(a: __m128h, b: __m128) -> __m128h { |
12964 | _mm_mask_cvtss_sh(src:_mm_undefined_ph(), k:0xff, a, b) |
12965 | } |
12966 | |
12967 | /// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit) |
12968 | /// floating-point elements, store the result in the lower element of dst using writemask k (the element |
12969 | /// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the |
12970 | /// upper elements of dst. |
12971 | /// |
12972 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtss_sh) |
12973 | #[inline ] |
12974 | #[target_feature (enable = "avx512fp16" )] |
12975 | #[cfg_attr (test, assert_instr(vcvtss2sh))] |
12976 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12977 | pub fn _mm_mask_cvtss_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128) -> __m128h { |
12978 | unsafe { vcvtss2sh(a, b, src, k, _MM_FROUND_CUR_DIRECTION) } |
12979 | } |
12980 | |
12981 | /// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit) |
12982 | /// floating-point elements, store the result in the lower element of dst using zeromask k (the element |
12983 | /// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper |
12984 | /// elements of dst. |
12985 | /// |
12986 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtss_sh) |
12987 | #[inline ] |
12988 | #[target_feature (enable = "avx512fp16" )] |
12989 | #[cfg_attr (test, assert_instr(vcvtss2sh))] |
12990 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
12991 | pub fn _mm_maskz_cvtss_sh(k: __mmask8, a: __m128h, b: __m128) -> __m128h { |
12992 | _mm_mask_cvtss_sh(src:_mm_setzero_ph(), k, a, b) |
12993 | } |
12994 | |
12995 | /// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit) |
12996 | /// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed |
12997 | /// elements from a to the upper elements of dst. |
12998 | /// |
12999 | /// Rounding is done according to the rounding parameter, which can be one of: |
13000 | /// |
13001 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
13002 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
13003 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
13004 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
13005 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
13006 | /// |
13007 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_sh) |
13008 | #[inline ] |
13009 | #[target_feature (enable = "avx512fp16" )] |
13010 | #[cfg_attr (test, assert_instr(vcvtss2sh, ROUNDING = 8))] |
13011 | #[rustc_legacy_const_generics (2)] |
13012 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13013 | pub fn _mm_cvt_roundss_sh<const ROUNDING: i32>(a: __m128h, b: __m128) -> __m128h { |
13014 | static_assert_rounding!(ROUNDING); |
13015 | _mm_mask_cvt_roundss_sh::<ROUNDING>(src:_mm_undefined_ph(), k:0xff, a, b) |
13016 | } |
13017 | |
13018 | /// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit) |
13019 | /// floating-point elements, store the result in the lower element of dst using writemask k (the element |
13020 | /// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the |
13021 | /// upper elements of dst. |
13022 | /// |
13023 | /// Rounding is done according to the rounding parameter, which can be one of: |
13024 | /// |
13025 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
13026 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
13027 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
13028 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
13029 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
13030 | /// |
13031 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundss_sh) |
13032 | #[inline ] |
13033 | #[target_feature (enable = "avx512fp16" )] |
13034 | #[cfg_attr (test, assert_instr(vcvtss2sh, ROUNDING = 8))] |
13035 | #[rustc_legacy_const_generics (4)] |
13036 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13037 | pub fn _mm_mask_cvt_roundss_sh<const ROUNDING: i32>( |
13038 | src: __m128h, |
13039 | k: __mmask8, |
13040 | a: __m128h, |
13041 | b: __m128, |
13042 | ) -> __m128h { |
13043 | unsafe { |
13044 | static_assert_rounding!(ROUNDING); |
13045 | vcvtss2sh(a, b, src, k, ROUNDING) |
13046 | } |
13047 | } |
13048 | |
13049 | /// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit) |
13050 | /// floating-point elements, store the result in the lower element of dst using zeromask k (the element |
13051 | /// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper |
13052 | /// elements of dst. |
13053 | /// |
13054 | /// Rounding is done according to the rounding parameter, which can be one of: |
13055 | /// |
13056 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
13057 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
13058 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
13059 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
13060 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
13061 | /// |
13062 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundss_sh) |
13063 | #[inline ] |
13064 | #[target_feature (enable = "avx512fp16" )] |
13065 | #[cfg_attr (test, assert_instr(vcvtss2sh, ROUNDING = 8))] |
13066 | #[rustc_legacy_const_generics (3)] |
13067 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13068 | pub fn _mm_maskz_cvt_roundss_sh<const ROUNDING: i32>( |
13069 | k: __mmask8, |
13070 | a: __m128h, |
13071 | b: __m128, |
13072 | ) -> __m128h { |
13073 | static_assert_rounding!(ROUNDING); |
13074 | _mm_mask_cvt_roundss_sh::<ROUNDING>(src:_mm_setzero_ph(), k, a, b) |
13075 | } |
13076 | |
13077 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) |
13078 | /// floating-point elements, and store the results in dst. The upper 96 bits of dst are zeroed out. |
13079 | /// |
13080 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_ph) |
13081 | #[inline ] |
13082 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13083 | #[cfg_attr (test, assert_instr(vcvtpd2ph))] |
13084 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13085 | pub fn _mm_cvtpd_ph(a: __m128d) -> __m128h { |
13086 | _mm_mask_cvtpd_ph(src:_mm_setzero_ph(), k:0xff, a) |
13087 | } |
13088 | |
13089 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) |
13090 | /// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst |
13091 | /// when the corresponding mask bit is not set). The upper 96 bits of dst are zeroed out. |
13092 | /// |
13093 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtpd_ph) |
13094 | #[inline ] |
13095 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13096 | #[cfg_attr (test, assert_instr(vcvtpd2ph))] |
13097 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13098 | pub fn _mm_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m128d) -> __m128h { |
13099 | unsafe { vcvtpd2ph_128(a, src, k) } |
13100 | } |
13101 | |
13102 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) |
13103 | /// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the |
13104 | /// corresponding mask bit is not set). The upper 96 bits of dst are zeroed out. |
13105 | /// |
13106 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtpd_ph) |
13107 | #[inline ] |
13108 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13109 | #[cfg_attr (test, assert_instr(vcvtpd2ph))] |
13110 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13111 | pub fn _mm_maskz_cvtpd_ph(k: __mmask8, a: __m128d) -> __m128h { |
13112 | _mm_mask_cvtpd_ph(src:_mm_setzero_ph(), k, a) |
13113 | } |
13114 | |
13115 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) |
13116 | /// floating-point elements, and store the results in dst. The upper 64 bits of dst are zeroed out. |
13117 | /// |
13118 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtpd_ph) |
13119 | #[inline ] |
13120 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13121 | #[cfg_attr (test, assert_instr(vcvtpd2ph))] |
13122 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13123 | pub fn _mm256_cvtpd_ph(a: __m256d) -> __m128h { |
13124 | _mm256_mask_cvtpd_ph(src:_mm_setzero_ph(), k:0xff, a) |
13125 | } |
13126 | |
13127 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) |
13128 | /// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst |
13129 | /// when the corresponding mask bit is not set). The upper 64 bits of dst are zeroed out. |
13130 | /// |
13131 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtpd_ph) |
13132 | #[inline ] |
13133 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13134 | #[cfg_attr (test, assert_instr(vcvtpd2ph))] |
13135 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13136 | pub fn _mm256_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m256d) -> __m128h { |
13137 | unsafe { vcvtpd2ph_256(a, src, k) } |
13138 | } |
13139 | |
13140 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) |
13141 | /// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the |
13142 | /// corresponding mask bit is not set). The upper 64 bits of dst are zeroed out. |
13143 | /// |
13144 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtpd_ph) |
13145 | #[inline ] |
13146 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13147 | #[cfg_attr (test, assert_instr(vcvtpd2ph))] |
13148 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13149 | pub fn _mm256_maskz_cvtpd_ph(k: __mmask8, a: __m256d) -> __m128h { |
13150 | _mm256_mask_cvtpd_ph(src:_mm_setzero_ph(), k, a) |
13151 | } |
13152 | |
13153 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) |
13154 | /// floating-point elements, and store the results in dst. |
13155 | /// |
13156 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtpd_ph) |
13157 | #[inline ] |
13158 | #[target_feature (enable = "avx512fp16" )] |
13159 | #[cfg_attr (test, assert_instr(vcvtpd2ph))] |
13160 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13161 | pub fn _mm512_cvtpd_ph(a: __m512d) -> __m128h { |
13162 | _mm512_mask_cvtpd_ph(src:_mm_setzero_ph(), k:0xff, a) |
13163 | } |
13164 | |
13165 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) |
13166 | /// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst |
13167 | /// when the corresponding mask bit is not set). |
13168 | /// |
13169 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtpd_ph) |
13170 | #[inline ] |
13171 | #[target_feature (enable = "avx512fp16" )] |
13172 | #[cfg_attr (test, assert_instr(vcvtpd2ph))] |
13173 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13174 | pub fn _mm512_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m512d) -> __m128h { |
13175 | unsafe { vcvtpd2ph_512(a, src, k, _MM_FROUND_CUR_DIRECTION) } |
13176 | } |
13177 | |
13178 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) |
13179 | /// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the |
13180 | /// corresponding mask bit is not set). |
13181 | /// |
13182 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtpd_ph) |
13183 | #[inline ] |
13184 | #[target_feature (enable = "avx512fp16" )] |
13185 | #[cfg_attr (test, assert_instr(vcvtpd2ph))] |
13186 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13187 | pub fn _mm512_maskz_cvtpd_ph(k: __mmask8, a: __m512d) -> __m128h { |
13188 | _mm512_mask_cvtpd_ph(src:_mm_setzero_ph(), k, a) |
13189 | } |
13190 | |
13191 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) |
13192 | /// floating-point elements, and store the results in dst. |
13193 | /// |
13194 | /// Rounding is done according to the rounding parameter, which can be one of: |
13195 | /// |
13196 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
13197 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
13198 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
13199 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
13200 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
13201 | /// |
13202 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundpd_ph) |
13203 | #[inline ] |
13204 | #[target_feature (enable = "avx512fp16" )] |
13205 | #[cfg_attr (test, assert_instr(vcvtpd2ph, ROUNDING = 8))] |
13206 | #[rustc_legacy_const_generics (1)] |
13207 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13208 | pub fn _mm512_cvt_roundpd_ph<const ROUNDING: i32>(a: __m512d) -> __m128h { |
13209 | static_assert_rounding!(ROUNDING); |
13210 | _mm512_mask_cvt_roundpd_ph::<ROUNDING>(src:_mm_setzero_ph(), k:0xff, a) |
13211 | } |
13212 | |
13213 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) |
13214 | /// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst |
13215 | /// when the corresponding mask bit is not set). |
13216 | /// |
13217 | /// Rounding is done according to the rounding parameter, which can be one of: |
13218 | /// |
13219 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
13220 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
13221 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
13222 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
13223 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
13224 | /// |
13225 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundpd_ph) |
13226 | #[inline ] |
13227 | #[target_feature (enable = "avx512fp16" )] |
13228 | #[cfg_attr (test, assert_instr(vcvtpd2ph, ROUNDING = 8))] |
13229 | #[rustc_legacy_const_generics (3)] |
13230 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13231 | pub fn _mm512_mask_cvt_roundpd_ph<const ROUNDING: i32>( |
13232 | src: __m128h, |
13233 | k: __mmask8, |
13234 | a: __m512d, |
13235 | ) -> __m128h { |
13236 | unsafe { |
13237 | static_assert_rounding!(ROUNDING); |
13238 | vcvtpd2ph_512(a, src, k, ROUNDING) |
13239 | } |
13240 | } |
13241 | |
13242 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) |
13243 | /// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the |
13244 | /// corresponding mask bit is not set). |
13245 | /// |
13246 | /// Rounding is done according to the rounding parameter, which can be one of: |
13247 | /// |
13248 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
13249 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
13250 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
13251 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
13252 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
13253 | /// |
13254 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_ph) |
13255 | #[inline ] |
13256 | #[target_feature (enable = "avx512fp16" )] |
13257 | #[cfg_attr (test, assert_instr(vcvtpd2ph, ROUNDING = 8))] |
13258 | #[rustc_legacy_const_generics (2)] |
13259 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13260 | pub fn _mm512_maskz_cvt_roundpd_ph<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m128h { |
13261 | static_assert_rounding!(ROUNDING); |
13262 | _mm512_mask_cvt_roundpd_ph::<ROUNDING>(src:_mm_setzero_ph(), k, a) |
13263 | } |
13264 | |
13265 | /// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit) |
13266 | /// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed |
13267 | /// elements from a to the upper elements of dst. |
13268 | /// |
13269 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_sh) |
13270 | #[inline ] |
13271 | #[target_feature (enable = "avx512fp16" )] |
13272 | #[cfg_attr (test, assert_instr(vcvtsd2sh))] |
13273 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13274 | pub fn _mm_cvtsd_sh(a: __m128h, b: __m128d) -> __m128h { |
13275 | _mm_mask_cvtsd_sh(src:_mm_undefined_ph(), k:0xff, a, b) |
13276 | } |
13277 | |
13278 | /// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit) |
13279 | /// floating-point elements, store the result in the lower element of dst using writemask k (the element |
13280 | /// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the |
13281 | /// upper elements of dst. |
13282 | /// |
13283 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsd_sh) |
13284 | #[inline ] |
13285 | #[target_feature (enable = "avx512fp16" )] |
13286 | #[cfg_attr (test, assert_instr(vcvtsd2sh))] |
13287 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13288 | pub fn _mm_mask_cvtsd_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128d) -> __m128h { |
13289 | unsafe { vcvtsd2sh(a, b, src, k, _MM_FROUND_CUR_DIRECTION) } |
13290 | } |
13291 | |
13292 | /// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit) |
13293 | /// floating-point elements, store the result in the lower element of dst using zeromask k (the element |
13294 | /// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper |
13295 | /// elements of dst. |
13296 | /// |
13297 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsd_sh) |
13298 | #[inline ] |
13299 | #[target_feature (enable = "avx512fp16" )] |
13300 | #[cfg_attr (test, assert_instr(vcvtsd2sh))] |
13301 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13302 | pub fn _mm_maskz_cvtsd_sh(k: __mmask8, a: __m128h, b: __m128d) -> __m128h { |
13303 | _mm_mask_cvtsd_sh(src:_mm_setzero_ph(), k, a, b) |
13304 | } |
13305 | |
13306 | /// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit) |
13307 | /// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed |
13308 | /// elements from a to the upper elements of dst. |
13309 | /// |
13310 | /// Rounding is done according to the rounding parameter, which can be one of: |
13311 | /// |
13312 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
13313 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
13314 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
13315 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
13316 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
13317 | /// |
13318 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_sh) |
13319 | #[inline ] |
13320 | #[target_feature (enable = "avx512fp16" )] |
13321 | #[cfg_attr (test, assert_instr(vcvtsd2sh, ROUNDING = 8))] |
13322 | #[rustc_legacy_const_generics (2)] |
13323 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13324 | pub fn _mm_cvt_roundsd_sh<const ROUNDING: i32>(a: __m128h, b: __m128d) -> __m128h { |
13325 | static_assert_rounding!(ROUNDING); |
13326 | _mm_mask_cvt_roundsd_sh::<ROUNDING>(src:_mm_undefined_ph(), k:0xff, a, b) |
13327 | } |
13328 | |
13329 | /// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit) |
13330 | /// floating-point elements, store the result in the lower element of dst using writemask k (the element |
13331 | /// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the |
13332 | /// upper elements of dst. |
13333 | /// |
13334 | /// Rounding is done according to the rounding parameter, which can be one of: |
13335 | /// |
13336 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
13337 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
13338 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
13339 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
13340 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
13341 | /// |
13342 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsd_sh) |
13343 | #[inline ] |
13344 | #[target_feature (enable = "avx512fp16" )] |
13345 | #[cfg_attr (test, assert_instr(vcvtsd2sh, ROUNDING = 8))] |
13346 | #[rustc_legacy_const_generics (4)] |
13347 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13348 | pub fn _mm_mask_cvt_roundsd_sh<const ROUNDING: i32>( |
13349 | src: __m128h, |
13350 | k: __mmask8, |
13351 | a: __m128h, |
13352 | b: __m128d, |
13353 | ) -> __m128h { |
13354 | unsafe { |
13355 | static_assert_rounding!(ROUNDING); |
13356 | vcvtsd2sh(a, b, src, k, ROUNDING) |
13357 | } |
13358 | } |
13359 | |
13360 | /// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit) |
13361 | /// floating-point elements, store the result in the lower element of dst using zeromask k (the element |
13362 | /// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper |
13363 | /// elements of dst. |
13364 | /// |
13365 | /// Rounding is done according to the rounding parameter, which can be one of: |
13366 | /// |
13367 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
13368 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
13369 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
13370 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
13371 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
13372 | /// |
13373 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsd_sh) |
13374 | #[inline ] |
13375 | #[target_feature (enable = "avx512fp16" )] |
13376 | #[cfg_attr (test, assert_instr(vcvtsd2sh, ROUNDING = 8))] |
13377 | #[rustc_legacy_const_generics (3)] |
13378 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13379 | pub fn _mm_maskz_cvt_roundsd_sh<const ROUNDING: i32>( |
13380 | k: __mmask8, |
13381 | a: __m128h, |
13382 | b: __m128d, |
13383 | ) -> __m128h { |
13384 | static_assert_rounding!(ROUNDING); |
13385 | _mm_mask_cvt_roundsd_sh::<ROUNDING>(src:_mm_setzero_ph(), k, a, b) |
13386 | } |
13387 | |
13388 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and |
13389 | /// store the results in dst. |
13390 | /// |
13391 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epi16) |
13392 | #[inline ] |
13393 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13394 | #[cfg_attr (test, assert_instr(vcvtph2w))] |
13395 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13396 | pub fn _mm_cvtph_epi16(a: __m128h) -> __m128i { |
13397 | _mm_mask_cvtph_epi16(src:_mm_undefined_si128(), k:0xff, a) |
13398 | } |
13399 | |
13400 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and |
13401 | /// store the results in dst using writemask k (elements are copied from src when the corresponding |
13402 | /// mask bit is not set). |
13403 | /// |
13404 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epi16) |
13405 | #[inline ] |
13406 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13407 | #[cfg_attr (test, assert_instr(vcvtph2w))] |
13408 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13409 | pub fn _mm_mask_cvtph_epi16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { |
13410 | unsafe { transmute(src:vcvtph2w_128(a, src.as_i16x8(), k)) } |
13411 | } |
13412 | |
13413 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and |
13414 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
13415 | /// |
13416 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epi16) |
13417 | #[inline ] |
13418 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13419 | #[cfg_attr (test, assert_instr(vcvtph2w))] |
13420 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13421 | pub fn _mm_maskz_cvtph_epi16(k: __mmask8, a: __m128h) -> __m128i { |
13422 | _mm_mask_cvtph_epi16(src:_mm_setzero_si128(), k, a) |
13423 | } |
13424 | |
13425 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and |
13426 | /// store the results in dst. |
13427 | /// |
13428 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epi16) |
13429 | #[inline ] |
13430 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13431 | #[cfg_attr (test, assert_instr(vcvtph2w))] |
13432 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13433 | pub fn _mm256_cvtph_epi16(a: __m256h) -> __m256i { |
13434 | _mm256_mask_cvtph_epi16(src:_mm256_undefined_si256(), k:0xffff, a) |
13435 | } |
13436 | |
13437 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and |
13438 | /// store the results in dst using writemask k (elements are copied from src when the corresponding |
13439 | /// mask bit is not set). |
13440 | /// |
13441 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epi16) |
13442 | #[inline ] |
13443 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13444 | #[cfg_attr (test, assert_instr(vcvtph2w))] |
13445 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13446 | pub fn _mm256_mask_cvtph_epi16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i { |
13447 | unsafe { transmute(src:vcvtph2w_256(a, src.as_i16x16(), k)) } |
13448 | } |
13449 | |
13450 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and |
13451 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
13452 | /// |
13453 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epi16) |
13454 | #[inline ] |
13455 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13456 | #[cfg_attr (test, assert_instr(vcvtph2w))] |
13457 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13458 | pub fn _mm256_maskz_cvtph_epi16(k: __mmask16, a: __m256h) -> __m256i { |
13459 | _mm256_mask_cvtph_epi16(src:_mm256_setzero_si256(), k, a) |
13460 | } |
13461 | |
13462 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and |
13463 | /// store the results in dst. |
13464 | /// |
13465 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi16) |
13466 | #[inline ] |
13467 | #[target_feature (enable = "avx512fp16" )] |
13468 | #[cfg_attr (test, assert_instr(vcvtph2w))] |
13469 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13470 | pub fn _mm512_cvtph_epi16(a: __m512h) -> __m512i { |
13471 | _mm512_mask_cvtph_epi16(src:_mm512_undefined_epi32(), k:0xffffffff, a) |
13472 | } |
13473 | |
13474 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and |
13475 | /// store the results in dst using writemask k (elements are copied from src when the corresponding |
13476 | /// mask bit is not set). |
13477 | /// |
13478 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi16) |
13479 | #[inline ] |
13480 | #[target_feature (enable = "avx512fp16" )] |
13481 | #[cfg_attr (test, assert_instr(vcvtph2w))] |
13482 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13483 | pub fn _mm512_mask_cvtph_epi16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i { |
13484 | unsafe { |
13485 | transmute(src:vcvtph2w_512( |
13486 | a, |
13487 | src.as_i16x32(), |
13488 | k, |
13489 | _MM_FROUND_CUR_DIRECTION, |
13490 | )) |
13491 | } |
13492 | } |
13493 | |
13494 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and |
13495 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
13496 | /// |
13497 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi16) |
13498 | #[inline ] |
13499 | #[target_feature (enable = "avx512fp16" )] |
13500 | #[cfg_attr (test, assert_instr(vcvtph2w))] |
13501 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13502 | pub fn _mm512_maskz_cvtph_epi16(k: __mmask32, a: __m512h) -> __m512i { |
13503 | _mm512_mask_cvtph_epi16(src:_mm512_setzero_si512(), k, a) |
13504 | } |
13505 | |
13506 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and |
13507 | /// store the results in dst. |
13508 | /// |
13509 | /// Rounding is done according to the rounding parameter, which can be one of: |
13510 | /// |
13511 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
13512 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
13513 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
13514 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
13515 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
13516 | /// |
13517 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi16) |
13518 | #[inline ] |
13519 | #[target_feature (enable = "avx512fp16" )] |
13520 | #[cfg_attr (test, assert_instr(vcvtph2w, ROUNDING = 8))] |
13521 | #[rustc_legacy_const_generics (1)] |
13522 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13523 | pub fn _mm512_cvt_roundph_epi16<const ROUNDING: i32>(a: __m512h) -> __m512i { |
13524 | static_assert_rounding!(ROUNDING); |
13525 | _mm512_mask_cvt_roundph_epi16::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xffffffff, a) |
13526 | } |
13527 | |
13528 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and |
13529 | /// store the results in dst using writemask k (elements are copied from src when the corresponding |
13530 | /// mask bit is not set). |
13531 | /// |
13532 | /// Rounding is done according to the rounding parameter, which can be one of: |
13533 | /// |
13534 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
13535 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
13536 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
13537 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
13538 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
13539 | /// |
13540 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi16) |
13541 | #[inline ] |
13542 | #[target_feature (enable = "avx512fp16" )] |
13543 | #[cfg_attr (test, assert_instr(vcvtph2w, ROUNDING = 8))] |
13544 | #[rustc_legacy_const_generics (3)] |
13545 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13546 | pub fn _mm512_mask_cvt_roundph_epi16<const ROUNDING: i32>( |
13547 | src: __m512i, |
13548 | k: __mmask32, |
13549 | a: __m512h, |
13550 | ) -> __m512i { |
13551 | unsafe { |
13552 | static_assert_rounding!(ROUNDING); |
13553 | transmute(src:vcvtph2w_512(a, src.as_i16x32(), k, ROUNDING)) |
13554 | } |
13555 | } |
13556 | |
13557 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and |
13558 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
13559 | /// |
13560 | /// Rounding is done according to the rounding parameter, which can be one of: |
13561 | /// |
13562 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
13563 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
13564 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
13565 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
13566 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
13567 | /// |
13568 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi16) |
13569 | #[inline ] |
13570 | #[target_feature (enable = "avx512fp16" )] |
13571 | #[cfg_attr (test, assert_instr(vcvtph2w, ROUNDING = 8))] |
13572 | #[rustc_legacy_const_generics (2)] |
13573 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13574 | pub fn _mm512_maskz_cvt_roundph_epi16<const ROUNDING: i32>(k: __mmask32, a: __m512h) -> __m512i { |
13575 | static_assert_rounding!(ROUNDING); |
13576 | _mm512_mask_cvt_roundph_epi16::<ROUNDING>(src:_mm512_setzero_si512(), k, a) |
13577 | } |
13578 | |
13579 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, |
13580 | /// and store the results in dst. |
13581 | /// |
13582 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epu16) |
13583 | #[inline ] |
13584 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13585 | #[cfg_attr (test, assert_instr(vcvtph2uw))] |
13586 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13587 | pub fn _mm_cvtph_epu16(a: __m128h) -> __m128i { |
13588 | _mm_mask_cvtph_epu16(src:_mm_undefined_si128(), k:0xff, a) |
13589 | } |
13590 | |
13591 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, |
13592 | /// and store the results in dst using writemask k (elements are copied from src when the corresponding |
13593 | /// mask bit is not set). |
13594 | /// |
13595 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epu16) |
13596 | #[inline ] |
13597 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13598 | #[cfg_attr (test, assert_instr(vcvtph2uw))] |
13599 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13600 | pub fn _mm_mask_cvtph_epu16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { |
13601 | unsafe { transmute(src:vcvtph2uw_128(a, src.as_u16x8(), k)) } |
13602 | } |
13603 | |
13604 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, |
13605 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
13606 | /// |
13607 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epu16) |
13608 | #[inline ] |
13609 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13610 | #[cfg_attr (test, assert_instr(vcvtph2uw))] |
13611 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13612 | pub fn _mm_maskz_cvtph_epu16(k: __mmask8, a: __m128h) -> __m128i { |
13613 | _mm_mask_cvtph_epu16(src:_mm_setzero_si128(), k, a) |
13614 | } |
13615 | |
13616 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, |
13617 | /// and store the results in dst. |
13618 | /// |
13619 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epu16) |
13620 | #[inline ] |
13621 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13622 | #[cfg_attr (test, assert_instr(vcvtph2uw))] |
13623 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13624 | pub fn _mm256_cvtph_epu16(a: __m256h) -> __m256i { |
13625 | _mm256_mask_cvtph_epu16(src:_mm256_undefined_si256(), k:0xffff, a) |
13626 | } |
13627 | |
13628 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, |
13629 | /// and store the results in dst using writemask k (elements are copied from src when the corresponding |
13630 | /// mask bit is not set). |
13631 | /// |
13632 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epu16) |
13633 | #[inline ] |
13634 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13635 | #[cfg_attr (test, assert_instr(vcvtph2uw))] |
13636 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13637 | pub fn _mm256_mask_cvtph_epu16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i { |
13638 | unsafe { transmute(src:vcvtph2uw_256(a, src.as_u16x16(), k)) } |
13639 | } |
13640 | |
13641 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, |
13642 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
13643 | /// |
13644 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epu16) |
13645 | #[inline ] |
13646 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13647 | #[cfg_attr (test, assert_instr(vcvtph2uw))] |
13648 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13649 | pub fn _mm256_maskz_cvtph_epu16(k: __mmask16, a: __m256h) -> __m256i { |
13650 | _mm256_mask_cvtph_epu16(src:_mm256_setzero_si256(), k, a) |
13651 | } |
13652 | |
13653 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, |
13654 | /// and store the results in dst. |
13655 | /// |
13656 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epu16) |
13657 | #[inline ] |
13658 | #[target_feature (enable = "avx512fp16" )] |
13659 | #[cfg_attr (test, assert_instr(vcvtph2uw))] |
13660 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13661 | pub fn _mm512_cvtph_epu16(a: __m512h) -> __m512i { |
13662 | _mm512_mask_cvtph_epu16(src:_mm512_undefined_epi32(), k:0xffffffff, a) |
13663 | } |
13664 | |
13665 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, |
13666 | /// and store the results in dst using writemask k (elements are copied from src when the corresponding |
13667 | /// mask bit is not set). |
13668 | /// |
13669 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epu16) |
13670 | #[inline ] |
13671 | #[target_feature (enable = "avx512fp16" )] |
13672 | #[cfg_attr (test, assert_instr(vcvtph2uw))] |
13673 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13674 | pub fn _mm512_mask_cvtph_epu16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i { |
13675 | unsafe { |
13676 | transmute(src:vcvtph2uw_512( |
13677 | a, |
13678 | src.as_u16x32(), |
13679 | k, |
13680 | _MM_FROUND_CUR_DIRECTION, |
13681 | )) |
13682 | } |
13683 | } |
13684 | |
13685 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, |
13686 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
13687 | /// |
13688 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epu16) |
13689 | #[inline ] |
13690 | #[target_feature (enable = "avx512fp16" )] |
13691 | #[cfg_attr (test, assert_instr(vcvtph2uw))] |
13692 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13693 | pub fn _mm512_maskz_cvtph_epu16(k: __mmask32, a: __m512h) -> __m512i { |
13694 | _mm512_mask_cvtph_epu16(src:_mm512_setzero_si512(), k, a) |
13695 | } |
13696 | |
13697 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, |
13698 | /// and store the results in dst. |
13699 | /// |
13700 | /// Rounding is done according to the rounding parameter, which can be one of: |
13701 | /// |
13702 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
13703 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
13704 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
13705 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
13706 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
13707 | /// |
13708 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu16) |
13709 | #[inline ] |
13710 | #[target_feature (enable = "avx512fp16" )] |
13711 | #[cfg_attr (test, assert_instr(vcvtph2uw, ROUNDING = 8))] |
13712 | #[rustc_legacy_const_generics (1)] |
13713 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13714 | pub fn _mm512_cvt_roundph_epu16<const ROUNDING: i32>(a: __m512h) -> __m512i { |
13715 | static_assert_rounding!(ROUNDING); |
13716 | _mm512_mask_cvt_roundph_epu16::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xffffffff, a) |
13717 | } |
13718 | |
13719 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, |
13720 | /// and store the results in dst using writemask k (elements are copied from src when the corresponding |
13721 | /// mask bit is not set). |
13722 | /// |
13723 | /// Rounding is done according to the rounding parameter, which can be one of: |
13724 | /// |
13725 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
13726 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
13727 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
13728 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
13729 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
13730 | /// |
13731 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu16) |
13732 | #[inline ] |
13733 | #[target_feature (enable = "avx512fp16" )] |
13734 | #[cfg_attr (test, assert_instr(vcvtph2uw, ROUNDING = 8))] |
13735 | #[rustc_legacy_const_generics (3)] |
13736 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13737 | pub fn _mm512_mask_cvt_roundph_epu16<const ROUNDING: i32>( |
13738 | src: __m512i, |
13739 | k: __mmask32, |
13740 | a: __m512h, |
13741 | ) -> __m512i { |
13742 | unsafe { |
13743 | static_assert_rounding!(ROUNDING); |
13744 | transmute(src:vcvtph2uw_512(a, src.as_u16x32(), k, ROUNDING)) |
13745 | } |
13746 | } |
13747 | |
13748 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, |
13749 | /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
13750 | /// |
13751 | /// Rounding is done according to the rounding parameter, which can be one of: |
13752 | /// |
13753 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
13754 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
13755 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
13756 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
13757 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
13758 | /// |
13759 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu16) |
13760 | #[inline ] |
13761 | #[target_feature (enable = "avx512fp16" )] |
13762 | #[cfg_attr (test, assert_instr(vcvtph2uw, ROUNDING = 8))] |
13763 | #[rustc_legacy_const_generics (2)] |
13764 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13765 | pub fn _mm512_maskz_cvt_roundph_epu16<const ROUNDING: i32>(k: __mmask32, a: __m512h) -> __m512i { |
13766 | static_assert_rounding!(ROUNDING); |
13767 | _mm512_mask_cvt_roundph_epu16::<ROUNDING>(src:_mm512_setzero_si512(), k, a) |
13768 | } |
13769 | |
13770 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with |
13771 | /// truncation, and store the results in dst. |
13772 | /// |
13773 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epi16) |
13774 | #[inline ] |
13775 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13776 | #[cfg_attr (test, assert_instr(vcvttph2w))] |
13777 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13778 | pub fn _mm_cvttph_epi16(a: __m128h) -> __m128i { |
13779 | _mm_mask_cvttph_epi16(src:_mm_undefined_si128(), k:0xff, a) |
13780 | } |
13781 | |
13782 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with |
13783 | /// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding |
13784 | /// mask bit is not set). |
13785 | /// |
13786 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epi16) |
13787 | #[inline ] |
13788 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13789 | #[cfg_attr (test, assert_instr(vcvttph2w))] |
13790 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13791 | pub fn _mm_mask_cvttph_epi16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { |
13792 | unsafe { transmute(src:vcvttph2w_128(a, src.as_i16x8(), k)) } |
13793 | } |
13794 | |
13795 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with |
13796 | /// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding |
13797 | /// mask bit is not set). |
13798 | /// |
13799 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epi16) |
13800 | #[inline ] |
13801 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13802 | #[cfg_attr (test, assert_instr(vcvttph2w))] |
13803 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13804 | pub fn _mm_maskz_cvttph_epi16(k: __mmask8, a: __m128h) -> __m128i { |
13805 | _mm_mask_cvttph_epi16(src:_mm_setzero_si128(), k, a) |
13806 | } |
13807 | |
13808 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with |
13809 | /// truncation, and store the results in dst. |
13810 | /// |
13811 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epi16) |
13812 | #[inline ] |
13813 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13814 | #[cfg_attr (test, assert_instr(vcvttph2w))] |
13815 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13816 | pub fn _mm256_cvttph_epi16(a: __m256h) -> __m256i { |
13817 | _mm256_mask_cvttph_epi16(src:_mm256_undefined_si256(), k:0xffff, a) |
13818 | } |
13819 | |
13820 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with |
13821 | /// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding |
13822 | /// mask bit is not set). |
13823 | /// |
13824 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epi16) |
13825 | #[inline ] |
13826 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13827 | #[cfg_attr (test, assert_instr(vcvttph2w))] |
13828 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13829 | pub fn _mm256_mask_cvttph_epi16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i { |
13830 | unsafe { transmute(src:vcvttph2w_256(a, src.as_i16x16(), k)) } |
13831 | } |
13832 | |
13833 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with |
13834 | /// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding |
13835 | /// mask bit is not set). |
13836 | /// |
13837 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epi16) |
13838 | #[inline ] |
13839 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13840 | #[cfg_attr (test, assert_instr(vcvttph2w))] |
13841 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13842 | pub fn _mm256_maskz_cvttph_epi16(k: __mmask16, a: __m256h) -> __m256i { |
13843 | _mm256_mask_cvttph_epi16(src:_mm256_setzero_si256(), k, a) |
13844 | } |
13845 | |
13846 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with |
13847 | /// truncation, and store the results in dst. |
13848 | /// |
13849 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epi16) |
13850 | #[inline ] |
13851 | #[target_feature (enable = "avx512fp16" )] |
13852 | #[cfg_attr (test, assert_instr(vcvttph2w))] |
13853 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13854 | pub fn _mm512_cvttph_epi16(a: __m512h) -> __m512i { |
13855 | _mm512_mask_cvttph_epi16(src:_mm512_undefined_epi32(), k:0xffffffff, a) |
13856 | } |
13857 | |
13858 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with |
13859 | /// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding |
13860 | /// mask bit is not set). |
13861 | /// |
13862 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epi16) |
13863 | #[inline ] |
13864 | #[target_feature (enable = "avx512fp16" )] |
13865 | #[cfg_attr (test, assert_instr(vcvttph2w))] |
13866 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13867 | pub fn _mm512_mask_cvttph_epi16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i { |
13868 | unsafe { |
13869 | transmute(src:vcvttph2w_512( |
13870 | a, |
13871 | src.as_i16x32(), |
13872 | k, |
13873 | _MM_FROUND_CUR_DIRECTION, |
13874 | )) |
13875 | } |
13876 | } |
13877 | |
13878 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with |
13879 | /// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding |
13880 | /// mask bit is not set). |
13881 | /// |
13882 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epi16) |
13883 | #[inline ] |
13884 | #[target_feature (enable = "avx512fp16" )] |
13885 | #[cfg_attr (test, assert_instr(vcvttph2w))] |
13886 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13887 | pub fn _mm512_maskz_cvttph_epi16(k: __mmask32, a: __m512h) -> __m512i { |
13888 | _mm512_mask_cvttph_epi16(src:_mm512_setzero_si512(), k, a) |
13889 | } |
13890 | |
13891 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with |
13892 | /// truncation, and store the results in dst. |
13893 | /// |
13894 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
13895 | /// |
13896 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epi16) |
13897 | #[inline ] |
13898 | #[target_feature (enable = "avx512fp16" )] |
13899 | #[cfg_attr (test, assert_instr(vcvttph2w, SAE = 8))] |
13900 | #[rustc_legacy_const_generics (1)] |
13901 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13902 | pub fn _mm512_cvtt_roundph_epi16<const SAE: i32>(a: __m512h) -> __m512i { |
13903 | static_assert_sae!(SAE); |
13904 | _mm512_mask_cvtt_roundph_epi16::<SAE>(src:_mm512_undefined_epi32(), k:0xffffffff, a) |
13905 | } |
13906 | |
13907 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with |
13908 | /// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding |
13909 | /// mask bit is not set). |
13910 | /// |
13911 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
13912 | /// |
13913 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epi16) |
13914 | #[inline ] |
13915 | #[target_feature (enable = "avx512fp16" )] |
13916 | #[cfg_attr (test, assert_instr(vcvttph2w, SAE = 8))] |
13917 | #[rustc_legacy_const_generics (3)] |
13918 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13919 | pub fn _mm512_mask_cvtt_roundph_epi16<const SAE: i32>( |
13920 | src: __m512i, |
13921 | k: __mmask32, |
13922 | a: __m512h, |
13923 | ) -> __m512i { |
13924 | unsafe { |
13925 | static_assert_sae!(SAE); |
13926 | transmute(src:vcvttph2w_512(a, src.as_i16x32(), k, SAE)) |
13927 | } |
13928 | } |
13929 | |
13930 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with |
13931 | /// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding |
13932 | /// mask bit is not set). |
13933 | /// |
13934 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
13935 | /// |
13936 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epi16) |
13937 | #[inline ] |
13938 | #[target_feature (enable = "avx512fp16" )] |
13939 | #[cfg_attr (test, assert_instr(vcvttph2w, SAE = 8))] |
13940 | #[rustc_legacy_const_generics (2)] |
13941 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13942 | pub fn _mm512_maskz_cvtt_roundph_epi16<const SAE: i32>(k: __mmask32, a: __m512h) -> __m512i { |
13943 | static_assert_sae!(SAE); |
13944 | _mm512_mask_cvtt_roundph_epi16::<SAE>(src:_mm512_setzero_si512(), k, a) |
13945 | } |
13946 | |
13947 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with |
13948 | /// truncation, and store the results in dst. |
13949 | /// |
13950 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu16) |
13951 | #[inline ] |
13952 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13953 | #[cfg_attr (test, assert_instr(vcvttph2uw))] |
13954 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13955 | pub fn _mm_cvttph_epu16(a: __m128h) -> __m128i { |
13956 | _mm_mask_cvttph_epu16(src:_mm_undefined_si128(), k:0xff, a) |
13957 | } |
13958 | |
13959 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with |
13960 | /// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding |
13961 | /// mask bit is not set). |
13962 | /// |
13963 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu16) |
13964 | #[inline ] |
13965 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13966 | #[cfg_attr (test, assert_instr(vcvttph2uw))] |
13967 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13968 | pub fn _mm_mask_cvttph_epu16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { |
13969 | unsafe { transmute(src:vcvttph2uw_128(a, src.as_u16x8(), k)) } |
13970 | } |
13971 | |
13972 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with |
13973 | /// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding |
13974 | /// mask bit is not set). |
13975 | /// |
13976 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu16) |
13977 | #[inline ] |
13978 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13979 | #[cfg_attr (test, assert_instr(vcvttph2uw))] |
13980 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13981 | pub fn _mm_maskz_cvttph_epu16(k: __mmask8, a: __m128h) -> __m128i { |
13982 | _mm_mask_cvttph_epu16(src:_mm_setzero_si128(), k, a) |
13983 | } |
13984 | |
13985 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with |
13986 | /// truncation, and store the results in dst. |
13987 | /// |
13988 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu16) |
13989 | #[inline ] |
13990 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
13991 | #[cfg_attr (test, assert_instr(vcvttph2uw))] |
13992 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
13993 | pub fn _mm256_cvttph_epu16(a: __m256h) -> __m256i { |
13994 | _mm256_mask_cvttph_epu16(src:_mm256_undefined_si256(), k:0xffff, a) |
13995 | } |
13996 | |
13997 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with |
13998 | /// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding |
13999 | /// mask bit is not set). |
14000 | /// |
14001 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu16) |
14002 | #[inline ] |
14003 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14004 | #[cfg_attr (test, assert_instr(vcvttph2uw))] |
14005 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14006 | pub fn _mm256_mask_cvttph_epu16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i { |
14007 | unsafe { transmute(src:vcvttph2uw_256(a, src.as_u16x16(), k)) } |
14008 | } |
14009 | |
14010 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with |
14011 | /// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding |
14012 | /// mask bit is not set). |
14013 | /// |
14014 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu16) |
14015 | #[inline ] |
14016 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14017 | #[cfg_attr (test, assert_instr(vcvttph2uw))] |
14018 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14019 | pub fn _mm256_maskz_cvttph_epu16(k: __mmask16, a: __m256h) -> __m256i { |
14020 | _mm256_mask_cvttph_epu16(src:_mm256_setzero_si256(), k, a) |
14021 | } |
14022 | |
14023 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with |
14024 | /// truncation, and store the results in dst. |
14025 | /// |
14026 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu16) |
14027 | #[inline ] |
14028 | #[target_feature (enable = "avx512fp16" )] |
14029 | #[cfg_attr (test, assert_instr(vcvttph2uw))] |
14030 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14031 | pub fn _mm512_cvttph_epu16(a: __m512h) -> __m512i { |
14032 | _mm512_mask_cvttph_epu16(src:_mm512_undefined_epi32(), k:0xffffffff, a) |
14033 | } |
14034 | |
14035 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with |
14036 | /// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding |
14037 | /// mask bit is not set). |
14038 | /// |
14039 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu16) |
14040 | #[inline ] |
14041 | #[target_feature (enable = "avx512fp16" )] |
14042 | #[cfg_attr (test, assert_instr(vcvttph2uw))] |
14043 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14044 | pub fn _mm512_mask_cvttph_epu16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i { |
14045 | unsafe { |
14046 | transmute(src:vcvttph2uw_512( |
14047 | a, |
14048 | src.as_u16x32(), |
14049 | k, |
14050 | _MM_FROUND_CUR_DIRECTION, |
14051 | )) |
14052 | } |
14053 | } |
14054 | |
14055 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with |
14056 | /// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding |
14057 | /// mask bit is not set). |
14058 | /// |
14059 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu16) |
14060 | #[inline ] |
14061 | #[target_feature (enable = "avx512fp16" )] |
14062 | #[cfg_attr (test, assert_instr(vcvttph2uw))] |
14063 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14064 | pub fn _mm512_maskz_cvttph_epu16(k: __mmask32, a: __m512h) -> __m512i { |
14065 | _mm512_mask_cvttph_epu16(src:_mm512_setzero_si512(), k, a) |
14066 | } |
14067 | |
14068 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with |
14069 | /// truncation, and store the results in dst. |
14070 | /// |
14071 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
14072 | /// |
14073 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epu16) |
14074 | #[inline ] |
14075 | #[target_feature (enable = "avx512fp16" )] |
14076 | #[cfg_attr (test, assert_instr(vcvttph2uw, SAE = 8))] |
14077 | #[rustc_legacy_const_generics (1)] |
14078 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14079 | pub fn _mm512_cvtt_roundph_epu16<const SAE: i32>(a: __m512h) -> __m512i { |
14080 | static_assert_sae!(SAE); |
14081 | _mm512_mask_cvtt_roundph_epu16::<SAE>(src:_mm512_undefined_epi32(), k:0xffffffff, a) |
14082 | } |
14083 | |
14084 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with |
14085 | /// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding |
14086 | /// mask bit is not set). |
14087 | /// |
14088 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
14089 | /// |
14090 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epu16) |
14091 | #[inline ] |
14092 | #[target_feature (enable = "avx512fp16" )] |
14093 | #[cfg_attr (test, assert_instr(vcvttph2uw, SAE = 8))] |
14094 | #[rustc_legacy_const_generics (3)] |
14095 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14096 | pub fn _mm512_mask_cvtt_roundph_epu16<const SAE: i32>( |
14097 | src: __m512i, |
14098 | k: __mmask32, |
14099 | a: __m512h, |
14100 | ) -> __m512i { |
14101 | unsafe { |
14102 | static_assert_sae!(SAE); |
14103 | transmute(src:vcvttph2uw_512(a, src.as_u16x32(), k, SAE)) |
14104 | } |
14105 | } |
14106 | |
14107 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with |
14108 | /// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding |
14109 | /// mask bit is not set). |
14110 | /// |
14111 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
14112 | /// |
14113 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epu16) |
14114 | #[inline ] |
14115 | #[target_feature (enable = "avx512fp16" )] |
14116 | #[cfg_attr (test, assert_instr(vcvttph2uw, SAE = 8))] |
14117 | #[rustc_legacy_const_generics (2)] |
14118 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14119 | pub fn _mm512_maskz_cvtt_roundph_epu16<const SAE: i32>(k: __mmask32, a: __m512h) -> __m512i { |
14120 | static_assert_sae!(SAE); |
14121 | _mm512_mask_cvtt_roundph_epu16::<SAE>(src:_mm512_setzero_si512(), k, a) |
14122 | } |
14123 | |
14124 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the |
14125 | /// results in dst. |
14126 | /// |
14127 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epi32) |
14128 | #[inline ] |
14129 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14130 | #[cfg_attr (test, assert_instr(vcvtph2dq))] |
14131 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14132 | pub fn _mm_cvtph_epi32(a: __m128h) -> __m128i { |
14133 | _mm_mask_cvtph_epi32(src:_mm_undefined_si128(), k:0xff, a) |
14134 | } |
14135 | |
14136 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the |
14137 | /// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
14138 | /// |
14139 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epi32) |
14140 | #[inline ] |
14141 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14142 | #[cfg_attr (test, assert_instr(vcvtph2dq))] |
14143 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14144 | pub fn _mm_mask_cvtph_epi32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { |
14145 | unsafe { transmute(src:vcvtph2dq_128(a, src.as_i32x4(), k)) } |
14146 | } |
14147 | |
14148 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the |
14149 | /// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
14150 | /// |
14151 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epi32) |
14152 | #[inline ] |
14153 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14154 | #[cfg_attr (test, assert_instr(vcvtph2dq))] |
14155 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14156 | pub fn _mm_maskz_cvtph_epi32(k: __mmask8, a: __m128h) -> __m128i { |
14157 | _mm_mask_cvtph_epi32(src:_mm_setzero_si128(), k, a) |
14158 | } |
14159 | |
14160 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the |
14161 | /// results in dst. |
14162 | /// |
14163 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epi32) |
14164 | #[inline ] |
14165 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14166 | #[cfg_attr (test, assert_instr(vcvtph2dq))] |
14167 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14168 | pub fn _mm256_cvtph_epi32(a: __m128h) -> __m256i { |
14169 | _mm256_mask_cvtph_epi32(src:_mm256_undefined_si256(), k:0xff, a) |
14170 | } |
14171 | |
14172 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the |
14173 | /// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
14174 | /// |
14175 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epi32) |
14176 | #[inline ] |
14177 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14178 | #[cfg_attr (test, assert_instr(vcvtph2dq))] |
14179 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14180 | pub fn _mm256_mask_cvtph_epi32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { |
14181 | unsafe { transmute(src:vcvtph2dq_256(a, src.as_i32x8(), k)) } |
14182 | } |
14183 | |
14184 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the |
14185 | /// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
14186 | /// |
14187 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epi32) |
14188 | #[inline ] |
14189 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14190 | #[cfg_attr (test, assert_instr(vcvtph2dq))] |
14191 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14192 | pub fn _mm256_maskz_cvtph_epi32(k: __mmask8, a: __m128h) -> __m256i { |
14193 | _mm256_mask_cvtph_epi32(src:_mm256_setzero_si256(), k, a) |
14194 | } |
14195 | |
14196 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the |
14197 | /// results in dst. |
14198 | /// |
14199 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi32) |
14200 | #[inline ] |
14201 | #[target_feature (enable = "avx512fp16" )] |
14202 | #[cfg_attr (test, assert_instr(vcvtph2dq))] |
14203 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14204 | pub fn _mm512_cvtph_epi32(a: __m256h) -> __m512i { |
14205 | _mm512_mask_cvtph_epi32(src:_mm512_undefined_epi32(), k:0xffff, a) |
14206 | } |
14207 | |
14208 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the |
14209 | /// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
14210 | /// |
14211 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi32) |
14212 | #[inline ] |
14213 | #[target_feature (enable = "avx512fp16" )] |
14214 | #[cfg_attr (test, assert_instr(vcvtph2dq))] |
14215 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14216 | pub fn _mm512_mask_cvtph_epi32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i { |
14217 | unsafe { |
14218 | transmute(src:vcvtph2dq_512( |
14219 | a, |
14220 | src.as_i32x16(), |
14221 | k, |
14222 | _MM_FROUND_CUR_DIRECTION, |
14223 | )) |
14224 | } |
14225 | } |
14226 | |
14227 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the |
14228 | /// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
14229 | /// |
14230 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi32) |
14231 | #[inline ] |
14232 | #[target_feature (enable = "avx512fp16" )] |
14233 | #[cfg_attr (test, assert_instr(vcvtph2dq))] |
14234 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14235 | pub fn _mm512_maskz_cvtph_epi32(k: __mmask16, a: __m256h) -> __m512i { |
14236 | _mm512_mask_cvtph_epi32(src:_mm512_setzero_si512(), k, a) |
14237 | } |
14238 | |
14239 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the |
14240 | /// results in dst. |
14241 | /// |
14242 | /// Rounding is done according to the rounding parameter, which can be one of: |
14243 | /// |
14244 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
14245 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
14246 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
14247 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
14248 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
14249 | /// |
14250 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi32) |
14251 | #[inline ] |
14252 | #[target_feature (enable = "avx512fp16" )] |
14253 | #[cfg_attr (test, assert_instr(vcvtph2dq, ROUNDING = 8))] |
14254 | #[rustc_legacy_const_generics (1)] |
14255 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14256 | pub fn _mm512_cvt_roundph_epi32<const ROUNDING: i32>(a: __m256h) -> __m512i { |
14257 | static_assert_rounding!(ROUNDING); |
14258 | _mm512_mask_cvt_roundph_epi32::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xffff, a) |
14259 | } |
14260 | |
14261 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the |
14262 | /// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
14263 | /// |
14264 | /// Rounding is done according to the rounding parameter, which can be one of: |
14265 | /// |
14266 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
14267 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
14268 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
14269 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
14270 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
14271 | /// |
14272 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi32) |
14273 | #[inline ] |
14274 | #[target_feature (enable = "avx512fp16" )] |
14275 | #[cfg_attr (test, assert_instr(vcvtph2dq, ROUNDING = 8))] |
14276 | #[rustc_legacy_const_generics (3)] |
14277 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14278 | pub fn _mm512_mask_cvt_roundph_epi32<const ROUNDING: i32>( |
14279 | src: __m512i, |
14280 | k: __mmask16, |
14281 | a: __m256h, |
14282 | ) -> __m512i { |
14283 | unsafe { |
14284 | static_assert_rounding!(ROUNDING); |
14285 | transmute(src:vcvtph2dq_512(a, src.as_i32x16(), k, ROUNDING)) |
14286 | } |
14287 | } |
14288 | |
14289 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the |
14290 | /// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
14291 | /// |
14292 | /// Rounding is done according to the rounding parameter, which can be one of: |
14293 | /// |
14294 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
14295 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
14296 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
14297 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
14298 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
14299 | /// |
14300 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi32) |
14301 | #[inline ] |
14302 | #[target_feature (enable = "avx512fp16" )] |
14303 | #[cfg_attr (test, assert_instr(vcvtph2dq, ROUNDING = 8))] |
14304 | #[rustc_legacy_const_generics (2)] |
14305 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14306 | pub fn _mm512_maskz_cvt_roundph_epi32<const ROUNDING: i32>(k: __mmask16, a: __m256h) -> __m512i { |
14307 | static_assert_rounding!(ROUNDING); |
14308 | _mm512_mask_cvt_roundph_epi32::<ROUNDING>(src:_mm512_setzero_si512(), k, a) |
14309 | } |
14310 | |
14311 | /// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer, and store |
14312 | /// the result in dst. |
14313 | /// |
14314 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_i32) |
14315 | #[inline ] |
14316 | #[target_feature (enable = "avx512fp16" )] |
14317 | #[cfg_attr (test, assert_instr(vcvtsh2si))] |
14318 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14319 | pub fn _mm_cvtsh_i32(a: __m128h) -> i32 { |
14320 | unsafe { vcvtsh2si32(a, _MM_FROUND_CUR_DIRECTION) } |
14321 | } |
14322 | |
14323 | /// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer, and store |
14324 | /// the result in dst. |
14325 | /// |
14326 | /// Rounding is done according to the rounding parameter, which can be one of: |
14327 | /// |
14328 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
14329 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
14330 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
14331 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
14332 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
14333 | /// |
14334 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_i32) |
14335 | #[inline ] |
14336 | #[target_feature (enable = "avx512fp16" )] |
14337 | #[cfg_attr (test, assert_instr(vcvtsh2si, ROUNDING = 8))] |
14338 | #[rustc_legacy_const_generics (1)] |
14339 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14340 | pub fn _mm_cvt_roundsh_i32<const ROUNDING: i32>(a: __m128h) -> i32 { |
14341 | unsafe { |
14342 | static_assert_rounding!(ROUNDING); |
14343 | vcvtsh2si32(a, ROUNDING) |
14344 | } |
14345 | } |
14346 | |
14347 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the |
14348 | /// results in dst. |
14349 | /// |
14350 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epu32) |
14351 | #[inline ] |
14352 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14353 | #[cfg_attr (test, assert_instr(vcvtph2udq))] |
14354 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14355 | pub fn _mm_cvtph_epu32(a: __m128h) -> __m128i { |
14356 | _mm_mask_cvtph_epu32(src:_mm_undefined_si128(), k:0xff, a) |
14357 | } |
14358 | |
14359 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store |
14360 | /// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
14361 | /// |
14362 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epu32) |
14363 | #[inline ] |
14364 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14365 | #[cfg_attr (test, assert_instr(vcvtph2udq))] |
14366 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14367 | pub fn _mm_mask_cvtph_epu32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { |
14368 | unsafe { transmute(src:vcvtph2udq_128(a, src.as_u32x4(), k)) } |
14369 | } |
14370 | |
14371 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store |
14372 | /// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
14373 | /// |
14374 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epu32) |
14375 | #[inline ] |
14376 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14377 | #[cfg_attr (test, assert_instr(vcvtph2udq))] |
14378 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14379 | pub fn _mm_maskz_cvtph_epu32(k: __mmask8, a: __m128h) -> __m128i { |
14380 | _mm_mask_cvtph_epu32(src:_mm_setzero_si128(), k, a) |
14381 | } |
14382 | |
14383 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store |
14384 | /// the results in dst. |
14385 | /// |
14386 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epu32) |
14387 | #[inline ] |
14388 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14389 | #[cfg_attr (test, assert_instr(vcvtph2udq))] |
14390 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14391 | pub fn _mm256_cvtph_epu32(a: __m128h) -> __m256i { |
14392 | _mm256_mask_cvtph_epu32(src:_mm256_undefined_si256(), k:0xff, a) |
14393 | } |
14394 | |
14395 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store |
14396 | /// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
14397 | /// |
14398 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epu32) |
14399 | #[inline ] |
14400 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14401 | #[cfg_attr (test, assert_instr(vcvtph2udq))] |
14402 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14403 | pub fn _mm256_mask_cvtph_epu32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { |
14404 | unsafe { transmute(src:vcvtph2udq_256(a, src.as_u32x8(), k)) } |
14405 | } |
14406 | |
14407 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store |
14408 | /// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
14409 | /// |
14410 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epu32) |
14411 | #[inline ] |
14412 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14413 | #[cfg_attr (test, assert_instr(vcvtph2udq))] |
14414 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14415 | pub fn _mm256_maskz_cvtph_epu32(k: __mmask8, a: __m128h) -> __m256i { |
14416 | _mm256_mask_cvtph_epu32(src:_mm256_setzero_si256(), k, a) |
14417 | } |
14418 | |
14419 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store |
14420 | /// the results in dst. |
14421 | /// |
14422 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epu32) |
14423 | #[inline ] |
14424 | #[target_feature (enable = "avx512fp16" )] |
14425 | #[cfg_attr (test, assert_instr(vcvtph2udq))] |
14426 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14427 | pub fn _mm512_cvtph_epu32(a: __m256h) -> __m512i { |
14428 | _mm512_mask_cvtph_epu32(src:_mm512_undefined_epi32(), k:0xffff, a) |
14429 | } |
14430 | |
14431 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store |
14432 | /// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
14433 | /// |
14434 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epu32) |
14435 | #[inline ] |
14436 | #[target_feature (enable = "avx512fp16" )] |
14437 | #[cfg_attr (test, assert_instr(vcvtph2udq))] |
14438 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14439 | pub fn _mm512_mask_cvtph_epu32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i { |
14440 | unsafe { |
14441 | transmute(src:vcvtph2udq_512( |
14442 | a, |
14443 | src.as_u32x16(), |
14444 | k, |
14445 | _MM_FROUND_CUR_DIRECTION, |
14446 | )) |
14447 | } |
14448 | } |
14449 | |
14450 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store |
14451 | /// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
14452 | /// |
14453 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epu32) |
14454 | #[inline ] |
14455 | #[target_feature (enable = "avx512fp16" )] |
14456 | #[cfg_attr (test, assert_instr(vcvtph2udq))] |
14457 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14458 | pub fn _mm512_maskz_cvtph_epu32(k: __mmask16, a: __m256h) -> __m512i { |
14459 | _mm512_mask_cvtph_epu32(src:_mm512_setzero_si512(), k, a) |
14460 | } |
14461 | |
14462 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store |
14463 | /// the results in dst. |
14464 | /// |
14465 | /// Rounding is done according to the rounding parameter, which can be one of: |
14466 | /// |
14467 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
14468 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
14469 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
14470 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
14471 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
14472 | /// |
14473 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu32) |
14474 | #[inline ] |
14475 | #[target_feature (enable = "avx512fp16" )] |
14476 | #[cfg_attr (test, assert_instr(vcvtph2udq, ROUNDING = 8))] |
14477 | #[rustc_legacy_const_generics (1)] |
14478 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14479 | pub fn _mm512_cvt_roundph_epu32<const ROUNDING: i32>(a: __m256h) -> __m512i { |
14480 | static_assert_rounding!(ROUNDING); |
14481 | _mm512_mask_cvt_roundph_epu32::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xffff, a) |
14482 | } |
14483 | |
14484 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store |
14485 | /// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
14486 | /// |
14487 | /// Rounding is done according to the rounding parameter, which can be one of: |
14488 | /// |
14489 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
14490 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
14491 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
14492 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
14493 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
14494 | /// |
14495 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu32) |
14496 | #[inline ] |
14497 | #[target_feature (enable = "avx512fp16" )] |
14498 | #[cfg_attr (test, assert_instr(vcvtph2udq, ROUNDING = 8))] |
14499 | #[rustc_legacy_const_generics (3)] |
14500 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14501 | pub fn _mm512_mask_cvt_roundph_epu32<const ROUNDING: i32>( |
14502 | src: __m512i, |
14503 | k: __mmask16, |
14504 | a: __m256h, |
14505 | ) -> __m512i { |
14506 | unsafe { |
14507 | static_assert_rounding!(ROUNDING); |
14508 | transmute(src:vcvtph2udq_512(a, src.as_u32x16(), k, ROUNDING)) |
14509 | } |
14510 | } |
14511 | |
14512 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store |
14513 | /// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
14514 | /// |
14515 | /// Rounding is done according to the rounding parameter, which can be one of: |
14516 | /// |
14517 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
14518 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
14519 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
14520 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
14521 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
14522 | /// |
14523 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu32) |
14524 | #[inline ] |
14525 | #[target_feature (enable = "avx512fp16" )] |
14526 | #[cfg_attr (test, assert_instr(vcvtph2udq, ROUNDING = 8))] |
14527 | #[rustc_legacy_const_generics (2)] |
14528 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14529 | pub fn _mm512_maskz_cvt_roundph_epu32<const ROUNDING: i32>(k: __mmask16, a: __m256h) -> __m512i { |
14530 | static_assert_rounding!(ROUNDING); |
14531 | _mm512_mask_cvt_roundph_epu32::<ROUNDING>(src:_mm512_setzero_si512(), k, a) |
14532 | } |
14533 | |
14534 | /// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer, and store |
14535 | /// the result in dst. |
14536 | /// |
14537 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_u32) |
14538 | #[inline ] |
14539 | #[target_feature (enable = "avx512fp16" )] |
14540 | #[cfg_attr (test, assert_instr(vcvtsh2usi))] |
14541 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14542 | pub fn _mm_cvtsh_u32(a: __m128h) -> u32 { |
14543 | unsafe { vcvtsh2usi32(a, _MM_FROUND_CUR_DIRECTION) } |
14544 | } |
14545 | |
14546 | /// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer, and store |
14547 | /// the result in dst. |
14548 | /// |
14549 | /// Rounding is done according to the rounding parameter, which can be one of: |
14550 | /// |
14551 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
14552 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
14553 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
14554 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
14555 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
14556 | /// |
14557 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_u32) |
14558 | #[inline ] |
14559 | #[target_feature (enable = "avx512fp16" )] |
14560 | #[cfg_attr (test, assert_instr(vcvtsh2usi, ROUNDING = 8))] |
14561 | #[rustc_legacy_const_generics (1)] |
14562 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14563 | pub fn _mm_cvt_roundsh_u32<const ROUNDING: i32>(a: __m128h) -> u32 { |
14564 | unsafe { |
14565 | static_assert_rounding!(ROUNDING); |
14566 | vcvtsh2usi32(a, ROUNDING) |
14567 | } |
14568 | } |
14569 | |
14570 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and |
14571 | /// store the results in dst. |
14572 | /// |
14573 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epi32) |
14574 | #[inline ] |
14575 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14576 | #[cfg_attr (test, assert_instr(vcvttph2dq))] |
14577 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14578 | pub fn _mm_cvttph_epi32(a: __m128h) -> __m128i { |
14579 | _mm_mask_cvttph_epi32(src:_mm_undefined_si128(), k:0xff, a) |
14580 | } |
14581 | |
14582 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and |
14583 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
14584 | /// |
14585 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epi32) |
14586 | #[inline ] |
14587 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14588 | #[cfg_attr (test, assert_instr(vcvttph2dq))] |
14589 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14590 | pub fn _mm_mask_cvttph_epi32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { |
14591 | unsafe { transmute(src:vcvttph2dq_128(a, src.as_i32x4(), k)) } |
14592 | } |
14593 | |
14594 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and |
14595 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
14596 | /// |
14597 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epi32) |
14598 | #[inline ] |
14599 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14600 | #[cfg_attr (test, assert_instr(vcvttph2dq))] |
14601 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14602 | pub fn _mm_maskz_cvttph_epi32(k: __mmask8, a: __m128h) -> __m128i { |
14603 | _mm_mask_cvttph_epi32(src:_mm_setzero_si128(), k, a) |
14604 | } |
14605 | |
14606 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and |
14607 | /// store the results in dst. |
14608 | /// |
14609 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epi32) |
14610 | #[inline ] |
14611 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14612 | #[cfg_attr (test, assert_instr(vcvttph2dq))] |
14613 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14614 | pub fn _mm256_cvttph_epi32(a: __m128h) -> __m256i { |
14615 | _mm256_mask_cvttph_epi32(src:_mm256_undefined_si256(), k:0xff, a) |
14616 | } |
14617 | |
14618 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and |
14619 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
14620 | /// |
14621 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epi32) |
14622 | #[inline ] |
14623 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14624 | #[cfg_attr (test, assert_instr(vcvttph2dq))] |
14625 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14626 | pub fn _mm256_mask_cvttph_epi32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { |
14627 | unsafe { transmute(src:vcvttph2dq_256(a, src.as_i32x8(), k)) } |
14628 | } |
14629 | |
14630 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and |
14631 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
14632 | /// |
14633 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epi32) |
14634 | #[inline ] |
14635 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14636 | #[cfg_attr (test, assert_instr(vcvttph2dq))] |
14637 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14638 | pub fn _mm256_maskz_cvttph_epi32(k: __mmask8, a: __m128h) -> __m256i { |
14639 | _mm256_mask_cvttph_epi32(src:_mm256_setzero_si256(), k, a) |
14640 | } |
14641 | |
14642 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and |
14643 | /// store the results in dst. |
14644 | /// |
14645 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epi32) |
14646 | #[inline ] |
14647 | #[target_feature (enable = "avx512fp16" )] |
14648 | #[cfg_attr (test, assert_instr(vcvttph2dq))] |
14649 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14650 | pub fn _mm512_cvttph_epi32(a: __m256h) -> __m512i { |
14651 | _mm512_mask_cvttph_epi32(src:_mm512_undefined_epi32(), k:0xffff, a) |
14652 | } |
14653 | |
14654 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and |
14655 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
14656 | /// |
14657 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epi32) |
14658 | #[inline ] |
14659 | #[target_feature (enable = "avx512fp16" )] |
14660 | #[cfg_attr (test, assert_instr(vcvttph2dq))] |
14661 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14662 | pub fn _mm512_mask_cvttph_epi32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i { |
14663 | unsafe { |
14664 | transmute(src:vcvttph2dq_512( |
14665 | a, |
14666 | src.as_i32x16(), |
14667 | k, |
14668 | _MM_FROUND_CUR_DIRECTION, |
14669 | )) |
14670 | } |
14671 | } |
14672 | |
14673 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and |
14674 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
14675 | /// |
14676 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epi32) |
14677 | #[inline ] |
14678 | #[target_feature (enable = "avx512fp16" )] |
14679 | #[cfg_attr (test, assert_instr(vcvttph2dq))] |
14680 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14681 | pub fn _mm512_maskz_cvttph_epi32(k: __mmask16, a: __m256h) -> __m512i { |
14682 | _mm512_mask_cvttph_epi32(src:_mm512_setzero_si512(), k, a) |
14683 | } |
14684 | |
14685 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and |
14686 | /// store the results in dst. |
14687 | /// |
14688 | /// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter. |
14689 | /// |
14690 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epi32) |
14691 | #[inline ] |
14692 | #[target_feature (enable = "avx512fp16" )] |
14693 | #[cfg_attr (test, assert_instr(vcvttph2dq, SAE = 8))] |
14694 | #[rustc_legacy_const_generics (1)] |
14695 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14696 | pub fn _mm512_cvtt_roundph_epi32<const SAE: i32>(a: __m256h) -> __m512i { |
14697 | static_assert_sae!(SAE); |
14698 | _mm512_mask_cvtt_roundph_epi32::<SAE>(src:_mm512_undefined_epi32(), k:0xffff, a) |
14699 | } |
14700 | |
14701 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and |
14702 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
14703 | /// |
14704 | /// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter. |
14705 | /// |
14706 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epi32) |
14707 | #[inline ] |
14708 | #[target_feature (enable = "avx512fp16" )] |
14709 | #[cfg_attr (test, assert_instr(vcvttph2dq, SAE = 8))] |
14710 | #[rustc_legacy_const_generics (3)] |
14711 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14712 | pub fn _mm512_mask_cvtt_roundph_epi32<const SAE: i32>( |
14713 | src: __m512i, |
14714 | k: __mmask16, |
14715 | a: __m256h, |
14716 | ) -> __m512i { |
14717 | unsafe { |
14718 | static_assert_sae!(SAE); |
14719 | transmute(src:vcvttph2dq_512(a, src.as_i32x16(), k, SAE)) |
14720 | } |
14721 | } |
14722 | |
14723 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and |
14724 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
14725 | /// |
14726 | /// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter. |
14727 | /// |
14728 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epi32) |
14729 | #[inline ] |
14730 | #[target_feature (enable = "avx512fp16" )] |
14731 | #[cfg_attr (test, assert_instr(vcvttph2dq, SAE = 8))] |
14732 | #[rustc_legacy_const_generics (2)] |
14733 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14734 | pub fn _mm512_maskz_cvtt_roundph_epi32<const SAE: i32>(k: __mmask16, a: __m256h) -> __m512i { |
14735 | static_assert_sae!(SAE); |
14736 | _mm512_mask_cvtt_roundph_epi32::<SAE>(src:_mm512_setzero_si512(), k, a) |
14737 | } |
14738 | |
14739 | /// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer with truncation, and store |
14740 | /// the result in dst. |
14741 | /// |
14742 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_i32) |
14743 | #[inline ] |
14744 | #[target_feature (enable = "avx512fp16" )] |
14745 | #[cfg_attr (test, assert_instr(vcvttsh2si))] |
14746 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14747 | pub fn _mm_cvttsh_i32(a: __m128h) -> i32 { |
14748 | unsafe { vcvttsh2si32(a, _MM_FROUND_CUR_DIRECTION) } |
14749 | } |
14750 | |
14751 | /// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer with truncation, and store |
14752 | /// the result in dst. |
14753 | /// |
14754 | /// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter. |
14755 | /// |
14756 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_i32) |
14757 | #[inline ] |
14758 | #[target_feature (enable = "avx512fp16" )] |
14759 | #[cfg_attr (test, assert_instr(vcvttsh2si, SAE = 8))] |
14760 | #[rustc_legacy_const_generics (1)] |
14761 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14762 | pub fn _mm_cvtt_roundsh_i32<const SAE: i32>(a: __m128h) -> i32 { |
14763 | unsafe { |
14764 | static_assert_sae!(SAE); |
14765 | vcvttsh2si32(a, SAE) |
14766 | } |
14767 | } |
14768 | |
14769 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and |
14770 | /// store the results in dst. |
14771 | /// |
14772 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu32) |
14773 | #[inline ] |
14774 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14775 | #[cfg_attr (test, assert_instr(vcvttph2udq))] |
14776 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14777 | pub fn _mm_cvttph_epu32(a: __m128h) -> __m128i { |
14778 | _mm_mask_cvttph_epu32(src:_mm_undefined_si128(), k:0xff, a) |
14779 | } |
14780 | |
14781 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and |
14782 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
14783 | /// |
14784 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu32) |
14785 | #[inline ] |
14786 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14787 | #[cfg_attr (test, assert_instr(vcvttph2udq))] |
14788 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14789 | pub fn _mm_mask_cvttph_epu32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { |
14790 | unsafe { transmute(src:vcvttph2udq_128(a, src.as_u32x4(), k)) } |
14791 | } |
14792 | |
14793 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and |
14794 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
14795 | /// |
14796 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu32) |
14797 | #[inline ] |
14798 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14799 | #[cfg_attr (test, assert_instr(vcvttph2udq))] |
14800 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14801 | pub fn _mm_maskz_cvttph_epu32(k: __mmask8, a: __m128h) -> __m128i { |
14802 | _mm_mask_cvttph_epu32(src:_mm_setzero_si128(), k, a) |
14803 | } |
14804 | |
14805 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and |
14806 | /// store the results in dst. |
14807 | /// |
14808 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu32) |
14809 | #[inline ] |
14810 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14811 | #[cfg_attr (test, assert_instr(vcvttph2udq))] |
14812 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14813 | pub fn _mm256_cvttph_epu32(a: __m128h) -> __m256i { |
14814 | _mm256_mask_cvttph_epu32(src:_mm256_undefined_si256(), k:0xff, a) |
14815 | } |
14816 | |
14817 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and |
14818 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
14819 | /// |
14820 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu32) |
14821 | #[inline ] |
14822 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14823 | #[cfg_attr (test, assert_instr(vcvttph2udq))] |
14824 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14825 | pub fn _mm256_mask_cvttph_epu32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { |
14826 | unsafe { transmute(src:vcvttph2udq_256(a, src.as_u32x8(), k)) } |
14827 | } |
14828 | |
14829 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and |
14830 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
14831 | /// |
14832 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu32) |
14833 | #[inline ] |
14834 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14835 | #[cfg_attr (test, assert_instr(vcvttph2udq))] |
14836 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14837 | pub fn _mm256_maskz_cvttph_epu32(k: __mmask8, a: __m128h) -> __m256i { |
14838 | _mm256_mask_cvttph_epu32(src:_mm256_setzero_si256(), k, a) |
14839 | } |
14840 | |
14841 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and |
14842 | /// store the results in dst. |
14843 | /// |
14844 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu32) |
14845 | #[inline ] |
14846 | #[target_feature (enable = "avx512fp16" )] |
14847 | #[cfg_attr (test, assert_instr(vcvttph2udq))] |
14848 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14849 | pub fn _mm512_cvttph_epu32(a: __m256h) -> __m512i { |
14850 | _mm512_mask_cvttph_epu32(src:_mm512_undefined_epi32(), k:0xffff, a) |
14851 | } |
14852 | |
14853 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and |
14854 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
14855 | /// |
14856 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu32) |
14857 | #[inline ] |
14858 | #[target_feature (enable = "avx512fp16" )] |
14859 | #[cfg_attr (test, assert_instr(vcvttph2udq))] |
14860 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14861 | pub fn _mm512_mask_cvttph_epu32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i { |
14862 | unsafe { |
14863 | transmute(src:vcvttph2udq_512( |
14864 | a, |
14865 | src.as_u32x16(), |
14866 | k, |
14867 | _MM_FROUND_CUR_DIRECTION, |
14868 | )) |
14869 | } |
14870 | } |
14871 | |
14872 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and |
14873 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
14874 | /// |
14875 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu32) |
14876 | #[inline ] |
14877 | #[target_feature (enable = "avx512fp16" )] |
14878 | #[cfg_attr (test, assert_instr(vcvttph2udq))] |
14879 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14880 | pub fn _mm512_maskz_cvttph_epu32(k: __mmask16, a: __m256h) -> __m512i { |
14881 | _mm512_mask_cvttph_epu32(src:_mm512_setzero_si512(), k, a) |
14882 | } |
14883 | |
14884 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and |
14885 | /// store the results in dst. |
14886 | /// |
14887 | /// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter. |
14888 | /// |
14889 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epu32) |
14890 | #[inline ] |
14891 | #[target_feature (enable = "avx512fp16" )] |
14892 | #[cfg_attr (test, assert_instr(vcvttph2udq, SAE = 8))] |
14893 | #[rustc_legacy_const_generics (1)] |
14894 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14895 | pub fn _mm512_cvtt_roundph_epu32<const SAE: i32>(a: __m256h) -> __m512i { |
14896 | static_assert_sae!(SAE); |
14897 | _mm512_mask_cvtt_roundph_epu32::<SAE>(src:_mm512_undefined_epi32(), k:0xffff, a) |
14898 | } |
14899 | |
14900 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and |
14901 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
14902 | /// |
14903 | /// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter. |
14904 | /// |
14905 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epu32) |
14906 | #[inline ] |
14907 | #[target_feature (enable = "avx512fp16" )] |
14908 | #[cfg_attr (test, assert_instr(vcvttph2udq, SAE = 8))] |
14909 | #[rustc_legacy_const_generics (3)] |
14910 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14911 | pub fn _mm512_mask_cvtt_roundph_epu32<const SAE: i32>( |
14912 | src: __m512i, |
14913 | k: __mmask16, |
14914 | a: __m256h, |
14915 | ) -> __m512i { |
14916 | unsafe { |
14917 | static_assert_sae!(SAE); |
14918 | transmute(src:vcvttph2udq_512(a, src.as_u32x16(), k, SAE)) |
14919 | } |
14920 | } |
14921 | |
14922 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and |
14923 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
14924 | /// |
14925 | /// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter. |
14926 | /// |
14927 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epu32) |
14928 | #[inline ] |
14929 | #[target_feature (enable = "avx512fp16" )] |
14930 | #[cfg_attr (test, assert_instr(vcvttph2udq, SAE = 8))] |
14931 | #[rustc_legacy_const_generics (2)] |
14932 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14933 | pub fn _mm512_maskz_cvtt_roundph_epu32<const SAE: i32>(k: __mmask16, a: __m256h) -> __m512i { |
14934 | static_assert_sae!(SAE); |
14935 | _mm512_mask_cvtt_roundph_epu32::<SAE>(src:_mm512_setzero_si512(), k, a) |
14936 | } |
14937 | |
14938 | /// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer with truncation, and store |
14939 | /// the result in dst. |
14940 | /// |
14941 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_u32) |
14942 | #[inline ] |
14943 | #[target_feature (enable = "avx512fp16" )] |
14944 | #[cfg_attr (test, assert_instr(vcvttsh2usi))] |
14945 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14946 | pub fn _mm_cvttsh_u32(a: __m128h) -> u32 { |
14947 | unsafe { vcvttsh2usi32(a, _MM_FROUND_CUR_DIRECTION) } |
14948 | } |
14949 | |
14950 | /// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer with truncation, and store |
14951 | /// the result in dst. |
14952 | /// |
14953 | /// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter. |
14954 | /// |
14955 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_u32) |
14956 | #[inline ] |
14957 | #[target_feature (enable = "avx512fp16" )] |
14958 | #[cfg_attr (test, assert_instr(vcvttsh2usi, SAE = 8))] |
14959 | #[rustc_legacy_const_generics (1)] |
14960 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14961 | pub fn _mm_cvtt_roundsh_u32<const SAE: i32>(a: __m128h) -> u32 { |
14962 | unsafe { |
14963 | static_assert_sae!(SAE); |
14964 | vcvttsh2usi32(a, SAE) |
14965 | } |
14966 | } |
14967 | |
14968 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and |
14969 | /// store the results in dst. |
14970 | /// |
14971 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epi64) |
14972 | #[inline ] |
14973 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14974 | #[cfg_attr (test, assert_instr(vcvtph2qq))] |
14975 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14976 | pub fn _mm_cvtph_epi64(a: __m128h) -> __m128i { |
14977 | _mm_mask_cvtph_epi64(src:_mm_undefined_si128(), k:0xff, a) |
14978 | } |
14979 | |
14980 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and |
14981 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
14982 | /// |
14983 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epi64) |
14984 | #[inline ] |
14985 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14986 | #[cfg_attr (test, assert_instr(vcvtph2qq))] |
14987 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
14988 | pub fn _mm_mask_cvtph_epi64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { |
14989 | unsafe { transmute(src:vcvtph2qq_128(a, src.as_i64x2(), k)) } |
14990 | } |
14991 | |
14992 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and |
14993 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
14994 | /// |
14995 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epi64) |
14996 | #[inline ] |
14997 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
14998 | #[cfg_attr (test, assert_instr(vcvtph2qq))] |
14999 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15000 | pub fn _mm_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m128i { |
15001 | _mm_mask_cvtph_epi64(src:_mm_setzero_si128(), k, a) |
15002 | } |
15003 | |
15004 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and |
15005 | /// store the results in dst. |
15006 | /// |
15007 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epi64) |
15008 | #[inline ] |
15009 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15010 | #[cfg_attr (test, assert_instr(vcvtph2qq))] |
15011 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15012 | pub fn _mm256_cvtph_epi64(a: __m128h) -> __m256i { |
15013 | _mm256_mask_cvtph_epi64(src:_mm256_undefined_si256(), k:0xff, a) |
15014 | } |
15015 | |
15016 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and |
15017 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
15018 | /// |
15019 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epi64) |
15020 | #[inline ] |
15021 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15022 | #[cfg_attr (test, assert_instr(vcvtph2qq))] |
15023 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15024 | pub fn _mm256_mask_cvtph_epi64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { |
15025 | unsafe { transmute(src:vcvtph2qq_256(a, src.as_i64x4(), k)) } |
15026 | } |
15027 | |
15028 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and |
15029 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
15030 | /// |
15031 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epi64) |
15032 | #[inline ] |
15033 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15034 | #[cfg_attr (test, assert_instr(vcvtph2qq))] |
15035 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15036 | pub fn _mm256_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m256i { |
15037 | _mm256_mask_cvtph_epi64(src:_mm256_setzero_si256(), k, a) |
15038 | } |
15039 | |
15040 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and |
15041 | /// store the results in dst. |
15042 | /// |
15043 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi64) |
15044 | #[inline ] |
15045 | #[target_feature (enable = "avx512fp16" )] |
15046 | #[cfg_attr (test, assert_instr(vcvtph2qq))] |
15047 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15048 | pub fn _mm512_cvtph_epi64(a: __m128h) -> __m512i { |
15049 | _mm512_mask_cvtph_epi64(src:_mm512_undefined_epi32(), k:0xff, a) |
15050 | } |
15051 | |
15052 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and |
15053 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
15054 | /// |
15055 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi64) |
15056 | #[inline ] |
15057 | #[target_feature (enable = "avx512fp16" )] |
15058 | #[cfg_attr (test, assert_instr(vcvtph2qq))] |
15059 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15060 | pub fn _mm512_mask_cvtph_epi64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i { |
15061 | unsafe { |
15062 | transmute(src:vcvtph2qq_512( |
15063 | a, |
15064 | src.as_i64x8(), |
15065 | k, |
15066 | _MM_FROUND_CUR_DIRECTION, |
15067 | )) |
15068 | } |
15069 | } |
15070 | |
15071 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and |
15072 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
15073 | /// |
15074 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi64) |
15075 | #[inline ] |
15076 | #[target_feature (enable = "avx512fp16" )] |
15077 | #[cfg_attr (test, assert_instr(vcvtph2qq))] |
15078 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15079 | pub fn _mm512_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m512i { |
15080 | _mm512_mask_cvtph_epi64(src:_mm512_setzero_si512(), k, a) |
15081 | } |
15082 | |
15083 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and |
15084 | /// store the results in dst. |
15085 | /// |
15086 | /// Rounding is done according to the rounding parameter, which can be one of: |
15087 | /// |
15088 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
15089 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
15090 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
15091 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
15092 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
15093 | /// |
15094 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi64) |
15095 | #[inline ] |
15096 | #[target_feature (enable = "avx512fp16" )] |
15097 | #[cfg_attr (test, assert_instr(vcvtph2qq, ROUNDING = 8))] |
15098 | #[rustc_legacy_const_generics (1)] |
15099 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15100 | pub fn _mm512_cvt_roundph_epi64<const ROUNDING: i32>(a: __m128h) -> __m512i { |
15101 | static_assert_rounding!(ROUNDING); |
15102 | _mm512_mask_cvt_roundph_epi64::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xff, a) |
15103 | } |
15104 | |
15105 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and |
15106 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
15107 | /// |
15108 | /// Rounding is done according to the rounding parameter, which can be one of: |
15109 | /// |
15110 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
15111 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
15112 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
15113 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
15114 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
15115 | /// |
15116 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi64) |
15117 | #[inline ] |
15118 | #[target_feature (enable = "avx512fp16" )] |
15119 | #[cfg_attr (test, assert_instr(vcvtph2qq, ROUNDING = 8))] |
15120 | #[rustc_legacy_const_generics (3)] |
15121 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15122 | pub fn _mm512_mask_cvt_roundph_epi64<const ROUNDING: i32>( |
15123 | src: __m512i, |
15124 | k: __mmask8, |
15125 | a: __m128h, |
15126 | ) -> __m512i { |
15127 | unsafe { |
15128 | static_assert_rounding!(ROUNDING); |
15129 | transmute(src:vcvtph2qq_512(a, src.as_i64x8(), k, ROUNDING)) |
15130 | } |
15131 | } |
15132 | |
15133 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and |
15134 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
15135 | /// |
15136 | /// Rounding is done according to the rounding parameter, which can be one of: |
15137 | /// |
15138 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
15139 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
15140 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
15141 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
15142 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
15143 | /// |
15144 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi64) |
15145 | #[inline ] |
15146 | #[target_feature (enable = "avx512fp16" )] |
15147 | #[cfg_attr (test, assert_instr(vcvtph2qq, ROUNDING = 8))] |
15148 | #[rustc_legacy_const_generics (2)] |
15149 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15150 | pub fn _mm512_maskz_cvt_roundph_epi64<const ROUNDING: i32>(k: __mmask8, a: __m128h) -> __m512i { |
15151 | static_assert_rounding!(ROUNDING); |
15152 | _mm512_mask_cvt_roundph_epi64::<ROUNDING>(src:_mm512_setzero_si512(), k, a) |
15153 | } |
15154 | |
15155 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and |
15156 | /// store the results in dst. |
15157 | /// |
15158 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epu64) |
15159 | #[inline ] |
15160 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15161 | #[cfg_attr (test, assert_instr(vcvtph2uqq))] |
15162 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15163 | pub fn _mm_cvtph_epu64(a: __m128h) -> __m128i { |
15164 | _mm_mask_cvtph_epu64(src:_mm_undefined_si128(), k:0xff, a) |
15165 | } |
15166 | |
15167 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and |
15168 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
15169 | /// |
15170 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epu64) |
15171 | #[inline ] |
15172 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15173 | #[cfg_attr (test, assert_instr(vcvtph2uqq))] |
15174 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15175 | pub fn _mm_mask_cvtph_epu64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { |
15176 | unsafe { transmute(src:vcvtph2uqq_128(a, src.as_u64x2(), k)) } |
15177 | } |
15178 | |
15179 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and |
15180 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
15181 | /// |
15182 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epu64) |
15183 | #[inline ] |
15184 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15185 | #[cfg_attr (test, assert_instr(vcvtph2uqq))] |
15186 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15187 | pub fn _mm_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m128i { |
15188 | _mm_mask_cvtph_epu64(src:_mm_setzero_si128(), k, a) |
15189 | } |
15190 | |
15191 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and |
15192 | /// store the results in dst. |
15193 | /// |
15194 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epu64) |
15195 | #[inline ] |
15196 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15197 | #[cfg_attr (test, assert_instr(vcvtph2uqq))] |
15198 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15199 | pub fn _mm256_cvtph_epu64(a: __m128h) -> __m256i { |
15200 | _mm256_mask_cvtph_epu64(src:_mm256_undefined_si256(), k:0xff, a) |
15201 | } |
15202 | |
15203 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and |
15204 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
15205 | /// |
15206 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epu64) |
15207 | #[inline ] |
15208 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15209 | #[cfg_attr (test, assert_instr(vcvtph2uqq))] |
15210 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15211 | pub fn _mm256_mask_cvtph_epu64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { |
15212 | unsafe { transmute(src:vcvtph2uqq_256(a, src.as_u64x4(), k)) } |
15213 | } |
15214 | |
15215 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and |
15216 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
15217 | /// |
15218 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epu64) |
15219 | #[inline ] |
15220 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15221 | #[cfg_attr (test, assert_instr(vcvtph2uqq))] |
15222 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15223 | pub fn _mm256_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m256i { |
15224 | _mm256_mask_cvtph_epu64(src:_mm256_setzero_si256(), k, a) |
15225 | } |
15226 | |
15227 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and |
15228 | /// store the results in dst. |
15229 | /// |
15230 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epu64) |
15231 | #[inline ] |
15232 | #[target_feature (enable = "avx512fp16" )] |
15233 | #[cfg_attr (test, assert_instr(vcvtph2uqq))] |
15234 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15235 | pub fn _mm512_cvtph_epu64(a: __m128h) -> __m512i { |
15236 | _mm512_mask_cvtph_epu64(src:_mm512_undefined_epi32(), k:0xff, a) |
15237 | } |
15238 | |
15239 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and |
15240 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
15241 | /// |
15242 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epu64) |
15243 | #[inline ] |
15244 | #[target_feature (enable = "avx512fp16" )] |
15245 | #[cfg_attr (test, assert_instr(vcvtph2uqq))] |
15246 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15247 | pub fn _mm512_mask_cvtph_epu64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i { |
15248 | unsafe { |
15249 | transmute(src:vcvtph2uqq_512( |
15250 | a, |
15251 | src.as_u64x8(), |
15252 | k, |
15253 | _MM_FROUND_CUR_DIRECTION, |
15254 | )) |
15255 | } |
15256 | } |
15257 | |
15258 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and |
15259 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
15260 | /// |
15261 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epu64) |
15262 | #[inline ] |
15263 | #[target_feature (enable = "avx512fp16" )] |
15264 | #[cfg_attr (test, assert_instr(vcvtph2uqq))] |
15265 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15266 | pub fn _mm512_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m512i { |
15267 | _mm512_mask_cvtph_epu64(src:_mm512_setzero_si512(), k, a) |
15268 | } |
15269 | |
15270 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and |
15271 | /// store the results in dst. |
15272 | /// |
15273 | /// Rounding is done according to the rounding parameter, which can be one of: |
15274 | /// |
15275 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
15276 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
15277 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
15278 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
15279 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
15280 | /// |
15281 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu64) |
15282 | #[inline ] |
15283 | #[target_feature (enable = "avx512fp16" )] |
15284 | #[cfg_attr (test, assert_instr(vcvtph2uqq, ROUNDING = 8))] |
15285 | #[rustc_legacy_const_generics (1)] |
15286 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15287 | pub fn _mm512_cvt_roundph_epu64<const ROUNDING: i32>(a: __m128h) -> __m512i { |
15288 | static_assert_rounding!(ROUNDING); |
15289 | _mm512_mask_cvt_roundph_epu64::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xff, a) |
15290 | } |
15291 | |
15292 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and |
15293 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
15294 | /// |
15295 | /// Rounding is done according to the rounding parameter, which can be one of: |
15296 | /// |
15297 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
15298 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
15299 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
15300 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
15301 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
15302 | /// |
15303 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu64) |
15304 | #[inline ] |
15305 | #[target_feature (enable = "avx512fp16" )] |
15306 | #[cfg_attr (test, assert_instr(vcvtph2uqq, ROUNDING = 8))] |
15307 | #[rustc_legacy_const_generics (3)] |
15308 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15309 | pub fn _mm512_mask_cvt_roundph_epu64<const ROUNDING: i32>( |
15310 | src: __m512i, |
15311 | k: __mmask8, |
15312 | a: __m128h, |
15313 | ) -> __m512i { |
15314 | unsafe { |
15315 | static_assert_rounding!(ROUNDING); |
15316 | transmute(src:vcvtph2uqq_512(a, src.as_u64x8(), k, ROUNDING)) |
15317 | } |
15318 | } |
15319 | |
15320 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and |
15321 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
15322 | /// |
15323 | /// Rounding is done according to the rounding parameter, which can be one of: |
15324 | /// |
15325 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
15326 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
15327 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
15328 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
15329 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
15330 | /// |
15331 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu64) |
15332 | #[inline ] |
15333 | #[target_feature (enable = "avx512fp16" )] |
15334 | #[cfg_attr (test, assert_instr(vcvtph2uqq, ROUNDING = 8))] |
15335 | #[rustc_legacy_const_generics (2)] |
15336 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15337 | pub fn _mm512_maskz_cvt_roundph_epu64<const ROUNDING: i32>(k: __mmask8, a: __m128h) -> __m512i { |
15338 | static_assert_rounding!(ROUNDING); |
15339 | _mm512_mask_cvt_roundph_epu64::<ROUNDING>(src:_mm512_setzero_si512(), k, a) |
15340 | } |
15341 | |
15342 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and |
15343 | /// store the results in dst. |
15344 | /// |
15345 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epi64) |
15346 | #[inline ] |
15347 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15348 | #[cfg_attr (test, assert_instr(vcvttph2qq))] |
15349 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15350 | pub fn _mm_cvttph_epi64(a: __m128h) -> __m128i { |
15351 | _mm_mask_cvttph_epi64(src:_mm_undefined_si128(), k:0xff, a) |
15352 | } |
15353 | |
15354 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and |
15355 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
15356 | /// |
15357 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epi64) |
15358 | #[inline ] |
15359 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15360 | #[cfg_attr (test, assert_instr(vcvttph2qq))] |
15361 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15362 | pub fn _mm_mask_cvttph_epi64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { |
15363 | unsafe { transmute(src:vcvttph2qq_128(a, src.as_i64x2(), k)) } |
15364 | } |
15365 | |
15366 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and |
15367 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
15368 | /// |
15369 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epi64) |
15370 | #[inline ] |
15371 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15372 | #[cfg_attr (test, assert_instr(vcvttph2qq))] |
15373 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15374 | pub fn _mm_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m128i { |
15375 | _mm_mask_cvttph_epi64(src:_mm_setzero_si128(), k, a) |
15376 | } |
15377 | |
15378 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and |
15379 | /// store the results in dst. |
15380 | /// |
15381 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epi64) |
15382 | #[inline ] |
15383 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15384 | #[cfg_attr (test, assert_instr(vcvttph2qq))] |
15385 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15386 | pub fn _mm256_cvttph_epi64(a: __m128h) -> __m256i { |
15387 | _mm256_mask_cvttph_epi64(src:_mm256_undefined_si256(), k:0xff, a) |
15388 | } |
15389 | |
15390 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and |
15391 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
15392 | /// |
15393 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epi64) |
15394 | #[inline ] |
15395 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15396 | #[cfg_attr (test, assert_instr(vcvttph2qq))] |
15397 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15398 | pub fn _mm256_mask_cvttph_epi64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { |
15399 | unsafe { transmute(src:vcvttph2qq_256(a, src.as_i64x4(), k)) } |
15400 | } |
15401 | |
15402 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and |
15403 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
15404 | /// |
15405 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epi64) |
15406 | #[inline ] |
15407 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15408 | #[cfg_attr (test, assert_instr(vcvttph2qq))] |
15409 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15410 | pub fn _mm256_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m256i { |
15411 | _mm256_mask_cvttph_epi64(src:_mm256_setzero_si256(), k, a) |
15412 | } |
15413 | |
15414 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and |
15415 | /// store the results in dst. |
15416 | /// |
15417 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epi64) |
15418 | #[inline ] |
15419 | #[target_feature (enable = "avx512fp16" )] |
15420 | #[cfg_attr (test, assert_instr(vcvttph2qq))] |
15421 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15422 | pub fn _mm512_cvttph_epi64(a: __m128h) -> __m512i { |
15423 | _mm512_mask_cvttph_epi64(src:_mm512_undefined_epi32(), k:0xff, a) |
15424 | } |
15425 | |
15426 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and |
15427 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
15428 | /// |
15429 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epi64) |
15430 | #[inline ] |
15431 | #[target_feature (enable = "avx512fp16" )] |
15432 | #[cfg_attr (test, assert_instr(vcvttph2qq))] |
15433 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15434 | pub fn _mm512_mask_cvttph_epi64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i { |
15435 | unsafe { |
15436 | transmute(src:vcvttph2qq_512( |
15437 | a, |
15438 | src.as_i64x8(), |
15439 | k, |
15440 | _MM_FROUND_CUR_DIRECTION, |
15441 | )) |
15442 | } |
15443 | } |
15444 | |
15445 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and |
15446 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
15447 | /// |
15448 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epi64) |
15449 | #[inline ] |
15450 | #[target_feature (enable = "avx512fp16" )] |
15451 | #[cfg_attr (test, assert_instr(vcvttph2qq))] |
15452 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15453 | pub fn _mm512_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m512i { |
15454 | _mm512_mask_cvttph_epi64(src:_mm512_setzero_si512(), k, a) |
15455 | } |
15456 | |
15457 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and |
15458 | /// store the results in dst. |
15459 | /// |
15460 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
15461 | /// |
15462 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epi64) |
15463 | #[inline ] |
15464 | #[target_feature (enable = "avx512fp16" )] |
15465 | #[cfg_attr (test, assert_instr(vcvttph2qq, SAE = 8))] |
15466 | #[rustc_legacy_const_generics (1)] |
15467 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15468 | pub fn _mm512_cvtt_roundph_epi64<const SAE: i32>(a: __m128h) -> __m512i { |
15469 | static_assert_sae!(SAE); |
15470 | _mm512_mask_cvtt_roundph_epi64::<SAE>(src:_mm512_undefined_epi32(), k:0xff, a) |
15471 | } |
15472 | |
15473 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and |
15474 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
15475 | /// |
15476 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
15477 | /// |
15478 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epi64) |
15479 | #[inline ] |
15480 | #[target_feature (enable = "avx512fp16" )] |
15481 | #[cfg_attr (test, assert_instr(vcvttph2qq, SAE = 8))] |
15482 | #[rustc_legacy_const_generics (3)] |
15483 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15484 | pub fn _mm512_mask_cvtt_roundph_epi64<const SAE: i32>( |
15485 | src: __m512i, |
15486 | k: __mmask8, |
15487 | a: __m128h, |
15488 | ) -> __m512i { |
15489 | unsafe { |
15490 | static_assert_sae!(SAE); |
15491 | transmute(src:vcvttph2qq_512(a, src.as_i64x8(), k, SAE)) |
15492 | } |
15493 | } |
15494 | |
15495 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and |
15496 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
15497 | /// |
15498 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
15499 | /// |
15500 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epi64) |
15501 | #[inline ] |
15502 | #[target_feature (enable = "avx512fp16" )] |
15503 | #[cfg_attr (test, assert_instr(vcvttph2qq, SAE = 8))] |
15504 | #[rustc_legacy_const_generics (2)] |
15505 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15506 | pub fn _mm512_maskz_cvtt_roundph_epi64<const SAE: i32>(k: __mmask8, a: __m128h) -> __m512i { |
15507 | static_assert_sae!(SAE); |
15508 | _mm512_mask_cvtt_roundph_epi64::<SAE>(src:_mm512_setzero_si512(), k, a) |
15509 | } |
15510 | |
15511 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and |
15512 | /// store the results in dst. |
15513 | /// |
15514 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu64) |
15515 | #[inline ] |
15516 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15517 | #[cfg_attr (test, assert_instr(vcvttph2uqq))] |
15518 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15519 | pub fn _mm_cvttph_epu64(a: __m128h) -> __m128i { |
15520 | _mm_mask_cvttph_epu64(src:_mm_undefined_si128(), k:0xff, a) |
15521 | } |
15522 | |
15523 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and |
15524 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
15525 | /// |
15526 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu64) |
15527 | #[inline ] |
15528 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15529 | #[cfg_attr (test, assert_instr(vcvttph2uqq))] |
15530 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15531 | pub fn _mm_mask_cvttph_epu64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { |
15532 | unsafe { transmute(src:vcvttph2uqq_128(a, src.as_u64x2(), k)) } |
15533 | } |
15534 | |
15535 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and |
15536 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
15537 | /// |
15538 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu64) |
15539 | #[inline ] |
15540 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15541 | #[cfg_attr (test, assert_instr(vcvttph2uqq))] |
15542 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15543 | pub fn _mm_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m128i { |
15544 | _mm_mask_cvttph_epu64(src:_mm_setzero_si128(), k, a) |
15545 | } |
15546 | |
15547 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and |
15548 | /// store the results in dst. |
15549 | /// |
15550 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu64) |
15551 | #[inline ] |
15552 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15553 | #[cfg_attr (test, assert_instr(vcvttph2uqq))] |
15554 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15555 | pub fn _mm256_cvttph_epu64(a: __m128h) -> __m256i { |
15556 | _mm256_mask_cvttph_epu64(src:_mm256_undefined_si256(), k:0xff, a) |
15557 | } |
15558 | |
15559 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and |
15560 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
15561 | /// |
15562 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu64) |
15563 | #[inline ] |
15564 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15565 | #[cfg_attr (test, assert_instr(vcvttph2uqq))] |
15566 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15567 | pub fn _mm256_mask_cvttph_epu64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { |
15568 | unsafe { transmute(src:vcvttph2uqq_256(a, src.as_u64x4(), k)) } |
15569 | } |
15570 | |
15571 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and |
15572 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
15573 | /// |
15574 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu64) |
15575 | #[inline ] |
15576 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15577 | #[cfg_attr (test, assert_instr(vcvttph2uqq))] |
15578 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15579 | pub fn _mm256_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m256i { |
15580 | _mm256_mask_cvttph_epu64(src:_mm256_setzero_si256(), k, a) |
15581 | } |
15582 | |
15583 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and |
15584 | /// store the results in dst. |
15585 | /// |
15586 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu64) |
15587 | #[inline ] |
15588 | #[target_feature (enable = "avx512fp16" )] |
15589 | #[cfg_attr (test, assert_instr(vcvttph2uqq))] |
15590 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15591 | pub fn _mm512_cvttph_epu64(a: __m128h) -> __m512i { |
15592 | _mm512_mask_cvttph_epu64(src:_mm512_undefined_epi32(), k:0xff, a) |
15593 | } |
15594 | |
15595 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and |
15596 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
15597 | /// |
15598 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu64) |
15599 | #[inline ] |
15600 | #[target_feature (enable = "avx512fp16" )] |
15601 | #[cfg_attr (test, assert_instr(vcvttph2uqq))] |
15602 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15603 | pub fn _mm512_mask_cvttph_epu64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i { |
15604 | unsafe { |
15605 | transmute(src:vcvttph2uqq_512( |
15606 | a, |
15607 | src.as_u64x8(), |
15608 | k, |
15609 | _MM_FROUND_CUR_DIRECTION, |
15610 | )) |
15611 | } |
15612 | } |
15613 | |
15614 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and |
15615 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
15616 | /// |
15617 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu64) |
15618 | #[inline ] |
15619 | #[target_feature (enable = "avx512fp16" )] |
15620 | #[cfg_attr (test, assert_instr(vcvttph2uqq))] |
15621 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15622 | pub fn _mm512_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m512i { |
15623 | _mm512_mask_cvttph_epu64(src:_mm512_setzero_si512(), k, a) |
15624 | } |
15625 | |
15626 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and |
15627 | /// store the results in dst. |
15628 | /// |
15629 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
15630 | /// |
15631 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epu64) |
15632 | #[inline ] |
15633 | #[target_feature (enable = "avx512fp16" )] |
15634 | #[cfg_attr (test, assert_instr(vcvttph2uqq, SAE = 8))] |
15635 | #[rustc_legacy_const_generics (1)] |
15636 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15637 | pub fn _mm512_cvtt_roundph_epu64<const SAE: i32>(a: __m128h) -> __m512i { |
15638 | static_assert_sae!(SAE); |
15639 | _mm512_mask_cvtt_roundph_epu64::<SAE>(src:_mm512_undefined_epi32(), k:0xff, a) |
15640 | } |
15641 | |
15642 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and |
15643 | /// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
15644 | /// |
15645 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
15646 | /// |
15647 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epu64) |
15648 | #[inline ] |
15649 | #[target_feature (enable = "avx512fp16" )] |
15650 | #[cfg_attr (test, assert_instr(vcvttph2uqq, SAE = 8))] |
15651 | #[rustc_legacy_const_generics (3)] |
15652 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15653 | pub fn _mm512_mask_cvtt_roundph_epu64<const SAE: i32>( |
15654 | src: __m512i, |
15655 | k: __mmask8, |
15656 | a: __m128h, |
15657 | ) -> __m512i { |
15658 | unsafe { |
15659 | static_assert_sae!(SAE); |
15660 | transmute(src:vcvttph2uqq_512(a, src.as_u64x8(), k, SAE)) |
15661 | } |
15662 | } |
15663 | |
15664 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and |
15665 | /// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
15666 | /// |
15667 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
15668 | /// |
15669 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epu64) |
15670 | #[inline ] |
15671 | #[target_feature (enable = "avx512fp16" )] |
15672 | #[cfg_attr (test, assert_instr(vcvttph2uqq, SAE = 8))] |
15673 | #[rustc_legacy_const_generics (2)] |
15674 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15675 | pub fn _mm512_maskz_cvtt_roundph_epu64<const SAE: i32>(k: __mmask8, a: __m128h) -> __m512i { |
15676 | static_assert_sae!(SAE); |
15677 | _mm512_mask_cvtt_roundph_epu64::<SAE>(src:_mm512_setzero_si512(), k, a) |
15678 | } |
15679 | |
15680 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) |
15681 | /// floating-point elements, and store the results in dst. |
15682 | /// |
15683 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtxph_ps) |
15684 | #[inline ] |
15685 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15686 | #[cfg_attr (test, assert_instr(vcvtph2psx))] |
15687 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15688 | pub fn _mm_cvtxph_ps(a: __m128h) -> __m128 { |
15689 | _mm_mask_cvtxph_ps(src:_mm_setzero_ps(), k:0xff, a) |
15690 | } |
15691 | |
15692 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) |
15693 | /// floating-point elements, and store the results in dst using writemask k (elements are copied from src to |
15694 | /// dst when the corresponding mask bit is not set). |
15695 | /// |
15696 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtxph_ps) |
15697 | #[inline ] |
15698 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15699 | #[cfg_attr (test, assert_instr(vcvtph2psx))] |
15700 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15701 | pub fn _mm_mask_cvtxph_ps(src: __m128, k: __mmask8, a: __m128h) -> __m128 { |
15702 | unsafe { vcvtph2psx_128(a, src, k) } |
15703 | } |
15704 | |
15705 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) |
15706 | /// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the |
15707 | /// corresponding mask bit is not set). |
15708 | /// |
15709 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtxph_ps) |
15710 | #[inline ] |
15711 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15712 | #[cfg_attr (test, assert_instr(vcvtph2psx))] |
15713 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15714 | pub fn _mm_maskz_cvtxph_ps(k: __mmask8, a: __m128h) -> __m128 { |
15715 | _mm_mask_cvtxph_ps(src:_mm_setzero_ps(), k, a) |
15716 | } |
15717 | |
15718 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) |
15719 | /// floating-point elements, and store the results in dst. |
15720 | /// |
15721 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtxph_ps) |
15722 | #[inline ] |
15723 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15724 | #[cfg_attr (test, assert_instr(vcvtph2psx))] |
15725 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15726 | pub fn _mm256_cvtxph_ps(a: __m128h) -> __m256 { |
15727 | _mm256_mask_cvtxph_ps(src:_mm256_setzero_ps(), k:0xff, a) |
15728 | } |
15729 | |
15730 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) |
15731 | /// floating-point elements, and store the results in dst using writemask k (elements are copied from src to |
15732 | /// dst when the corresponding mask bit is not set). |
15733 | /// |
15734 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtxph_ps) |
15735 | #[inline ] |
15736 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15737 | #[cfg_attr (test, assert_instr(vcvtph2psx))] |
15738 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15739 | pub fn _mm256_mask_cvtxph_ps(src: __m256, k: __mmask8, a: __m128h) -> __m256 { |
15740 | unsafe { vcvtph2psx_256(a, src, k) } |
15741 | } |
15742 | |
15743 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) |
15744 | /// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the |
15745 | /// corresponding mask bit is not set). |
15746 | /// |
15747 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtxph_ps) |
15748 | #[inline ] |
15749 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15750 | #[cfg_attr (test, assert_instr(vcvtph2psx))] |
15751 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15752 | pub fn _mm256_maskz_cvtxph_ps(k: __mmask8, a: __m128h) -> __m256 { |
15753 | _mm256_mask_cvtxph_ps(src:_mm256_setzero_ps(), k, a) |
15754 | } |
15755 | |
15756 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) |
15757 | /// floating-point elements, and store the results in dst. |
15758 | /// |
15759 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtxph_ps) |
15760 | #[inline ] |
15761 | #[target_feature (enable = "avx512fp16" )] |
15762 | #[cfg_attr (test, assert_instr(vcvtph2psx))] |
15763 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15764 | pub fn _mm512_cvtxph_ps(a: __m256h) -> __m512 { |
15765 | _mm512_mask_cvtxph_ps(src:_mm512_setzero_ps(), k:0xffff, a) |
15766 | } |
15767 | |
15768 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) |
15769 | /// floating-point elements, and store the results in dst using writemask k (elements are copied from src to |
15770 | /// dst when the corresponding mask bit is not set). |
15771 | /// |
15772 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtxph_ps) |
15773 | #[inline ] |
15774 | #[target_feature (enable = "avx512fp16" )] |
15775 | #[cfg_attr (test, assert_instr(vcvtph2psx))] |
15776 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15777 | pub fn _mm512_mask_cvtxph_ps(src: __m512, k: __mmask16, a: __m256h) -> __m512 { |
15778 | unsafe { vcvtph2psx_512(a, src, k, _MM_FROUND_CUR_DIRECTION) } |
15779 | } |
15780 | |
15781 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) |
15782 | /// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the |
15783 | /// corresponding mask bit is not set). |
15784 | /// |
15785 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtxph_ps) |
15786 | #[inline ] |
15787 | #[target_feature (enable = "avx512fp16" )] |
15788 | #[cfg_attr (test, assert_instr(vcvtph2psx))] |
15789 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15790 | pub fn _mm512_maskz_cvtxph_ps(k: __mmask16, a: __m256h) -> __m512 { |
15791 | _mm512_mask_cvtxph_ps(src:_mm512_setzero_ps(), k, a) |
15792 | } |
15793 | |
15794 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) |
15795 | /// floating-point elements, and store the results in dst. |
15796 | /// |
15797 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
15798 | /// |
15799 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtx_roundph_ps) |
15800 | #[inline ] |
15801 | #[target_feature (enable = "avx512fp16" )] |
15802 | #[cfg_attr (test, assert_instr(vcvtph2psx, SAE = 8))] |
15803 | #[rustc_legacy_const_generics (1)] |
15804 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15805 | pub fn _mm512_cvtx_roundph_ps<const SAE: i32>(a: __m256h) -> __m512 { |
15806 | static_assert_sae!(SAE); |
15807 | _mm512_mask_cvtx_roundph_ps::<SAE>(src:_mm512_setzero_ps(), k:0xffff, a) |
15808 | } |
15809 | |
15810 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) |
15811 | /// floating-point elements, and store the results in dst using writemask k (elements are copied from src to |
15812 | /// dst when the corresponding mask bit is not set). |
15813 | /// |
15814 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
15815 | /// |
15816 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtx_roundph_ps) |
15817 | #[inline ] |
15818 | #[target_feature (enable = "avx512fp16" )] |
15819 | #[cfg_attr (test, assert_instr(vcvtph2psx, SAE = 8))] |
15820 | #[rustc_legacy_const_generics (3)] |
15821 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15822 | pub fn _mm512_mask_cvtx_roundph_ps<const SAE: i32>( |
15823 | src: __m512, |
15824 | k: __mmask16, |
15825 | a: __m256h, |
15826 | ) -> __m512 { |
15827 | unsafe { |
15828 | static_assert_sae!(SAE); |
15829 | vcvtph2psx_512(a, src, k, SAE) |
15830 | } |
15831 | } |
15832 | |
15833 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) |
15834 | /// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the |
15835 | /// corresponding mask bit is not set). |
15836 | /// |
15837 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
15838 | /// |
15839 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtx_roundph_ps) |
15840 | #[inline ] |
15841 | #[target_feature (enable = "avx512fp16" )] |
15842 | #[cfg_attr (test, assert_instr(vcvtph2psx, SAE = 8))] |
15843 | #[rustc_legacy_const_generics (2)] |
15844 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15845 | pub fn _mm512_maskz_cvtx_roundph_ps<const SAE: i32>(k: __mmask16, a: __m256h) -> __m512 { |
15846 | static_assert_sae!(SAE); |
15847 | _mm512_mask_cvtx_roundph_ps::<SAE>(src:_mm512_setzero_ps(), k, a) |
15848 | } |
15849 | |
15850 | /// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit) |
15851 | /// floating-point element, store the result in the lower element of dst, and copy the upper 3 packed |
15852 | /// elements from a to the upper elements of dst. |
15853 | /// |
15854 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_ss) |
15855 | #[inline ] |
15856 | #[target_feature (enable = "avx512fp16" )] |
15857 | #[cfg_attr (test, assert_instr(vcvtsh2ss))] |
15858 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15859 | pub fn _mm_cvtsh_ss(a: __m128, b: __m128h) -> __m128 { |
15860 | _mm_mask_cvtsh_ss(src:a, k:0xff, a, b) |
15861 | } |
15862 | |
15863 | /// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit) |
15864 | /// floating-point element, store the result in the lower element of dst using writemask k (the element is |
15865 | /// copied from src to dst when mask bit 0 is not set), and copy the upper 3 packed elements from a to the |
15866 | /// upper elements of dst. |
15867 | /// |
15868 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsh_ss) |
15869 | #[inline ] |
15870 | #[target_feature (enable = "avx512fp16" )] |
15871 | #[cfg_attr (test, assert_instr(vcvtsh2ss))] |
15872 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15873 | pub fn _mm_mask_cvtsh_ss(src: __m128, k: __mmask8, a: __m128, b: __m128h) -> __m128 { |
15874 | unsafe { vcvtsh2ss(a, b, src, k, _MM_FROUND_CUR_DIRECTION) } |
15875 | } |
15876 | |
15877 | /// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit) |
15878 | /// floating-point element, store the result in the lower element of dst using zeromask k (the element is |
15879 | /// zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements |
15880 | /// of dst. |
15881 | /// |
15882 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsh_ss) |
15883 | #[inline ] |
15884 | #[target_feature (enable = "avx512fp16" )] |
15885 | #[cfg_attr (test, assert_instr(vcvtsh2ss))] |
15886 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15887 | pub fn _mm_maskz_cvtsh_ss(k: __mmask8, a: __m128, b: __m128h) -> __m128 { |
15888 | _mm_mask_cvtsh_ss(src:_mm_setzero_ps(), k, a, b) |
15889 | } |
15890 | |
15891 | /// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit) |
15892 | /// floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements |
15893 | /// from a to the upper elements of dst. |
15894 | /// |
15895 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
15896 | /// |
15897 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_ss) |
15898 | #[inline ] |
15899 | #[target_feature (enable = "avx512fp16" )] |
15900 | #[cfg_attr (test, assert_instr(vcvtsh2ss, SAE = 8))] |
15901 | #[rustc_legacy_const_generics (2)] |
15902 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15903 | pub fn _mm_cvt_roundsh_ss<const SAE: i32>(a: __m128, b: __m128h) -> __m128 { |
15904 | static_assert_sae!(SAE); |
15905 | _mm_mask_cvt_roundsh_ss::<SAE>(src:_mm_undefined_ps(), k:0xff, a, b) |
15906 | } |
15907 | |
15908 | /// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit) |
15909 | /// floating-point element, store the result in the lower element of dst using writemask k (the element is |
15910 | /// copied from src to dst when mask bit 0 is not set), and copy the upper 3 packed elements from a to the |
15911 | /// upper elements of dst. |
15912 | /// |
15913 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
15914 | /// |
15915 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsh_ss) |
15916 | #[inline ] |
15917 | #[target_feature (enable = "avx512fp16" )] |
15918 | #[cfg_attr (test, assert_instr(vcvtsh2ss, SAE = 8))] |
15919 | #[rustc_legacy_const_generics (4)] |
15920 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15921 | pub fn _mm_mask_cvt_roundsh_ss<const SAE: i32>( |
15922 | src: __m128, |
15923 | k: __mmask8, |
15924 | a: __m128, |
15925 | b: __m128h, |
15926 | ) -> __m128 { |
15927 | unsafe { |
15928 | static_assert_sae!(SAE); |
15929 | vcvtsh2ss(a, b, src, k, SAE) |
15930 | } |
15931 | } |
15932 | |
15933 | /// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit) |
15934 | /// floating-point element, store the result in the lower element of dst using zeromask k (the element is |
15935 | /// zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements |
15936 | /// of dst. |
15937 | /// |
15938 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
15939 | /// |
15940 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsh_ss) |
15941 | #[inline ] |
15942 | #[target_feature (enable = "avx512fp16" )] |
15943 | #[cfg_attr (test, assert_instr(vcvtsh2ss, SAE = 8))] |
15944 | #[rustc_legacy_const_generics (3)] |
15945 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15946 | pub fn _mm_maskz_cvt_roundsh_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128h) -> __m128 { |
15947 | static_assert_sae!(SAE); |
15948 | _mm_mask_cvt_roundsh_ss::<SAE>(src:_mm_setzero_ps(), k, a, b) |
15949 | } |
15950 | |
15951 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) |
15952 | /// floating-point elements, and store the results in dst. |
15953 | /// |
15954 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_pd) |
15955 | #[inline ] |
15956 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15957 | #[cfg_attr (test, assert_instr(vcvtph2pd))] |
15958 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15959 | pub fn _mm_cvtph_pd(a: __m128h) -> __m128d { |
15960 | _mm_mask_cvtph_pd(src:_mm_setzero_pd(), k:0xff, a) |
15961 | } |
15962 | |
15963 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) |
15964 | /// floating-point elements, and store the results in dst using writemask k (elements are copied from src to |
15965 | /// dst when the corresponding mask bit is not set). |
15966 | /// |
15967 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_pd) |
15968 | #[inline ] |
15969 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15970 | #[cfg_attr (test, assert_instr(vcvtph2pd))] |
15971 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15972 | pub fn _mm_mask_cvtph_pd(src: __m128d, k: __mmask8, a: __m128h) -> __m128d { |
15973 | unsafe { vcvtph2pd_128(a, src, k) } |
15974 | } |
15975 | |
15976 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) |
15977 | /// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the |
15978 | /// corresponding mask bit is not set). |
15979 | /// |
15980 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_pd) |
15981 | #[inline ] |
15982 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15983 | #[cfg_attr (test, assert_instr(vcvtph2pd))] |
15984 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15985 | pub fn _mm_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m128d { |
15986 | _mm_mask_cvtph_pd(src:_mm_setzero_pd(), k, a) |
15987 | } |
15988 | |
15989 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) |
15990 | /// floating-point elements, and store the results in dst. |
15991 | /// |
15992 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_pd) |
15993 | #[inline ] |
15994 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
15995 | #[cfg_attr (test, assert_instr(vcvtph2pd))] |
15996 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
15997 | pub fn _mm256_cvtph_pd(a: __m128h) -> __m256d { |
15998 | _mm256_mask_cvtph_pd(src:_mm256_setzero_pd(), k:0xff, a) |
15999 | } |
16000 | |
16001 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) |
16002 | /// floating-point elements, and store the results in dst using writemask k (elements are copied from src to |
16003 | /// dst when the corresponding mask bit is not set). |
16004 | /// |
16005 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_pd) |
16006 | #[inline ] |
16007 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
16008 | #[cfg_attr (test, assert_instr(vcvtph2pd))] |
16009 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
16010 | pub fn _mm256_mask_cvtph_pd(src: __m256d, k: __mmask8, a: __m128h) -> __m256d { |
16011 | unsafe { vcvtph2pd_256(a, src, k) } |
16012 | } |
16013 | |
16014 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) |
16015 | /// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the |
16016 | /// corresponding mask bit is not set). |
16017 | /// |
16018 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_pd) |
16019 | #[inline ] |
16020 | #[target_feature (enable = "avx512fp16,avx512vl" )] |
16021 | #[cfg_attr (test, assert_instr(vcvtph2pd))] |
16022 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
16023 | pub fn _mm256_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m256d { |
16024 | _mm256_mask_cvtph_pd(src:_mm256_setzero_pd(), k, a) |
16025 | } |
16026 | |
16027 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) |
16028 | /// floating-point elements, and store the results in dst. |
16029 | /// |
16030 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_pd) |
16031 | #[inline ] |
16032 | #[target_feature (enable = "avx512fp16" )] |
16033 | #[cfg_attr (test, assert_instr(vcvtph2pd))] |
16034 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
16035 | pub fn _mm512_cvtph_pd(a: __m128h) -> __m512d { |
16036 | _mm512_mask_cvtph_pd(src:_mm512_setzero_pd(), k:0xff, a) |
16037 | } |
16038 | |
16039 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) |
16040 | /// floating-point elements, and store the results in dst using writemask k (elements are copied from src to |
16041 | /// dst when the corresponding mask bit is not set). |
16042 | /// |
16043 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_pd) |
16044 | #[inline ] |
16045 | #[target_feature (enable = "avx512fp16" )] |
16046 | #[cfg_attr (test, assert_instr(vcvtph2pd))] |
16047 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
16048 | pub fn _mm512_mask_cvtph_pd(src: __m512d, k: __mmask8, a: __m128h) -> __m512d { |
16049 | unsafe { vcvtph2pd_512(a, src, k, _MM_FROUND_CUR_DIRECTION) } |
16050 | } |
16051 | |
16052 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) |
16053 | /// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the |
16054 | /// corresponding mask bit is not set). |
16055 | /// |
16056 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_pd) |
16057 | #[inline ] |
16058 | #[target_feature (enable = "avx512fp16" )] |
16059 | #[cfg_attr (test, assert_instr(vcvtph2pd))] |
16060 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
16061 | pub fn _mm512_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m512d { |
16062 | _mm512_mask_cvtph_pd(src:_mm512_setzero_pd(), k, a) |
16063 | } |
16064 | |
16065 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) |
16066 | /// floating-point elements, and store the results in dst. |
16067 | /// |
16068 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
16069 | /// |
16070 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_pd) |
16071 | #[inline ] |
16072 | #[target_feature (enable = "avx512fp16" )] |
16073 | #[cfg_attr (test, assert_instr(vcvtph2pd, SAE = 8))] |
16074 | #[rustc_legacy_const_generics (1)] |
16075 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
16076 | pub fn _mm512_cvt_roundph_pd<const SAE: i32>(a: __m128h) -> __m512d { |
16077 | static_assert_sae!(SAE); |
16078 | _mm512_mask_cvt_roundph_pd::<SAE>(src:_mm512_setzero_pd(), k:0xff, a) |
16079 | } |
16080 | |
16081 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) |
16082 | /// floating-point elements, and store the results in dst using writemask k (elements are copied from src to |
16083 | /// dst when the corresponding mask bit is not set). |
16084 | /// |
16085 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
16086 | /// |
16087 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_pd) |
16088 | #[inline ] |
16089 | #[target_feature (enable = "avx512fp16" )] |
16090 | #[cfg_attr (test, assert_instr(vcvtph2pd, SAE = 8))] |
16091 | #[rustc_legacy_const_generics (3)] |
16092 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
16093 | pub fn _mm512_mask_cvt_roundph_pd<const SAE: i32>( |
16094 | src: __m512d, |
16095 | k: __mmask8, |
16096 | a: __m128h, |
16097 | ) -> __m512d { |
16098 | unsafe { |
16099 | static_assert_sae!(SAE); |
16100 | vcvtph2pd_512(a, src, k, SAE) |
16101 | } |
16102 | } |
16103 | |
16104 | /// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) |
16105 | /// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the |
16106 | /// corresponding mask bit is not set). |
16107 | /// |
16108 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
16109 | /// |
16110 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_pd) |
16111 | #[inline ] |
16112 | #[target_feature (enable = "avx512fp16" )] |
16113 | #[cfg_attr (test, assert_instr(vcvtph2pd, SAE = 8))] |
16114 | #[rustc_legacy_const_generics (2)] |
16115 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
16116 | pub fn _mm512_maskz_cvt_roundph_pd<const SAE: i32>(k: __mmask8, a: __m128h) -> __m512d { |
16117 | static_assert_sae!(SAE); |
16118 | _mm512_mask_cvt_roundph_pd::<SAE>(src:_mm512_setzero_pd(), k, a) |
16119 | } |
16120 | |
16121 | /// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit) |
16122 | /// floating-point element, store the result in the lower element of dst, and copy the upper element |
16123 | /// from a to the upper element of dst. |
16124 | /// |
16125 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_sd) |
16126 | #[inline ] |
16127 | #[target_feature (enable = "avx512fp16" )] |
16128 | #[cfg_attr (test, assert_instr(vcvtsh2sd))] |
16129 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
16130 | pub fn _mm_cvtsh_sd(a: __m128d, b: __m128h) -> __m128d { |
16131 | _mm_mask_cvtsh_sd(src:a, k:0xff, a, b) |
16132 | } |
16133 | |
16134 | /// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit) |
16135 | /// floating-point element, store the result in the lower element of dst using writemask k (the element is |
16136 | /// copied from src to dst when mask bit 0 is not set), and copy the upper element from a to the upper element |
16137 | /// of dst. |
16138 | /// |
16139 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsh_sd) |
16140 | #[inline ] |
16141 | #[target_feature (enable = "avx512fp16" )] |
16142 | #[cfg_attr (test, assert_instr(vcvtsh2sd))] |
16143 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
16144 | pub fn _mm_mask_cvtsh_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128h) -> __m128d { |
16145 | unsafe { vcvtsh2sd(a, b, src, k, _MM_FROUND_CUR_DIRECTION) } |
16146 | } |
16147 | |
16148 | /// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit) |
16149 | /// floating-point element, store the result in the lower element of dst using zeromask k (the element is |
16150 | /// zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. |
16151 | /// |
16152 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsh_sd) |
16153 | #[inline ] |
16154 | #[target_feature (enable = "avx512fp16" )] |
16155 | #[cfg_attr (test, assert_instr(vcvtsh2sd))] |
16156 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
16157 | pub fn _mm_maskz_cvtsh_sd(k: __mmask8, a: __m128d, b: __m128h) -> __m128d { |
16158 | _mm_mask_cvtsh_sd(src:_mm_setzero_pd(), k, a, b) |
16159 | } |
16160 | |
16161 | /// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit) |
16162 | /// floating-point element, store the result in the lower element of dst, and copy the upper element from a |
16163 | /// to the upper element of dst. |
16164 | /// |
16165 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
16166 | /// |
16167 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_sd) |
16168 | #[inline ] |
16169 | #[target_feature (enable = "avx512fp16" )] |
16170 | #[cfg_attr (test, assert_instr(vcvtsh2sd, SAE = 8))] |
16171 | #[rustc_legacy_const_generics (2)] |
16172 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
16173 | pub fn _mm_cvt_roundsh_sd<const SAE: i32>(a: __m128d, b: __m128h) -> __m128d { |
16174 | static_assert_sae!(SAE); |
16175 | _mm_mask_cvt_roundsh_sd::<SAE>(src:a, k:0xff, a, b) |
16176 | } |
16177 | |
16178 | /// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit) |
16179 | /// floating-point element, store the result in the lower element of dst using writemask k (the element is |
16180 | /// copied from src to dst when mask bit 0 is not set), and copy the upper element from a to the upper element |
16181 | /// of dst. |
16182 | /// |
16183 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
16184 | /// |
16185 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsh_sd) |
16186 | #[inline ] |
16187 | #[target_feature (enable = "avx512fp16" )] |
16188 | #[cfg_attr (test, assert_instr(vcvtsh2sd, SAE = 8))] |
16189 | #[rustc_legacy_const_generics (4)] |
16190 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
16191 | pub fn _mm_mask_cvt_roundsh_sd<const SAE: i32>( |
16192 | src: __m128d, |
16193 | k: __mmask8, |
16194 | a: __m128d, |
16195 | b: __m128h, |
16196 | ) -> __m128d { |
16197 | unsafe { |
16198 | static_assert_sae!(SAE); |
16199 | vcvtsh2sd(a, b, src, k, SAE) |
16200 | } |
16201 | } |
16202 | |
16203 | /// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit) |
16204 | /// floating-point element, store the result in the lower element of dst using zeromask k (the element is |
16205 | /// zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. |
16206 | /// |
16207 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
16208 | /// |
16209 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsh_sd) |
16210 | #[inline ] |
16211 | #[target_feature (enable = "avx512fp16" )] |
16212 | #[cfg_attr (test, assert_instr(vcvtsh2sd, SAE = 8))] |
16213 | #[rustc_legacy_const_generics (3)] |
16214 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
16215 | pub fn _mm_maskz_cvt_roundsh_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128h) -> __m128d { |
16216 | static_assert_sae!(SAE); |
16217 | _mm_mask_cvt_roundsh_sd::<SAE>(src:_mm_setzero_pd(), k, a, b) |
16218 | } |
16219 | |
16220 | /// Copy the lower half-precision (16-bit) floating-point element from `a` to `dst`. |
16221 | /// |
16222 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_h) |
16223 | #[inline ] |
16224 | #[target_feature (enable = "avx512fp16" )] |
16225 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
16226 | pub fn _mm_cvtsh_h(a: __m128h) -> f16 { |
16227 | unsafe { simd_extract!(a, 0) } |
16228 | } |
16229 | |
16230 | /// Copy the lower half-precision (16-bit) floating-point element from `a` to `dst`. |
16231 | /// |
16232 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtsh_h) |
16233 | #[inline ] |
16234 | #[target_feature (enable = "avx512fp16" )] |
16235 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
16236 | pub fn _mm256_cvtsh_h(a: __m256h) -> f16 { |
16237 | unsafe { simd_extract!(a, 0) } |
16238 | } |
16239 | |
16240 | /// Copy the lower half-precision (16-bit) floating-point element from `a` to `dst`. |
16241 | /// |
16242 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtsh_h) |
16243 | #[inline ] |
16244 | #[target_feature (enable = "avx512fp16" )] |
16245 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
16246 | pub fn _mm512_cvtsh_h(a: __m512h) -> f16 { |
16247 | unsafe { simd_extract!(a, 0) } |
16248 | } |
16249 | |
16250 | /// Copy the lower 16-bit integer in a to dst. |
16251 | /// |
16252 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si16) |
16253 | #[inline ] |
16254 | #[target_feature (enable = "avx512fp16" )] |
16255 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
16256 | pub fn _mm_cvtsi128_si16(a: __m128i) -> i16 { |
16257 | unsafe { simd_extract!(a.as_i16x8(), 0) } |
16258 | } |
16259 | |
16260 | /// Copy 16-bit integer a to the lower elements of dst, and zero the upper elements of dst. |
16261 | /// |
16262 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi16_si128) |
16263 | #[inline ] |
16264 | #[target_feature (enable = "avx512fp16" )] |
16265 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
16266 | pub fn _mm_cvtsi16_si128(a: i16) -> __m128i { |
16267 | unsafe { transmute(src:simd_insert!(i16x8::ZERO, 0, a)) } |
16268 | } |
16269 | |
16270 | #[allow (improper_ctypes)] |
16271 | unsafe extern "C" { |
16272 | #[link_name = "llvm.x86.avx512fp16.mask.cmp.sh" ] |
16273 | unsafefn vcmpsh(a: __m128h, b: __m128h, imm8: i32, mask: __mmask8, sae: i32) -> __mmask8; |
16274 | #[link_name = "llvm.x86.avx512fp16.vcomi.sh" ] |
16275 | unsafefn vcomish(a: __m128h, b: __m128h, imm8: i32, sae: i32) -> i32; |
16276 | |
16277 | #[link_name = "llvm.x86.avx512fp16.add.ph.512" ] |
16278 | unsafefn vaddph(a: __m512h, b: __m512h, rounding: i32) -> __m512h; |
16279 | #[link_name = "llvm.x86.avx512fp16.sub.ph.512" ] |
16280 | unsafefn vsubph(a: __m512h, b: __m512h, rounding: i32) -> __m512h; |
16281 | #[link_name = "llvm.x86.avx512fp16.mul.ph.512" ] |
16282 | unsafefn vmulph(a: __m512h, b: __m512h, rounding: i32) -> __m512h; |
16283 | #[link_name = "llvm.x86.avx512fp16.div.ph.512" ] |
16284 | unsafefn vdivph(a: __m512h, b: __m512h, rounding: i32) -> __m512h; |
16285 | |
16286 | #[link_name = "llvm.x86.avx512fp16.mask.add.sh.round" ] |
16287 | unsafefn vaddsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h; |
16288 | #[link_name = "llvm.x86.avx512fp16.mask.sub.sh.round" ] |
16289 | unsafefn vsubsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h; |
16290 | #[link_name = "llvm.x86.avx512fp16.mask.mul.sh.round" ] |
16291 | unsafefn vmulsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h; |
16292 | #[link_name = "llvm.x86.avx512fp16.mask.div.sh.round" ] |
16293 | unsafefn vdivsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h; |
16294 | |
16295 | #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.128" ] |
16296 | unsafefn vfmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128; |
16297 | #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.256" ] |
16298 | unsafefn vfmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256; |
16299 | #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.512" ] |
16300 | unsafefn vfmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512; |
16301 | #[link_name = "llvm.x86.avx512fp16.mask.vfmul.csh" ] |
16302 | unsafefn vfmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128; |
16303 | |
16304 | #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.128" ] |
16305 | unsafefn vfcmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128; |
16306 | #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.256" ] |
16307 | unsafefn vfcmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256; |
16308 | #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.512" ] |
16309 | unsafefn vfcmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512; |
16310 | #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.csh" ] |
16311 | unsafefn vfcmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128; |
16312 | |
16313 | #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.128" ] |
16314 | unsafefn vfmaddcph_mask3_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128; |
16315 | #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.128" ] |
16316 | unsafefn vfmaddcph_maskz_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128; |
16317 | #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.256" ] |
16318 | unsafefn vfmaddcph_mask3_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256; |
16319 | #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.256" ] |
16320 | unsafefn vfmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256; |
16321 | #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.512" ] |
16322 | unsafefn vfmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) -> __m512; |
16323 | #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.512" ] |
16324 | unsafefn vfmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) -> __m512; |
16325 | #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.csh" ] |
16326 | unsafefn vfmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128; |
16327 | #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.csh" ] |
16328 | unsafefn vfmaddcsh_maskz(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128; |
16329 | |
16330 | #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.128" ] |
16331 | unsafefn vfcmaddcph_mask3_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128; |
16332 | #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.128" ] |
16333 | unsafefn vfcmaddcph_maskz_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128; |
16334 | #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.256" ] |
16335 | unsafefn vfcmaddcph_mask3_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256; |
16336 | #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.256" ] |
16337 | unsafefn vfcmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256; |
16338 | #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.512" ] |
16339 | unsafefn vfcmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) |
16340 | -> __m512; |
16341 | #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.512" ] |
16342 | unsafefn vfcmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) |
16343 | -> __m512; |
16344 | #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.csh" ] |
16345 | unsafefn vfcmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128; |
16346 | #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.csh" ] |
16347 | unsafefn vfcmaddcsh_maskz(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128; |
16348 | |
16349 | #[link_name = "llvm.x86.avx512fp16.vfmadd.ph.512" ] |
16350 | unsafefn vfmaddph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h; |
16351 | #[link_name = "llvm.x86.avx512fp16.vfmadd.f16" ] |
16352 | unsafefn vfmaddsh(a: f16, b: f16, c: f16, rounding: i32) -> f16; |
16353 | |
16354 | #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.128" ] |
16355 | unsafefn vfmaddsubph_128(a: __m128h, b: __m128h, c: __m128h) -> __m128h; |
16356 | #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.256" ] |
16357 | unsafefn vfmaddsubph_256(a: __m256h, b: __m256h, c: __m256h) -> __m256h; |
16358 | #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.512" ] |
16359 | unsafefn vfmaddsubph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h; |
16360 | |
16361 | #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.128" ] |
16362 | unsafefn vrcpph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h; |
16363 | #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.256" ] |
16364 | unsafefn vrcpph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h; |
16365 | #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.512" ] |
16366 | unsafefn vrcpph_512(a: __m512h, src: __m512h, k: __mmask32) -> __m512h; |
16367 | #[link_name = "llvm.x86.avx512fp16.mask.rcp.sh" ] |
16368 | unsafefn vrcpsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h; |
16369 | |
16370 | #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.128" ] |
16371 | unsafefn vrsqrtph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h; |
16372 | #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.256" ] |
16373 | unsafefn vrsqrtph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h; |
16374 | #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.512" ] |
16375 | unsafefn vrsqrtph_512(a: __m512h, src: __m512h, k: __mmask32) -> __m512h; |
16376 | #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.sh" ] |
16377 | unsafefn vrsqrtsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h; |
16378 | |
16379 | #[link_name = "llvm.x86.avx512fp16.sqrt.ph.512" ] |
16380 | unsafefn vsqrtph_512(a: __m512h, rounding: i32) -> __m512h; |
16381 | #[link_name = "llvm.x86.avx512fp16.mask.sqrt.sh" ] |
16382 | unsafefn vsqrtsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h; |
16383 | |
16384 | #[link_name = "llvm.x86.avx512fp16.max.ph.128" ] |
16385 | unsafefn vmaxph_128(a: __m128h, b: __m128h) -> __m128h; |
16386 | #[link_name = "llvm.x86.avx512fp16.max.ph.256" ] |
16387 | unsafefn vmaxph_256(a: __m256h, b: __m256h) -> __m256h; |
16388 | #[link_name = "llvm.x86.avx512fp16.max.ph.512" ] |
16389 | unsafefn vmaxph_512(a: __m512h, b: __m512h, sae: i32) -> __m512h; |
16390 | #[link_name = "llvm.x86.avx512fp16.mask.max.sh.round" ] |
16391 | unsafefn vmaxsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h; |
16392 | |
16393 | #[link_name = "llvm.x86.avx512fp16.min.ph.128" ] |
16394 | unsafefn vminph_128(a: __m128h, b: __m128h) -> __m128h; |
16395 | #[link_name = "llvm.x86.avx512fp16.min.ph.256" ] |
16396 | unsafefn vminph_256(a: __m256h, b: __m256h) -> __m256h; |
16397 | #[link_name = "llvm.x86.avx512fp16.min.ph.512" ] |
16398 | unsafefn vminph_512(a: __m512h, b: __m512h, sae: i32) -> __m512h; |
16399 | #[link_name = "llvm.x86.avx512fp16.mask.min.sh.round" ] |
16400 | unsafefn vminsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h; |
16401 | |
16402 | #[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.128" ] |
16403 | unsafefn vgetexpph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h; |
16404 | #[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.256" ] |
16405 | unsafefn vgetexpph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h; |
16406 | #[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.512" ] |
16407 | unsafefn vgetexpph_512(a: __m512h, src: __m512h, k: __mmask32, sae: i32) -> __m512h; |
16408 | #[link_name = "llvm.x86.avx512fp16.mask.getexp.sh" ] |
16409 | unsafefn vgetexpsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h; |
16410 | |
16411 | #[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.128" ] |
16412 | unsafefn vgetmantph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h; |
16413 | #[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.256" ] |
16414 | unsafefn vgetmantph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h; |
16415 | #[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.512" ] |
16416 | unsafefn vgetmantph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h; |
16417 | #[link_name = "llvm.x86.avx512fp16.mask.getmant.sh" ] |
16418 | unsafefn vgetmantsh( |
16419 | a: __m128h, |
16420 | b: __m128h, |
16421 | imm8: i32, |
16422 | src: __m128h, |
16423 | k: __mmask8, |
16424 | sae: i32, |
16425 | ) -> __m128h; |
16426 | |
16427 | #[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.128" ] |
16428 | unsafefn vrndscaleph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h; |
16429 | #[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.256" ] |
16430 | unsafefn vrndscaleph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h; |
16431 | #[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.512" ] |
16432 | unsafefn vrndscaleph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h; |
16433 | #[link_name = "llvm.x86.avx512fp16.mask.rndscale.sh" ] |
16434 | unsafefn vrndscalesh( |
16435 | a: __m128h, |
16436 | b: __m128h, |
16437 | src: __m128h, |
16438 | k: __mmask8, |
16439 | imm8: i32, |
16440 | sae: i32, |
16441 | ) -> __m128h; |
16442 | |
16443 | #[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.128" ] |
16444 | unsafefn vscalefph_128(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h; |
16445 | #[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.256" ] |
16446 | unsafefn vscalefph_256(a: __m256h, b: __m256h, src: __m256h, k: __mmask16) -> __m256h; |
16447 | #[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.512" ] |
16448 | unsafefn vscalefph_512(a: __m512h, b: __m512h, src: __m512h, k: __mmask32, rounding: i32) -> __m512h; |
16449 | #[link_name = "llvm.x86.avx512fp16.mask.scalef.sh" ] |
16450 | unsafefn vscalefsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h; |
16451 | |
16452 | #[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.128" ] |
16453 | unsafefn vreduceph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h; |
16454 | #[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.256" ] |
16455 | unsafefn vreduceph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h; |
16456 | #[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.512" ] |
16457 | unsafefn vreduceph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h; |
16458 | #[link_name = "llvm.x86.avx512fp16.mask.reduce.sh" ] |
16459 | unsafefn vreducesh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, imm8: i32, sae: i32) |
16460 | -> __m128h; |
16461 | |
16462 | #[link_name = "llvm.x86.avx512fp16.mask.fpclass.sh" ] |
16463 | unsafefn vfpclasssh(a: __m128h, imm8: i32, k: __mmask8) -> __mmask8; |
16464 | |
16465 | #[link_name = "llvm.x86.avx512.sitofp.round.v8f16.v8i16" ] |
16466 | unsafefn vcvtw2ph_128(a: i16x8, rounding: i32) -> __m128h; |
16467 | #[link_name = "llvm.x86.avx512.sitofp.round.v16f16.v16i16" ] |
16468 | unsafefn vcvtw2ph_256(a: i16x16, rounding: i32) -> __m256h; |
16469 | #[link_name = "llvm.x86.avx512.sitofp.round.v32f16.v32i16" ] |
16470 | unsafefn vcvtw2ph_512(a: i16x32, rounding: i32) -> __m512h; |
16471 | #[link_name = "llvm.x86.avx512.uitofp.round.v8f16.v8u16" ] |
16472 | unsafefn vcvtuw2ph_128(a: u16x8, rounding: i32) -> __m128h; |
16473 | #[link_name = "llvm.x86.avx512.uitofp.round.v16f16.v16u16" ] |
16474 | unsafefn vcvtuw2ph_256(a: u16x16, rounding: i32) -> __m256h; |
16475 | #[link_name = "llvm.x86.avx512.uitofp.round.v32f16.v32u16" ] |
16476 | unsafefn vcvtuw2ph_512(a: u16x32, rounding: i32) -> __m512h; |
16477 | |
16478 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtdq2ph.128" ] |
16479 | unsafefn vcvtdq2ph_128(a: i32x4, src: __m128h, k: __mmask8) -> __m128h; |
16480 | #[link_name = "llvm.x86.avx512.sitofp.round.v8f16.v8i32" ] |
16481 | unsafefn vcvtdq2ph_256(a: i32x8, rounding: i32) -> __m128h; |
16482 | #[link_name = "llvm.x86.avx512.sitofp.round.v16f16.v16i32" ] |
16483 | unsafefn vcvtdq2ph_512(a: i32x16, rounding: i32) -> __m256h; |
16484 | #[link_name = "llvm.x86.avx512fp16.vcvtsi2sh" ] |
16485 | unsafefn vcvtsi2sh(a: __m128h, b: i32, rounding: i32) -> __m128h; |
16486 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtudq2ph.128" ] |
16487 | unsafefn vcvtudq2ph_128(a: u32x4, src: __m128h, k: __mmask8) -> __m128h; |
16488 | #[link_name = "llvm.x86.avx512.uitofp.round.v8f16.v8u32" ] |
16489 | unsafefn vcvtudq2ph_256(a: u32x8, rounding: i32) -> __m128h; |
16490 | #[link_name = "llvm.x86.avx512.uitofp.round.v16f16.v16u32" ] |
16491 | unsafefn vcvtudq2ph_512(a: u32x16, rounding: i32) -> __m256h; |
16492 | #[link_name = "llvm.x86.avx512fp16.vcvtusi2sh" ] |
16493 | unsafefn vcvtusi2sh(a: __m128h, b: u32, rounding: i32) -> __m128h; |
16494 | |
16495 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtqq2ph.128" ] |
16496 | unsafefn vcvtqq2ph_128(a: i64x2, src: __m128h, k: __mmask8) -> __m128h; |
16497 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtqq2ph.256" ] |
16498 | unsafefn vcvtqq2ph_256(a: i64x4, src: __m128h, k: __mmask8) -> __m128h; |
16499 | #[link_name = "llvm.x86.avx512.sitofp.round.v8f16.v8i64" ] |
16500 | unsafefn vcvtqq2ph_512(a: i64x8, rounding: i32) -> __m128h; |
16501 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtuqq2ph.128" ] |
16502 | unsafefn vcvtuqq2ph_128(a: u64x2, src: __m128h, k: __mmask8) -> __m128h; |
16503 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtuqq2ph.256" ] |
16504 | unsafefn vcvtuqq2ph_256(a: u64x4, src: __m128h, k: __mmask8) -> __m128h; |
16505 | #[link_name = "llvm.x86.avx512.uitofp.round.v8f16.v8u64" ] |
16506 | unsafefn vcvtuqq2ph_512(a: u64x8, rounding: i32) -> __m128h; |
16507 | |
16508 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtps2phx.128" ] |
16509 | unsafefn vcvtps2phx_128(a: __m128, src: __m128h, k: __mmask8) -> __m128h; |
16510 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtps2phx.256" ] |
16511 | unsafefn vcvtps2phx_256(a: __m256, src: __m128h, k: __mmask8) -> __m128h; |
16512 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtps2phx.512" ] |
16513 | unsafefn vcvtps2phx_512(a: __m512, src: __m256h, k: __mmask16, rounding: i32) -> __m256h; |
16514 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtss2sh.round" ] |
16515 | unsafefn vcvtss2sh(a: __m128h, b: __m128, src: __m128h, k: __mmask8, rounding: i32) -> __m128h; |
16516 | |
16517 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtpd2ph.128" ] |
16518 | unsafefn vcvtpd2ph_128(a: __m128d, src: __m128h, k: __mmask8) -> __m128h; |
16519 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtpd2ph.256" ] |
16520 | unsafefn vcvtpd2ph_256(a: __m256d, src: __m128h, k: __mmask8) -> __m128h; |
16521 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtpd2ph.512" ] |
16522 | unsafefn vcvtpd2ph_512(a: __m512d, src: __m128h, k: __mmask8, rounding: i32) -> __m128h; |
16523 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtsd2sh.round" ] |
16524 | unsafefn vcvtsd2sh(a: __m128h, b: __m128d, src: __m128h, k: __mmask8, rounding: i32) -> __m128h; |
16525 | |
16526 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2w.128" ] |
16527 | unsafefn vcvtph2w_128(a: __m128h, src: i16x8, k: __mmask8) -> i16x8; |
16528 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2w.256" ] |
16529 | unsafefn vcvtph2w_256(a: __m256h, src: i16x16, k: __mmask16) -> i16x16; |
16530 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2w.512" ] |
16531 | unsafefn vcvtph2w_512(a: __m512h, src: i16x32, k: __mmask32, rounding: i32) -> i16x32; |
16532 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uw.128" ] |
16533 | unsafefn vcvtph2uw_128(a: __m128h, src: u16x8, k: __mmask8) -> u16x8; |
16534 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uw.256" ] |
16535 | unsafefn vcvtph2uw_256(a: __m256h, src: u16x16, k: __mmask16) -> u16x16; |
16536 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uw.512" ] |
16537 | unsafefn vcvtph2uw_512(a: __m512h, src: u16x32, k: __mmask32, rounding: i32) -> u16x32; |
16538 | |
16539 | #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2w.128" ] |
16540 | unsafefn vcvttph2w_128(a: __m128h, src: i16x8, k: __mmask8) -> i16x8; |
16541 | #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2w.256" ] |
16542 | unsafefn vcvttph2w_256(a: __m256h, src: i16x16, k: __mmask16) -> i16x16; |
16543 | #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2w.512" ] |
16544 | unsafefn vcvttph2w_512(a: __m512h, src: i16x32, k: __mmask32, sae: i32) -> i16x32; |
16545 | #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uw.128" ] |
16546 | unsafefn vcvttph2uw_128(a: __m128h, src: u16x8, k: __mmask8) -> u16x8; |
16547 | #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uw.256" ] |
16548 | unsafefn vcvttph2uw_256(a: __m256h, src: u16x16, k: __mmask16) -> u16x16; |
16549 | #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uw.512" ] |
16550 | unsafefn vcvttph2uw_512(a: __m512h, src: u16x32, k: __mmask32, sae: i32) -> u16x32; |
16551 | |
16552 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2dq.128" ] |
16553 | unsafefn vcvtph2dq_128(a: __m128h, src: i32x4, k: __mmask8) -> i32x4; |
16554 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2dq.256" ] |
16555 | unsafefn vcvtph2dq_256(a: __m128h, src: i32x8, k: __mmask8) -> i32x8; |
16556 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2dq.512" ] |
16557 | unsafefn vcvtph2dq_512(a: __m256h, src: i32x16, k: __mmask16, rounding: i32) -> i32x16; |
16558 | #[link_name = "llvm.x86.avx512fp16.vcvtsh2si32" ] |
16559 | unsafefn vcvtsh2si32(a: __m128h, rounding: i32) -> i32; |
16560 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2udq.128" ] |
16561 | unsafefn vcvtph2udq_128(a: __m128h, src: u32x4, k: __mmask8) -> u32x4; |
16562 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2udq.256" ] |
16563 | unsafefn vcvtph2udq_256(a: __m128h, src: u32x8, k: __mmask8) -> u32x8; |
16564 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2udq.512" ] |
16565 | unsafefn vcvtph2udq_512(a: __m256h, src: u32x16, k: __mmask16, rounding: i32) -> u32x16; |
16566 | #[link_name = "llvm.x86.avx512fp16.vcvtsh2usi32" ] |
16567 | unsafefn vcvtsh2usi32(a: __m128h, sae: i32) -> u32; |
16568 | |
16569 | #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2dq.128" ] |
16570 | unsafefn vcvttph2dq_128(a: __m128h, src: i32x4, k: __mmask8) -> i32x4; |
16571 | #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2dq.256" ] |
16572 | unsafefn vcvttph2dq_256(a: __m128h, src: i32x8, k: __mmask8) -> i32x8; |
16573 | #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2dq.512" ] |
16574 | unsafefn vcvttph2dq_512(a: __m256h, src: i32x16, k: __mmask16, sae: i32) -> i32x16; |
16575 | #[link_name = "llvm.x86.avx512fp16.vcvttsh2si32" ] |
16576 | unsafefn vcvttsh2si32(a: __m128h, sae: i32) -> i32; |
16577 | #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2udq.128" ] |
16578 | unsafefn vcvttph2udq_128(a: __m128h, src: u32x4, k: __mmask8) -> u32x4; |
16579 | #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2udq.256" ] |
16580 | unsafefn vcvttph2udq_256(a: __m128h, src: u32x8, k: __mmask8) -> u32x8; |
16581 | #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2udq.512" ] |
16582 | unsafefn vcvttph2udq_512(a: __m256h, src: u32x16, k: __mmask16, sae: i32) -> u32x16; |
16583 | #[link_name = "llvm.x86.avx512fp16.vcvttsh2usi32" ] |
16584 | unsafefn vcvttsh2usi32(a: __m128h, sae: i32) -> u32; |
16585 | |
16586 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2qq.128" ] |
16587 | unsafefn vcvtph2qq_128(a: __m128h, src: i64x2, k: __mmask8) -> i64x2; |
16588 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2qq.256" ] |
16589 | unsafefn vcvtph2qq_256(a: __m128h, src: i64x4, k: __mmask8) -> i64x4; |
16590 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2qq.512" ] |
16591 | unsafefn vcvtph2qq_512(a: __m128h, src: i64x8, k: __mmask8, rounding: i32) -> i64x8; |
16592 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uqq.128" ] |
16593 | unsafefn vcvtph2uqq_128(a: __m128h, src: u64x2, k: __mmask8) -> u64x2; |
16594 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uqq.256" ] |
16595 | unsafefn vcvtph2uqq_256(a: __m128h, src: u64x4, k: __mmask8) -> u64x4; |
16596 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uqq.512" ] |
16597 | unsafefn vcvtph2uqq_512(a: __m128h, src: u64x8, k: __mmask8, rounding: i32) -> u64x8; |
16598 | |
16599 | #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2qq.128" ] |
16600 | unsafefn vcvttph2qq_128(a: __m128h, src: i64x2, k: __mmask8) -> i64x2; |
16601 | #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2qq.256" ] |
16602 | unsafefn vcvttph2qq_256(a: __m128h, src: i64x4, k: __mmask8) -> i64x4; |
16603 | #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2qq.512" ] |
16604 | unsafefn vcvttph2qq_512(a: __m128h, src: i64x8, k: __mmask8, sae: i32) -> i64x8; |
16605 | #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uqq.128" ] |
16606 | unsafefn vcvttph2uqq_128(a: __m128h, src: u64x2, k: __mmask8) -> u64x2; |
16607 | #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uqq.256" ] |
16608 | unsafefn vcvttph2uqq_256(a: __m128h, src: u64x4, k: __mmask8) -> u64x4; |
16609 | #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uqq.512" ] |
16610 | unsafefn vcvttph2uqq_512(a: __m128h, src: u64x8, k: __mmask8, sae: i32) -> u64x8; |
16611 | |
16612 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2psx.128" ] |
16613 | unsafefn vcvtph2psx_128(a: __m128h, src: __m128, k: __mmask8) -> __m128; |
16614 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2psx.256" ] |
16615 | unsafefn vcvtph2psx_256(a: __m128h, src: __m256, k: __mmask8) -> __m256; |
16616 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2psx.512" ] |
16617 | unsafefn vcvtph2psx_512(a: __m256h, src: __m512, k: __mmask16, sae: i32) -> __m512; |
16618 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtsh2ss.round" ] |
16619 | unsafefn vcvtsh2ss(a: __m128, b: __m128h, src: __m128, k: __mmask8, sae: i32) -> __m128; |
16620 | |
16621 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2pd.128" ] |
16622 | unsafefn vcvtph2pd_128(a: __m128h, src: __m128d, k: __mmask8) -> __m128d; |
16623 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2pd.256" ] |
16624 | unsafefn vcvtph2pd_256(a: __m128h, src: __m256d, k: __mmask8) -> __m256d; |
16625 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2pd.512" ] |
16626 | unsafefn vcvtph2pd_512(a: __m128h, src: __m512d, k: __mmask8, sae: i32) -> __m512d; |
16627 | #[link_name = "llvm.x86.avx512fp16.mask.vcvtsh2sd.round" ] |
16628 | unsafefn vcvtsh2sd(a: __m128d, b: __m128h, src: __m128d, k: __mmask8, sae: i32) -> __m128d; |
16629 | |
16630 | } |
16631 | |
16632 | #[cfg (test)] |
16633 | mod tests { |
16634 | use crate::core_arch::x86::*; |
16635 | use crate::mem::transmute; |
16636 | use crate::ptr::{addr_of, addr_of_mut}; |
16637 | use stdarch_test::simd_test; |
16638 | |
16639 | #[target_feature (enable = "avx512fp16" )] |
16640 | unsafe fn _mm_set1_pch(re: f16, im: f16) -> __m128h { |
16641 | _mm_setr_ph(re, im, re, im, re, im, re, im) |
16642 | } |
16643 | |
16644 | #[target_feature (enable = "avx512fp16" )] |
16645 | unsafe fn _mm256_set1_pch(re: f16, im: f16) -> __m256h { |
16646 | _mm256_setr_ph( |
16647 | re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, |
16648 | ) |
16649 | } |
16650 | |
16651 | #[target_feature (enable = "avx512fp16" )] |
16652 | unsafe fn _mm512_set1_pch(re: f16, im: f16) -> __m512h { |
16653 | _mm512_setr_ph( |
16654 | re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, |
16655 | re, im, re, im, re, im, re, im, re, im, |
16656 | ) |
16657 | } |
16658 | |
16659 | #[simd_test(enable = "avx512fp16" )] |
16660 | unsafe fn test_mm_set_ph() { |
16661 | let r = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
16662 | let e = _mm_setr_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0); |
16663 | assert_eq_m128h(r, e); |
16664 | } |
16665 | |
16666 | #[simd_test(enable = "avx512fp16" )] |
16667 | unsafe fn test_mm256_set_ph() { |
16668 | let r = _mm256_set_ph( |
16669 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
16670 | ); |
16671 | let e = _mm256_setr_ph( |
16672 | 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, |
16673 | ); |
16674 | assert_eq_m256h(r, e); |
16675 | } |
16676 | |
16677 | #[simd_test(enable = "avx512fp16" )] |
16678 | unsafe fn test_mm512_set_ph() { |
16679 | let r = _mm512_set_ph( |
16680 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
16681 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
16682 | 31.0, 32.0, |
16683 | ); |
16684 | let e = _mm512_setr_ph( |
16685 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
16686 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
16687 | 3.0, 2.0, 1.0, |
16688 | ); |
16689 | assert_eq_m512h(r, e); |
16690 | } |
16691 | |
16692 | #[simd_test(enable = "avx512fp16" )] |
16693 | unsafe fn test_mm_set_sh() { |
16694 | let r = _mm_set_sh(1.0); |
16695 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0); |
16696 | assert_eq_m128h(r, e); |
16697 | } |
16698 | |
16699 | #[simd_test(enable = "avx512fp16" )] |
16700 | unsafe fn test_mm_set1_ph() { |
16701 | let r = _mm_set1_ph(1.0); |
16702 | let e = _mm_set_ph(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0); |
16703 | assert_eq_m128h(r, e); |
16704 | } |
16705 | |
16706 | #[simd_test(enable = "avx512fp16" )] |
16707 | unsafe fn test_mm256_set1_ph() { |
16708 | let r = _mm256_set1_ph(1.0); |
16709 | let e = _mm256_set_ph( |
16710 | 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, |
16711 | ); |
16712 | assert_eq_m256h(r, e); |
16713 | } |
16714 | |
16715 | #[simd_test(enable = "avx512fp16" )] |
16716 | unsafe fn test_mm512_set1_ph() { |
16717 | let r = _mm512_set1_ph(1.0); |
16718 | let e = _mm512_set_ph( |
16719 | 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, |
16720 | 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, |
16721 | ); |
16722 | assert_eq_m512h(r, e); |
16723 | } |
16724 | |
16725 | #[simd_test(enable = "avx512fp16" )] |
16726 | unsafe fn test_mm_setr_ph() { |
16727 | let r = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
16728 | let e = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0); |
16729 | assert_eq_m128h(r, e); |
16730 | } |
16731 | |
16732 | #[simd_test(enable = "avx512fp16" )] |
16733 | unsafe fn test_mm256_setr_ph() { |
16734 | let r = _mm256_setr_ph( |
16735 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
16736 | ); |
16737 | let e = _mm256_set_ph( |
16738 | 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, |
16739 | ); |
16740 | assert_eq_m256h(r, e); |
16741 | } |
16742 | |
16743 | #[simd_test(enable = "avx512fp16" )] |
16744 | unsafe fn test_mm512_setr_ph() { |
16745 | let r = _mm512_setr_ph( |
16746 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
16747 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
16748 | 31.0, 32.0, |
16749 | ); |
16750 | let e = _mm512_set_ph( |
16751 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
16752 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
16753 | 3.0, 2.0, 1.0, |
16754 | ); |
16755 | assert_eq_m512h(r, e); |
16756 | } |
16757 | |
16758 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
16759 | unsafe fn test_mm_setzero_ph() { |
16760 | let r = _mm_setzero_ph(); |
16761 | let e = _mm_set1_ph(0.0); |
16762 | assert_eq_m128h(r, e); |
16763 | } |
16764 | |
16765 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
16766 | unsafe fn test_mm256_setzero_ph() { |
16767 | let r = _mm256_setzero_ph(); |
16768 | let e = _mm256_set1_ph(0.0); |
16769 | assert_eq_m256h(r, e); |
16770 | } |
16771 | |
16772 | #[simd_test(enable = "avx512fp16" )] |
16773 | unsafe fn test_mm512_setzero_ph() { |
16774 | let r = _mm512_setzero_ph(); |
16775 | let e = _mm512_set1_ph(0.0); |
16776 | assert_eq_m512h(r, e); |
16777 | } |
16778 | |
16779 | #[simd_test(enable = "avx512fp16" )] |
16780 | unsafe fn test_mm_castsi128_ph() { |
16781 | let a = _mm_set1_epi16(0x3c00); |
16782 | let r = _mm_castsi128_ph(a); |
16783 | let e = _mm_set1_ph(1.0); |
16784 | assert_eq_m128h(r, e); |
16785 | } |
16786 | |
16787 | #[simd_test(enable = "avx512fp16" )] |
16788 | unsafe fn test_mm256_castsi256_ph() { |
16789 | let a = _mm256_set1_epi16(0x3c00); |
16790 | let r = _mm256_castsi256_ph(a); |
16791 | let e = _mm256_set1_ph(1.0); |
16792 | assert_eq_m256h(r, e); |
16793 | } |
16794 | |
16795 | #[simd_test(enable = "avx512fp16" )] |
16796 | unsafe fn test_mm512_castsi512_ph() { |
16797 | let a = _mm512_set1_epi16(0x3c00); |
16798 | let r = _mm512_castsi512_ph(a); |
16799 | let e = _mm512_set1_ph(1.0); |
16800 | assert_eq_m512h(r, e); |
16801 | } |
16802 | |
16803 | #[simd_test(enable = "avx512fp16" )] |
16804 | unsafe fn test_mm_castph_si128() { |
16805 | let a = _mm_set1_ph(1.0); |
16806 | let r = _mm_castph_si128(a); |
16807 | let e = _mm_set1_epi16(0x3c00); |
16808 | assert_eq_m128i(r, e); |
16809 | } |
16810 | |
16811 | #[simd_test(enable = "avx512fp16" )] |
16812 | unsafe fn test_mm256_castph_si256() { |
16813 | let a = _mm256_set1_ph(1.0); |
16814 | let r = _mm256_castph_si256(a); |
16815 | let e = _mm256_set1_epi16(0x3c00); |
16816 | assert_eq_m256i(r, e); |
16817 | } |
16818 | |
16819 | #[simd_test(enable = "avx512fp16" )] |
16820 | unsafe fn test_mm512_castph_si512() { |
16821 | let a = _mm512_set1_ph(1.0); |
16822 | let r = _mm512_castph_si512(a); |
16823 | let e = _mm512_set1_epi16(0x3c00); |
16824 | assert_eq_m512i(r, e); |
16825 | } |
16826 | |
16827 | #[simd_test(enable = "avx512fp16" )] |
16828 | unsafe fn test_mm_castps_ph() { |
16829 | let a = _mm_castsi128_ps(_mm_set1_epi16(0x3c00)); |
16830 | let r = _mm_castps_ph(a); |
16831 | let e = _mm_set1_ph(1.0); |
16832 | assert_eq_m128h(r, e); |
16833 | } |
16834 | |
16835 | #[simd_test(enable = "avx512fp16" )] |
16836 | unsafe fn test_mm256_castps_ph() { |
16837 | let a = _mm256_castsi256_ps(_mm256_set1_epi16(0x3c00)); |
16838 | let r = _mm256_castps_ph(a); |
16839 | let e = _mm256_set1_ph(1.0); |
16840 | assert_eq_m256h(r, e); |
16841 | } |
16842 | |
16843 | #[simd_test(enable = "avx512fp16" )] |
16844 | unsafe fn test_mm512_castps_ph() { |
16845 | let a = _mm512_castsi512_ps(_mm512_set1_epi16(0x3c00)); |
16846 | let r = _mm512_castps_ph(a); |
16847 | let e = _mm512_set1_ph(1.0); |
16848 | assert_eq_m512h(r, e); |
16849 | } |
16850 | |
16851 | #[simd_test(enable = "avx512fp16" )] |
16852 | unsafe fn test_mm_castph_ps() { |
16853 | let a = _mm_castsi128_ph(_mm_set1_epi32(0x3f800000)); |
16854 | let r = _mm_castph_ps(a); |
16855 | let e = _mm_set1_ps(1.0); |
16856 | assert_eq_m128(r, e); |
16857 | } |
16858 | |
16859 | #[simd_test(enable = "avx512fp16" )] |
16860 | unsafe fn test_mm256_castph_ps() { |
16861 | let a = _mm256_castsi256_ph(_mm256_set1_epi32(0x3f800000)); |
16862 | let r = _mm256_castph_ps(a); |
16863 | let e = _mm256_set1_ps(1.0); |
16864 | assert_eq_m256(r, e); |
16865 | } |
16866 | |
16867 | #[simd_test(enable = "avx512fp16" )] |
16868 | unsafe fn test_mm512_castph_ps() { |
16869 | let a = _mm512_castsi512_ph(_mm512_set1_epi32(0x3f800000)); |
16870 | let r = _mm512_castph_ps(a); |
16871 | let e = _mm512_set1_ps(1.0); |
16872 | assert_eq_m512(r, e); |
16873 | } |
16874 | |
16875 | #[simd_test(enable = "avx512fp16" )] |
16876 | unsafe fn test_mm_castpd_ph() { |
16877 | let a = _mm_castsi128_pd(_mm_set1_epi16(0x3c00)); |
16878 | let r = _mm_castpd_ph(a); |
16879 | let e = _mm_set1_ph(1.0); |
16880 | assert_eq_m128h(r, e); |
16881 | } |
16882 | |
16883 | #[simd_test(enable = "avx512fp16" )] |
16884 | unsafe fn test_mm256_castpd_ph() { |
16885 | let a = _mm256_castsi256_pd(_mm256_set1_epi16(0x3c00)); |
16886 | let r = _mm256_castpd_ph(a); |
16887 | let e = _mm256_set1_ph(1.0); |
16888 | assert_eq_m256h(r, e); |
16889 | } |
16890 | |
16891 | #[simd_test(enable = "avx512fp16" )] |
16892 | unsafe fn test_mm512_castpd_ph() { |
16893 | let a = _mm512_castsi512_pd(_mm512_set1_epi16(0x3c00)); |
16894 | let r = _mm512_castpd_ph(a); |
16895 | let e = _mm512_set1_ph(1.0); |
16896 | assert_eq_m512h(r, e); |
16897 | } |
16898 | |
16899 | #[simd_test(enable = "avx512fp16" )] |
16900 | unsafe fn test_mm_castph_pd() { |
16901 | let a = _mm_castsi128_ph(_mm_set1_epi64x(0x3ff0000000000000)); |
16902 | let r = _mm_castph_pd(a); |
16903 | let e = _mm_set1_pd(1.0); |
16904 | assert_eq_m128d(r, e); |
16905 | } |
16906 | |
16907 | #[simd_test(enable = "avx512fp16" )] |
16908 | unsafe fn test_mm256_castph_pd() { |
16909 | let a = _mm256_castsi256_ph(_mm256_set1_epi64x(0x3ff0000000000000)); |
16910 | let r = _mm256_castph_pd(a); |
16911 | let e = _mm256_set1_pd(1.0); |
16912 | assert_eq_m256d(r, e); |
16913 | } |
16914 | |
16915 | #[simd_test(enable = "avx512fp16" )] |
16916 | unsafe fn test_mm512_castph_pd() { |
16917 | let a = _mm512_castsi512_ph(_mm512_set1_epi64(0x3ff0000000000000)); |
16918 | let r = _mm512_castph_pd(a); |
16919 | let e = _mm512_set1_pd(1.0); |
16920 | assert_eq_m512d(r, e); |
16921 | } |
16922 | |
16923 | #[simd_test(enable = "avx512fp16" )] |
16924 | unsafe fn test_mm256_castph256_ph128() { |
16925 | let a = _mm256_setr_ph( |
16926 | 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
16927 | ); |
16928 | let r = _mm256_castph256_ph128(a); |
16929 | let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.); |
16930 | assert_eq_m128h(r, e); |
16931 | } |
16932 | |
16933 | #[simd_test(enable = "avx512fp16" )] |
16934 | unsafe fn test_mm512_castph512_ph128() { |
16935 | let a = _mm512_setr_ph( |
16936 | 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., |
16937 | 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., |
16938 | ); |
16939 | let r = _mm512_castph512_ph128(a); |
16940 | let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.); |
16941 | assert_eq_m128h(r, e); |
16942 | } |
16943 | |
16944 | #[simd_test(enable = "avx512fp16" )] |
16945 | unsafe fn test_mm512_castph512_ph256() { |
16946 | let a = _mm512_setr_ph( |
16947 | 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., |
16948 | 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., |
16949 | ); |
16950 | let r = _mm512_castph512_ph256(a); |
16951 | let e = _mm256_setr_ph( |
16952 | 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
16953 | ); |
16954 | assert_eq_m256h(r, e); |
16955 | } |
16956 | |
16957 | #[simd_test(enable = "avx512fp16" )] |
16958 | unsafe fn test_mm256_castph128_ph256() { |
16959 | let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.); |
16960 | let r = _mm256_castph128_ph256(a); |
16961 | assert_eq_m128h(_mm256_castph256_ph128(r), a); |
16962 | } |
16963 | |
16964 | #[simd_test(enable = "avx512fp16" )] |
16965 | unsafe fn test_mm512_castph128_ph512() { |
16966 | let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.); |
16967 | let r = _mm512_castph128_ph512(a); |
16968 | assert_eq_m128h(_mm512_castph512_ph128(r), a); |
16969 | } |
16970 | |
16971 | #[simd_test(enable = "avx512fp16" )] |
16972 | unsafe fn test_mm512_castph256_ph512() { |
16973 | let a = _mm256_setr_ph( |
16974 | 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
16975 | ); |
16976 | let r = _mm512_castph256_ph512(a); |
16977 | assert_eq_m256h(_mm512_castph512_ph256(r), a); |
16978 | } |
16979 | |
16980 | #[simd_test(enable = "avx512fp16" )] |
16981 | unsafe fn test_mm256_zextph128_ph256() { |
16982 | let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.); |
16983 | let r = _mm256_zextph128_ph256(a); |
16984 | let e = _mm256_setr_ph( |
16985 | 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0., |
16986 | ); |
16987 | assert_eq_m256h(r, e); |
16988 | } |
16989 | |
16990 | #[simd_test(enable = "avx512fp16" )] |
16991 | unsafe fn test_mm512_zextph128_ph512() { |
16992 | let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.); |
16993 | let r = _mm512_zextph128_ph512(a); |
16994 | let e = _mm512_setr_ph( |
16995 | 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., |
16996 | 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., |
16997 | ); |
16998 | assert_eq_m512h(r, e); |
16999 | } |
17000 | |
17001 | #[simd_test(enable = "avx512fp16" )] |
17002 | unsafe fn test_mm512_zextph256_ph512() { |
17003 | let a = _mm256_setr_ph( |
17004 | 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
17005 | ); |
17006 | let r = _mm512_zextph256_ph512(a); |
17007 | let e = _mm512_setr_ph( |
17008 | 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 0., 0., 0., 0., |
17009 | 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., |
17010 | ); |
17011 | assert_eq_m512h(r, e); |
17012 | } |
17013 | |
17014 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17015 | unsafe fn test_mm_cmp_ph_mask() { |
17016 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
17017 | let b = _mm_set_ph(1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0); |
17018 | let r = _mm_cmp_ph_mask::<_CMP_EQ_OQ>(a, b); |
17019 | assert_eq!(r, 0b11110000); |
17020 | } |
17021 | |
17022 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17023 | unsafe fn test_mm_mask_cmp_ph_mask() { |
17024 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
17025 | let b = _mm_set_ph(1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0); |
17026 | let r = _mm_mask_cmp_ph_mask::<_CMP_EQ_OQ>(0b01010101, a, b); |
17027 | assert_eq!(r, 0b01010000); |
17028 | } |
17029 | |
17030 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17031 | unsafe fn test_mm256_cmp_ph_mask() { |
17032 | let a = _mm256_set_ph( |
17033 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17034 | ); |
17035 | let b = _mm256_set_ph( |
17036 | 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0, |
17037 | -16.0, |
17038 | ); |
17039 | let r = _mm256_cmp_ph_mask::<_CMP_EQ_OQ>(a, b); |
17040 | assert_eq!(r, 0b1111000011110000); |
17041 | } |
17042 | |
17043 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17044 | unsafe fn test_mm256_mask_cmp_ph_mask() { |
17045 | let a = _mm256_set_ph( |
17046 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17047 | ); |
17048 | let b = _mm256_set_ph( |
17049 | 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0, |
17050 | -16.0, |
17051 | ); |
17052 | let r = _mm256_mask_cmp_ph_mask::<_CMP_EQ_OQ>(0b0101010101010101, a, b); |
17053 | assert_eq!(r, 0b0101000001010000); |
17054 | } |
17055 | |
17056 | #[simd_test(enable = "avx512fp16" )] |
17057 | unsafe fn test_mm512_cmp_ph_mask() { |
17058 | let a = _mm512_set_ph( |
17059 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17060 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17061 | 31.0, 32.0, |
17062 | ); |
17063 | let b = _mm512_set_ph( |
17064 | 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0, |
17065 | -16.0, 17.0, 18.0, 19.0, 20.0, -21.0, -22.0, -23.0, -24.0, 25.0, 26.0, 27.0, 28.0, |
17066 | -29.0, -30.0, -31.0, -32.0, |
17067 | ); |
17068 | let r = _mm512_cmp_ph_mask::<_CMP_EQ_OQ>(a, b); |
17069 | assert_eq!(r, 0b11110000111100001111000011110000); |
17070 | } |
17071 | |
17072 | #[simd_test(enable = "avx512fp16" )] |
17073 | unsafe fn test_mm512_mask_cmp_ph_mask() { |
17074 | let a = _mm512_set_ph( |
17075 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17076 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17077 | 31.0, 32.0, |
17078 | ); |
17079 | let b = _mm512_set_ph( |
17080 | 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0, |
17081 | -16.0, 17.0, 18.0, 19.0, 20.0, -21.0, -22.0, -23.0, -24.0, 25.0, 26.0, 27.0, 28.0, |
17082 | -29.0, -30.0, -31.0, -32.0, |
17083 | ); |
17084 | let r = _mm512_mask_cmp_ph_mask::<_CMP_EQ_OQ>(0b01010101010101010101010101010101, a, b); |
17085 | assert_eq!(r, 0b01010000010100000101000001010000); |
17086 | } |
17087 | |
17088 | #[simd_test(enable = "avx512fp16" )] |
17089 | unsafe fn test_mm512_cmp_round_ph_mask() { |
17090 | let a = _mm512_set_ph( |
17091 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17092 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17093 | 31.0, 32.0, |
17094 | ); |
17095 | let b = _mm512_set_ph( |
17096 | 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0, |
17097 | -16.0, 17.0, 18.0, 19.0, 20.0, -21.0, -22.0, -23.0, -24.0, 25.0, 26.0, 27.0, 28.0, |
17098 | -29.0, -30.0, -31.0, -32.0, |
17099 | ); |
17100 | let r = _mm512_cmp_round_ph_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b); |
17101 | assert_eq!(r, 0b11110000111100001111000011110000); |
17102 | } |
17103 | |
17104 | #[simd_test(enable = "avx512fp16" )] |
17105 | unsafe fn test_mm512_mask_cmp_round_ph_mask() { |
17106 | let a = _mm512_set_ph( |
17107 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17108 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17109 | 31.0, 32.0, |
17110 | ); |
17111 | let b = _mm512_set_ph( |
17112 | 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0, |
17113 | -16.0, 17.0, 18.0, 19.0, 20.0, -21.0, -22.0, -23.0, -24.0, 25.0, 26.0, 27.0, 28.0, |
17114 | -29.0, -30.0, -31.0, -32.0, |
17115 | ); |
17116 | let r = _mm512_mask_cmp_round_ph_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>( |
17117 | 0b01010101010101010101010101010101, |
17118 | a, |
17119 | b, |
17120 | ); |
17121 | assert_eq!(r, 0b01010000010100000101000001010000); |
17122 | } |
17123 | |
17124 | #[simd_test(enable = "avx512fp16" )] |
17125 | unsafe fn test_mm_cmp_round_sh_mask() { |
17126 | let a = _mm_set_sh(1.0); |
17127 | let b = _mm_set_sh(1.0); |
17128 | let r = _mm_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b); |
17129 | assert_eq!(r, 1); |
17130 | } |
17131 | |
17132 | #[simd_test(enable = "avx512fp16" )] |
17133 | unsafe fn test_mm_mask_cmp_round_sh_mask() { |
17134 | let a = _mm_set_sh(1.0); |
17135 | let b = _mm_set_sh(1.0); |
17136 | let r = _mm_mask_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(0, a, b); |
17137 | assert_eq!(r, 0); |
17138 | } |
17139 | |
17140 | #[simd_test(enable = "avx512fp16" )] |
17141 | unsafe fn test_mm_cmp_sh_mask() { |
17142 | let a = _mm_set_sh(1.0); |
17143 | let b = _mm_set_sh(1.0); |
17144 | let r = _mm_cmp_sh_mask::<_CMP_EQ_OQ>(a, b); |
17145 | assert_eq!(r, 1); |
17146 | } |
17147 | |
17148 | #[simd_test(enable = "avx512fp16" )] |
17149 | unsafe fn test_mm_mask_cmp_sh_mask() { |
17150 | let a = _mm_set_sh(1.0); |
17151 | let b = _mm_set_sh(1.0); |
17152 | let r = _mm_mask_cmp_sh_mask::<_CMP_EQ_OQ>(0, a, b); |
17153 | assert_eq!(r, 0); |
17154 | } |
17155 | |
17156 | #[simd_test(enable = "avx512fp16" )] |
17157 | unsafe fn test_mm_comi_round_sh() { |
17158 | let a = _mm_set_sh(1.0); |
17159 | let b = _mm_set_sh(1.0); |
17160 | let r = _mm_comi_round_sh::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b); |
17161 | assert_eq!(r, 1); |
17162 | } |
17163 | |
17164 | #[simd_test(enable = "avx512fp16" )] |
17165 | unsafe fn test_mm_comi_sh() { |
17166 | let a = _mm_set_sh(1.0); |
17167 | let b = _mm_set_sh(1.0); |
17168 | let r = _mm_comi_sh::<_CMP_EQ_OQ>(a, b); |
17169 | assert_eq!(r, 1); |
17170 | } |
17171 | |
17172 | #[simd_test(enable = "avx512fp16" )] |
17173 | unsafe fn test_mm_comieq_sh() { |
17174 | let a = _mm_set_sh(1.0); |
17175 | let b = _mm_set_sh(1.0); |
17176 | let r = _mm_comieq_sh(a, b); |
17177 | assert_eq!(r, 1); |
17178 | } |
17179 | |
17180 | #[simd_test(enable = "avx512fp16" )] |
17181 | unsafe fn test_mm_comige_sh() { |
17182 | let a = _mm_set_sh(2.0); |
17183 | let b = _mm_set_sh(1.0); |
17184 | let r = _mm_comige_sh(a, b); |
17185 | assert_eq!(r, 1); |
17186 | } |
17187 | |
17188 | #[simd_test(enable = "avx512fp16" )] |
17189 | unsafe fn test_mm_comigt_sh() { |
17190 | let a = _mm_set_sh(2.0); |
17191 | let b = _mm_set_sh(1.0); |
17192 | let r = _mm_comigt_sh(a, b); |
17193 | assert_eq!(r, 1); |
17194 | } |
17195 | |
17196 | #[simd_test(enable = "avx512fp16" )] |
17197 | unsafe fn test_mm_comile_sh() { |
17198 | let a = _mm_set_sh(1.0); |
17199 | let b = _mm_set_sh(2.0); |
17200 | let r = _mm_comile_sh(a, b); |
17201 | assert_eq!(r, 1); |
17202 | } |
17203 | |
17204 | #[simd_test(enable = "avx512fp16" )] |
17205 | unsafe fn test_mm_comilt_sh() { |
17206 | let a = _mm_set_sh(1.0); |
17207 | let b = _mm_set_sh(2.0); |
17208 | let r = _mm_comilt_sh(a, b); |
17209 | assert_eq!(r, 1); |
17210 | } |
17211 | |
17212 | #[simd_test(enable = "avx512fp16" )] |
17213 | unsafe fn test_mm_comineq_sh() { |
17214 | let a = _mm_set_sh(1.0); |
17215 | let b = _mm_set_sh(2.0); |
17216 | let r = _mm_comineq_sh(a, b); |
17217 | assert_eq!(r, 1); |
17218 | } |
17219 | |
17220 | #[simd_test(enable = "avx512fp16" )] |
17221 | unsafe fn test_mm_ucomieq_sh() { |
17222 | let a = _mm_set_sh(1.0); |
17223 | let b = _mm_set_sh(1.0); |
17224 | let r = _mm_ucomieq_sh(a, b); |
17225 | assert_eq!(r, 1); |
17226 | } |
17227 | |
17228 | #[simd_test(enable = "avx512fp16" )] |
17229 | unsafe fn test_mm_ucomige_sh() { |
17230 | let a = _mm_set_sh(2.0); |
17231 | let b = _mm_set_sh(1.0); |
17232 | let r = _mm_ucomige_sh(a, b); |
17233 | assert_eq!(r, 1); |
17234 | } |
17235 | |
17236 | #[simd_test(enable = "avx512fp16" )] |
17237 | unsafe fn test_mm_ucomigt_sh() { |
17238 | let a = _mm_set_sh(2.0); |
17239 | let b = _mm_set_sh(1.0); |
17240 | let r = _mm_ucomigt_sh(a, b); |
17241 | assert_eq!(r, 1); |
17242 | } |
17243 | |
17244 | #[simd_test(enable = "avx512fp16" )] |
17245 | unsafe fn test_mm_ucomile_sh() { |
17246 | let a = _mm_set_sh(1.0); |
17247 | let b = _mm_set_sh(2.0); |
17248 | let r = _mm_ucomile_sh(a, b); |
17249 | assert_eq!(r, 1); |
17250 | } |
17251 | |
17252 | #[simd_test(enable = "avx512fp16" )] |
17253 | unsafe fn test_mm_ucomilt_sh() { |
17254 | let a = _mm_set_sh(1.0); |
17255 | let b = _mm_set_sh(2.0); |
17256 | let r = _mm_ucomilt_sh(a, b); |
17257 | assert_eq!(r, 1); |
17258 | } |
17259 | |
17260 | #[simd_test(enable = "avx512fp16" )] |
17261 | unsafe fn test_mm_ucomineq_sh() { |
17262 | let a = _mm_set_sh(1.0); |
17263 | let b = _mm_set_sh(2.0); |
17264 | let r = _mm_ucomineq_sh(a, b); |
17265 | assert_eq!(r, 1); |
17266 | } |
17267 | |
17268 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17269 | unsafe fn test_mm_load_ph() { |
17270 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
17271 | let b = _mm_load_ph(addr_of!(a).cast()); |
17272 | assert_eq_m128h(a, b); |
17273 | } |
17274 | |
17275 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17276 | unsafe fn test_mm256_load_ph() { |
17277 | let a = _mm256_set_ph( |
17278 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17279 | ); |
17280 | let b = _mm256_load_ph(addr_of!(a).cast()); |
17281 | assert_eq_m256h(a, b); |
17282 | } |
17283 | |
17284 | #[simd_test(enable = "avx512fp16" )] |
17285 | unsafe fn test_mm512_load_ph() { |
17286 | let a = _mm512_set_ph( |
17287 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17288 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17289 | 31.0, 32.0, |
17290 | ); |
17291 | let b = _mm512_load_ph(addr_of!(a).cast()); |
17292 | assert_eq_m512h(a, b); |
17293 | } |
17294 | |
17295 | #[simd_test(enable = "avx512fp16" )] |
17296 | unsafe fn test_mm_load_sh() { |
17297 | let a = _mm_set_sh(1.0); |
17298 | let b = _mm_load_sh(addr_of!(a).cast()); |
17299 | assert_eq_m128h(a, b); |
17300 | } |
17301 | |
17302 | #[simd_test(enable = "avx512fp16" )] |
17303 | unsafe fn test_mm_mask_load_sh() { |
17304 | let a = _mm_set_sh(1.0); |
17305 | let src = _mm_set_sh(2.); |
17306 | let b = _mm_mask_load_sh(src, 1, addr_of!(a).cast()); |
17307 | assert_eq_m128h(a, b); |
17308 | let b = _mm_mask_load_sh(src, 0, addr_of!(a).cast()); |
17309 | assert_eq_m128h(src, b); |
17310 | } |
17311 | |
17312 | #[simd_test(enable = "avx512fp16" )] |
17313 | unsafe fn test_mm_maskz_load_sh() { |
17314 | let a = _mm_set_sh(1.0); |
17315 | let b = _mm_maskz_load_sh(1, addr_of!(a).cast()); |
17316 | assert_eq_m128h(a, b); |
17317 | let b = _mm_maskz_load_sh(0, addr_of!(a).cast()); |
17318 | assert_eq_m128h(_mm_setzero_ph(), b); |
17319 | } |
17320 | |
17321 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17322 | unsafe fn test_mm_loadu_ph() { |
17323 | let array = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]; |
17324 | let r = _mm_loadu_ph(array.as_ptr()); |
17325 | let e = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
17326 | assert_eq_m128h(r, e); |
17327 | } |
17328 | |
17329 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17330 | unsafe fn test_mm256_loadu_ph() { |
17331 | let array = [ |
17332 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17333 | ]; |
17334 | let r = _mm256_loadu_ph(array.as_ptr()); |
17335 | let e = _mm256_setr_ph( |
17336 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17337 | ); |
17338 | assert_eq_m256h(r, e); |
17339 | } |
17340 | |
17341 | #[simd_test(enable = "avx512fp16" )] |
17342 | unsafe fn test_mm512_loadu_ph() { |
17343 | let array = [ |
17344 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17345 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17346 | 31.0, 32.0, |
17347 | ]; |
17348 | let r = _mm512_loadu_ph(array.as_ptr()); |
17349 | let e = _mm512_setr_ph( |
17350 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17351 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17352 | 31.0, 32.0, |
17353 | ); |
17354 | assert_eq_m512h(r, e); |
17355 | } |
17356 | |
17357 | #[simd_test(enable = "avx512fp16" )] |
17358 | unsafe fn test_mm_move_sh() { |
17359 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
17360 | let b = _mm_set_sh(9.0); |
17361 | let r = _mm_move_sh(a, b); |
17362 | let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0); |
17363 | assert_eq_m128h(r, e); |
17364 | } |
17365 | |
17366 | #[simd_test(enable = "avx512fp16" )] |
17367 | unsafe fn test_mm_mask_move_sh() { |
17368 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
17369 | let b = _mm_set_sh(9.0); |
17370 | let src = _mm_set_sh(10.0); |
17371 | let r = _mm_mask_move_sh(src, 0, a, b); |
17372 | let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 10.0); |
17373 | assert_eq_m128h(r, e); |
17374 | } |
17375 | |
17376 | #[simd_test(enable = "avx512fp16" )] |
17377 | unsafe fn test_mm_maskz_move_sh() { |
17378 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
17379 | let b = _mm_set_sh(9.0); |
17380 | let r = _mm_maskz_move_sh(0, a, b); |
17381 | let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 0.0); |
17382 | assert_eq_m128h(r, e); |
17383 | } |
17384 | |
17385 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17386 | unsafe fn test_mm_store_ph() { |
17387 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
17388 | let mut b = _mm_setzero_ph(); |
17389 | _mm_store_ph(addr_of_mut!(b).cast(), a); |
17390 | assert_eq_m128h(a, b); |
17391 | } |
17392 | |
17393 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17394 | unsafe fn test_mm256_store_ph() { |
17395 | let a = _mm256_set_ph( |
17396 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17397 | ); |
17398 | let mut b = _mm256_setzero_ph(); |
17399 | _mm256_store_ph(addr_of_mut!(b).cast(), a); |
17400 | assert_eq_m256h(a, b); |
17401 | } |
17402 | |
17403 | #[simd_test(enable = "avx512fp16" )] |
17404 | unsafe fn test_mm512_store_ph() { |
17405 | let a = _mm512_set_ph( |
17406 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17407 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17408 | 31.0, 32.0, |
17409 | ); |
17410 | let mut b = _mm512_setzero_ph(); |
17411 | _mm512_store_ph(addr_of_mut!(b).cast(), a); |
17412 | assert_eq_m512h(a, b); |
17413 | } |
17414 | |
17415 | #[simd_test(enable = "avx512fp16" )] |
17416 | unsafe fn test_mm_store_sh() { |
17417 | let a = _mm_set_sh(1.0); |
17418 | let mut b = _mm_setzero_ph(); |
17419 | _mm_store_sh(addr_of_mut!(b).cast(), a); |
17420 | assert_eq_m128h(a, b); |
17421 | } |
17422 | |
17423 | #[simd_test(enable = "avx512fp16" )] |
17424 | unsafe fn test_mm_mask_store_sh() { |
17425 | let a = _mm_set_sh(1.0); |
17426 | let mut b = _mm_setzero_ph(); |
17427 | _mm_mask_store_sh(addr_of_mut!(b).cast(), 0, a); |
17428 | assert_eq_m128h(_mm_setzero_ph(), b); |
17429 | _mm_mask_store_sh(addr_of_mut!(b).cast(), 1, a); |
17430 | assert_eq_m128h(a, b); |
17431 | } |
17432 | |
17433 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17434 | unsafe fn test_mm_storeu_ph() { |
17435 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
17436 | let mut array = [0.0; 8]; |
17437 | _mm_storeu_ph(array.as_mut_ptr(), a); |
17438 | assert_eq_m128h(a, _mm_loadu_ph(array.as_ptr())); |
17439 | } |
17440 | |
17441 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17442 | unsafe fn test_mm256_storeu_ph() { |
17443 | let a = _mm256_set_ph( |
17444 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17445 | ); |
17446 | let mut array = [0.0; 16]; |
17447 | _mm256_storeu_ph(array.as_mut_ptr(), a); |
17448 | assert_eq_m256h(a, _mm256_loadu_ph(array.as_ptr())); |
17449 | } |
17450 | |
17451 | #[simd_test(enable = "avx512fp16" )] |
17452 | unsafe fn test_mm512_storeu_ph() { |
17453 | let a = _mm512_set_ph( |
17454 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17455 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17456 | 31.0, 32.0, |
17457 | ); |
17458 | let mut array = [0.0; 32]; |
17459 | _mm512_storeu_ph(array.as_mut_ptr(), a); |
17460 | assert_eq_m512h(a, _mm512_loadu_ph(array.as_ptr())); |
17461 | } |
17462 | |
17463 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17464 | unsafe fn test_mm_add_ph() { |
17465 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
17466 | let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0); |
17467 | let r = _mm_add_ph(a, b); |
17468 | let e = _mm_set1_ph(9.0); |
17469 | assert_eq_m128h(r, e); |
17470 | } |
17471 | |
17472 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17473 | unsafe fn test_mm_mask_add_ph() { |
17474 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
17475 | let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0); |
17476 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
17477 | let r = _mm_mask_add_ph(src, 0b01010101, a, b); |
17478 | let e = _mm_set_ph(10., 9., 12., 9., 14., 9., 16., 9.); |
17479 | assert_eq_m128h(r, e); |
17480 | } |
17481 | |
17482 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17483 | unsafe fn test_mm_maskz_add_ph() { |
17484 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
17485 | let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0); |
17486 | let r = _mm_maskz_add_ph(0b01010101, a, b); |
17487 | let e = _mm_set_ph(0., 9., 0., 9., 0., 9., 0., 9.); |
17488 | assert_eq_m128h(r, e); |
17489 | } |
17490 | |
17491 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17492 | unsafe fn test_mm256_add_ph() { |
17493 | let a = _mm256_set_ph( |
17494 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17495 | ); |
17496 | let b = _mm256_set_ph( |
17497 | 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, |
17498 | ); |
17499 | let r = _mm256_add_ph(a, b); |
17500 | let e = _mm256_set1_ph(17.0); |
17501 | assert_eq_m256h(r, e); |
17502 | } |
17503 | |
17504 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17505 | unsafe fn test_mm256_mask_add_ph() { |
17506 | let a = _mm256_set_ph( |
17507 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17508 | ); |
17509 | let b = _mm256_set_ph( |
17510 | 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, |
17511 | ); |
17512 | let src = _mm256_set_ph( |
17513 | 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33., |
17514 | ); |
17515 | let r = _mm256_mask_add_ph(src, 0b0101010101010101, a, b); |
17516 | let e = _mm256_set_ph( |
17517 | 18., 17., 20., 17., 22., 17., 24., 17., 26., 17., 28., 17., 30., 17., 32., 17., |
17518 | ); |
17519 | assert_eq_m256h(r, e); |
17520 | } |
17521 | |
17522 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17523 | unsafe fn test_mm256_maskz_add_ph() { |
17524 | let a = _mm256_set_ph( |
17525 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17526 | ); |
17527 | let b = _mm256_set_ph( |
17528 | 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, |
17529 | ); |
17530 | let r = _mm256_maskz_add_ph(0b0101010101010101, a, b); |
17531 | let e = _mm256_set_ph( |
17532 | 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., |
17533 | ); |
17534 | assert_eq_m256h(r, e); |
17535 | } |
17536 | |
17537 | #[simd_test(enable = "avx512fp16" )] |
17538 | unsafe fn test_mm512_add_ph() { |
17539 | let a = _mm512_set_ph( |
17540 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17541 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17542 | 31.0, 32.0, |
17543 | ); |
17544 | let b = _mm512_set_ph( |
17545 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
17546 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
17547 | 3.0, 2.0, 1.0, |
17548 | ); |
17549 | let r = _mm512_add_ph(a, b); |
17550 | let e = _mm512_set1_ph(33.0); |
17551 | assert_eq_m512h(r, e); |
17552 | } |
17553 | |
17554 | #[simd_test(enable = "avx512fp16" )] |
17555 | unsafe fn test_mm512_mask_add_ph() { |
17556 | let a = _mm512_set_ph( |
17557 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17558 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17559 | 31.0, 32.0, |
17560 | ); |
17561 | let b = _mm512_set_ph( |
17562 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
17563 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
17564 | 3.0, 2.0, 1.0, |
17565 | ); |
17566 | let src = _mm512_set_ph( |
17567 | 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50., |
17568 | 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65., |
17569 | ); |
17570 | let r = _mm512_mask_add_ph(src, 0b01010101010101010101010101010101, a, b); |
17571 | let e = _mm512_set_ph( |
17572 | 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50., |
17573 | 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33., |
17574 | ); |
17575 | assert_eq_m512h(r, e); |
17576 | } |
17577 | |
17578 | #[simd_test(enable = "avx512fp16" )] |
17579 | unsafe fn test_mm512_maskz_add_ph() { |
17580 | let a = _mm512_set_ph( |
17581 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17582 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17583 | 31.0, 32.0, |
17584 | ); |
17585 | let b = _mm512_set_ph( |
17586 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
17587 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
17588 | 3.0, 2.0, 1.0, |
17589 | ); |
17590 | let r = _mm512_maskz_add_ph(0b01010101010101010101010101010101, a, b); |
17591 | let e = _mm512_set_ph( |
17592 | 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., |
17593 | 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., |
17594 | ); |
17595 | assert_eq_m512h(r, e); |
17596 | } |
17597 | |
17598 | #[simd_test(enable = "avx512fp16" )] |
17599 | unsafe fn test_mm512_add_round_ph() { |
17600 | let a = _mm512_set_ph( |
17601 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17602 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17603 | 31.0, 32.0, |
17604 | ); |
17605 | let b = _mm512_set_ph( |
17606 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
17607 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
17608 | 3.0, 2.0, 1.0, |
17609 | ); |
17610 | let r = _mm512_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
17611 | let e = _mm512_set1_ph(33.0); |
17612 | assert_eq_m512h(r, e); |
17613 | } |
17614 | |
17615 | #[simd_test(enable = "avx512fp16" )] |
17616 | unsafe fn test_mm512_mask_add_round_ph() { |
17617 | let a = _mm512_set_ph( |
17618 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17619 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17620 | 31.0, 32.0, |
17621 | ); |
17622 | let b = _mm512_set_ph( |
17623 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
17624 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
17625 | 3.0, 2.0, 1.0, |
17626 | ); |
17627 | let src = _mm512_set_ph( |
17628 | 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50., |
17629 | 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65., |
17630 | ); |
17631 | let r = _mm512_mask_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
17632 | src, |
17633 | 0b01010101010101010101010101010101, |
17634 | a, |
17635 | b, |
17636 | ); |
17637 | let e = _mm512_set_ph( |
17638 | 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50., |
17639 | 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33., |
17640 | ); |
17641 | assert_eq_m512h(r, e); |
17642 | } |
17643 | |
17644 | #[simd_test(enable = "avx512fp16" )] |
17645 | unsafe fn test_mm512_maskz_add_round_ph() { |
17646 | let a = _mm512_set_ph( |
17647 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17648 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17649 | 31.0, 32.0, |
17650 | ); |
17651 | let b = _mm512_set_ph( |
17652 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
17653 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
17654 | 3.0, 2.0, 1.0, |
17655 | ); |
17656 | let r = _mm512_maskz_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
17657 | 0b01010101010101010101010101010101, |
17658 | a, |
17659 | b, |
17660 | ); |
17661 | let e = _mm512_set_ph( |
17662 | 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., |
17663 | 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., |
17664 | ); |
17665 | assert_eq_m512h(r, e); |
17666 | } |
17667 | |
17668 | #[simd_test(enable = "avx512fp16" )] |
17669 | unsafe fn test_mm_add_round_sh() { |
17670 | let a = _mm_set_sh(1.0); |
17671 | let b = _mm_set_sh(2.0); |
17672 | let r = _mm_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
17673 | let e = _mm_set_sh(3.0); |
17674 | assert_eq_m128h(r, e); |
17675 | } |
17676 | |
17677 | #[simd_test(enable = "avx512fp16" )] |
17678 | unsafe fn test_mm_mask_add_round_sh() { |
17679 | let a = _mm_set_sh(1.0); |
17680 | let b = _mm_set_sh(2.0); |
17681 | let src = _mm_set_sh(4.0); |
17682 | let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
17683 | src, 0, a, b, |
17684 | ); |
17685 | let e = _mm_set_sh(4.0); |
17686 | assert_eq_m128h(r, e); |
17687 | let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
17688 | src, 1, a, b, |
17689 | ); |
17690 | let e = _mm_set_sh(3.0); |
17691 | assert_eq_m128h(r, e); |
17692 | } |
17693 | |
17694 | #[simd_test(enable = "avx512fp16" )] |
17695 | unsafe fn test_mm_maskz_add_round_sh() { |
17696 | let a = _mm_set_sh(1.0); |
17697 | let b = _mm_set_sh(2.0); |
17698 | let r = |
17699 | _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); |
17700 | let e = _mm_set_sh(0.0); |
17701 | assert_eq_m128h(r, e); |
17702 | let r = |
17703 | _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b); |
17704 | let e = _mm_set_sh(3.0); |
17705 | assert_eq_m128h(r, e); |
17706 | } |
17707 | |
17708 | #[simd_test(enable = "avx512fp16" )] |
17709 | unsafe fn test_mm_add_sh() { |
17710 | let a = _mm_set_sh(1.0); |
17711 | let b = _mm_set_sh(2.0); |
17712 | let r = _mm_add_sh(a, b); |
17713 | let e = _mm_set_sh(3.0); |
17714 | assert_eq_m128h(r, e); |
17715 | } |
17716 | |
17717 | #[simd_test(enable = "avx512fp16" )] |
17718 | unsafe fn test_mm_mask_add_sh() { |
17719 | let a = _mm_set_sh(1.0); |
17720 | let b = _mm_set_sh(2.0); |
17721 | let src = _mm_set_sh(4.0); |
17722 | let r = _mm_mask_add_sh(src, 0, a, b); |
17723 | let e = _mm_set_sh(4.0); |
17724 | assert_eq_m128h(r, e); |
17725 | let r = _mm_mask_add_sh(src, 1, a, b); |
17726 | let e = _mm_set_sh(3.0); |
17727 | assert_eq_m128h(r, e); |
17728 | } |
17729 | |
17730 | #[simd_test(enable = "avx512fp16" )] |
17731 | unsafe fn test_mm_maskz_add_sh() { |
17732 | let a = _mm_set_sh(1.0); |
17733 | let b = _mm_set_sh(2.0); |
17734 | let r = _mm_maskz_add_sh(0, a, b); |
17735 | let e = _mm_set_sh(0.0); |
17736 | assert_eq_m128h(r, e); |
17737 | let r = _mm_maskz_add_sh(1, a, b); |
17738 | let e = _mm_set_sh(3.0); |
17739 | assert_eq_m128h(r, e); |
17740 | } |
17741 | |
17742 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17743 | unsafe fn test_mm_sub_ph() { |
17744 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
17745 | let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0); |
17746 | let r = _mm_sub_ph(a, b); |
17747 | let e = _mm_set_ph(-7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0); |
17748 | assert_eq_m128h(r, e); |
17749 | } |
17750 | |
17751 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17752 | unsafe fn test_mm_mask_sub_ph() { |
17753 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
17754 | let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0); |
17755 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
17756 | let r = _mm_mask_sub_ph(src, 0b01010101, a, b); |
17757 | let e = _mm_set_ph(10., -5., 12., -1., 14., 3., 16., 7.); |
17758 | assert_eq_m128h(r, e); |
17759 | } |
17760 | |
17761 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17762 | unsafe fn test_mm_maskz_sub_ph() { |
17763 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
17764 | let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0); |
17765 | let r = _mm_maskz_sub_ph(0b01010101, a, b); |
17766 | let e = _mm_set_ph(0., -5., 0., -1., 0., 3., 0., 7.); |
17767 | assert_eq_m128h(r, e); |
17768 | } |
17769 | |
17770 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17771 | unsafe fn test_mm256_sub_ph() { |
17772 | let a = _mm256_set_ph( |
17773 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17774 | ); |
17775 | let b = _mm256_set_ph( |
17776 | 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, |
17777 | ); |
17778 | let r = _mm256_sub_ph(a, b); |
17779 | let e = _mm256_set_ph( |
17780 | -15.0, -13.0, -11.0, -9.0, -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, |
17781 | 15.0, |
17782 | ); |
17783 | assert_eq_m256h(r, e); |
17784 | } |
17785 | |
17786 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17787 | unsafe fn test_mm256_mask_sub_ph() { |
17788 | let a = _mm256_set_ph( |
17789 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17790 | ); |
17791 | let b = _mm256_set_ph( |
17792 | 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, |
17793 | ); |
17794 | let src = _mm256_set_ph( |
17795 | 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33., |
17796 | ); |
17797 | let r = _mm256_mask_sub_ph(src, 0b0101010101010101, a, b); |
17798 | let e = _mm256_set_ph( |
17799 | 18., -13., 20., -9., 22., -5., 24., -1., 26., 3., 28., 7., 30., 11., 32., 15., |
17800 | ); |
17801 | assert_eq_m256h(r, e); |
17802 | } |
17803 | |
17804 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
17805 | unsafe fn test_mm256_maskz_sub_ph() { |
17806 | let a = _mm256_set_ph( |
17807 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17808 | ); |
17809 | let b = _mm256_set_ph( |
17810 | 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, |
17811 | ); |
17812 | let r = _mm256_maskz_sub_ph(0b0101010101010101, a, b); |
17813 | let e = _mm256_set_ph( |
17814 | 0., -13., 0., -9., 0., -5., 0., -1., 0., 3., 0., 7., 0., 11., 0., 15., |
17815 | ); |
17816 | assert_eq_m256h(r, e); |
17817 | } |
17818 | |
17819 | #[simd_test(enable = "avx512fp16" )] |
17820 | unsafe fn test_mm512_sub_ph() { |
17821 | let a = _mm512_set_ph( |
17822 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17823 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17824 | 31.0, 32.0, |
17825 | ); |
17826 | let b = _mm512_set_ph( |
17827 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
17828 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
17829 | 3.0, 2.0, 1.0, |
17830 | ); |
17831 | let r = _mm512_sub_ph(a, b); |
17832 | let e = _mm512_set_ph( |
17833 | -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0, |
17834 | -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, |
17835 | 23.0, 25.0, 27.0, 29.0, 31.0, |
17836 | ); |
17837 | assert_eq_m512h(r, e); |
17838 | } |
17839 | |
17840 | #[simd_test(enable = "avx512fp16" )] |
17841 | unsafe fn test_mm512_mask_sub_ph() { |
17842 | let a = _mm512_set_ph( |
17843 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17844 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17845 | 31.0, 32.0, |
17846 | ); |
17847 | let b = _mm512_set_ph( |
17848 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
17849 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
17850 | 3.0, 2.0, 1.0, |
17851 | ); |
17852 | let src = _mm512_set_ph( |
17853 | 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50., |
17854 | 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65., |
17855 | ); |
17856 | let r = _mm512_mask_sub_ph(src, 0b01010101010101010101010101010101, a, b); |
17857 | let e = _mm512_set_ph( |
17858 | 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1., |
17859 | 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31., |
17860 | ); |
17861 | assert_eq_m512h(r, e); |
17862 | } |
17863 | |
17864 | #[simd_test(enable = "avx512fp16" )] |
17865 | unsafe fn test_mm512_maskz_sub_ph() { |
17866 | let a = _mm512_set_ph( |
17867 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17868 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17869 | 31.0, 32.0, |
17870 | ); |
17871 | let b = _mm512_set_ph( |
17872 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
17873 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
17874 | 3.0, 2.0, 1.0, |
17875 | ); |
17876 | let r = _mm512_maskz_sub_ph(0b01010101010101010101010101010101, a, b); |
17877 | let e = _mm512_set_ph( |
17878 | 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3., |
17879 | 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31., |
17880 | ); |
17881 | assert_eq_m512h(r, e); |
17882 | } |
17883 | |
17884 | #[simd_test(enable = "avx512fp16" )] |
17885 | unsafe fn test_mm512_sub_round_ph() { |
17886 | let a = _mm512_set_ph( |
17887 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17888 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17889 | 31.0, 32.0, |
17890 | ); |
17891 | let b = _mm512_set_ph( |
17892 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
17893 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
17894 | 3.0, 2.0, 1.0, |
17895 | ); |
17896 | let r = _mm512_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
17897 | let e = _mm512_set_ph( |
17898 | -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0, |
17899 | -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, |
17900 | 23.0, 25.0, 27.0, 29.0, 31.0, |
17901 | ); |
17902 | assert_eq_m512h(r, e); |
17903 | } |
17904 | |
17905 | #[simd_test(enable = "avx512fp16" )] |
17906 | unsafe fn test_mm512_mask_sub_round_ph() { |
17907 | let a = _mm512_set_ph( |
17908 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17909 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17910 | 31.0, 32.0, |
17911 | ); |
17912 | let b = _mm512_set_ph( |
17913 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
17914 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
17915 | 3.0, 2.0, 1.0, |
17916 | ); |
17917 | let src = _mm512_set_ph( |
17918 | 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50., |
17919 | 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65., |
17920 | ); |
17921 | let r = _mm512_mask_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
17922 | src, |
17923 | 0b01010101010101010101010101010101, |
17924 | a, |
17925 | b, |
17926 | ); |
17927 | let e = _mm512_set_ph( |
17928 | 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1., |
17929 | 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31., |
17930 | ); |
17931 | assert_eq_m512h(r, e); |
17932 | } |
17933 | |
17934 | #[simd_test(enable = "avx512fp16" )] |
17935 | unsafe fn test_mm512_maskz_sub_round_ph() { |
17936 | let a = _mm512_set_ph( |
17937 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
17938 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
17939 | 31.0, 32.0, |
17940 | ); |
17941 | let b = _mm512_set_ph( |
17942 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
17943 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
17944 | 3.0, 2.0, 1.0, |
17945 | ); |
17946 | let r = _mm512_maskz_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
17947 | 0b01010101010101010101010101010101, |
17948 | a, |
17949 | b, |
17950 | ); |
17951 | let e = _mm512_set_ph( |
17952 | 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3., |
17953 | 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31., |
17954 | ); |
17955 | assert_eq_m512h(r, e); |
17956 | } |
17957 | |
17958 | #[simd_test(enable = "avx512fp16" )] |
17959 | unsafe fn test_mm_sub_round_sh() { |
17960 | let a = _mm_set_sh(1.0); |
17961 | let b = _mm_set_sh(2.0); |
17962 | let r = _mm_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
17963 | let e = _mm_set_sh(-1.0); |
17964 | assert_eq_m128h(r, e); |
17965 | } |
17966 | |
17967 | #[simd_test(enable = "avx512fp16" )] |
17968 | unsafe fn test_mm_mask_sub_round_sh() { |
17969 | let a = _mm_set_sh(1.0); |
17970 | let b = _mm_set_sh(2.0); |
17971 | let src = _mm_set_sh(4.0); |
17972 | let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
17973 | src, 0, a, b, |
17974 | ); |
17975 | let e = _mm_set_sh(4.0); |
17976 | assert_eq_m128h(r, e); |
17977 | let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
17978 | src, 1, a, b, |
17979 | ); |
17980 | let e = _mm_set_sh(-1.0); |
17981 | assert_eq_m128h(r, e); |
17982 | } |
17983 | |
17984 | #[simd_test(enable = "avx512fp16" )] |
17985 | unsafe fn test_mm_maskz_sub_round_sh() { |
17986 | let a = _mm_set_sh(1.0); |
17987 | let b = _mm_set_sh(2.0); |
17988 | let r = |
17989 | _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); |
17990 | let e = _mm_set_sh(0.0); |
17991 | assert_eq_m128h(r, e); |
17992 | let r = |
17993 | _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b); |
17994 | let e = _mm_set_sh(-1.0); |
17995 | assert_eq_m128h(r, e); |
17996 | } |
17997 | |
17998 | #[simd_test(enable = "avx512fp16" )] |
17999 | unsafe fn test_mm_sub_sh() { |
18000 | let a = _mm_set_sh(1.0); |
18001 | let b = _mm_set_sh(2.0); |
18002 | let r = _mm_sub_sh(a, b); |
18003 | let e = _mm_set_sh(-1.0); |
18004 | assert_eq_m128h(r, e); |
18005 | } |
18006 | |
18007 | #[simd_test(enable = "avx512fp16" )] |
18008 | unsafe fn test_mm_mask_sub_sh() { |
18009 | let a = _mm_set_sh(1.0); |
18010 | let b = _mm_set_sh(2.0); |
18011 | let src = _mm_set_sh(4.0); |
18012 | let r = _mm_mask_sub_sh(src, 0, a, b); |
18013 | let e = _mm_set_sh(4.0); |
18014 | assert_eq_m128h(r, e); |
18015 | let r = _mm_mask_sub_sh(src, 1, a, b); |
18016 | let e = _mm_set_sh(-1.0); |
18017 | assert_eq_m128h(r, e); |
18018 | } |
18019 | |
18020 | #[simd_test(enable = "avx512fp16" )] |
18021 | unsafe fn test_mm_maskz_sub_sh() { |
18022 | let a = _mm_set_sh(1.0); |
18023 | let b = _mm_set_sh(2.0); |
18024 | let r = _mm_maskz_sub_sh(0, a, b); |
18025 | let e = _mm_set_sh(0.0); |
18026 | assert_eq_m128h(r, e); |
18027 | let r = _mm_maskz_sub_sh(1, a, b); |
18028 | let e = _mm_set_sh(-1.0); |
18029 | assert_eq_m128h(r, e); |
18030 | } |
18031 | |
18032 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18033 | unsafe fn test_mm_mul_ph() { |
18034 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
18035 | let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0); |
18036 | let r = _mm_mul_ph(a, b); |
18037 | let e = _mm_set_ph(8.0, 14.0, 18.0, 20.0, 20.0, 18.0, 14.0, 8.0); |
18038 | assert_eq_m128h(r, e); |
18039 | } |
18040 | |
18041 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18042 | unsafe fn test_mm_mask_mul_ph() { |
18043 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
18044 | let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0); |
18045 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
18046 | let r = _mm_mask_mul_ph(src, 0b01010101, a, b); |
18047 | let e = _mm_set_ph(10., 14., 12., 20., 14., 18., 16., 8.); |
18048 | assert_eq_m128h(r, e); |
18049 | } |
18050 | |
18051 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18052 | unsafe fn test_mm_maskz_mul_ph() { |
18053 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
18054 | let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0); |
18055 | let r = _mm_maskz_mul_ph(0b01010101, a, b); |
18056 | let e = _mm_set_ph(0., 14., 0., 20., 0., 18., 0., 8.); |
18057 | assert_eq_m128h(r, e); |
18058 | } |
18059 | |
18060 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18061 | unsafe fn test_mm256_mul_ph() { |
18062 | let a = _mm256_set_ph( |
18063 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
18064 | ); |
18065 | let b = _mm256_set_ph( |
18066 | 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, |
18067 | ); |
18068 | let r = _mm256_mul_ph(a, b); |
18069 | let e = _mm256_set_ph( |
18070 | 16.0, 30.0, 42.0, 52.0, 60.0, 66.0, 70.0, 72.0, 72.0, 70.0, 66.0, 60.0, 52.0, 42.0, |
18071 | 30.0, 16.0, |
18072 | ); |
18073 | assert_eq_m256h(r, e); |
18074 | } |
18075 | |
18076 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18077 | unsafe fn test_mm256_mask_mul_ph() { |
18078 | let a = _mm256_set_ph( |
18079 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
18080 | ); |
18081 | let b = _mm256_set_ph( |
18082 | 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, |
18083 | ); |
18084 | let src = _mm256_set_ph( |
18085 | 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33., |
18086 | ); |
18087 | let r = _mm256_mask_mul_ph(src, 0b0101010101010101, a, b); |
18088 | let e = _mm256_set_ph( |
18089 | 18., 30., 20., 52., 22., 66., 24., 72., 26., 70., 28., 60., 30., 42., 32., 16., |
18090 | ); |
18091 | assert_eq_m256h(r, e); |
18092 | } |
18093 | |
18094 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18095 | unsafe fn test_mm256_maskz_mul_ph() { |
18096 | let a = _mm256_set_ph( |
18097 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
18098 | ); |
18099 | let b = _mm256_set_ph( |
18100 | 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, |
18101 | ); |
18102 | let r = _mm256_maskz_mul_ph(0b0101010101010101, a, b); |
18103 | let e = _mm256_set_ph( |
18104 | 0., 30., 0., 52., 0., 66., 0., 72., 0., 70., 0., 60., 0., 42., 0., 16., |
18105 | ); |
18106 | assert_eq_m256h(r, e); |
18107 | } |
18108 | |
18109 | #[simd_test(enable = "avx512fp16" )] |
18110 | unsafe fn test_mm512_mul_ph() { |
18111 | let a = _mm512_set_ph( |
18112 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
18113 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
18114 | 31.0, 32.0, |
18115 | ); |
18116 | let b = _mm512_set_ph( |
18117 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
18118 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
18119 | 3.0, 2.0, 1.0, |
18120 | ); |
18121 | let r = _mm512_mul_ph(a, b); |
18122 | let e = _mm512_set_ph( |
18123 | 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0, |
18124 | 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0, |
18125 | 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0, |
18126 | ); |
18127 | assert_eq_m512h(r, e); |
18128 | } |
18129 | |
18130 | #[simd_test(enable = "avx512fp16" )] |
18131 | unsafe fn test_mm512_mask_mul_ph() { |
18132 | let a = _mm512_set_ph( |
18133 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
18134 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
18135 | 31.0, 32.0, |
18136 | ); |
18137 | let b = _mm512_set_ph( |
18138 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
18139 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
18140 | 3.0, 2.0, 1.0, |
18141 | ); |
18142 | let src = _mm512_set_ph( |
18143 | 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50., |
18144 | 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65., |
18145 | ); |
18146 | let r = _mm512_mask_mul_ph(src, 0b01010101010101010101010101010101, a, b); |
18147 | let e = _mm512_set_ph( |
18148 | 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272., |
18149 | 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32., |
18150 | ); |
18151 | assert_eq_m512h(r, e); |
18152 | } |
18153 | |
18154 | #[simd_test(enable = "avx512fp16" )] |
18155 | unsafe fn test_mm512_maskz_mul_ph() { |
18156 | let a = _mm512_set_ph( |
18157 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
18158 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
18159 | 31.0, 32.0, |
18160 | ); |
18161 | let b = _mm512_set_ph( |
18162 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
18163 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
18164 | 3.0, 2.0, 1.0, |
18165 | ); |
18166 | let r = _mm512_maskz_mul_ph(0b01010101010101010101010101010101, a, b); |
18167 | let e = _mm512_set_ph( |
18168 | 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0., |
18169 | 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32., |
18170 | ); |
18171 | assert_eq_m512h(r, e); |
18172 | } |
18173 | |
18174 | #[simd_test(enable = "avx512fp16" )] |
18175 | unsafe fn test_mm512_mul_round_ph() { |
18176 | let a = _mm512_set_ph( |
18177 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
18178 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
18179 | 31.0, 32.0, |
18180 | ); |
18181 | let b = _mm512_set_ph( |
18182 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
18183 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
18184 | 3.0, 2.0, 1.0, |
18185 | ); |
18186 | let r = _mm512_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
18187 | let e = _mm512_set_ph( |
18188 | 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0, |
18189 | 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0, |
18190 | 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0, |
18191 | ); |
18192 | assert_eq_m512h(r, e); |
18193 | } |
18194 | |
18195 | #[simd_test(enable = "avx512fp16" )] |
18196 | unsafe fn test_mm512_mask_mul_round_ph() { |
18197 | let a = _mm512_set_ph( |
18198 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
18199 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
18200 | 31.0, 32.0, |
18201 | ); |
18202 | let b = _mm512_set_ph( |
18203 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
18204 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
18205 | 3.0, 2.0, 1.0, |
18206 | ); |
18207 | let src = _mm512_set_ph( |
18208 | 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50., |
18209 | 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65., |
18210 | ); |
18211 | let r = _mm512_mask_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
18212 | src, |
18213 | 0b01010101010101010101010101010101, |
18214 | a, |
18215 | b, |
18216 | ); |
18217 | let e = _mm512_set_ph( |
18218 | 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272., |
18219 | 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32., |
18220 | ); |
18221 | assert_eq_m512h(r, e); |
18222 | } |
18223 | |
18224 | #[simd_test(enable = "avx512fp16" )] |
18225 | unsafe fn test_mm512_maskz_mul_round_ph() { |
18226 | let a = _mm512_set_ph( |
18227 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
18228 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
18229 | 31.0, 32.0, |
18230 | ); |
18231 | let b = _mm512_set_ph( |
18232 | 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, |
18233 | 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, |
18234 | 3.0, 2.0, 1.0, |
18235 | ); |
18236 | let r = _mm512_maskz_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
18237 | 0b01010101010101010101010101010101, |
18238 | a, |
18239 | b, |
18240 | ); |
18241 | let e = _mm512_set_ph( |
18242 | 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0., |
18243 | 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32., |
18244 | ); |
18245 | assert_eq_m512h(r, e); |
18246 | } |
18247 | |
18248 | #[simd_test(enable = "avx512fp16" )] |
18249 | unsafe fn test_mm_mul_round_sh() { |
18250 | let a = _mm_set_sh(1.0); |
18251 | let b = _mm_set_sh(2.0); |
18252 | let r = _mm_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
18253 | let e = _mm_set_sh(2.0); |
18254 | assert_eq_m128h(r, e); |
18255 | } |
18256 | |
18257 | #[simd_test(enable = "avx512fp16" )] |
18258 | unsafe fn test_mm_mask_mul_round_sh() { |
18259 | let a = _mm_set_sh(1.0); |
18260 | let b = _mm_set_sh(2.0); |
18261 | let src = _mm_set_sh(4.0); |
18262 | let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
18263 | src, 0, a, b, |
18264 | ); |
18265 | let e = _mm_set_sh(4.0); |
18266 | assert_eq_m128h(r, e); |
18267 | let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
18268 | src, 1, a, b, |
18269 | ); |
18270 | let e = _mm_set_sh(2.0); |
18271 | assert_eq_m128h(r, e); |
18272 | } |
18273 | |
18274 | #[simd_test(enable = "avx512fp16" )] |
18275 | unsafe fn test_mm_maskz_mul_round_sh() { |
18276 | let a = _mm_set_sh(1.0); |
18277 | let b = _mm_set_sh(2.0); |
18278 | let r = |
18279 | _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); |
18280 | let e = _mm_set_sh(0.0); |
18281 | assert_eq_m128h(r, e); |
18282 | let r = |
18283 | _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b); |
18284 | let e = _mm_set_sh(2.0); |
18285 | assert_eq_m128h(r, e); |
18286 | } |
18287 | |
18288 | #[simd_test(enable = "avx512fp16" )] |
18289 | unsafe fn test_mm_mul_sh() { |
18290 | let a = _mm_set_sh(1.0); |
18291 | let b = _mm_set_sh(2.0); |
18292 | let r = _mm_mul_sh(a, b); |
18293 | let e = _mm_set_sh(2.0); |
18294 | assert_eq_m128h(r, e); |
18295 | } |
18296 | |
18297 | #[simd_test(enable = "avx512fp16" )] |
18298 | unsafe fn test_mm_mask_mul_sh() { |
18299 | let a = _mm_set_sh(1.0); |
18300 | let b = _mm_set_sh(2.0); |
18301 | let src = _mm_set_sh(4.0); |
18302 | let r = _mm_mask_mul_sh(src, 0, a, b); |
18303 | let e = _mm_set_sh(4.0); |
18304 | assert_eq_m128h(r, e); |
18305 | let r = _mm_mask_mul_sh(src, 1, a, b); |
18306 | let e = _mm_set_sh(2.0); |
18307 | assert_eq_m128h(r, e); |
18308 | } |
18309 | |
18310 | #[simd_test(enable = "avx512fp16" )] |
18311 | unsafe fn test_mm_maskz_mul_sh() { |
18312 | let a = _mm_set_sh(1.0); |
18313 | let b = _mm_set_sh(2.0); |
18314 | let r = _mm_maskz_mul_sh(0, a, b); |
18315 | let e = _mm_set_sh(0.0); |
18316 | assert_eq_m128h(r, e); |
18317 | let r = _mm_maskz_mul_sh(1, a, b); |
18318 | let e = _mm_set_sh(2.0); |
18319 | assert_eq_m128h(r, e); |
18320 | } |
18321 | |
18322 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18323 | unsafe fn test_mm_div_ph() { |
18324 | let a = _mm_set1_ph(1.0); |
18325 | let b = _mm_set1_ph(2.0); |
18326 | let r = _mm_div_ph(a, b); |
18327 | let e = _mm_set1_ph(0.5); |
18328 | assert_eq_m128h(r, e); |
18329 | } |
18330 | |
18331 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18332 | unsafe fn test_mm_mask_div_ph() { |
18333 | let a = _mm_set1_ph(1.0); |
18334 | let b = _mm_set1_ph(2.0); |
18335 | let src = _mm_set_ph(4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0); |
18336 | let r = _mm_mask_div_ph(src, 0b01010101, a, b); |
18337 | let e = _mm_set_ph(4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5); |
18338 | assert_eq_m128h(r, e); |
18339 | } |
18340 | |
18341 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18342 | unsafe fn test_mm_maskz_div_ph() { |
18343 | let a = _mm_set1_ph(1.0); |
18344 | let b = _mm_set1_ph(2.0); |
18345 | let r = _mm_maskz_div_ph(0b01010101, a, b); |
18346 | let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5); |
18347 | assert_eq_m128h(r, e); |
18348 | } |
18349 | |
18350 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18351 | unsafe fn test_mm256_div_ph() { |
18352 | let a = _mm256_set1_ph(1.0); |
18353 | let b = _mm256_set1_ph(2.0); |
18354 | let r = _mm256_div_ph(a, b); |
18355 | let e = _mm256_set1_ph(0.5); |
18356 | assert_eq_m256h(r, e); |
18357 | } |
18358 | |
18359 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18360 | unsafe fn test_mm256_mask_div_ph() { |
18361 | let a = _mm256_set1_ph(1.0); |
18362 | let b = _mm256_set1_ph(2.0); |
18363 | let src = _mm256_set_ph( |
18364 | 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, |
18365 | 19.0, |
18366 | ); |
18367 | let r = _mm256_mask_div_ph(src, 0b0101010101010101, a, b); |
18368 | let e = _mm256_set_ph( |
18369 | 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5, |
18370 | ); |
18371 | assert_eq_m256h(r, e); |
18372 | } |
18373 | |
18374 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18375 | unsafe fn test_mm256_maskz_div_ph() { |
18376 | let a = _mm256_set1_ph(1.0); |
18377 | let b = _mm256_set1_ph(2.0); |
18378 | let r = _mm256_maskz_div_ph(0b0101010101010101, a, b); |
18379 | let e = _mm256_set_ph( |
18380 | 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, |
18381 | ); |
18382 | assert_eq_m256h(r, e); |
18383 | } |
18384 | |
18385 | #[simd_test(enable = "avx512fp16" )] |
18386 | unsafe fn test_mm512_div_ph() { |
18387 | let a = _mm512_set1_ph(1.0); |
18388 | let b = _mm512_set1_ph(2.0); |
18389 | let r = _mm512_div_ph(a, b); |
18390 | let e = _mm512_set1_ph(0.5); |
18391 | assert_eq_m512h(r, e); |
18392 | } |
18393 | |
18394 | #[simd_test(enable = "avx512fp16" )] |
18395 | unsafe fn test_mm512_mask_div_ph() { |
18396 | let a = _mm512_set1_ph(1.0); |
18397 | let b = _mm512_set1_ph(2.0); |
18398 | let src = _mm512_set_ph( |
18399 | 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, |
18400 | 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0, |
18401 | 33.0, 34.0, 35.0, |
18402 | ); |
18403 | let r = _mm512_mask_div_ph(src, 0b01010101010101010101010101010101, a, b); |
18404 | let e = _mm512_set_ph( |
18405 | 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5, |
18406 | 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5, |
18407 | ); |
18408 | assert_eq_m512h(r, e); |
18409 | } |
18410 | |
18411 | #[simd_test(enable = "avx512fp16" )] |
18412 | unsafe fn test_mm512_maskz_div_ph() { |
18413 | let a = _mm512_set1_ph(1.0); |
18414 | let b = _mm512_set1_ph(2.0); |
18415 | let r = _mm512_maskz_div_ph(0b01010101010101010101010101010101, a, b); |
18416 | let e = _mm512_set_ph( |
18417 | 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, |
18418 | 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, |
18419 | ); |
18420 | assert_eq_m512h(r, e); |
18421 | } |
18422 | |
18423 | #[simd_test(enable = "avx512fp16" )] |
18424 | unsafe fn test_mm512_div_round_ph() { |
18425 | let a = _mm512_set1_ph(1.0); |
18426 | let b = _mm512_set1_ph(2.0); |
18427 | let r = _mm512_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
18428 | let e = _mm512_set1_ph(0.5); |
18429 | assert_eq_m512h(r, e); |
18430 | } |
18431 | |
18432 | #[simd_test(enable = "avx512fp16" )] |
18433 | unsafe fn test_mm512_mask_div_round_ph() { |
18434 | let a = _mm512_set1_ph(1.0); |
18435 | let b = _mm512_set1_ph(2.0); |
18436 | let src = _mm512_set_ph( |
18437 | 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, |
18438 | 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0, |
18439 | 33.0, 34.0, 35.0, |
18440 | ); |
18441 | let r = _mm512_mask_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
18442 | src, |
18443 | 0b01010101010101010101010101010101, |
18444 | a, |
18445 | b, |
18446 | ); |
18447 | let e = _mm512_set_ph( |
18448 | 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5, |
18449 | 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5, |
18450 | ); |
18451 | assert_eq_m512h(r, e); |
18452 | } |
18453 | |
18454 | #[simd_test(enable = "avx512fp16" )] |
18455 | unsafe fn test_mm512_maskz_div_round_ph() { |
18456 | let a = _mm512_set1_ph(1.0); |
18457 | let b = _mm512_set1_ph(2.0); |
18458 | let r = _mm512_maskz_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
18459 | 0b01010101010101010101010101010101, |
18460 | a, |
18461 | b, |
18462 | ); |
18463 | let e = _mm512_set_ph( |
18464 | 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, |
18465 | 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, |
18466 | ); |
18467 | assert_eq_m512h(r, e); |
18468 | } |
18469 | |
18470 | #[simd_test(enable = "avx512fp16" )] |
18471 | unsafe fn test_mm_div_round_sh() { |
18472 | let a = _mm_set_sh(1.0); |
18473 | let b = _mm_set_sh(2.0); |
18474 | let r = _mm_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
18475 | let e = _mm_set_sh(0.5); |
18476 | assert_eq_m128h(r, e); |
18477 | } |
18478 | |
18479 | #[simd_test(enable = "avx512fp16" )] |
18480 | unsafe fn test_mm_mask_div_round_sh() { |
18481 | let a = _mm_set_sh(1.0); |
18482 | let b = _mm_set_sh(2.0); |
18483 | let src = _mm_set_sh(4.0); |
18484 | let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
18485 | src, 0, a, b, |
18486 | ); |
18487 | let e = _mm_set_sh(4.0); |
18488 | assert_eq_m128h(r, e); |
18489 | let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
18490 | src, 1, a, b, |
18491 | ); |
18492 | let e = _mm_set_sh(0.5); |
18493 | assert_eq_m128h(r, e); |
18494 | } |
18495 | |
18496 | #[simd_test(enable = "avx512fp16" )] |
18497 | unsafe fn test_mm_maskz_div_round_sh() { |
18498 | let a = _mm_set_sh(1.0); |
18499 | let b = _mm_set_sh(2.0); |
18500 | let r = |
18501 | _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); |
18502 | let e = _mm_set_sh(0.0); |
18503 | assert_eq_m128h(r, e); |
18504 | let r = |
18505 | _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b); |
18506 | let e = _mm_set_sh(0.5); |
18507 | assert_eq_m128h(r, e); |
18508 | } |
18509 | |
18510 | #[simd_test(enable = "avx512fp16" )] |
18511 | unsafe fn test_mm_div_sh() { |
18512 | let a = _mm_set_sh(1.0); |
18513 | let b = _mm_set_sh(2.0); |
18514 | let r = _mm_div_sh(a, b); |
18515 | let e = _mm_set_sh(0.5); |
18516 | assert_eq_m128h(r, e); |
18517 | } |
18518 | |
18519 | #[simd_test(enable = "avx512fp16" )] |
18520 | unsafe fn test_mm_mask_div_sh() { |
18521 | let a = _mm_set_sh(1.0); |
18522 | let b = _mm_set_sh(2.0); |
18523 | let src = _mm_set_sh(4.0); |
18524 | let r = _mm_mask_div_sh(src, 0, a, b); |
18525 | let e = _mm_set_sh(4.0); |
18526 | assert_eq_m128h(r, e); |
18527 | let r = _mm_mask_div_sh(src, 1, a, b); |
18528 | let e = _mm_set_sh(0.5); |
18529 | assert_eq_m128h(r, e); |
18530 | } |
18531 | |
18532 | #[simd_test(enable = "avx512fp16" )] |
18533 | unsafe fn test_mm_maskz_div_sh() { |
18534 | let a = _mm_set_sh(1.0); |
18535 | let b = _mm_set_sh(2.0); |
18536 | let r = _mm_maskz_div_sh(0, a, b); |
18537 | let e = _mm_set_sh(0.0); |
18538 | assert_eq_m128h(r, e); |
18539 | let r = _mm_maskz_div_sh(1, a, b); |
18540 | let e = _mm_set_sh(0.5); |
18541 | assert_eq_m128h(r, e); |
18542 | } |
18543 | |
18544 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18545 | unsafe fn test_mm_mul_pch() { |
18546 | let a = _mm_set1_pch(0.0, 1.0); |
18547 | let b = _mm_set1_pch(0.0, 1.0); |
18548 | let r = _mm_mul_pch(a, b); |
18549 | let e = _mm_set1_pch(-1.0, 0.0); |
18550 | assert_eq_m128h(r, e); |
18551 | } |
18552 | |
18553 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18554 | unsafe fn test_mm_mask_mul_pch() { |
18555 | let a = _mm_set1_pch(0.0, 1.0); |
18556 | let b = _mm_set1_pch(0.0, 1.0); |
18557 | let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0); |
18558 | let r = _mm_mask_mul_pch(src, 0b0101, a, b); |
18559 | let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0); |
18560 | assert_eq_m128h(r, e); |
18561 | } |
18562 | |
18563 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18564 | unsafe fn test_mm_maskz_mul_pch() { |
18565 | let a = _mm_set1_pch(0.0, 1.0); |
18566 | let b = _mm_set1_pch(0.0, 1.0); |
18567 | let r = _mm_maskz_mul_pch(0b0101, a, b); |
18568 | let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0); |
18569 | assert_eq_m128h(r, e); |
18570 | } |
18571 | |
18572 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18573 | unsafe fn test_mm256_mul_pch() { |
18574 | let a = _mm256_set1_pch(0.0, 1.0); |
18575 | let b = _mm256_set1_pch(0.0, 1.0); |
18576 | let r = _mm256_mul_pch(a, b); |
18577 | let e = _mm256_set1_pch(-1.0, 0.0); |
18578 | assert_eq_m256h(r, e); |
18579 | } |
18580 | |
18581 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18582 | unsafe fn test_mm256_mask_mul_pch() { |
18583 | let a = _mm256_set1_pch(0.0, 1.0); |
18584 | let b = _mm256_set1_pch(0.0, 1.0); |
18585 | let src = _mm256_setr_ph( |
18586 | 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, |
18587 | ); |
18588 | let r = _mm256_mask_mul_pch(src, 0b01010101, a, b); |
18589 | let e = _mm256_setr_ph( |
18590 | -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0, |
18591 | ); |
18592 | assert_eq_m256h(r, e); |
18593 | } |
18594 | |
18595 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18596 | unsafe fn test_mm256_maskz_mul_pch() { |
18597 | let a = _mm256_set1_pch(0.0, 1.0); |
18598 | let b = _mm256_set1_pch(0.0, 1.0); |
18599 | let r = _mm256_maskz_mul_pch(0b01010101, a, b); |
18600 | let e = _mm256_setr_ph( |
18601 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
18602 | ); |
18603 | assert_eq_m256h(r, e); |
18604 | } |
18605 | |
18606 | #[simd_test(enable = "avx512fp16" )] |
18607 | unsafe fn test_mm512_mul_pch() { |
18608 | let a = _mm512_set1_pch(0.0, 1.0); |
18609 | let b = _mm512_set1_pch(0.0, 1.0); |
18610 | let r = _mm512_mul_pch(a, b); |
18611 | let e = _mm512_set1_pch(-1.0, 0.0); |
18612 | assert_eq_m512h(r, e); |
18613 | } |
18614 | |
18615 | #[simd_test(enable = "avx512fp16" )] |
18616 | unsafe fn test_mm512_mask_mul_pch() { |
18617 | let a = _mm512_set1_pch(0.0, 1.0); |
18618 | let b = _mm512_set1_pch(0.0, 1.0); |
18619 | let src = _mm512_setr_ph( |
18620 | 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, |
18621 | 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, |
18622 | 32.0, 33.0, |
18623 | ); |
18624 | let r = _mm512_mask_mul_pch(src, 0b0101010101010101, a, b); |
18625 | let e = _mm512_setr_ph( |
18626 | -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0, |
18627 | -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0, |
18628 | 33.0, |
18629 | ); |
18630 | assert_eq_m512h(r, e); |
18631 | } |
18632 | |
18633 | #[simd_test(enable = "avx512fp16" )] |
18634 | unsafe fn test_mm512_maskz_mul_pch() { |
18635 | let a = _mm512_set1_pch(0.0, 1.0); |
18636 | let b = _mm512_set1_pch(0.0, 1.0); |
18637 | let r = _mm512_maskz_mul_pch(0b0101010101010101, a, b); |
18638 | let e = _mm512_setr_ph( |
18639 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
18640 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
18641 | ); |
18642 | assert_eq_m512h(r, e); |
18643 | } |
18644 | |
18645 | #[simd_test(enable = "avx512fp16" )] |
18646 | unsafe fn test_mm512_mul_round_pch() { |
18647 | let a = _mm512_set1_pch(0.0, 1.0); |
18648 | let b = _mm512_set1_pch(0.0, 1.0); |
18649 | let r = _mm512_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
18650 | let e = _mm512_set1_pch(-1.0, 0.0); |
18651 | assert_eq_m512h(r, e); |
18652 | } |
18653 | |
18654 | #[simd_test(enable = "avx512fp16" )] |
18655 | unsafe fn test_mm512_mask_mul_round_pch() { |
18656 | let a = _mm512_set1_pch(0.0, 1.0); |
18657 | let b = _mm512_set1_pch(0.0, 1.0); |
18658 | let src = _mm512_setr_ph( |
18659 | 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, |
18660 | 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, |
18661 | 32.0, 33.0, |
18662 | ); |
18663 | let r = _mm512_mask_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
18664 | src, |
18665 | 0b0101010101010101, |
18666 | a, |
18667 | b, |
18668 | ); |
18669 | let e = _mm512_setr_ph( |
18670 | -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0, |
18671 | -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0, |
18672 | 33.0, |
18673 | ); |
18674 | assert_eq_m512h(r, e); |
18675 | } |
18676 | |
18677 | #[simd_test(enable = "avx512fp16" )] |
18678 | unsafe fn test_mm512_maskz_mul_round_pch() { |
18679 | let a = _mm512_set1_pch(0.0, 1.0); |
18680 | let b = _mm512_set1_pch(0.0, 1.0); |
18681 | let r = _mm512_maskz_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
18682 | 0b0101010101010101, |
18683 | a, |
18684 | b, |
18685 | ); |
18686 | let e = _mm512_setr_ph( |
18687 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
18688 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
18689 | ); |
18690 | assert_eq_m512h(r, e); |
18691 | } |
18692 | |
18693 | #[simd_test(enable = "avx512fp16" )] |
18694 | unsafe fn test_mm_mul_round_sch() { |
18695 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18696 | let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
18697 | let r = _mm_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
18698 | let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18699 | assert_eq_m128h(r, e); |
18700 | } |
18701 | |
18702 | #[simd_test(enable = "avx512fp16" )] |
18703 | unsafe fn test_mm_mask_mul_round_sch() { |
18704 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18705 | let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
18706 | let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0); |
18707 | let r = _mm_mask_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
18708 | src, 0, a, b, |
18709 | ); |
18710 | let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18711 | assert_eq_m128h(r, e); |
18712 | } |
18713 | |
18714 | #[simd_test(enable = "avx512fp16" )] |
18715 | unsafe fn test_mm_maskz_mul_round_sch() { |
18716 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18717 | let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
18718 | let r = |
18719 | _mm_maskz_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); |
18720 | let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18721 | assert_eq_m128h(r, e); |
18722 | } |
18723 | |
18724 | #[simd_test(enable = "avx512fp16" )] |
18725 | unsafe fn test_mm_mul_sch() { |
18726 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18727 | let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
18728 | let r = _mm_mul_sch(a, b); |
18729 | let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18730 | assert_eq_m128h(r, e); |
18731 | } |
18732 | |
18733 | #[simd_test(enable = "avx512fp16" )] |
18734 | unsafe fn test_mm_mask_mul_sch() { |
18735 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18736 | let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
18737 | let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0); |
18738 | let r = _mm_mask_mul_sch(src, 0, a, b); |
18739 | let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18740 | assert_eq_m128h(r, e); |
18741 | } |
18742 | |
18743 | #[simd_test(enable = "avx512fp16" )] |
18744 | unsafe fn test_mm_maskz_mul_sch() { |
18745 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18746 | let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
18747 | let r = _mm_maskz_mul_sch(0, a, b); |
18748 | let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18749 | assert_eq_m128h(r, e); |
18750 | } |
18751 | |
18752 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18753 | unsafe fn test_mm_fmul_pch() { |
18754 | let a = _mm_set1_pch(0.0, 1.0); |
18755 | let b = _mm_set1_pch(0.0, 1.0); |
18756 | let r = _mm_fmul_pch(a, b); |
18757 | let e = _mm_set1_pch(-1.0, 0.0); |
18758 | assert_eq_m128h(r, e); |
18759 | } |
18760 | |
18761 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18762 | unsafe fn test_mm_mask_fmul_pch() { |
18763 | let a = _mm_set1_pch(0.0, 1.0); |
18764 | let b = _mm_set1_pch(0.0, 1.0); |
18765 | let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0); |
18766 | let r = _mm_mask_fmul_pch(src, 0b0101, a, b); |
18767 | let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0); |
18768 | assert_eq_m128h(r, e); |
18769 | } |
18770 | |
18771 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18772 | unsafe fn test_mm_maskz_fmul_pch() { |
18773 | let a = _mm_set1_pch(0.0, 1.0); |
18774 | let b = _mm_set1_pch(0.0, 1.0); |
18775 | let r = _mm_maskz_fmul_pch(0b0101, a, b); |
18776 | let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0); |
18777 | assert_eq_m128h(r, e); |
18778 | } |
18779 | |
18780 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18781 | unsafe fn test_mm256_fmul_pch() { |
18782 | let a = _mm256_set1_pch(0.0, 1.0); |
18783 | let b = _mm256_set1_pch(0.0, 1.0); |
18784 | let r = _mm256_fmul_pch(a, b); |
18785 | let e = _mm256_set1_pch(-1.0, 0.0); |
18786 | assert_eq_m256h(r, e); |
18787 | } |
18788 | |
18789 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18790 | unsafe fn test_mm256_mask_fmul_pch() { |
18791 | let a = _mm256_set1_pch(0.0, 1.0); |
18792 | let b = _mm256_set1_pch(0.0, 1.0); |
18793 | let src = _mm256_setr_ph( |
18794 | 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, |
18795 | ); |
18796 | let r = _mm256_mask_fmul_pch(src, 0b01010101, a, b); |
18797 | let e = _mm256_setr_ph( |
18798 | -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0, |
18799 | ); |
18800 | assert_eq_m256h(r, e); |
18801 | } |
18802 | |
18803 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18804 | unsafe fn test_mm256_maskz_fmul_pch() { |
18805 | let a = _mm256_set1_pch(0.0, 1.0); |
18806 | let b = _mm256_set1_pch(0.0, 1.0); |
18807 | let r = _mm256_maskz_fmul_pch(0b01010101, a, b); |
18808 | let e = _mm256_setr_ph( |
18809 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
18810 | ); |
18811 | assert_eq_m256h(r, e); |
18812 | } |
18813 | |
18814 | #[simd_test(enable = "avx512fp16" )] |
18815 | unsafe fn test_mm512_fmul_pch() { |
18816 | let a = _mm512_set1_pch(0.0, 1.0); |
18817 | let b = _mm512_set1_pch(0.0, 1.0); |
18818 | let r = _mm512_fmul_pch(a, b); |
18819 | let e = _mm512_set1_pch(-1.0, 0.0); |
18820 | assert_eq_m512h(r, e); |
18821 | } |
18822 | |
18823 | #[simd_test(enable = "avx512fp16" )] |
18824 | unsafe fn test_mm512_mask_fmul_pch() { |
18825 | let a = _mm512_set1_pch(0.0, 1.0); |
18826 | let b = _mm512_set1_pch(0.0, 1.0); |
18827 | let src = _mm512_setr_ph( |
18828 | 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, |
18829 | 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, |
18830 | 32.0, 33.0, |
18831 | ); |
18832 | let r = _mm512_mask_fmul_pch(src, 0b0101010101010101, a, b); |
18833 | let e = _mm512_setr_ph( |
18834 | -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0, |
18835 | -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0, |
18836 | 33.0, |
18837 | ); |
18838 | assert_eq_m512h(r, e); |
18839 | } |
18840 | |
18841 | #[simd_test(enable = "avx512fp16" )] |
18842 | unsafe fn test_mm512_maskz_fmul_pch() { |
18843 | let a = _mm512_set1_pch(0.0, 1.0); |
18844 | let b = _mm512_set1_pch(0.0, 1.0); |
18845 | let r = _mm512_maskz_fmul_pch(0b0101010101010101, a, b); |
18846 | let e = _mm512_setr_ph( |
18847 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
18848 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
18849 | ); |
18850 | assert_eq_m512h(r, e); |
18851 | } |
18852 | |
18853 | #[simd_test(enable = "avx512fp16" )] |
18854 | unsafe fn test_mm512_fmul_round_pch() { |
18855 | let a = _mm512_set1_pch(0.0, 1.0); |
18856 | let b = _mm512_set1_pch(0.0, 1.0); |
18857 | let r = _mm512_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
18858 | let e = _mm512_set1_pch(-1.0, 0.0); |
18859 | assert_eq_m512h(r, e); |
18860 | } |
18861 | |
18862 | #[simd_test(enable = "avx512fp16" )] |
18863 | unsafe fn test_mm512_mask_fmul_round_pch() { |
18864 | let a = _mm512_set1_pch(0.0, 1.0); |
18865 | let b = _mm512_set1_pch(0.0, 1.0); |
18866 | let src = _mm512_setr_ph( |
18867 | 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, |
18868 | 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, |
18869 | 32.0, 33.0, |
18870 | ); |
18871 | let r = _mm512_mask_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
18872 | src, |
18873 | 0b0101010101010101, |
18874 | a, |
18875 | b, |
18876 | ); |
18877 | let e = _mm512_setr_ph( |
18878 | -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0, |
18879 | -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0, |
18880 | 33.0, |
18881 | ); |
18882 | assert_eq_m512h(r, e); |
18883 | } |
18884 | |
18885 | #[simd_test(enable = "avx512fp16" )] |
18886 | unsafe fn test_mm512_maskz_fmul_round_pch() { |
18887 | let a = _mm512_set1_pch(0.0, 1.0); |
18888 | let b = _mm512_set1_pch(0.0, 1.0); |
18889 | let r = _mm512_maskz_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
18890 | 0b0101010101010101, |
18891 | a, |
18892 | b, |
18893 | ); |
18894 | let e = _mm512_setr_ph( |
18895 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
18896 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
18897 | ); |
18898 | assert_eq_m512h(r, e); |
18899 | } |
18900 | |
18901 | #[simd_test(enable = "avx512fp16" )] |
18902 | unsafe fn test_mm_fmul_round_sch() { |
18903 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18904 | let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
18905 | let r = _mm_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
18906 | let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18907 | assert_eq_m128h(r, e); |
18908 | } |
18909 | |
18910 | #[simd_test(enable = "avx512fp16" )] |
18911 | unsafe fn test_mm_mask_fmul_round_sch() { |
18912 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18913 | let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
18914 | let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0); |
18915 | let r = _mm_mask_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
18916 | src, 0, a, b, |
18917 | ); |
18918 | let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18919 | assert_eq_m128h(r, e); |
18920 | } |
18921 | |
18922 | #[simd_test(enable = "avx512fp16" )] |
18923 | unsafe fn test_mm_maskz_fmul_round_sch() { |
18924 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18925 | let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
18926 | let r = |
18927 | _mm_maskz_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); |
18928 | let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18929 | assert_eq_m128h(r, e); |
18930 | } |
18931 | |
18932 | #[simd_test(enable = "avx512fp16" )] |
18933 | unsafe fn test_mm_fmul_sch() { |
18934 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18935 | let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
18936 | let r = _mm_fmul_sch(a, b); |
18937 | let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18938 | assert_eq_m128h(r, e); |
18939 | } |
18940 | |
18941 | #[simd_test(enable = "avx512fp16" )] |
18942 | unsafe fn test_mm_mask_fmul_sch() { |
18943 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18944 | let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
18945 | let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0); |
18946 | let r = _mm_mask_fmul_sch(src, 0, a, b); |
18947 | let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18948 | assert_eq_m128h(r, e); |
18949 | } |
18950 | |
18951 | #[simd_test(enable = "avx512fp16" )] |
18952 | unsafe fn test_mm_maskz_fmul_sch() { |
18953 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18954 | let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
18955 | let r = _mm_maskz_fmul_sch(0, a, b); |
18956 | let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
18957 | assert_eq_m128h(r, e); |
18958 | } |
18959 | |
18960 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18961 | unsafe fn test_mm_cmul_pch() { |
18962 | let a = _mm_set1_pch(0.0, 1.0); |
18963 | let b = _mm_set1_pch(0.0, -1.0); |
18964 | let r = _mm_cmul_pch(a, b); |
18965 | let e = _mm_set1_pch(-1.0, 0.0); |
18966 | assert_eq_m128h(r, e); |
18967 | } |
18968 | |
18969 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18970 | unsafe fn test_mm_mask_cmul_pch() { |
18971 | let a = _mm_set1_pch(0.0, 1.0); |
18972 | let b = _mm_set1_pch(0.0, -1.0); |
18973 | let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0); |
18974 | let r = _mm_mask_cmul_pch(src, 0b0101, a, b); |
18975 | let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0); |
18976 | assert_eq_m128h(r, e); |
18977 | } |
18978 | |
18979 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18980 | unsafe fn test_mm_maskz_cmul_pch() { |
18981 | let a = _mm_set1_pch(0.0, 1.0); |
18982 | let b = _mm_set1_pch(0.0, -1.0); |
18983 | let r = _mm_maskz_cmul_pch(0b0101, a, b); |
18984 | let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0); |
18985 | assert_eq_m128h(r, e); |
18986 | } |
18987 | |
18988 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18989 | unsafe fn test_mm256_cmul_pch() { |
18990 | let a = _mm256_set1_pch(0.0, 1.0); |
18991 | let b = _mm256_set1_pch(0.0, -1.0); |
18992 | let r = _mm256_cmul_pch(a, b); |
18993 | let e = _mm256_set1_pch(-1.0, 0.0); |
18994 | assert_eq_m256h(r, e); |
18995 | } |
18996 | |
18997 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
18998 | unsafe fn test_mm256_mask_cmul_pch() { |
18999 | let a = _mm256_set1_pch(0.0, 1.0); |
19000 | let b = _mm256_set1_pch(0.0, -1.0); |
19001 | let src = _mm256_setr_ph( |
19002 | 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, |
19003 | ); |
19004 | let r = _mm256_mask_cmul_pch(src, 0b01010101, a, b); |
19005 | let e = _mm256_setr_ph( |
19006 | -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0, |
19007 | ); |
19008 | assert_eq_m256h(r, e); |
19009 | } |
19010 | |
19011 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19012 | unsafe fn test_mm256_maskz_cmul_pch() { |
19013 | let a = _mm256_set1_pch(0.0, 1.0); |
19014 | let b = _mm256_set1_pch(0.0, -1.0); |
19015 | let r = _mm256_maskz_cmul_pch(0b01010101, a, b); |
19016 | let e = _mm256_setr_ph( |
19017 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
19018 | ); |
19019 | assert_eq_m256h(r, e); |
19020 | } |
19021 | |
19022 | #[simd_test(enable = "avx512fp16" )] |
19023 | unsafe fn test_mm512_cmul_pch() { |
19024 | let a = _mm512_set1_pch(0.0, 1.0); |
19025 | let b = _mm512_set1_pch(0.0, -1.0); |
19026 | let r = _mm512_cmul_pch(a, b); |
19027 | let e = _mm512_set1_pch(-1.0, 0.0); |
19028 | assert_eq_m512h(r, e); |
19029 | } |
19030 | |
19031 | #[simd_test(enable = "avx512fp16" )] |
19032 | unsafe fn test_mm512_mask_cmul_pch() { |
19033 | let a = _mm512_set1_pch(0.0, 1.0); |
19034 | let b = _mm512_set1_pch(0.0, -1.0); |
19035 | let src = _mm512_setr_ph( |
19036 | 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, |
19037 | 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, |
19038 | 32.0, 33.0, |
19039 | ); |
19040 | let r = _mm512_mask_cmul_pch(src, 0b0101010101010101, a, b); |
19041 | let e = _mm512_setr_ph( |
19042 | -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0, |
19043 | -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0, |
19044 | 33.0, |
19045 | ); |
19046 | assert_eq_m512h(r, e); |
19047 | } |
19048 | |
19049 | #[simd_test(enable = "avx512fp16" )] |
19050 | unsafe fn test_mm512_maskz_cmul_pch() { |
19051 | let a = _mm512_set1_pch(0.0, 1.0); |
19052 | let b = _mm512_set1_pch(0.0, -1.0); |
19053 | let r = _mm512_maskz_cmul_pch(0b0101010101010101, a, b); |
19054 | let e = _mm512_setr_ph( |
19055 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
19056 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
19057 | ); |
19058 | assert_eq_m512h(r, e); |
19059 | } |
19060 | |
19061 | #[simd_test(enable = "avx512fp16" )] |
19062 | unsafe fn test_mm512_cmul_round_pch() { |
19063 | let a = _mm512_set1_pch(0.0, 1.0); |
19064 | let b = _mm512_set1_pch(0.0, -1.0); |
19065 | let r = _mm512_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
19066 | let e = _mm512_set1_pch(-1.0, 0.0); |
19067 | assert_eq_m512h(r, e); |
19068 | } |
19069 | |
19070 | #[simd_test(enable = "avx512fp16" )] |
19071 | unsafe fn test_mm512_mask_cmul_round_pch() { |
19072 | let a = _mm512_set1_pch(0.0, 1.0); |
19073 | let b = _mm512_set1_pch(0.0, -1.0); |
19074 | let src = _mm512_setr_ph( |
19075 | 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, |
19076 | 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, |
19077 | 32.0, 33.0, |
19078 | ); |
19079 | let r = _mm512_mask_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
19080 | src, |
19081 | 0b0101010101010101, |
19082 | a, |
19083 | b, |
19084 | ); |
19085 | let e = _mm512_setr_ph( |
19086 | -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0, |
19087 | -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0, |
19088 | 33.0, |
19089 | ); |
19090 | assert_eq_m512h(r, e); |
19091 | } |
19092 | |
19093 | #[simd_test(enable = "avx512fp16" )] |
19094 | unsafe fn test_mm512_maskz_cmul_round_pch() { |
19095 | let a = _mm512_set1_pch(0.0, 1.0); |
19096 | let b = _mm512_set1_pch(0.0, -1.0); |
19097 | let r = _mm512_maskz_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
19098 | 0b0101010101010101, |
19099 | a, |
19100 | b, |
19101 | ); |
19102 | let e = _mm512_setr_ph( |
19103 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
19104 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
19105 | ); |
19106 | assert_eq_m512h(r, e); |
19107 | } |
19108 | |
19109 | #[simd_test(enable = "avx512fp16" )] |
19110 | unsafe fn test_mm_cmul_sch() { |
19111 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19112 | let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0); |
19113 | let r = _mm_cmul_sch(a, b); |
19114 | let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19115 | assert_eq_m128h(r, e); |
19116 | } |
19117 | |
19118 | #[simd_test(enable = "avx512fp16" )] |
19119 | unsafe fn test_mm_mask_cmul_sch() { |
19120 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19121 | let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0); |
19122 | let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0); |
19123 | let r = _mm_mask_cmul_sch(src, 0, a, b); |
19124 | let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19125 | assert_eq_m128h(r, e); |
19126 | } |
19127 | |
19128 | #[simd_test(enable = "avx512fp16" )] |
19129 | unsafe fn test_mm_maskz_cmul_sch() { |
19130 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19131 | let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0); |
19132 | let r = _mm_maskz_cmul_sch(0, a, b); |
19133 | let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19134 | assert_eq_m128h(r, e); |
19135 | } |
19136 | |
19137 | #[simd_test(enable = "avx512fp16" )] |
19138 | unsafe fn test_mm_cmul_round_sch() { |
19139 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19140 | let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0); |
19141 | let r = _mm_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
19142 | let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19143 | assert_eq_m128h(r, e); |
19144 | } |
19145 | |
19146 | #[simd_test(enable = "avx512fp16" )] |
19147 | unsafe fn test_mm_mask_cmul_round_sch() { |
19148 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19149 | let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0); |
19150 | let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0); |
19151 | let r = _mm_mask_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
19152 | src, 0, a, b, |
19153 | ); |
19154 | let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19155 | assert_eq_m128h(r, e); |
19156 | } |
19157 | |
19158 | #[simd_test(enable = "avx512fp16" )] |
19159 | unsafe fn test_mm_maskz_cmul_round_sch() { |
19160 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19161 | let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0); |
19162 | let r = |
19163 | _mm_maskz_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); |
19164 | let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19165 | assert_eq_m128h(r, e); |
19166 | } |
19167 | |
19168 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19169 | unsafe fn test_mm_fcmul_pch() { |
19170 | let a = _mm_set1_pch(0.0, 1.0); |
19171 | let b = _mm_set1_pch(0.0, -1.0); |
19172 | let r = _mm_fcmul_pch(a, b); |
19173 | let e = _mm_set1_pch(-1.0, 0.0); |
19174 | assert_eq_m128h(r, e); |
19175 | } |
19176 | |
19177 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19178 | unsafe fn test_mm_mask_fcmul_pch() { |
19179 | let a = _mm_set1_pch(0.0, 1.0); |
19180 | let b = _mm_set1_pch(0.0, -1.0); |
19181 | let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0); |
19182 | let r = _mm_mask_fcmul_pch(src, 0b0101, a, b); |
19183 | let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0); |
19184 | assert_eq_m128h(r, e); |
19185 | } |
19186 | |
19187 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19188 | unsafe fn test_mm_maskz_fcmul_pch() { |
19189 | let a = _mm_set1_pch(0.0, 1.0); |
19190 | let b = _mm_set1_pch(0.0, -1.0); |
19191 | let r = _mm_maskz_fcmul_pch(0b0101, a, b); |
19192 | let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0); |
19193 | assert_eq_m128h(r, e); |
19194 | } |
19195 | |
19196 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19197 | unsafe fn test_mm256_fcmul_pch() { |
19198 | let a = _mm256_set1_pch(0.0, 1.0); |
19199 | let b = _mm256_set1_pch(0.0, -1.0); |
19200 | let r = _mm256_fcmul_pch(a, b); |
19201 | let e = _mm256_set1_pch(-1.0, 0.0); |
19202 | assert_eq_m256h(r, e); |
19203 | } |
19204 | |
19205 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19206 | unsafe fn test_mm256_mask_fcmul_pch() { |
19207 | let a = _mm256_set1_pch(0.0, 1.0); |
19208 | let b = _mm256_set1_pch(0.0, -1.0); |
19209 | let src = _mm256_setr_ph( |
19210 | 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, |
19211 | ); |
19212 | let r = _mm256_mask_fcmul_pch(src, 0b01010101, a, b); |
19213 | let e = _mm256_setr_ph( |
19214 | -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0, |
19215 | ); |
19216 | assert_eq_m256h(r, e); |
19217 | } |
19218 | |
19219 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19220 | unsafe fn test_mm256_maskz_fcmul_pch() { |
19221 | let a = _mm256_set1_pch(0.0, 1.0); |
19222 | let b = _mm256_set1_pch(0.0, -1.0); |
19223 | let r = _mm256_maskz_fcmul_pch(0b01010101, a, b); |
19224 | let e = _mm256_setr_ph( |
19225 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
19226 | ); |
19227 | assert_eq_m256h(r, e); |
19228 | } |
19229 | |
19230 | #[simd_test(enable = "avx512fp16" )] |
19231 | unsafe fn test_mm512_fcmul_pch() { |
19232 | let a = _mm512_set1_pch(0.0, 1.0); |
19233 | let b = _mm512_set1_pch(0.0, -1.0); |
19234 | let r = _mm512_fcmul_pch(a, b); |
19235 | let e = _mm512_set1_pch(-1.0, 0.0); |
19236 | assert_eq_m512h(r, e); |
19237 | } |
19238 | |
19239 | #[simd_test(enable = "avx512fp16" )] |
19240 | unsafe fn test_mm512_mask_fcmul_pch() { |
19241 | let a = _mm512_set1_pch(0.0, 1.0); |
19242 | let b = _mm512_set1_pch(0.0, -1.0); |
19243 | let src = _mm512_setr_ph( |
19244 | 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, |
19245 | 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, |
19246 | 32.0, 33.0, |
19247 | ); |
19248 | let r = _mm512_mask_fcmul_pch(src, 0b0101010101010101, a, b); |
19249 | let e = _mm512_setr_ph( |
19250 | -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0, |
19251 | -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0, |
19252 | 33.0, |
19253 | ); |
19254 | assert_eq_m512h(r, e); |
19255 | } |
19256 | |
19257 | #[simd_test(enable = "avx512fp16" )] |
19258 | unsafe fn test_mm512_maskz_fcmul_pch() { |
19259 | let a = _mm512_set1_pch(0.0, 1.0); |
19260 | let b = _mm512_set1_pch(0.0, -1.0); |
19261 | let r = _mm512_maskz_fcmul_pch(0b0101010101010101, a, b); |
19262 | let e = _mm512_setr_ph( |
19263 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
19264 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
19265 | ); |
19266 | assert_eq_m512h(r, e); |
19267 | } |
19268 | |
19269 | #[simd_test(enable = "avx512fp16" )] |
19270 | unsafe fn test_mm512_fcmul_round_pch() { |
19271 | let a = _mm512_set1_pch(0.0, 1.0); |
19272 | let b = _mm512_set1_pch(0.0, -1.0); |
19273 | let r = _mm512_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
19274 | let e = _mm512_set1_pch(-1.0, 0.0); |
19275 | assert_eq_m512h(r, e); |
19276 | } |
19277 | |
19278 | #[simd_test(enable = "avx512fp16" )] |
19279 | unsafe fn test_mm512_mask_fcmul_round_pch() { |
19280 | let a = _mm512_set1_pch(0.0, 1.0); |
19281 | let b = _mm512_set1_pch(0.0, -1.0); |
19282 | let src = _mm512_setr_ph( |
19283 | 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, |
19284 | 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, |
19285 | 32.0, 33.0, |
19286 | ); |
19287 | let r = _mm512_mask_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
19288 | src, |
19289 | 0b0101010101010101, |
19290 | a, |
19291 | b, |
19292 | ); |
19293 | let e = _mm512_setr_ph( |
19294 | -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0, |
19295 | -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0, |
19296 | 33.0, |
19297 | ); |
19298 | assert_eq_m512h(r, e); |
19299 | } |
19300 | |
19301 | #[simd_test(enable = "avx512fp16" )] |
19302 | unsafe fn test_mm512_maskz_fcmul_round_pch() { |
19303 | let a = _mm512_set1_pch(0.0, 1.0); |
19304 | let b = _mm512_set1_pch(0.0, -1.0); |
19305 | let r = _mm512_maskz_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
19306 | 0b0101010101010101, |
19307 | a, |
19308 | b, |
19309 | ); |
19310 | let e = _mm512_setr_ph( |
19311 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
19312 | -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, |
19313 | ); |
19314 | assert_eq_m512h(r, e); |
19315 | } |
19316 | |
19317 | #[simd_test(enable = "avx512fp16" )] |
19318 | unsafe fn test_mm_fcmul_sch() { |
19319 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19320 | let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0); |
19321 | let r = _mm_fcmul_sch(a, b); |
19322 | let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19323 | assert_eq_m128h(r, e); |
19324 | } |
19325 | |
19326 | #[simd_test(enable = "avx512fp16" )] |
19327 | unsafe fn test_mm_mask_fcmul_sch() { |
19328 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19329 | let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0); |
19330 | let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0); |
19331 | let r = _mm_mask_fcmul_sch(src, 0, a, b); |
19332 | let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19333 | assert_eq_m128h(r, e); |
19334 | } |
19335 | |
19336 | #[simd_test(enable = "avx512fp16" )] |
19337 | unsafe fn test_mm_maskz_fcmul_sch() { |
19338 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19339 | let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0); |
19340 | let r = _mm_maskz_fcmul_sch(0, a, b); |
19341 | let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19342 | assert_eq_m128h(r, e); |
19343 | } |
19344 | |
19345 | #[simd_test(enable = "avx512fp16" )] |
19346 | unsafe fn test_mm_fcmul_round_sch() { |
19347 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19348 | let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0); |
19349 | let r = _mm_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
19350 | let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19351 | assert_eq_m128h(r, e); |
19352 | } |
19353 | |
19354 | #[simd_test(enable = "avx512fp16" )] |
19355 | unsafe fn test_mm_mask_fcmul_round_sch() { |
19356 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19357 | let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0); |
19358 | let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0); |
19359 | let r = _mm_mask_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
19360 | src, 0, a, b, |
19361 | ); |
19362 | let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19363 | assert_eq_m128h(r, e); |
19364 | } |
19365 | |
19366 | #[simd_test(enable = "avx512fp16" )] |
19367 | unsafe fn test_mm_maskz_fcmul_round_sch() { |
19368 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19369 | let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0); |
19370 | let r = |
19371 | _mm_maskz_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); |
19372 | let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19373 | assert_eq_m128h(r, e); |
19374 | } |
19375 | |
19376 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19377 | unsafe fn test_mm_abs_ph() { |
19378 | let a = _mm_set_ph(-1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0); |
19379 | let r = _mm_abs_ph(a); |
19380 | let e = _mm_set_ph(1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0); |
19381 | assert_eq_m128h(r, e); |
19382 | } |
19383 | |
19384 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19385 | unsafe fn test_mm256_abs_ph() { |
19386 | let a = _mm256_set_ph( |
19387 | -1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0, |
19388 | -14.0, |
19389 | ); |
19390 | let r = _mm256_abs_ph(a); |
19391 | let e = _mm256_set_ph( |
19392 | 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, |
19393 | ); |
19394 | assert_eq_m256h(r, e); |
19395 | } |
19396 | |
19397 | #[simd_test(enable = "avx512fp16" )] |
19398 | unsafe fn test_mm512_abs_ph() { |
19399 | let a = _mm512_set_ph( |
19400 | -1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0, |
19401 | -14.0, 15.0, -16.0, 17.0, -18.0, 19.0, -20.0, 21.0, -22.0, 23.0, -24.0, 25.0, -26.0, |
19402 | 27.0, -28.0, 29.0, -30.0, |
19403 | ); |
19404 | let r = _mm512_abs_ph(a); |
19405 | let e = _mm512_set_ph( |
19406 | 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, |
19407 | 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, |
19408 | 29.0, 30.0, |
19409 | ); |
19410 | assert_eq_m512h(r, e); |
19411 | } |
19412 | |
19413 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19414 | unsafe fn test_mm_conj_pch() { |
19415 | let a = _mm_set1_pch(0.0, 1.0); |
19416 | let r = _mm_conj_pch(a); |
19417 | let e = _mm_set1_pch(0.0, -1.0); |
19418 | assert_eq_m128h(r, e); |
19419 | } |
19420 | |
19421 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19422 | unsafe fn test_mm_mask_conj_pch() { |
19423 | let a = _mm_set1_pch(0.0, 1.0); |
19424 | let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0); |
19425 | let r = _mm_mask_conj_pch(src, 0b0101, a); |
19426 | let e = _mm_setr_ph(0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0); |
19427 | assert_eq_m128h(r, e); |
19428 | } |
19429 | |
19430 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19431 | unsafe fn test_mm_maskz_conj_pch() { |
19432 | let a = _mm_set1_pch(0.0, 1.0); |
19433 | let r = _mm_maskz_conj_pch(0b0101, a); |
19434 | let e = _mm_setr_ph(0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0); |
19435 | assert_eq_m128h(r, e); |
19436 | } |
19437 | |
19438 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19439 | unsafe fn test_mm256_conj_pch() { |
19440 | let a = _mm256_set1_pch(0.0, 1.0); |
19441 | let r = _mm256_conj_pch(a); |
19442 | let e = _mm256_set1_pch(0.0, -1.0); |
19443 | assert_eq_m256h(r, e); |
19444 | } |
19445 | |
19446 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19447 | unsafe fn test_mm256_mask_conj_pch() { |
19448 | let a = _mm256_set1_pch(0.0, 1.0); |
19449 | let src = _mm256_setr_ph( |
19450 | 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, |
19451 | ); |
19452 | let r = _mm256_mask_conj_pch(src, 0b01010101, a); |
19453 | let e = _mm256_setr_ph( |
19454 | 0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0, 0.0, -1.0, 12.0, 13.0, 0.0, -1.0, 16.0, 17.0, |
19455 | ); |
19456 | assert_eq_m256h(r, e); |
19457 | } |
19458 | |
19459 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19460 | unsafe fn test_mm256_maskz_conj_pch() { |
19461 | let a = _mm256_set1_pch(0.0, 1.0); |
19462 | let r = _mm256_maskz_conj_pch(0b01010101, a); |
19463 | let e = _mm256_setr_ph( |
19464 | 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, |
19465 | ); |
19466 | assert_eq_m256h(r, e); |
19467 | } |
19468 | |
19469 | #[simd_test(enable = "avx512fp16" )] |
19470 | unsafe fn test_mm512_conj_pch() { |
19471 | let a = _mm512_set1_pch(0.0, 1.0); |
19472 | let r = _mm512_conj_pch(a); |
19473 | let e = _mm512_set1_pch(0.0, -1.0); |
19474 | assert_eq_m512h(r, e); |
19475 | } |
19476 | |
19477 | #[simd_test(enable = "avx512fp16" )] |
19478 | unsafe fn test_mm512_mask_conj_pch() { |
19479 | let a = _mm512_set1_pch(0.0, 1.0); |
19480 | let src = _mm512_setr_ph( |
19481 | 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, |
19482 | 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, |
19483 | 32.0, 33.0, |
19484 | ); |
19485 | let r = _mm512_mask_conj_pch(src, 0b0101010101010101, a); |
19486 | let e = _mm512_setr_ph( |
19487 | 0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0, 0.0, -1.0, 12.0, 13.0, 0.0, -1.0, 16.0, 17.0, |
19488 | 0.0, -1.0, 20.0, 21.0, 0.0, -1.0, 24.0, 25.0, 0.0, -1.0, 28.0, 29.0, 0.0, -1.0, 32.0, |
19489 | 33.0, |
19490 | ); |
19491 | assert_eq_m512h(r, e); |
19492 | } |
19493 | |
19494 | #[simd_test(enable = "avx512fp16" )] |
19495 | unsafe fn test_mm512_maskz_conj_pch() { |
19496 | let a = _mm512_set1_pch(0.0, 1.0); |
19497 | let r = _mm512_maskz_conj_pch(0b0101010101010101, a); |
19498 | let e = _mm512_setr_ph( |
19499 | 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, |
19500 | 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, |
19501 | ); |
19502 | assert_eq_m512h(r, e); |
19503 | } |
19504 | |
19505 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19506 | unsafe fn test_mm_fmadd_pch() { |
19507 | let a = _mm_set1_pch(0.0, 1.0); |
19508 | let b = _mm_set1_pch(0.0, 2.0); |
19509 | let c = _mm_set1_pch(0.0, 3.0); |
19510 | let r = _mm_fmadd_pch(a, b, c); |
19511 | let e = _mm_set1_pch(-2.0, 3.0); |
19512 | assert_eq_m128h(r, e); |
19513 | } |
19514 | |
19515 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19516 | unsafe fn test_mm_mask_fmadd_pch() { |
19517 | let a = _mm_set1_pch(0.0, 1.0); |
19518 | let b = _mm_set1_pch(0.0, 2.0); |
19519 | let c = _mm_set1_pch(0.0, 3.0); |
19520 | let r = _mm_mask_fmadd_pch(a, 0b0101, b, c); |
19521 | let e = _mm_setr_ph(-2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0); |
19522 | assert_eq_m128h(r, e); |
19523 | } |
19524 | |
19525 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19526 | unsafe fn test_mm_mask3_fmadd_pch() { |
19527 | let a = _mm_set1_pch(0.0, 1.0); |
19528 | let b = _mm_set1_pch(0.0, 2.0); |
19529 | let c = _mm_set1_pch(0.0, 3.0); |
19530 | let r = _mm_mask3_fmadd_pch(a, b, c, 0b0101); |
19531 | let e = _mm_setr_ph(-2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0); |
19532 | assert_eq_m128h(r, e); |
19533 | } |
19534 | |
19535 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19536 | unsafe fn test_mm_maskz_fmadd_pch() { |
19537 | let a = _mm_set1_pch(0.0, 1.0); |
19538 | let b = _mm_set1_pch(0.0, 2.0); |
19539 | let c = _mm_set1_pch(0.0, 3.0); |
19540 | let r = _mm_maskz_fmadd_pch(0b0101, a, b, c); |
19541 | let e = _mm_setr_ph(-2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0); |
19542 | assert_eq_m128h(r, e); |
19543 | } |
19544 | |
19545 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19546 | unsafe fn test_mm256_fmadd_pch() { |
19547 | let a = _mm256_set1_pch(0.0, 1.0); |
19548 | let b = _mm256_set1_pch(0.0, 2.0); |
19549 | let c = _mm256_set1_pch(0.0, 3.0); |
19550 | let r = _mm256_fmadd_pch(a, b, c); |
19551 | let e = _mm256_set1_pch(-2.0, 3.0); |
19552 | assert_eq_m256h(r, e); |
19553 | } |
19554 | |
19555 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19556 | unsafe fn test_mm256_mask_fmadd_pch() { |
19557 | let a = _mm256_set1_pch(0.0, 1.0); |
19558 | let b = _mm256_set1_pch(0.0, 2.0); |
19559 | let c = _mm256_set1_pch(0.0, 3.0); |
19560 | let r = _mm256_mask_fmadd_pch(a, 0b01010101, b, c); |
19561 | let e = _mm256_setr_ph( |
19562 | -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, |
19563 | ); |
19564 | assert_eq_m256h(r, e); |
19565 | } |
19566 | |
19567 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19568 | unsafe fn test_mm256_mask3_fmadd_pch() { |
19569 | let a = _mm256_set1_pch(0.0, 1.0); |
19570 | let b = _mm256_set1_pch(0.0, 2.0); |
19571 | let c = _mm256_set1_pch(0.0, 3.0); |
19572 | let r = _mm256_mask3_fmadd_pch(a, b, c, 0b01010101); |
19573 | let e = _mm256_setr_ph( |
19574 | -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, |
19575 | ); |
19576 | assert_eq_m256h(r, e); |
19577 | } |
19578 | |
19579 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19580 | unsafe fn test_mm256_maskz_fmadd_pch() { |
19581 | let a = _mm256_set1_pch(0.0, 1.0); |
19582 | let b = _mm256_set1_pch(0.0, 2.0); |
19583 | let c = _mm256_set1_pch(0.0, 3.0); |
19584 | let r = _mm256_maskz_fmadd_pch(0b01010101, a, b, c); |
19585 | let e = _mm256_setr_ph( |
19586 | -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, |
19587 | ); |
19588 | assert_eq_m256h(r, e); |
19589 | } |
19590 | |
19591 | #[simd_test(enable = "avx512fp16" )] |
19592 | unsafe fn test_mm512_fmadd_pch() { |
19593 | let a = _mm512_set1_pch(0.0, 1.0); |
19594 | let b = _mm512_set1_pch(0.0, 2.0); |
19595 | let c = _mm512_set1_pch(0.0, 3.0); |
19596 | let r = _mm512_fmadd_pch(a, b, c); |
19597 | let e = _mm512_set1_pch(-2.0, 3.0); |
19598 | assert_eq_m512h(r, e); |
19599 | } |
19600 | |
19601 | #[simd_test(enable = "avx512fp16" )] |
19602 | unsafe fn test_mm512_mask_fmadd_pch() { |
19603 | let a = _mm512_set1_pch(0.0, 1.0); |
19604 | let b = _mm512_set1_pch(0.0, 2.0); |
19605 | let c = _mm512_set1_pch(0.0, 3.0); |
19606 | let r = _mm512_mask_fmadd_pch(a, 0b0101010101010101, b, c); |
19607 | let e = _mm512_setr_ph( |
19608 | -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, |
19609 | -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, |
19610 | ); |
19611 | assert_eq_m512h(r, e); |
19612 | } |
19613 | |
19614 | #[simd_test(enable = "avx512fp16" )] |
19615 | unsafe fn test_mm512_mask3_fmadd_pch() { |
19616 | let a = _mm512_set1_pch(0.0, 1.0); |
19617 | let b = _mm512_set1_pch(0.0, 2.0); |
19618 | let c = _mm512_set1_pch(0.0, 3.0); |
19619 | let r = _mm512_mask3_fmadd_pch(a, b, c, 0b0101010101010101); |
19620 | let e = _mm512_setr_ph( |
19621 | -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, |
19622 | -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, |
19623 | ); |
19624 | assert_eq_m512h(r, e); |
19625 | } |
19626 | |
19627 | #[simd_test(enable = "avx512fp16" )] |
19628 | unsafe fn test_mm512_maskz_fmadd_pch() { |
19629 | let a = _mm512_set1_pch(0.0, 1.0); |
19630 | let b = _mm512_set1_pch(0.0, 2.0); |
19631 | let c = _mm512_set1_pch(0.0, 3.0); |
19632 | let r = _mm512_maskz_fmadd_pch(0b0101010101010101, a, b, c); |
19633 | let e = _mm512_setr_ph( |
19634 | -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, |
19635 | -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, |
19636 | ); |
19637 | assert_eq_m512h(r, e); |
19638 | } |
19639 | |
19640 | #[simd_test(enable = "avx512fp16" )] |
19641 | unsafe fn test_mm512_fmadd_round_pch() { |
19642 | let a = _mm512_set1_pch(0.0, 1.0); |
19643 | let b = _mm512_set1_pch(0.0, 2.0); |
19644 | let c = _mm512_set1_pch(0.0, 3.0); |
19645 | let r = |
19646 | _mm512_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); |
19647 | let e = _mm512_set1_pch(-2.0, 3.0); |
19648 | assert_eq_m512h(r, e); |
19649 | } |
19650 | |
19651 | #[simd_test(enable = "avx512fp16" )] |
19652 | unsafe fn test_mm512_mask_fmadd_round_pch() { |
19653 | let a = _mm512_set1_pch(0.0, 1.0); |
19654 | let b = _mm512_set1_pch(0.0, 2.0); |
19655 | let c = _mm512_set1_pch(0.0, 3.0); |
19656 | let r = _mm512_mask_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
19657 | a, |
19658 | 0b0101010101010101, |
19659 | b, |
19660 | c, |
19661 | ); |
19662 | let e = _mm512_setr_ph( |
19663 | -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, |
19664 | -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, |
19665 | ); |
19666 | assert_eq_m512h(r, e); |
19667 | } |
19668 | |
19669 | #[simd_test(enable = "avx512fp16" )] |
19670 | unsafe fn test_mm512_mask3_fmadd_round_pch() { |
19671 | let a = _mm512_set1_pch(0.0, 1.0); |
19672 | let b = _mm512_set1_pch(0.0, 2.0); |
19673 | let c = _mm512_set1_pch(0.0, 3.0); |
19674 | let r = _mm512_mask3_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
19675 | a, |
19676 | b, |
19677 | c, |
19678 | 0b0101010101010101, |
19679 | ); |
19680 | let e = _mm512_setr_ph( |
19681 | -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, |
19682 | -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, |
19683 | ); |
19684 | assert_eq_m512h(r, e); |
19685 | } |
19686 | |
19687 | #[simd_test(enable = "avx512fp16" )] |
19688 | unsafe fn test_mm512_maskz_fmadd_round_pch() { |
19689 | let a = _mm512_set1_pch(0.0, 1.0); |
19690 | let b = _mm512_set1_pch(0.0, 2.0); |
19691 | let c = _mm512_set1_pch(0.0, 3.0); |
19692 | let r = _mm512_maskz_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
19693 | 0b0101010101010101, |
19694 | a, |
19695 | b, |
19696 | c, |
19697 | ); |
19698 | let e = _mm512_setr_ph( |
19699 | -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, |
19700 | -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, |
19701 | ); |
19702 | assert_eq_m512h(r, e); |
19703 | } |
19704 | |
19705 | #[simd_test(enable = "avx512fp16" )] |
19706 | unsafe fn test_mm_fmadd_sch() { |
19707 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19708 | let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
19709 | let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
19710 | let r = _mm_fmadd_sch(a, b, c); |
19711 | let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19712 | assert_eq_m128h(r, e); |
19713 | } |
19714 | |
19715 | #[simd_test(enable = "avx512fp16" )] |
19716 | unsafe fn test_mm_mask_fmadd_sch() { |
19717 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19718 | let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
19719 | let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
19720 | let r = _mm_mask_fmadd_sch(a, 0, b, c); |
19721 | let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19722 | assert_eq_m128h(r, e); |
19723 | let r = _mm_mask_fmadd_sch(a, 1, b, c); |
19724 | let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19725 | assert_eq_m128h(r, e); |
19726 | } |
19727 | |
19728 | #[simd_test(enable = "avx512fp16" )] |
19729 | unsafe fn test_mm_mask3_fmadd_sch() { |
19730 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19731 | let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
19732 | let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
19733 | let r = _mm_mask3_fmadd_sch(a, b, c, 0); |
19734 | let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
19735 | assert_eq_m128h(r, e); |
19736 | let r = _mm_mask3_fmadd_sch(a, b, c, 1); |
19737 | let e = _mm_setr_ph(-2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
19738 | assert_eq_m128h(r, e); |
19739 | } |
19740 | |
19741 | #[simd_test(enable = "avx512fp16" )] |
19742 | unsafe fn test_mm_maskz_fmadd_sch() { |
19743 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19744 | let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
19745 | let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
19746 | let r = _mm_maskz_fmadd_sch(0, a, b, c); |
19747 | let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19748 | assert_eq_m128h(r, e); |
19749 | let r = _mm_maskz_fmadd_sch(1, a, b, c); |
19750 | let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19751 | assert_eq_m128h(r, e); |
19752 | } |
19753 | |
19754 | #[simd_test(enable = "avx512fp16" )] |
19755 | unsafe fn test_mm_fmadd_round_sch() { |
19756 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19757 | let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
19758 | let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
19759 | let r = _mm_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); |
19760 | let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19761 | assert_eq_m128h(r, e); |
19762 | } |
19763 | |
19764 | #[simd_test(enable = "avx512fp16" )] |
19765 | unsafe fn test_mm_mask_fmadd_round_sch() { |
19766 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19767 | let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
19768 | let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
19769 | let r = _mm_mask_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
19770 | a, 0, b, c, |
19771 | ); |
19772 | let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19773 | assert_eq_m128h(r, e); |
19774 | let r = _mm_mask_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
19775 | a, 1, b, c, |
19776 | ); |
19777 | let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19778 | assert_eq_m128h(r, e); |
19779 | } |
19780 | |
19781 | #[simd_test(enable = "avx512fp16" )] |
19782 | unsafe fn test_mm_mask3_fmadd_round_sch() { |
19783 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19784 | let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
19785 | let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
19786 | let r = _mm_mask3_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
19787 | a, b, c, 0, |
19788 | ); |
19789 | let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
19790 | assert_eq_m128h(r, e); |
19791 | let r = _mm_mask3_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
19792 | a, b, c, 1, |
19793 | ); |
19794 | let e = _mm_setr_ph(-2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
19795 | assert_eq_m128h(r, e); |
19796 | } |
19797 | |
19798 | #[simd_test(enable = "avx512fp16" )] |
19799 | unsafe fn test_mm_maskz_fmadd_round_sch() { |
19800 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19801 | let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
19802 | let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
19803 | let r = _mm_maskz_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
19804 | 0, a, b, c, |
19805 | ); |
19806 | let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19807 | assert_eq_m128h(r, e); |
19808 | let r = _mm_maskz_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
19809 | 1, a, b, c, |
19810 | ); |
19811 | let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
19812 | assert_eq_m128h(r, e); |
19813 | } |
19814 | |
19815 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19816 | unsafe fn test_mm_fcmadd_pch() { |
19817 | let a = _mm_set1_pch(0.0, 1.0); |
19818 | let b = _mm_set1_pch(0.0, 2.0); |
19819 | let c = _mm_set1_pch(0.0, 3.0); |
19820 | let r = _mm_fcmadd_pch(a, b, c); |
19821 | let e = _mm_set1_pch(2.0, 3.0); |
19822 | assert_eq_m128h(r, e); |
19823 | } |
19824 | |
19825 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19826 | unsafe fn test_mm_mask_fcmadd_pch() { |
19827 | let a = _mm_set1_pch(0.0, 1.0); |
19828 | let b = _mm_set1_pch(0.0, 2.0); |
19829 | let c = _mm_set1_pch(0.0, 3.0); |
19830 | let r = _mm_mask_fcmadd_pch(a, 0b0101, b, c); |
19831 | let e = _mm_setr_ph(2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0); |
19832 | assert_eq_m128h(r, e); |
19833 | } |
19834 | |
19835 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19836 | unsafe fn test_mm_mask3_fcmadd_pch() { |
19837 | let a = _mm_set1_pch(0.0, 1.0); |
19838 | let b = _mm_set1_pch(0.0, 2.0); |
19839 | let c = _mm_set1_pch(0.0, 3.0); |
19840 | let r = _mm_mask3_fcmadd_pch(a, b, c, 0b0101); |
19841 | let e = _mm_setr_ph(2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0); |
19842 | assert_eq_m128h(r, e); |
19843 | } |
19844 | |
19845 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19846 | unsafe fn test_mm_maskz_fcmadd_pch() { |
19847 | let a = _mm_set1_pch(0.0, 1.0); |
19848 | let b = _mm_set1_pch(0.0, 2.0); |
19849 | let c = _mm_set1_pch(0.0, 3.0); |
19850 | let r = _mm_maskz_fcmadd_pch(0b0101, a, b, c); |
19851 | let e = _mm_setr_ph(2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0); |
19852 | assert_eq_m128h(r, e); |
19853 | } |
19854 | |
19855 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19856 | unsafe fn test_mm256_fcmadd_pch() { |
19857 | let a = _mm256_set1_pch(0.0, 1.0); |
19858 | let b = _mm256_set1_pch(0.0, 2.0); |
19859 | let c = _mm256_set1_pch(0.0, 3.0); |
19860 | let r = _mm256_fcmadd_pch(a, b, c); |
19861 | let e = _mm256_set1_pch(2.0, 3.0); |
19862 | assert_eq_m256h(r, e); |
19863 | } |
19864 | |
19865 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19866 | unsafe fn test_mm256_mask_fcmadd_pch() { |
19867 | let a = _mm256_set1_pch(0.0, 1.0); |
19868 | let b = _mm256_set1_pch(0.0, 2.0); |
19869 | let c = _mm256_set1_pch(0.0, 3.0); |
19870 | let r = _mm256_mask_fcmadd_pch(a, 0b01010101, b, c); |
19871 | let e = _mm256_setr_ph( |
19872 | 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, |
19873 | ); |
19874 | assert_eq_m256h(r, e); |
19875 | } |
19876 | |
19877 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19878 | unsafe fn test_mm256_mask3_fcmadd_pch() { |
19879 | let a = _mm256_set1_pch(0.0, 1.0); |
19880 | let b = _mm256_set1_pch(0.0, 2.0); |
19881 | let c = _mm256_set1_pch(0.0, 3.0); |
19882 | let r = _mm256_mask3_fcmadd_pch(a, b, c, 0b01010101); |
19883 | let e = _mm256_setr_ph( |
19884 | 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, |
19885 | ); |
19886 | assert_eq_m256h(r, e); |
19887 | } |
19888 | |
19889 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
19890 | unsafe fn test_mm256_maskz_fcmadd_pch() { |
19891 | let a = _mm256_set1_pch(0.0, 1.0); |
19892 | let b = _mm256_set1_pch(0.0, 2.0); |
19893 | let c = _mm256_set1_pch(0.0, 3.0); |
19894 | let r = _mm256_maskz_fcmadd_pch(0b01010101, a, b, c); |
19895 | let e = _mm256_setr_ph( |
19896 | 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, |
19897 | ); |
19898 | assert_eq_m256h(r, e); |
19899 | } |
19900 | |
19901 | #[simd_test(enable = "avx512fp16" )] |
19902 | unsafe fn test_mm512_fcmadd_pch() { |
19903 | let a = _mm512_set1_pch(0.0, 1.0); |
19904 | let b = _mm512_set1_pch(0.0, 2.0); |
19905 | let c = _mm512_set1_pch(0.0, 3.0); |
19906 | let r = _mm512_fcmadd_pch(a, b, c); |
19907 | let e = _mm512_set1_pch(2.0, 3.0); |
19908 | assert_eq_m512h(r, e); |
19909 | } |
19910 | |
19911 | #[simd_test(enable = "avx512fp16" )] |
19912 | unsafe fn test_mm512_mask_fcmadd_pch() { |
19913 | let a = _mm512_set1_pch(0.0, 1.0); |
19914 | let b = _mm512_set1_pch(0.0, 2.0); |
19915 | let c = _mm512_set1_pch(0.0, 3.0); |
19916 | let r = _mm512_mask_fcmadd_pch(a, 0b0101010101010101, b, c); |
19917 | let e = _mm512_setr_ph( |
19918 | 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, |
19919 | 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, |
19920 | ); |
19921 | assert_eq_m512h(r, e); |
19922 | } |
19923 | |
19924 | #[simd_test(enable = "avx512fp16" )] |
19925 | unsafe fn test_mm512_mask3_fcmadd_pch() { |
19926 | let a = _mm512_set1_pch(0.0, 1.0); |
19927 | let b = _mm512_set1_pch(0.0, 2.0); |
19928 | let c = _mm512_set1_pch(0.0, 3.0); |
19929 | let r = _mm512_mask3_fcmadd_pch(a, b, c, 0b0101010101010101); |
19930 | let e = _mm512_setr_ph( |
19931 | 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, |
19932 | 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, |
19933 | ); |
19934 | assert_eq_m512h(r, e); |
19935 | } |
19936 | |
19937 | #[simd_test(enable = "avx512fp16" )] |
19938 | unsafe fn test_mm512_maskz_fcmadd_pch() { |
19939 | let a = _mm512_set1_pch(0.0, 1.0); |
19940 | let b = _mm512_set1_pch(0.0, 2.0); |
19941 | let c = _mm512_set1_pch(0.0, 3.0); |
19942 | let r = _mm512_maskz_fcmadd_pch(0b0101010101010101, a, b, c); |
19943 | let e = _mm512_setr_ph( |
19944 | 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, |
19945 | 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, |
19946 | ); |
19947 | assert_eq_m512h(r, e); |
19948 | } |
19949 | |
19950 | #[simd_test(enable = "avx512fp16" )] |
19951 | unsafe fn test_mm512_fcmadd_round_pch() { |
19952 | let a = _mm512_set1_pch(0.0, 1.0); |
19953 | let b = _mm512_set1_pch(0.0, 2.0); |
19954 | let c = _mm512_set1_pch(0.0, 3.0); |
19955 | let r = |
19956 | _mm512_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); |
19957 | let e = _mm512_set1_pch(2.0, 3.0); |
19958 | assert_eq_m512h(r, e); |
19959 | } |
19960 | |
19961 | #[simd_test(enable = "avx512fp16" )] |
19962 | unsafe fn test_mm512_mask_fcmadd_round_pch() { |
19963 | let a = _mm512_set1_pch(0.0, 1.0); |
19964 | let b = _mm512_set1_pch(0.0, 2.0); |
19965 | let c = _mm512_set1_pch(0.0, 3.0); |
19966 | let r = _mm512_mask_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
19967 | a, |
19968 | 0b0101010101010101, |
19969 | b, |
19970 | c, |
19971 | ); |
19972 | let e = _mm512_setr_ph( |
19973 | 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, |
19974 | 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, |
19975 | ); |
19976 | assert_eq_m512h(r, e); |
19977 | } |
19978 | |
19979 | #[simd_test(enable = "avx512fp16" )] |
19980 | unsafe fn test_mm512_mask3_fcmadd_round_pch() { |
19981 | let a = _mm512_set1_pch(0.0, 1.0); |
19982 | let b = _mm512_set1_pch(0.0, 2.0); |
19983 | let c = _mm512_set1_pch(0.0, 3.0); |
19984 | let r = _mm512_mask3_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
19985 | a, |
19986 | b, |
19987 | c, |
19988 | 0b0101010101010101, |
19989 | ); |
19990 | let e = _mm512_setr_ph( |
19991 | 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, |
19992 | 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, |
19993 | ); |
19994 | assert_eq_m512h(r, e); |
19995 | } |
19996 | |
19997 | #[simd_test(enable = "avx512fp16" )] |
19998 | unsafe fn test_mm512_maskz_fcmadd_round_pch() { |
19999 | let a = _mm512_set1_pch(0.0, 1.0); |
20000 | let b = _mm512_set1_pch(0.0, 2.0); |
20001 | let c = _mm512_set1_pch(0.0, 3.0); |
20002 | let r = _mm512_maskz_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20003 | 0b0101010101010101, |
20004 | a, |
20005 | b, |
20006 | c, |
20007 | ); |
20008 | let e = _mm512_setr_ph( |
20009 | 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, |
20010 | 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, |
20011 | ); |
20012 | assert_eq_m512h(r, e); |
20013 | } |
20014 | |
20015 | #[simd_test(enable = "avx512fp16" )] |
20016 | unsafe fn test_mm_fcmadd_sch() { |
20017 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
20018 | let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
20019 | let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
20020 | let r = _mm_fcmadd_sch(a, b, c); |
20021 | let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
20022 | assert_eq_m128h(r, e); |
20023 | } |
20024 | |
20025 | #[simd_test(enable = "avx512fp16" )] |
20026 | unsafe fn test_mm_mask_fcmadd_sch() { |
20027 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
20028 | let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
20029 | let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
20030 | let r = _mm_mask_fcmadd_sch(a, 0, b, c); |
20031 | let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
20032 | assert_eq_m128h(r, e); |
20033 | let r = _mm_mask_fcmadd_sch(a, 1, b, c); |
20034 | let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
20035 | assert_eq_m128h(r, e); |
20036 | } |
20037 | |
20038 | #[simd_test(enable = "avx512fp16" )] |
20039 | unsafe fn test_mm_mask3_fcmadd_sch() { |
20040 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
20041 | let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
20042 | let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
20043 | let r = _mm_mask3_fcmadd_sch(a, b, c, 0); |
20044 | let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
20045 | assert_eq_m128h(r, e); |
20046 | let r = _mm_mask3_fcmadd_sch(a, b, c, 1); |
20047 | let e = _mm_setr_ph(2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
20048 | assert_eq_m128h(r, e); |
20049 | } |
20050 | |
20051 | #[simd_test(enable = "avx512fp16" )] |
20052 | unsafe fn test_mm_maskz_fcmadd_sch() { |
20053 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
20054 | let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
20055 | let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
20056 | let r = _mm_maskz_fcmadd_sch(0, a, b, c); |
20057 | let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
20058 | assert_eq_m128h(r, e); |
20059 | let r = _mm_maskz_fcmadd_sch(1, a, b, c); |
20060 | let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
20061 | assert_eq_m128h(r, e); |
20062 | } |
20063 | |
20064 | #[simd_test(enable = "avx512fp16" )] |
20065 | unsafe fn test_mm_fcmadd_round_sch() { |
20066 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
20067 | let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
20068 | let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
20069 | let r = _mm_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); |
20070 | let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
20071 | assert_eq_m128h(r, e); |
20072 | } |
20073 | |
20074 | #[simd_test(enable = "avx512fp16" )] |
20075 | unsafe fn test_mm_mask_fcmadd_round_sch() { |
20076 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
20077 | let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
20078 | let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
20079 | let r = _mm_mask_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20080 | a, 0, b, c, |
20081 | ); |
20082 | let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
20083 | assert_eq_m128h(r, e); |
20084 | let r = _mm_mask_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20085 | a, 1, b, c, |
20086 | ); |
20087 | let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
20088 | assert_eq_m128h(r, e); |
20089 | } |
20090 | |
20091 | #[simd_test(enable = "avx512fp16" )] |
20092 | unsafe fn test_mm_mask3_fcmadd_round_sch() { |
20093 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
20094 | let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
20095 | let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
20096 | let r = _mm_mask3_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20097 | a, b, c, 0, |
20098 | ); |
20099 | let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
20100 | assert_eq_m128h(r, e); |
20101 | let r = _mm_mask3_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20102 | a, b, c, 1, |
20103 | ); |
20104 | let e = _mm_setr_ph(2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
20105 | assert_eq_m128h(r, e); |
20106 | } |
20107 | |
20108 | #[simd_test(enable = "avx512fp16" )] |
20109 | unsafe fn test_mm_maskz_fcmadd_round_sch() { |
20110 | let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
20111 | let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0); |
20112 | let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0); |
20113 | let r = _mm_maskz_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20114 | 0, a, b, c, |
20115 | ); |
20116 | let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
20117 | assert_eq_m128h(r, e); |
20118 | let r = _mm_maskz_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20119 | 1, a, b, c, |
20120 | ); |
20121 | let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0); |
20122 | assert_eq_m128h(r, e); |
20123 | } |
20124 | |
20125 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20126 | unsafe fn test_mm_fmadd_ph() { |
20127 | let a = _mm_set1_ph(1.0); |
20128 | let b = _mm_set1_ph(2.0); |
20129 | let c = _mm_set1_ph(3.0); |
20130 | let r = _mm_fmadd_ph(a, b, c); |
20131 | let e = _mm_set1_ph(5.0); |
20132 | assert_eq_m128h(r, e); |
20133 | } |
20134 | |
20135 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20136 | unsafe fn test_mm_mask_fmadd_ph() { |
20137 | let a = _mm_set1_ph(1.0); |
20138 | let b = _mm_set1_ph(2.0); |
20139 | let c = _mm_set1_ph(3.0); |
20140 | let r = _mm_mask_fmadd_ph(a, 0b01010101, b, c); |
20141 | let e = _mm_set_ph(1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0); |
20142 | assert_eq_m128h(r, e); |
20143 | } |
20144 | |
20145 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20146 | unsafe fn test_mm_mask3_fmadd_ph() { |
20147 | let a = _mm_set1_ph(1.0); |
20148 | let b = _mm_set1_ph(2.0); |
20149 | let c = _mm_set1_ph(3.0); |
20150 | let r = _mm_mask3_fmadd_ph(a, b, c, 0b01010101); |
20151 | let e = _mm_set_ph(3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0); |
20152 | assert_eq_m128h(r, e); |
20153 | } |
20154 | |
20155 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20156 | unsafe fn test_mm_maskz_fmadd_ph() { |
20157 | let a = _mm_set1_ph(1.0); |
20158 | let b = _mm_set1_ph(2.0); |
20159 | let c = _mm_set1_ph(3.0); |
20160 | let r = _mm_maskz_fmadd_ph(0b01010101, a, b, c); |
20161 | let e = _mm_set_ph(0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0); |
20162 | assert_eq_m128h(r, e); |
20163 | } |
20164 | |
20165 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20166 | unsafe fn test_mm256_fmadd_ph() { |
20167 | let a = _mm256_set1_ph(1.0); |
20168 | let b = _mm256_set1_ph(2.0); |
20169 | let c = _mm256_set1_ph(3.0); |
20170 | let r = _mm256_fmadd_ph(a, b, c); |
20171 | let e = _mm256_set1_ph(5.0); |
20172 | assert_eq_m256h(r, e); |
20173 | } |
20174 | |
20175 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20176 | unsafe fn test_mm256_mask_fmadd_ph() { |
20177 | let a = _mm256_set1_ph(1.0); |
20178 | let b = _mm256_set1_ph(2.0); |
20179 | let c = _mm256_set1_ph(3.0); |
20180 | let r = _mm256_mask_fmadd_ph(a, 0b0101010101010101, b, c); |
20181 | let e = _mm256_set_ph( |
20182 | 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, |
20183 | ); |
20184 | assert_eq_m256h(r, e); |
20185 | } |
20186 | |
20187 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20188 | unsafe fn test_mm256_mask3_fmadd_ph() { |
20189 | let a = _mm256_set1_ph(1.0); |
20190 | let b = _mm256_set1_ph(2.0); |
20191 | let c = _mm256_set1_ph(3.0); |
20192 | let r = _mm256_mask3_fmadd_ph(a, b, c, 0b0101010101010101); |
20193 | let e = _mm256_set_ph( |
20194 | 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, |
20195 | ); |
20196 | assert_eq_m256h(r, e); |
20197 | } |
20198 | |
20199 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20200 | unsafe fn test_mm256_maskz_fmadd_ph() { |
20201 | let a = _mm256_set1_ph(1.0); |
20202 | let b = _mm256_set1_ph(2.0); |
20203 | let c = _mm256_set1_ph(3.0); |
20204 | let r = _mm256_maskz_fmadd_ph(0b0101010101010101, a, b, c); |
20205 | let e = _mm256_set_ph( |
20206 | 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, |
20207 | ); |
20208 | assert_eq_m256h(r, e); |
20209 | } |
20210 | |
20211 | #[simd_test(enable = "avx512fp16" )] |
20212 | unsafe fn test_mm512_fmadd_ph() { |
20213 | let a = _mm512_set1_ph(1.0); |
20214 | let b = _mm512_set1_ph(2.0); |
20215 | let c = _mm512_set1_ph(3.0); |
20216 | let r = _mm512_fmadd_ph(a, b, c); |
20217 | let e = _mm512_set1_ph(5.0); |
20218 | assert_eq_m512h(r, e); |
20219 | } |
20220 | |
20221 | #[simd_test(enable = "avx512fp16" )] |
20222 | unsafe fn test_mm512_mask_fmadd_ph() { |
20223 | let a = _mm512_set1_ph(1.0); |
20224 | let b = _mm512_set1_ph(2.0); |
20225 | let c = _mm512_set1_ph(3.0); |
20226 | let r = _mm512_mask_fmadd_ph(a, 0b01010101010101010101010101010101, b, c); |
20227 | let e = _mm512_set_ph( |
20228 | 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, |
20229 | 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, |
20230 | ); |
20231 | assert_eq_m512h(r, e); |
20232 | } |
20233 | |
20234 | #[simd_test(enable = "avx512fp16" )] |
20235 | unsafe fn test_mm512_mask3_fmadd_ph() { |
20236 | let a = _mm512_set1_ph(1.0); |
20237 | let b = _mm512_set1_ph(2.0); |
20238 | let c = _mm512_set1_ph(3.0); |
20239 | let r = _mm512_mask3_fmadd_ph(a, b, c, 0b01010101010101010101010101010101); |
20240 | let e = _mm512_set_ph( |
20241 | 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, |
20242 | 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, |
20243 | ); |
20244 | assert_eq_m512h(r, e); |
20245 | } |
20246 | |
20247 | #[simd_test(enable = "avx512fp16" )] |
20248 | unsafe fn test_mm512_maskz_fmadd_ph() { |
20249 | let a = _mm512_set1_ph(1.0); |
20250 | let b = _mm512_set1_ph(2.0); |
20251 | let c = _mm512_set1_ph(3.0); |
20252 | let r = _mm512_maskz_fmadd_ph(0b01010101010101010101010101010101, a, b, c); |
20253 | let e = _mm512_set_ph( |
20254 | 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, |
20255 | 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, |
20256 | ); |
20257 | assert_eq_m512h(r, e); |
20258 | } |
20259 | |
20260 | #[simd_test(enable = "avx512fp16" )] |
20261 | unsafe fn test_mm512_fmadd_round_ph() { |
20262 | let a = _mm512_set1_ph(1.0); |
20263 | let b = _mm512_set1_ph(2.0); |
20264 | let c = _mm512_set1_ph(3.0); |
20265 | let r = _mm512_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); |
20266 | let e = _mm512_set1_ph(5.0); |
20267 | assert_eq_m512h(r, e); |
20268 | } |
20269 | |
20270 | #[simd_test(enable = "avx512fp16" )] |
20271 | unsafe fn test_mm512_mask_fmadd_round_ph() { |
20272 | let a = _mm512_set1_ph(1.0); |
20273 | let b = _mm512_set1_ph(2.0); |
20274 | let c = _mm512_set1_ph(3.0); |
20275 | let r = _mm512_mask_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20276 | a, |
20277 | 0b01010101010101010101010101010101, |
20278 | b, |
20279 | c, |
20280 | ); |
20281 | let e = _mm512_set_ph( |
20282 | 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, |
20283 | 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, |
20284 | ); |
20285 | assert_eq_m512h(r, e); |
20286 | } |
20287 | |
20288 | #[simd_test(enable = "avx512fp16" )] |
20289 | unsafe fn test_mm512_mask3_fmadd_round_ph() { |
20290 | let a = _mm512_set1_ph(1.0); |
20291 | let b = _mm512_set1_ph(2.0); |
20292 | let c = _mm512_set1_ph(3.0); |
20293 | let r = _mm512_mask3_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20294 | a, |
20295 | b, |
20296 | c, |
20297 | 0b01010101010101010101010101010101, |
20298 | ); |
20299 | let e = _mm512_set_ph( |
20300 | 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, |
20301 | 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, |
20302 | ); |
20303 | assert_eq_m512h(r, e); |
20304 | } |
20305 | |
20306 | #[simd_test(enable = "avx512fp16" )] |
20307 | unsafe fn test_mm512_maskz_fmadd_round_ph() { |
20308 | let a = _mm512_set1_ph(1.0); |
20309 | let b = _mm512_set1_ph(2.0); |
20310 | let c = _mm512_set1_ph(3.0); |
20311 | let r = _mm512_maskz_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20312 | 0b01010101010101010101010101010101, |
20313 | a, |
20314 | b, |
20315 | c, |
20316 | ); |
20317 | let e = _mm512_set_ph( |
20318 | 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, |
20319 | 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, |
20320 | ); |
20321 | assert_eq_m512h(r, e); |
20322 | } |
20323 | |
20324 | #[simd_test(enable = "avx512fp16" )] |
20325 | unsafe fn test_mm_fmadd_sh() { |
20326 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20327 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20328 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20329 | let r = _mm_fmadd_sh(a, b, c); |
20330 | let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.); |
20331 | assert_eq_m128h(r, e); |
20332 | } |
20333 | |
20334 | #[simd_test(enable = "avx512fp16" )] |
20335 | unsafe fn test_mm_mask_fmadd_sh() { |
20336 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20337 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20338 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20339 | let r = _mm_mask_fmadd_sh(a, 0, b, c); |
20340 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20341 | assert_eq_m128h(r, e); |
20342 | let r = _mm_mask_fmadd_sh(a, 1, b, c); |
20343 | let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.); |
20344 | assert_eq_m128h(r, e); |
20345 | } |
20346 | |
20347 | #[simd_test(enable = "avx512fp16" )] |
20348 | unsafe fn test_mm_mask3_fmadd_sh() { |
20349 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20350 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20351 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20352 | let r = _mm_mask3_fmadd_sh(a, b, c, 0); |
20353 | let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20354 | assert_eq_m128h(r, e); |
20355 | let r = _mm_mask3_fmadd_sh(a, b, c, 1); |
20356 | let e = _mm_setr_ph(5.0, 30., 31., 32., 33., 34., 35., 36.); |
20357 | assert_eq_m128h(r, e); |
20358 | } |
20359 | |
20360 | #[simd_test(enable = "avx512fp16" )] |
20361 | unsafe fn test_mm_maskz_fmadd_sh() { |
20362 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20363 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20364 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20365 | let r = _mm_maskz_fmadd_sh(0, a, b, c); |
20366 | let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); |
20367 | assert_eq_m128h(r, e); |
20368 | let r = _mm_maskz_fmadd_sh(1, a, b, c); |
20369 | let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.); |
20370 | assert_eq_m128h(r, e); |
20371 | } |
20372 | |
20373 | #[simd_test(enable = "avx512fp16" )] |
20374 | unsafe fn test_mm_fmadd_round_sh() { |
20375 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20376 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20377 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20378 | let r = _mm_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); |
20379 | let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.); |
20380 | assert_eq_m128h(r, e); |
20381 | } |
20382 | |
20383 | #[simd_test(enable = "avx512fp16" )] |
20384 | unsafe fn test_mm_mask_fmadd_round_sh() { |
20385 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20386 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20387 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20388 | let r = _mm_mask_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20389 | a, 0, b, c, |
20390 | ); |
20391 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20392 | assert_eq_m128h(r, e); |
20393 | let r = _mm_mask_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20394 | a, 1, b, c, |
20395 | ); |
20396 | let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.); |
20397 | assert_eq_m128h(r, e); |
20398 | } |
20399 | |
20400 | #[simd_test(enable = "avx512fp16" )] |
20401 | unsafe fn test_mm_mask3_fmadd_round_sh() { |
20402 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20403 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20404 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20405 | let r = _mm_mask3_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20406 | a, b, c, 0, |
20407 | ); |
20408 | let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20409 | assert_eq_m128h(r, e); |
20410 | let r = _mm_mask3_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20411 | a, b, c, 1, |
20412 | ); |
20413 | let e = _mm_setr_ph(5.0, 30., 31., 32., 33., 34., 35., 36.); |
20414 | assert_eq_m128h(r, e); |
20415 | } |
20416 | |
20417 | #[simd_test(enable = "avx512fp16" )] |
20418 | unsafe fn test_mm_maskz_fmadd_round_sh() { |
20419 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20420 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20421 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20422 | let r = _mm_maskz_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20423 | 0, a, b, c, |
20424 | ); |
20425 | let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); |
20426 | assert_eq_m128h(r, e); |
20427 | let r = _mm_maskz_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20428 | 1, a, b, c, |
20429 | ); |
20430 | let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.); |
20431 | assert_eq_m128h(r, e); |
20432 | } |
20433 | |
20434 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20435 | unsafe fn test_mm_fmsub_ph() { |
20436 | let a = _mm_set1_ph(1.0); |
20437 | let b = _mm_set1_ph(2.0); |
20438 | let c = _mm_set1_ph(3.0); |
20439 | let r = _mm_fmsub_ph(a, b, c); |
20440 | let e = _mm_set1_ph(-1.0); |
20441 | assert_eq_m128h(r, e); |
20442 | } |
20443 | |
20444 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20445 | unsafe fn test_mm_mask_fmsub_ph() { |
20446 | let a = _mm_set1_ph(1.0); |
20447 | let b = _mm_set1_ph(2.0); |
20448 | let c = _mm_set1_ph(3.0); |
20449 | let r = _mm_mask_fmsub_ph(a, 0b01010101, b, c); |
20450 | let e = _mm_set_ph(1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0); |
20451 | assert_eq_m128h(r, e); |
20452 | } |
20453 | |
20454 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20455 | unsafe fn test_mm_mask3_fmsub_ph() { |
20456 | let a = _mm_set1_ph(1.0); |
20457 | let b = _mm_set1_ph(2.0); |
20458 | let c = _mm_set1_ph(3.0); |
20459 | let r = _mm_mask3_fmsub_ph(a, b, c, 0b01010101); |
20460 | let e = _mm_set_ph(3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0); |
20461 | assert_eq_m128h(r, e); |
20462 | } |
20463 | |
20464 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20465 | unsafe fn test_mm_maskz_fmsub_ph() { |
20466 | let a = _mm_set1_ph(1.0); |
20467 | let b = _mm_set1_ph(2.0); |
20468 | let c = _mm_set1_ph(3.0); |
20469 | let r = _mm_maskz_fmsub_ph(0b01010101, a, b, c); |
20470 | let e = _mm_set_ph(0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0); |
20471 | assert_eq_m128h(r, e); |
20472 | } |
20473 | |
20474 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20475 | unsafe fn test_mm256_fmsub_ph() { |
20476 | let a = _mm256_set1_ph(1.0); |
20477 | let b = _mm256_set1_ph(2.0); |
20478 | let c = _mm256_set1_ph(3.0); |
20479 | let r = _mm256_fmsub_ph(a, b, c); |
20480 | let e = _mm256_set1_ph(-1.0); |
20481 | assert_eq_m256h(r, e); |
20482 | } |
20483 | |
20484 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20485 | unsafe fn test_mm256_mask_fmsub_ph() { |
20486 | let a = _mm256_set1_ph(1.0); |
20487 | let b = _mm256_set1_ph(2.0); |
20488 | let c = _mm256_set1_ph(3.0); |
20489 | let r = _mm256_mask_fmsub_ph(a, 0b0101010101010101, b, c); |
20490 | let e = _mm256_set_ph( |
20491 | 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, |
20492 | ); |
20493 | assert_eq_m256h(r, e); |
20494 | } |
20495 | |
20496 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20497 | unsafe fn test_mm256_mask3_fmsub_ph() { |
20498 | let a = _mm256_set1_ph(1.0); |
20499 | let b = _mm256_set1_ph(2.0); |
20500 | let c = _mm256_set1_ph(3.0); |
20501 | let r = _mm256_mask3_fmsub_ph(a, b, c, 0b0101010101010101); |
20502 | let e = _mm256_set_ph( |
20503 | 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, |
20504 | ); |
20505 | assert_eq_m256h(r, e); |
20506 | } |
20507 | |
20508 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20509 | unsafe fn test_mm256_maskz_fmsub_ph() { |
20510 | let a = _mm256_set1_ph(1.0); |
20511 | let b = _mm256_set1_ph(2.0); |
20512 | let c = _mm256_set1_ph(3.0); |
20513 | let r = _mm256_maskz_fmsub_ph(0b0101010101010101, a, b, c); |
20514 | let e = _mm256_set_ph( |
20515 | 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, |
20516 | ); |
20517 | assert_eq_m256h(r, e); |
20518 | } |
20519 | |
20520 | #[simd_test(enable = "avx512fp16" )] |
20521 | unsafe fn test_mm512_fmsub_ph() { |
20522 | let a = _mm512_set1_ph(1.0); |
20523 | let b = _mm512_set1_ph(2.0); |
20524 | let c = _mm512_set1_ph(3.0); |
20525 | let r = _mm512_fmsub_ph(a, b, c); |
20526 | let e = _mm512_set1_ph(-1.0); |
20527 | assert_eq_m512h(r, e); |
20528 | } |
20529 | |
20530 | #[simd_test(enable = "avx512fp16" )] |
20531 | unsafe fn test_mm512_mask_fmsub_ph() { |
20532 | let a = _mm512_set1_ph(1.0); |
20533 | let b = _mm512_set1_ph(2.0); |
20534 | let c = _mm512_set1_ph(3.0); |
20535 | let r = _mm512_mask_fmsub_ph(a, 0b01010101010101010101010101010101, b, c); |
20536 | let e = _mm512_set_ph( |
20537 | 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, |
20538 | 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, |
20539 | ); |
20540 | assert_eq_m512h(r, e); |
20541 | } |
20542 | |
20543 | #[simd_test(enable = "avx512fp16" )] |
20544 | unsafe fn test_mm512_mask3_fmsub_ph() { |
20545 | let a = _mm512_set1_ph(1.0); |
20546 | let b = _mm512_set1_ph(2.0); |
20547 | let c = _mm512_set1_ph(3.0); |
20548 | let r = _mm512_mask3_fmsub_ph(a, b, c, 0b01010101010101010101010101010101); |
20549 | let e = _mm512_set_ph( |
20550 | 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, |
20551 | 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, |
20552 | ); |
20553 | assert_eq_m512h(r, e); |
20554 | } |
20555 | |
20556 | #[simd_test(enable = "avx512fp16" )] |
20557 | unsafe fn test_mm512_maskz_fmsub_ph() { |
20558 | let a = _mm512_set1_ph(1.0); |
20559 | let b = _mm512_set1_ph(2.0); |
20560 | let c = _mm512_set1_ph(3.0); |
20561 | let r = _mm512_maskz_fmsub_ph(0b01010101010101010101010101010101, a, b, c); |
20562 | let e = _mm512_set_ph( |
20563 | 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, |
20564 | 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, |
20565 | ); |
20566 | assert_eq_m512h(r, e); |
20567 | } |
20568 | |
20569 | #[simd_test(enable = "avx512fp16" )] |
20570 | unsafe fn test_mm512_fmsub_round_ph() { |
20571 | let a = _mm512_set1_ph(1.0); |
20572 | let b = _mm512_set1_ph(2.0); |
20573 | let c = _mm512_set1_ph(3.0); |
20574 | let r = _mm512_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); |
20575 | let e = _mm512_set1_ph(-1.0); |
20576 | assert_eq_m512h(r, e); |
20577 | } |
20578 | |
20579 | #[simd_test(enable = "avx512fp16" )] |
20580 | unsafe fn test_mm512_mask_fmsub_round_ph() { |
20581 | let a = _mm512_set1_ph(1.0); |
20582 | let b = _mm512_set1_ph(2.0); |
20583 | let c = _mm512_set1_ph(3.0); |
20584 | let r = _mm512_mask_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20585 | a, |
20586 | 0b01010101010101010101010101010101, |
20587 | b, |
20588 | c, |
20589 | ); |
20590 | let e = _mm512_set_ph( |
20591 | 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, |
20592 | 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, |
20593 | ); |
20594 | assert_eq_m512h(r, e); |
20595 | } |
20596 | |
20597 | #[simd_test(enable = "avx512fp16" )] |
20598 | unsafe fn test_mm512_mask3_fmsub_round_ph() { |
20599 | let a = _mm512_set1_ph(1.0); |
20600 | let b = _mm512_set1_ph(2.0); |
20601 | let c = _mm512_set1_ph(3.0); |
20602 | let r = _mm512_mask3_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20603 | a, |
20604 | b, |
20605 | c, |
20606 | 0b01010101010101010101010101010101, |
20607 | ); |
20608 | let e = _mm512_set_ph( |
20609 | 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, |
20610 | 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, |
20611 | ); |
20612 | assert_eq_m512h(r, e); |
20613 | } |
20614 | |
20615 | #[simd_test(enable = "avx512fp16" )] |
20616 | unsafe fn test_mm512_maskz_fmsub_round_ph() { |
20617 | let a = _mm512_set1_ph(1.0); |
20618 | let b = _mm512_set1_ph(2.0); |
20619 | let c = _mm512_set1_ph(3.0); |
20620 | let r = _mm512_maskz_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20621 | 0b01010101010101010101010101010101, |
20622 | a, |
20623 | b, |
20624 | c, |
20625 | ); |
20626 | let e = _mm512_set_ph( |
20627 | 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, |
20628 | 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, |
20629 | ); |
20630 | assert_eq_m512h(r, e); |
20631 | } |
20632 | |
20633 | #[simd_test(enable = "avx512fp16" )] |
20634 | unsafe fn test_mm_fmsub_sh() { |
20635 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20636 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20637 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20638 | let r = _mm_fmsub_sh(a, b, c); |
20639 | let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.); |
20640 | assert_eq_m128h(r, e); |
20641 | } |
20642 | |
20643 | #[simd_test(enable = "avx512fp16" )] |
20644 | unsafe fn test_mm_mask_fmsub_sh() { |
20645 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20646 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20647 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20648 | let r = _mm_mask_fmsub_sh(a, 0, b, c); |
20649 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20650 | assert_eq_m128h(r, e); |
20651 | let r = _mm_mask_fmsub_sh(a, 1, b, c); |
20652 | let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.); |
20653 | assert_eq_m128h(r, e); |
20654 | } |
20655 | |
20656 | #[simd_test(enable = "avx512fp16" )] |
20657 | unsafe fn test_mm_mask3_fmsub_sh() { |
20658 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20659 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20660 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20661 | let r = _mm_mask3_fmsub_sh(a, b, c, 0); |
20662 | let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20663 | assert_eq_m128h(r, e); |
20664 | let r = _mm_mask3_fmsub_sh(a, b, c, 1); |
20665 | let e = _mm_setr_ph(-1.0, 30., 31., 32., 33., 34., 35., 36.); |
20666 | assert_eq_m128h(r, e); |
20667 | } |
20668 | |
20669 | #[simd_test(enable = "avx512fp16" )] |
20670 | unsafe fn test_mm_maskz_fmsub_sh() { |
20671 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20672 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20673 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20674 | let r = _mm_maskz_fmsub_sh(0, a, b, c); |
20675 | let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); |
20676 | assert_eq_m128h(r, e); |
20677 | let r = _mm_maskz_fmsub_sh(1, a, b, c); |
20678 | let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.); |
20679 | assert_eq_m128h(r, e); |
20680 | } |
20681 | |
20682 | #[simd_test(enable = "avx512fp16" )] |
20683 | unsafe fn test_mm_fmsub_round_sh() { |
20684 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20685 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20686 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20687 | let r = _mm_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); |
20688 | let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.); |
20689 | assert_eq_m128h(r, e); |
20690 | } |
20691 | |
20692 | #[simd_test(enable = "avx512fp16" )] |
20693 | unsafe fn test_mm_mask_fmsub_round_sh() { |
20694 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20695 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20696 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20697 | let r = _mm_mask_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20698 | a, 0, b, c, |
20699 | ); |
20700 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20701 | assert_eq_m128h(r, e); |
20702 | let r = _mm_mask_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20703 | a, 1, b, c, |
20704 | ); |
20705 | let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.); |
20706 | assert_eq_m128h(r, e); |
20707 | } |
20708 | |
20709 | #[simd_test(enable = "avx512fp16" )] |
20710 | unsafe fn test_mm_mask3_fmsub_round_sh() { |
20711 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20712 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20713 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20714 | let r = _mm_mask3_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20715 | a, b, c, 0, |
20716 | ); |
20717 | let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20718 | assert_eq_m128h(r, e); |
20719 | let r = _mm_mask3_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20720 | a, b, c, 1, |
20721 | ); |
20722 | let e = _mm_setr_ph(-1.0, 30., 31., 32., 33., 34., 35., 36.); |
20723 | assert_eq_m128h(r, e); |
20724 | } |
20725 | |
20726 | #[simd_test(enable = "avx512fp16" )] |
20727 | unsafe fn test_mm_maskz_fmsub_round_sh() { |
20728 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20729 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20730 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20731 | let r = _mm_maskz_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20732 | 0, a, b, c, |
20733 | ); |
20734 | let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); |
20735 | assert_eq_m128h(r, e); |
20736 | let r = _mm_maskz_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20737 | 1, a, b, c, |
20738 | ); |
20739 | let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.); |
20740 | assert_eq_m128h(r, e); |
20741 | } |
20742 | |
20743 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20744 | unsafe fn test_mm_fnmadd_ph() { |
20745 | let a = _mm_set1_ph(1.0); |
20746 | let b = _mm_set1_ph(2.0); |
20747 | let c = _mm_set1_ph(3.0); |
20748 | let r = _mm_fnmadd_ph(a, b, c); |
20749 | let e = _mm_set1_ph(1.0); |
20750 | assert_eq_m128h(r, e); |
20751 | } |
20752 | |
20753 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20754 | unsafe fn test_mm_mask_fnmadd_ph() { |
20755 | let a = _mm_set1_ph(1.0); |
20756 | let b = _mm_set1_ph(2.0); |
20757 | let c = _mm_set1_ph(3.0); |
20758 | let r = _mm_mask_fnmadd_ph(a, 0b01010101, b, c); |
20759 | let e = _mm_set_ph(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0); |
20760 | assert_eq_m128h(r, e); |
20761 | } |
20762 | |
20763 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20764 | unsafe fn test_mm_mask3_fnmadd_ph() { |
20765 | let a = _mm_set1_ph(1.0); |
20766 | let b = _mm_set1_ph(2.0); |
20767 | let c = _mm_set1_ph(3.0); |
20768 | let r = _mm_mask3_fnmadd_ph(a, b, c, 0b01010101); |
20769 | let e = _mm_set_ph(3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0); |
20770 | assert_eq_m128h(r, e); |
20771 | } |
20772 | |
20773 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20774 | unsafe fn test_mm_maskz_fnmadd_ph() { |
20775 | let a = _mm_set1_ph(1.0); |
20776 | let b = _mm_set1_ph(2.0); |
20777 | let c = _mm_set1_ph(3.0); |
20778 | let r = _mm_maskz_fnmadd_ph(0b01010101, a, b, c); |
20779 | let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0); |
20780 | assert_eq_m128h(r, e); |
20781 | } |
20782 | |
20783 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20784 | unsafe fn test_mm256_fnmadd_ph() { |
20785 | let a = _mm256_set1_ph(1.0); |
20786 | let b = _mm256_set1_ph(2.0); |
20787 | let c = _mm256_set1_ph(3.0); |
20788 | let r = _mm256_fnmadd_ph(a, b, c); |
20789 | let e = _mm256_set1_ph(1.0); |
20790 | assert_eq_m256h(r, e); |
20791 | } |
20792 | |
20793 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20794 | unsafe fn test_mm256_mask_fnmadd_ph() { |
20795 | let a = _mm256_set1_ph(1.0); |
20796 | let b = _mm256_set1_ph(2.0); |
20797 | let c = _mm256_set1_ph(3.0); |
20798 | let r = _mm256_mask_fnmadd_ph(a, 0b0101010101010101, b, c); |
20799 | let e = _mm256_set_ph( |
20800 | 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, |
20801 | ); |
20802 | assert_eq_m256h(r, e); |
20803 | } |
20804 | |
20805 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20806 | unsafe fn test_mm256_mask3_fnmadd_ph() { |
20807 | let a = _mm256_set1_ph(1.0); |
20808 | let b = _mm256_set1_ph(2.0); |
20809 | let c = _mm256_set1_ph(3.0); |
20810 | let r = _mm256_mask3_fnmadd_ph(a, b, c, 0b0101010101010101); |
20811 | let e = _mm256_set_ph( |
20812 | 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, |
20813 | ); |
20814 | assert_eq_m256h(r, e); |
20815 | } |
20816 | |
20817 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
20818 | unsafe fn test_mm256_maskz_fnmadd_ph() { |
20819 | let a = _mm256_set1_ph(1.0); |
20820 | let b = _mm256_set1_ph(2.0); |
20821 | let c = _mm256_set1_ph(3.0); |
20822 | let r = _mm256_maskz_fnmadd_ph(0b0101010101010101, a, b, c); |
20823 | let e = _mm256_set_ph( |
20824 | 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, |
20825 | ); |
20826 | assert_eq_m256h(r, e); |
20827 | } |
20828 | |
20829 | #[simd_test(enable = "avx512fp16" )] |
20830 | unsafe fn test_mm512_fnmadd_ph() { |
20831 | let a = _mm512_set1_ph(1.0); |
20832 | let b = _mm512_set1_ph(2.0); |
20833 | let c = _mm512_set1_ph(3.0); |
20834 | let r = _mm512_fnmadd_ph(a, b, c); |
20835 | let e = _mm512_set1_ph(1.0); |
20836 | assert_eq_m512h(r, e); |
20837 | } |
20838 | |
20839 | #[simd_test(enable = "avx512fp16" )] |
20840 | unsafe fn test_mm512_mask_fnmadd_ph() { |
20841 | let a = _mm512_set1_ph(1.0); |
20842 | let b = _mm512_set1_ph(2.0); |
20843 | let c = _mm512_set1_ph(3.0); |
20844 | let r = _mm512_mask_fnmadd_ph(a, 0b01010101010101010101010101010101, b, c); |
20845 | let e = _mm512_set_ph( |
20846 | 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, |
20847 | 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, |
20848 | ); |
20849 | assert_eq_m512h(r, e); |
20850 | } |
20851 | |
20852 | #[simd_test(enable = "avx512fp16" )] |
20853 | unsafe fn test_mm512_mask3_fnmadd_ph() { |
20854 | let a = _mm512_set1_ph(1.0); |
20855 | let b = _mm512_set1_ph(2.0); |
20856 | let c = _mm512_set1_ph(3.0); |
20857 | let r = _mm512_mask3_fnmadd_ph(a, b, c, 0b01010101010101010101010101010101); |
20858 | let e = _mm512_set_ph( |
20859 | 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, |
20860 | 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, |
20861 | ); |
20862 | assert_eq_m512h(r, e); |
20863 | } |
20864 | |
20865 | #[simd_test(enable = "avx512fp16" )] |
20866 | unsafe fn test_mm512_maskz_fnmadd_ph() { |
20867 | let a = _mm512_set1_ph(1.0); |
20868 | let b = _mm512_set1_ph(2.0); |
20869 | let c = _mm512_set1_ph(3.0); |
20870 | let r = _mm512_maskz_fnmadd_ph(0b01010101010101010101010101010101, a, b, c); |
20871 | let e = _mm512_set_ph( |
20872 | 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, |
20873 | 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, |
20874 | ); |
20875 | assert_eq_m512h(r, e); |
20876 | } |
20877 | |
20878 | #[simd_test(enable = "avx512fp16" )] |
20879 | unsafe fn test_mm512_fnmadd_round_ph() { |
20880 | let a = _mm512_set1_ph(1.0); |
20881 | let b = _mm512_set1_ph(2.0); |
20882 | let c = _mm512_set1_ph(3.0); |
20883 | let r = |
20884 | _mm512_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); |
20885 | let e = _mm512_set1_ph(1.0); |
20886 | assert_eq_m512h(r, e); |
20887 | } |
20888 | |
20889 | #[simd_test(enable = "avx512fp16" )] |
20890 | unsafe fn test_mm512_mask_fnmadd_round_ph() { |
20891 | let a = _mm512_set1_ph(1.0); |
20892 | let b = _mm512_set1_ph(2.0); |
20893 | let c = _mm512_set1_ph(3.0); |
20894 | let r = _mm512_mask_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20895 | a, |
20896 | 0b01010101010101010101010101010101, |
20897 | b, |
20898 | c, |
20899 | ); |
20900 | let e = _mm512_set_ph( |
20901 | 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, |
20902 | 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, |
20903 | ); |
20904 | assert_eq_m512h(r, e); |
20905 | } |
20906 | |
20907 | #[simd_test(enable = "avx512fp16" )] |
20908 | unsafe fn test_mm512_mask3_fnmadd_round_ph() { |
20909 | let a = _mm512_set1_ph(1.0); |
20910 | let b = _mm512_set1_ph(2.0); |
20911 | let c = _mm512_set1_ph(3.0); |
20912 | let r = _mm512_mask3_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20913 | a, |
20914 | b, |
20915 | c, |
20916 | 0b01010101010101010101010101010101, |
20917 | ); |
20918 | let e = _mm512_set_ph( |
20919 | 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, |
20920 | 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, |
20921 | ); |
20922 | assert_eq_m512h(r, e); |
20923 | } |
20924 | |
20925 | #[simd_test(enable = "avx512fp16" )] |
20926 | unsafe fn test_mm512_maskz_fnmadd_round_ph() { |
20927 | let a = _mm512_set1_ph(1.0); |
20928 | let b = _mm512_set1_ph(2.0); |
20929 | let c = _mm512_set1_ph(3.0); |
20930 | let r = _mm512_maskz_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
20931 | 0b01010101010101010101010101010101, |
20932 | a, |
20933 | b, |
20934 | c, |
20935 | ); |
20936 | let e = _mm512_set_ph( |
20937 | 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, |
20938 | 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, |
20939 | ); |
20940 | assert_eq_m512h(r, e); |
20941 | } |
20942 | |
20943 | #[simd_test(enable = "avx512fp16" )] |
20944 | unsafe fn test_mm_fnmadd_sh() { |
20945 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20946 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20947 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20948 | let r = _mm_fnmadd_sh(a, b, c); |
20949 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20950 | assert_eq_m128h(r, e); |
20951 | } |
20952 | |
20953 | #[simd_test(enable = "avx512fp16" )] |
20954 | unsafe fn test_mm_mask_fnmadd_sh() { |
20955 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20956 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20957 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20958 | let r = _mm_mask_fnmadd_sh(a, 0, b, c); |
20959 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20960 | assert_eq_m128h(r, e); |
20961 | let r = _mm_mask_fnmadd_sh(a, 1, b, c); |
20962 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20963 | assert_eq_m128h(r, e); |
20964 | } |
20965 | |
20966 | #[simd_test(enable = "avx512fp16" )] |
20967 | unsafe fn test_mm_mask3_fnmadd_sh() { |
20968 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20969 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20970 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20971 | let r = _mm_mask3_fnmadd_sh(a, b, c, 0); |
20972 | let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20973 | assert_eq_m128h(r, e); |
20974 | let r = _mm_mask3_fnmadd_sh(a, b, c, 1); |
20975 | let e = _mm_setr_ph(1.0, 30., 31., 32., 33., 34., 35., 36.); |
20976 | assert_eq_m128h(r, e); |
20977 | } |
20978 | |
20979 | #[simd_test(enable = "avx512fp16" )] |
20980 | unsafe fn test_mm_maskz_fnmadd_sh() { |
20981 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20982 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20983 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20984 | let r = _mm_maskz_fnmadd_sh(0, a, b, c); |
20985 | let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); |
20986 | assert_eq_m128h(r, e); |
20987 | let r = _mm_maskz_fnmadd_sh(1, a, b, c); |
20988 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20989 | assert_eq_m128h(r, e); |
20990 | } |
20991 | |
20992 | #[simd_test(enable = "avx512fp16" )] |
20993 | unsafe fn test_mm_fnmadd_round_sh() { |
20994 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20995 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
20996 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
20997 | let r = _mm_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); |
20998 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
20999 | assert_eq_m128h(r, e); |
21000 | } |
21001 | |
21002 | #[simd_test(enable = "avx512fp16" )] |
21003 | unsafe fn test_mm_mask_fnmadd_round_sh() { |
21004 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
21005 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
21006 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
21007 | let r = _mm_mask_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21008 | a, 0, b, c, |
21009 | ); |
21010 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
21011 | assert_eq_m128h(r, e); |
21012 | let r = _mm_mask_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21013 | a, 1, b, c, |
21014 | ); |
21015 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
21016 | assert_eq_m128h(r, e); |
21017 | } |
21018 | |
21019 | #[simd_test(enable = "avx512fp16" )] |
21020 | unsafe fn test_mm_mask3_fnmadd_round_sh() { |
21021 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
21022 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
21023 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
21024 | let r = _mm_mask3_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21025 | a, b, c, 0, |
21026 | ); |
21027 | let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
21028 | assert_eq_m128h(r, e); |
21029 | let r = _mm_mask3_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21030 | a, b, c, 1, |
21031 | ); |
21032 | let e = _mm_setr_ph(1.0, 30., 31., 32., 33., 34., 35., 36.); |
21033 | assert_eq_m128h(r, e); |
21034 | } |
21035 | |
21036 | #[simd_test(enable = "avx512fp16" )] |
21037 | unsafe fn test_mm_maskz_fnmadd_round_sh() { |
21038 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
21039 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
21040 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
21041 | let r = _mm_maskz_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21042 | 0, a, b, c, |
21043 | ); |
21044 | let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); |
21045 | assert_eq_m128h(r, e); |
21046 | let r = _mm_maskz_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21047 | 1, a, b, c, |
21048 | ); |
21049 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
21050 | assert_eq_m128h(r, e); |
21051 | } |
21052 | |
21053 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21054 | unsafe fn test_mm_fnmsub_ph() { |
21055 | let a = _mm_set1_ph(1.0); |
21056 | let b = _mm_set1_ph(2.0); |
21057 | let c = _mm_set1_ph(3.0); |
21058 | let r = _mm_fnmsub_ph(a, b, c); |
21059 | let e = _mm_set1_ph(-5.0); |
21060 | assert_eq_m128h(r, e); |
21061 | } |
21062 | |
21063 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21064 | unsafe fn test_mm_mask_fnmsub_ph() { |
21065 | let a = _mm_set1_ph(1.0); |
21066 | let b = _mm_set1_ph(2.0); |
21067 | let c = _mm_set1_ph(3.0); |
21068 | let r = _mm_mask_fnmsub_ph(a, 0b01010101, b, c); |
21069 | let e = _mm_set_ph(1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0); |
21070 | assert_eq_m128h(r, e); |
21071 | } |
21072 | |
21073 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21074 | unsafe fn test_mm_mask3_fnmsub_ph() { |
21075 | let a = _mm_set1_ph(1.0); |
21076 | let b = _mm_set1_ph(2.0); |
21077 | let c = _mm_set1_ph(3.0); |
21078 | let r = _mm_mask3_fnmsub_ph(a, b, c, 0b01010101); |
21079 | let e = _mm_set_ph(3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0); |
21080 | assert_eq_m128h(r, e); |
21081 | } |
21082 | |
21083 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21084 | unsafe fn test_mm_maskz_fnmsub_ph() { |
21085 | let a = _mm_set1_ph(1.0); |
21086 | let b = _mm_set1_ph(2.0); |
21087 | let c = _mm_set1_ph(3.0); |
21088 | let r = _mm_maskz_fnmsub_ph(0b01010101, a, b, c); |
21089 | let e = _mm_set_ph(0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0); |
21090 | assert_eq_m128h(r, e); |
21091 | } |
21092 | |
21093 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21094 | unsafe fn test_mm256_fnmsub_ph() { |
21095 | let a = _mm256_set1_ph(1.0); |
21096 | let b = _mm256_set1_ph(2.0); |
21097 | let c = _mm256_set1_ph(3.0); |
21098 | let r = _mm256_fnmsub_ph(a, b, c); |
21099 | let e = _mm256_set1_ph(-5.0); |
21100 | assert_eq_m256h(r, e); |
21101 | } |
21102 | |
21103 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21104 | unsafe fn test_mm256_mask_fnmsub_ph() { |
21105 | let a = _mm256_set1_ph(1.0); |
21106 | let b = _mm256_set1_ph(2.0); |
21107 | let c = _mm256_set1_ph(3.0); |
21108 | let r = _mm256_mask_fnmsub_ph(a, 0b0101010101010101, b, c); |
21109 | let e = _mm256_set_ph( |
21110 | 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, |
21111 | ); |
21112 | assert_eq_m256h(r, e); |
21113 | } |
21114 | |
21115 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21116 | unsafe fn test_mm256_mask3_fnmsub_ph() { |
21117 | let a = _mm256_set1_ph(1.0); |
21118 | let b = _mm256_set1_ph(2.0); |
21119 | let c = _mm256_set1_ph(3.0); |
21120 | let r = _mm256_mask3_fnmsub_ph(a, b, c, 0b0101010101010101); |
21121 | let e = _mm256_set_ph( |
21122 | 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, |
21123 | ); |
21124 | assert_eq_m256h(r, e); |
21125 | } |
21126 | |
21127 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21128 | unsafe fn test_mm256_maskz_fnmsub_ph() { |
21129 | let a = _mm256_set1_ph(1.0); |
21130 | let b = _mm256_set1_ph(2.0); |
21131 | let c = _mm256_set1_ph(3.0); |
21132 | let r = _mm256_maskz_fnmsub_ph(0b0101010101010101, a, b, c); |
21133 | let e = _mm256_set_ph( |
21134 | 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, |
21135 | ); |
21136 | assert_eq_m256h(r, e); |
21137 | } |
21138 | |
21139 | #[simd_test(enable = "avx512fp16" )] |
21140 | unsafe fn test_mm512_fnmsub_ph() { |
21141 | let a = _mm512_set1_ph(1.0); |
21142 | let b = _mm512_set1_ph(2.0); |
21143 | let c = _mm512_set1_ph(3.0); |
21144 | let r = _mm512_fnmsub_ph(a, b, c); |
21145 | let e = _mm512_set1_ph(-5.0); |
21146 | assert_eq_m512h(r, e); |
21147 | } |
21148 | |
21149 | #[simd_test(enable = "avx512fp16" )] |
21150 | unsafe fn test_mm512_mask_fnmsub_ph() { |
21151 | let a = _mm512_set1_ph(1.0); |
21152 | let b = _mm512_set1_ph(2.0); |
21153 | let c = _mm512_set1_ph(3.0); |
21154 | let r = _mm512_mask_fnmsub_ph(a, 0b01010101010101010101010101010101, b, c); |
21155 | let e = _mm512_set_ph( |
21156 | 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, |
21157 | 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, |
21158 | ); |
21159 | assert_eq_m512h(r, e); |
21160 | } |
21161 | |
21162 | #[simd_test(enable = "avx512fp16" )] |
21163 | unsafe fn test_mm512_mask3_fnmsub_ph() { |
21164 | let a = _mm512_set1_ph(1.0); |
21165 | let b = _mm512_set1_ph(2.0); |
21166 | let c = _mm512_set1_ph(3.0); |
21167 | let r = _mm512_mask3_fnmsub_ph(a, b, c, 0b01010101010101010101010101010101); |
21168 | let e = _mm512_set_ph( |
21169 | 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, |
21170 | 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, |
21171 | ); |
21172 | assert_eq_m512h(r, e); |
21173 | } |
21174 | |
21175 | #[simd_test(enable = "avx512fp16" )] |
21176 | unsafe fn test_mm512_maskz_fnmsub_ph() { |
21177 | let a = _mm512_set1_ph(1.0); |
21178 | let b = _mm512_set1_ph(2.0); |
21179 | let c = _mm512_set1_ph(3.0); |
21180 | let r = _mm512_maskz_fnmsub_ph(0b01010101010101010101010101010101, a, b, c); |
21181 | let e = _mm512_set_ph( |
21182 | 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, |
21183 | 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, |
21184 | ); |
21185 | assert_eq_m512h(r, e); |
21186 | } |
21187 | |
21188 | #[simd_test(enable = "avx512fp16" )] |
21189 | unsafe fn test_mm512_fnmsub_round_ph() { |
21190 | let a = _mm512_set1_ph(1.0); |
21191 | let b = _mm512_set1_ph(2.0); |
21192 | let c = _mm512_set1_ph(3.0); |
21193 | let r = |
21194 | _mm512_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); |
21195 | let e = _mm512_set1_ph(-5.0); |
21196 | assert_eq_m512h(r, e); |
21197 | } |
21198 | |
21199 | #[simd_test(enable = "avx512fp16" )] |
21200 | unsafe fn test_mm512_mask_fnmsub_round_ph() { |
21201 | let a = _mm512_set1_ph(1.0); |
21202 | let b = _mm512_set1_ph(2.0); |
21203 | let c = _mm512_set1_ph(3.0); |
21204 | let r = _mm512_mask_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21205 | a, |
21206 | 0b01010101010101010101010101010101, |
21207 | b, |
21208 | c, |
21209 | ); |
21210 | let e = _mm512_set_ph( |
21211 | 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, |
21212 | 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, |
21213 | ); |
21214 | assert_eq_m512h(r, e); |
21215 | } |
21216 | |
21217 | #[simd_test(enable = "avx512fp16" )] |
21218 | unsafe fn test_mm512_mask3_fnmsub_round_ph() { |
21219 | let a = _mm512_set1_ph(1.0); |
21220 | let b = _mm512_set1_ph(2.0); |
21221 | let c = _mm512_set1_ph(3.0); |
21222 | let r = _mm512_mask3_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21223 | a, |
21224 | b, |
21225 | c, |
21226 | 0b01010101010101010101010101010101, |
21227 | ); |
21228 | let e = _mm512_set_ph( |
21229 | 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, |
21230 | 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, |
21231 | ); |
21232 | assert_eq_m512h(r, e); |
21233 | } |
21234 | |
21235 | #[simd_test(enable = "avx512fp16" )] |
21236 | unsafe fn test_mm512_maskz_fnmsub_round_ph() { |
21237 | let a = _mm512_set1_ph(1.0); |
21238 | let b = _mm512_set1_ph(2.0); |
21239 | let c = _mm512_set1_ph(3.0); |
21240 | let r = _mm512_maskz_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21241 | 0b01010101010101010101010101010101, |
21242 | a, |
21243 | b, |
21244 | c, |
21245 | ); |
21246 | let e = _mm512_set_ph( |
21247 | 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, |
21248 | 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, |
21249 | ); |
21250 | assert_eq_m512h(r, e); |
21251 | } |
21252 | |
21253 | #[simd_test(enable = "avx512fp16" )] |
21254 | unsafe fn test_mm_fnmsub_sh() { |
21255 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
21256 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
21257 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
21258 | let r = _mm_fnmsub_sh(a, b, c); |
21259 | let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.); |
21260 | assert_eq_m128h(r, e); |
21261 | } |
21262 | |
21263 | #[simd_test(enable = "avx512fp16" )] |
21264 | unsafe fn test_mm_mask_fnmsub_sh() { |
21265 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
21266 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
21267 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
21268 | let r = _mm_mask_fnmsub_sh(a, 0, b, c); |
21269 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
21270 | assert_eq_m128h(r, e); |
21271 | let r = _mm_mask_fnmsub_sh(a, 1, b, c); |
21272 | let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.); |
21273 | assert_eq_m128h(r, e); |
21274 | } |
21275 | |
21276 | #[simd_test(enable = "avx512fp16" )] |
21277 | unsafe fn test_mm_mask3_fnmsub_sh() { |
21278 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
21279 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
21280 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
21281 | let r = _mm_mask3_fnmsub_sh(a, b, c, 0); |
21282 | let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
21283 | assert_eq_m128h(r, e); |
21284 | let r = _mm_mask3_fnmsub_sh(a, b, c, 1); |
21285 | let e = _mm_setr_ph(-5.0, 30., 31., 32., 33., 34., 35., 36.); |
21286 | assert_eq_m128h(r, e); |
21287 | } |
21288 | |
21289 | #[simd_test(enable = "avx512fp16" )] |
21290 | unsafe fn test_mm_maskz_fnmsub_sh() { |
21291 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
21292 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
21293 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
21294 | let r = _mm_maskz_fnmsub_sh(0, a, b, c); |
21295 | let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); |
21296 | assert_eq_m128h(r, e); |
21297 | let r = _mm_maskz_fnmsub_sh(1, a, b, c); |
21298 | let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.); |
21299 | assert_eq_m128h(r, e); |
21300 | } |
21301 | |
21302 | #[simd_test(enable = "avx512fp16" )] |
21303 | unsafe fn test_mm_fnmsub_round_sh() { |
21304 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
21305 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
21306 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
21307 | let r = _mm_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); |
21308 | let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.); |
21309 | assert_eq_m128h(r, e); |
21310 | } |
21311 | |
21312 | #[simd_test(enable = "avx512fp16" )] |
21313 | unsafe fn test_mm_mask_fnmsub_round_sh() { |
21314 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
21315 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
21316 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
21317 | let r = _mm_mask_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21318 | a, 0, b, c, |
21319 | ); |
21320 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
21321 | assert_eq_m128h(r, e); |
21322 | let r = _mm_mask_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21323 | a, 1, b, c, |
21324 | ); |
21325 | let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.); |
21326 | assert_eq_m128h(r, e); |
21327 | } |
21328 | |
21329 | #[simd_test(enable = "avx512fp16" )] |
21330 | unsafe fn test_mm_mask3_fnmsub_round_sh() { |
21331 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
21332 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
21333 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
21334 | let r = _mm_mask3_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21335 | a, b, c, 0, |
21336 | ); |
21337 | let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
21338 | assert_eq_m128h(r, e); |
21339 | let r = _mm_mask3_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21340 | a, b, c, 1, |
21341 | ); |
21342 | let e = _mm_setr_ph(-5.0, 30., 31., 32., 33., 34., 35., 36.); |
21343 | assert_eq_m128h(r, e); |
21344 | } |
21345 | |
21346 | #[simd_test(enable = "avx512fp16" )] |
21347 | unsafe fn test_mm_maskz_fnmsub_round_sh() { |
21348 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
21349 | let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.); |
21350 | let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
21351 | let r = _mm_maskz_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21352 | 0, a, b, c, |
21353 | ); |
21354 | let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); |
21355 | assert_eq_m128h(r, e); |
21356 | let r = _mm_maskz_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21357 | 1, a, b, c, |
21358 | ); |
21359 | let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.); |
21360 | assert_eq_m128h(r, e); |
21361 | } |
21362 | |
21363 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21364 | unsafe fn test_mm_fmaddsub_ph() { |
21365 | let a = _mm_set1_ph(1.0); |
21366 | let b = _mm_set1_ph(2.0); |
21367 | let c = _mm_set1_ph(3.0); |
21368 | let r = _mm_fmaddsub_ph(a, b, c); |
21369 | let e = _mm_set_ph(5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0); |
21370 | assert_eq_m128h(r, e); |
21371 | } |
21372 | |
21373 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21374 | unsafe fn test_mm_mask_fmaddsub_ph() { |
21375 | let a = _mm_set1_ph(1.0); |
21376 | let b = _mm_set1_ph(2.0); |
21377 | let c = _mm_set1_ph(3.0); |
21378 | let r = _mm_mask_fmaddsub_ph(a, 0b00110011, b, c); |
21379 | let e = _mm_set_ph(1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0); |
21380 | assert_eq_m128h(r, e); |
21381 | } |
21382 | |
21383 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21384 | unsafe fn test_mm_mask3_fmaddsub_ph() { |
21385 | let a = _mm_set1_ph(1.0); |
21386 | let b = _mm_set1_ph(2.0); |
21387 | let c = _mm_set1_ph(3.0); |
21388 | let r = _mm_mask3_fmaddsub_ph(a, b, c, 0b00110011); |
21389 | let e = _mm_set_ph(3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0); |
21390 | assert_eq_m128h(r, e); |
21391 | } |
21392 | |
21393 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21394 | unsafe fn test_mm_maskz_fmaddsub_ph() { |
21395 | let a = _mm_set1_ph(1.0); |
21396 | let b = _mm_set1_ph(2.0); |
21397 | let c = _mm_set1_ph(3.0); |
21398 | let r = _mm_maskz_fmaddsub_ph(0b00110011, a, b, c); |
21399 | let e = _mm_set_ph(0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0); |
21400 | assert_eq_m128h(r, e); |
21401 | } |
21402 | |
21403 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21404 | unsafe fn test_mm256_fmaddsub_ph() { |
21405 | let a = _mm256_set1_ph(1.0); |
21406 | let b = _mm256_set1_ph(2.0); |
21407 | let c = _mm256_set1_ph(3.0); |
21408 | let r = _mm256_fmaddsub_ph(a, b, c); |
21409 | let e = _mm256_set_ph( |
21410 | 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, |
21411 | ); |
21412 | assert_eq_m256h(r, e); |
21413 | } |
21414 | |
21415 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21416 | unsafe fn test_mm256_mask_fmaddsub_ph() { |
21417 | let a = _mm256_set1_ph(1.0); |
21418 | let b = _mm256_set1_ph(2.0); |
21419 | let c = _mm256_set1_ph(3.0); |
21420 | let r = _mm256_mask_fmaddsub_ph(a, 0b0011001100110011, b, c); |
21421 | let e = _mm256_set_ph( |
21422 | 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, |
21423 | ); |
21424 | assert_eq_m256h(r, e); |
21425 | } |
21426 | |
21427 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21428 | unsafe fn test_mm256_mask3_fmaddsub_ph() { |
21429 | let a = _mm256_set1_ph(1.0); |
21430 | let b = _mm256_set1_ph(2.0); |
21431 | let c = _mm256_set1_ph(3.0); |
21432 | let r = _mm256_mask3_fmaddsub_ph(a, b, c, 0b0011001100110011); |
21433 | let e = _mm256_set_ph( |
21434 | 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, |
21435 | ); |
21436 | assert_eq_m256h(r, e); |
21437 | } |
21438 | |
21439 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21440 | unsafe fn test_mm256_maskz_fmaddsub_ph() { |
21441 | let a = _mm256_set1_ph(1.0); |
21442 | let b = _mm256_set1_ph(2.0); |
21443 | let c = _mm256_set1_ph(3.0); |
21444 | let r = _mm256_maskz_fmaddsub_ph(0b0011001100110011, a, b, c); |
21445 | let e = _mm256_set_ph( |
21446 | 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, |
21447 | ); |
21448 | assert_eq_m256h(r, e); |
21449 | } |
21450 | |
21451 | #[simd_test(enable = "avx512fp16" )] |
21452 | unsafe fn test_mm512_fmaddsub_ph() { |
21453 | let a = _mm512_set1_ph(1.0); |
21454 | let b = _mm512_set1_ph(2.0); |
21455 | let c = _mm512_set1_ph(3.0); |
21456 | let r = _mm512_fmaddsub_ph(a, b, c); |
21457 | let e = _mm512_set_ph( |
21458 | 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, |
21459 | 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, |
21460 | ); |
21461 | assert_eq_m512h(r, e); |
21462 | } |
21463 | |
21464 | #[simd_test(enable = "avx512fp16" )] |
21465 | unsafe fn test_mm512_mask_fmaddsub_ph() { |
21466 | let a = _mm512_set1_ph(1.0); |
21467 | let b = _mm512_set1_ph(2.0); |
21468 | let c = _mm512_set1_ph(3.0); |
21469 | let r = _mm512_mask_fmaddsub_ph(a, 0b00110011001100110011001100110011, b, c); |
21470 | let e = _mm512_set_ph( |
21471 | 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, |
21472 | 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, |
21473 | ); |
21474 | assert_eq_m512h(r, e); |
21475 | } |
21476 | |
21477 | #[simd_test(enable = "avx512fp16" )] |
21478 | unsafe fn test_mm512_mask3_fmaddsub_ph() { |
21479 | let a = _mm512_set1_ph(1.0); |
21480 | let b = _mm512_set1_ph(2.0); |
21481 | let c = _mm512_set1_ph(3.0); |
21482 | let r = _mm512_mask3_fmaddsub_ph(a, b, c, 0b00110011001100110011001100110011); |
21483 | let e = _mm512_set_ph( |
21484 | 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, |
21485 | 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, |
21486 | ); |
21487 | assert_eq_m512h(r, e); |
21488 | } |
21489 | |
21490 | #[simd_test(enable = "avx512fp16" )] |
21491 | unsafe fn test_mm512_maskz_fmaddsub_ph() { |
21492 | let a = _mm512_set1_ph(1.0); |
21493 | let b = _mm512_set1_ph(2.0); |
21494 | let c = _mm512_set1_ph(3.0); |
21495 | let r = _mm512_maskz_fmaddsub_ph(0b00110011001100110011001100110011, a, b, c); |
21496 | let e = _mm512_set_ph( |
21497 | 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, |
21498 | 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, |
21499 | ); |
21500 | assert_eq_m512h(r, e); |
21501 | } |
21502 | |
21503 | #[simd_test(enable = "avx512fp16" )] |
21504 | unsafe fn test_mm512_fmaddsub_round_ph() { |
21505 | let a = _mm512_set1_ph(1.0); |
21506 | let b = _mm512_set1_ph(2.0); |
21507 | let c = _mm512_set1_ph(3.0); |
21508 | let r = |
21509 | _mm512_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); |
21510 | let e = _mm512_set_ph( |
21511 | 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, |
21512 | 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, |
21513 | ); |
21514 | assert_eq_m512h(r, e); |
21515 | } |
21516 | |
21517 | #[simd_test(enable = "avx512fp16" )] |
21518 | unsafe fn test_mm512_mask_fmaddsub_round_ph() { |
21519 | let a = _mm512_set1_ph(1.0); |
21520 | let b = _mm512_set1_ph(2.0); |
21521 | let c = _mm512_set1_ph(3.0); |
21522 | let r = _mm512_mask_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21523 | a, |
21524 | 0b00110011001100110011001100110011, |
21525 | b, |
21526 | c, |
21527 | ); |
21528 | let e = _mm512_set_ph( |
21529 | 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, |
21530 | 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, |
21531 | ); |
21532 | assert_eq_m512h(r, e); |
21533 | } |
21534 | |
21535 | #[simd_test(enable = "avx512fp16" )] |
21536 | unsafe fn test_mm512_mask3_fmaddsub_round_ph() { |
21537 | let a = _mm512_set1_ph(1.0); |
21538 | let b = _mm512_set1_ph(2.0); |
21539 | let c = _mm512_set1_ph(3.0); |
21540 | let r = _mm512_mask3_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21541 | a, |
21542 | b, |
21543 | c, |
21544 | 0b00110011001100110011001100110011, |
21545 | ); |
21546 | let e = _mm512_set_ph( |
21547 | 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, |
21548 | 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, |
21549 | ); |
21550 | assert_eq_m512h(r, e); |
21551 | } |
21552 | |
21553 | #[simd_test(enable = "avx512fp16" )] |
21554 | unsafe fn test_mm512_maskz_fmaddsub_round_ph() { |
21555 | let a = _mm512_set1_ph(1.0); |
21556 | let b = _mm512_set1_ph(2.0); |
21557 | let c = _mm512_set1_ph(3.0); |
21558 | let r = _mm512_maskz_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21559 | 0b00110011001100110011001100110011, |
21560 | a, |
21561 | b, |
21562 | c, |
21563 | ); |
21564 | let e = _mm512_set_ph( |
21565 | 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, |
21566 | 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, |
21567 | ); |
21568 | assert_eq_m512h(r, e); |
21569 | } |
21570 | |
21571 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21572 | unsafe fn test_mm_fmsubadd_ph() { |
21573 | let a = _mm_set1_ph(1.0); |
21574 | let b = _mm_set1_ph(2.0); |
21575 | let c = _mm_set1_ph(3.0); |
21576 | let r = _mm_fmsubadd_ph(a, b, c); |
21577 | let e = _mm_set_ph(-1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0); |
21578 | assert_eq_m128h(r, e); |
21579 | } |
21580 | |
21581 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21582 | unsafe fn test_mm_mask_fmsubadd_ph() { |
21583 | let a = _mm_set1_ph(1.0); |
21584 | let b = _mm_set1_ph(2.0); |
21585 | let c = _mm_set1_ph(3.0); |
21586 | let r = _mm_mask_fmsubadd_ph(a, 0b00110011, b, c); |
21587 | let e = _mm_set_ph(1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0); |
21588 | assert_eq_m128h(r, e); |
21589 | } |
21590 | |
21591 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21592 | unsafe fn test_mm_mask3_fmsubadd_ph() { |
21593 | let a = _mm_set1_ph(1.0); |
21594 | let b = _mm_set1_ph(2.0); |
21595 | let c = _mm_set1_ph(3.0); |
21596 | let r = _mm_mask3_fmsubadd_ph(a, b, c, 0b00110011); |
21597 | let e = _mm_set_ph(3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0); |
21598 | assert_eq_m128h(r, e); |
21599 | } |
21600 | |
21601 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21602 | unsafe fn test_mm_maskz_fmsubadd_ph() { |
21603 | let a = _mm_set1_ph(1.0); |
21604 | let b = _mm_set1_ph(2.0); |
21605 | let c = _mm_set1_ph(3.0); |
21606 | let r = _mm_maskz_fmsubadd_ph(0b00110011, a, b, c); |
21607 | let e = _mm_set_ph(0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0); |
21608 | assert_eq_m128h(r, e); |
21609 | } |
21610 | |
21611 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21612 | unsafe fn test_mm256_fmsubadd_ph() { |
21613 | let a = _mm256_set1_ph(1.0); |
21614 | let b = _mm256_set1_ph(2.0); |
21615 | let c = _mm256_set1_ph(3.0); |
21616 | let r = _mm256_fmsubadd_ph(a, b, c); |
21617 | let e = _mm256_set_ph( |
21618 | -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, |
21619 | ); |
21620 | assert_eq_m256h(r, e); |
21621 | } |
21622 | |
21623 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21624 | unsafe fn test_mm256_mask_fmsubadd_ph() { |
21625 | let a = _mm256_set1_ph(1.0); |
21626 | let b = _mm256_set1_ph(2.0); |
21627 | let c = _mm256_set1_ph(3.0); |
21628 | let r = _mm256_mask_fmsubadd_ph(a, 0b0011001100110011, b, c); |
21629 | let e = _mm256_set_ph( |
21630 | 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, |
21631 | ); |
21632 | assert_eq_m256h(r, e); |
21633 | } |
21634 | |
21635 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21636 | unsafe fn test_mm256_mask3_fmsubadd_ph() { |
21637 | let a = _mm256_set1_ph(1.0); |
21638 | let b = _mm256_set1_ph(2.0); |
21639 | let c = _mm256_set1_ph(3.0); |
21640 | let r = _mm256_mask3_fmsubadd_ph(a, b, c, 0b0011001100110011); |
21641 | let e = _mm256_set_ph( |
21642 | 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, |
21643 | ); |
21644 | assert_eq_m256h(r, e); |
21645 | } |
21646 | |
21647 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21648 | unsafe fn test_mm256_maskz_fmsubadd_ph() { |
21649 | let a = _mm256_set1_ph(1.0); |
21650 | let b = _mm256_set1_ph(2.0); |
21651 | let c = _mm256_set1_ph(3.0); |
21652 | let r = _mm256_maskz_fmsubadd_ph(0b0011001100110011, a, b, c); |
21653 | let e = _mm256_set_ph( |
21654 | 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, |
21655 | ); |
21656 | assert_eq_m256h(r, e); |
21657 | } |
21658 | |
21659 | #[simd_test(enable = "avx512fp16" )] |
21660 | unsafe fn test_mm512_fmsubadd_ph() { |
21661 | let a = _mm512_set1_ph(1.0); |
21662 | let b = _mm512_set1_ph(2.0); |
21663 | let c = _mm512_set1_ph(3.0); |
21664 | let r = _mm512_fmsubadd_ph(a, b, c); |
21665 | let e = _mm512_set_ph( |
21666 | -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, |
21667 | -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, |
21668 | ); |
21669 | assert_eq_m512h(r, e); |
21670 | } |
21671 | |
21672 | #[simd_test(enable = "avx512fp16" )] |
21673 | unsafe fn test_mm512_mask_fmsubadd_ph() { |
21674 | let a = _mm512_set1_ph(1.0); |
21675 | let b = _mm512_set1_ph(2.0); |
21676 | let c = _mm512_set1_ph(3.0); |
21677 | let r = _mm512_mask_fmsubadd_ph(a, 0b00110011001100110011001100110011, b, c); |
21678 | let e = _mm512_set_ph( |
21679 | 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, |
21680 | 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, |
21681 | ); |
21682 | assert_eq_m512h(r, e); |
21683 | } |
21684 | |
21685 | #[simd_test(enable = "avx512fp16" )] |
21686 | unsafe fn test_mm512_mask3_fmsubadd_ph() { |
21687 | let a = _mm512_set1_ph(1.0); |
21688 | let b = _mm512_set1_ph(2.0); |
21689 | let c = _mm512_set1_ph(3.0); |
21690 | let r = _mm512_mask3_fmsubadd_ph(a, b, c, 0b00110011001100110011001100110011); |
21691 | let e = _mm512_set_ph( |
21692 | 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, |
21693 | 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, |
21694 | ); |
21695 | assert_eq_m512h(r, e); |
21696 | } |
21697 | |
21698 | #[simd_test(enable = "avx512fp16" )] |
21699 | unsafe fn test_mm512_maskz_fmsubadd_ph() { |
21700 | let a = _mm512_set1_ph(1.0); |
21701 | let b = _mm512_set1_ph(2.0); |
21702 | let c = _mm512_set1_ph(3.0); |
21703 | let r = _mm512_maskz_fmsubadd_ph(0b00110011001100110011001100110011, a, b, c); |
21704 | let e = _mm512_set_ph( |
21705 | 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, |
21706 | 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, |
21707 | ); |
21708 | assert_eq_m512h(r, e); |
21709 | } |
21710 | |
21711 | #[simd_test(enable = "avx512fp16" )] |
21712 | unsafe fn test_mm512_fmsubadd_round_ph() { |
21713 | let a = _mm512_set1_ph(1.0); |
21714 | let b = _mm512_set1_ph(2.0); |
21715 | let c = _mm512_set1_ph(3.0); |
21716 | let r = |
21717 | _mm512_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); |
21718 | let e = _mm512_set_ph( |
21719 | -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, |
21720 | -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, |
21721 | ); |
21722 | assert_eq_m512h(r, e); |
21723 | } |
21724 | |
21725 | #[simd_test(enable = "avx512fp16" )] |
21726 | unsafe fn test_mm512_mask_fmsubadd_round_ph() { |
21727 | let a = _mm512_set1_ph(1.0); |
21728 | let b = _mm512_set1_ph(2.0); |
21729 | let c = _mm512_set1_ph(3.0); |
21730 | let r = _mm512_mask_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21731 | a, |
21732 | 0b00110011001100110011001100110011, |
21733 | b, |
21734 | c, |
21735 | ); |
21736 | let e = _mm512_set_ph( |
21737 | 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, |
21738 | 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, |
21739 | ); |
21740 | assert_eq_m512h(r, e); |
21741 | } |
21742 | |
21743 | #[simd_test(enable = "avx512fp16" )] |
21744 | unsafe fn test_mm512_mask3_fmsubadd_round_ph() { |
21745 | let a = _mm512_set1_ph(1.0); |
21746 | let b = _mm512_set1_ph(2.0); |
21747 | let c = _mm512_set1_ph(3.0); |
21748 | let r = _mm512_mask3_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21749 | a, |
21750 | b, |
21751 | c, |
21752 | 0b00110011001100110011001100110011, |
21753 | ); |
21754 | let e = _mm512_set_ph( |
21755 | 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, |
21756 | 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, |
21757 | ); |
21758 | assert_eq_m512h(r, e); |
21759 | } |
21760 | |
21761 | #[simd_test(enable = "avx512fp16" )] |
21762 | unsafe fn test_mm512_maskz_fmsubadd_round_ph() { |
21763 | let a = _mm512_set1_ph(1.0); |
21764 | let b = _mm512_set1_ph(2.0); |
21765 | let c = _mm512_set1_ph(3.0); |
21766 | let r = _mm512_maskz_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
21767 | 0b00110011001100110011001100110011, |
21768 | a, |
21769 | b, |
21770 | c, |
21771 | ); |
21772 | let e = _mm512_set_ph( |
21773 | 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, |
21774 | 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, |
21775 | ); |
21776 | assert_eq_m512h(r, e); |
21777 | } |
21778 | |
21779 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21780 | unsafe fn test_mm_rcp_ph() { |
21781 | let a = _mm_set1_ph(2.0); |
21782 | let r = _mm_rcp_ph(a); |
21783 | let e = _mm_set1_ph(0.5); |
21784 | assert_eq_m128h(r, e); |
21785 | } |
21786 | |
21787 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21788 | unsafe fn test_mm_mask_rcp_ph() { |
21789 | let a = _mm_set1_ph(2.0); |
21790 | let src = _mm_set1_ph(1.0); |
21791 | let r = _mm_mask_rcp_ph(src, 0b01010101, a); |
21792 | let e = _mm_set_ph(1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5); |
21793 | assert_eq_m128h(r, e); |
21794 | } |
21795 | |
21796 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21797 | unsafe fn test_mm_maskz_rcp_ph() { |
21798 | let a = _mm_set1_ph(2.0); |
21799 | let r = _mm_maskz_rcp_ph(0b01010101, a); |
21800 | let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5); |
21801 | assert_eq_m128h(r, e); |
21802 | } |
21803 | |
21804 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21805 | unsafe fn test_mm256_rcp_ph() { |
21806 | let a = _mm256_set1_ph(2.0); |
21807 | let r = _mm256_rcp_ph(a); |
21808 | let e = _mm256_set1_ph(0.5); |
21809 | assert_eq_m256h(r, e); |
21810 | } |
21811 | |
21812 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21813 | unsafe fn test_mm256_mask_rcp_ph() { |
21814 | let a = _mm256_set1_ph(2.0); |
21815 | let src = _mm256_set1_ph(1.0); |
21816 | let r = _mm256_mask_rcp_ph(src, 0b0101010101010101, a); |
21817 | let e = _mm256_set_ph( |
21818 | 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, |
21819 | ); |
21820 | assert_eq_m256h(r, e); |
21821 | } |
21822 | |
21823 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21824 | unsafe fn test_mm256_maskz_rcp_ph() { |
21825 | let a = _mm256_set1_ph(2.0); |
21826 | let r = _mm256_maskz_rcp_ph(0b0101010101010101, a); |
21827 | let e = _mm256_set_ph( |
21828 | 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, |
21829 | ); |
21830 | assert_eq_m256h(r, e); |
21831 | } |
21832 | |
21833 | #[simd_test(enable = "avx512fp16" )] |
21834 | unsafe fn test_mm512_rcp_ph() { |
21835 | let a = _mm512_set1_ph(2.0); |
21836 | let r = _mm512_rcp_ph(a); |
21837 | let e = _mm512_set1_ph(0.5); |
21838 | assert_eq_m512h(r, e); |
21839 | } |
21840 | |
21841 | #[simd_test(enable = "avx512fp16" )] |
21842 | unsafe fn test_mm512_mask_rcp_ph() { |
21843 | let a = _mm512_set1_ph(2.0); |
21844 | let src = _mm512_set1_ph(1.0); |
21845 | let r = _mm512_mask_rcp_ph(src, 0b01010101010101010101010101010101, a); |
21846 | let e = _mm512_set_ph( |
21847 | 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, |
21848 | 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, |
21849 | ); |
21850 | assert_eq_m512h(r, e); |
21851 | } |
21852 | |
21853 | #[simd_test(enable = "avx512fp16" )] |
21854 | unsafe fn test_mm512_maskz_rcp_ph() { |
21855 | let a = _mm512_set1_ph(2.0); |
21856 | let r = _mm512_maskz_rcp_ph(0b01010101010101010101010101010101, a); |
21857 | let e = _mm512_set_ph( |
21858 | 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, |
21859 | 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, |
21860 | ); |
21861 | assert_eq_m512h(r, e); |
21862 | } |
21863 | |
21864 | #[simd_test(enable = "avx512fp16" )] |
21865 | unsafe fn test_mm_rcp_sh() { |
21866 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
21867 | let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); |
21868 | let r = _mm_rcp_sh(a, b); |
21869 | let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
21870 | assert_eq_m128h(r, e); |
21871 | } |
21872 | |
21873 | #[simd_test(enable = "avx512fp16" )] |
21874 | unsafe fn test_mm_mask_rcp_sh() { |
21875 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
21876 | let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); |
21877 | let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0); |
21878 | let r = _mm_mask_rcp_sh(src, 0, a, b); |
21879 | let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
21880 | assert_eq_m128h(r, e); |
21881 | let r = _mm_mask_rcp_sh(src, 1, a, b); |
21882 | let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
21883 | assert_eq_m128h(r, e); |
21884 | } |
21885 | |
21886 | #[simd_test(enable = "avx512fp16" )] |
21887 | unsafe fn test_mm_maskz_rcp_sh() { |
21888 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
21889 | let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); |
21890 | let r = _mm_maskz_rcp_sh(0, a, b); |
21891 | let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
21892 | assert_eq_m128h(r, e); |
21893 | let r = _mm_maskz_rcp_sh(1, a, b); |
21894 | let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
21895 | assert_eq_m128h(r, e); |
21896 | } |
21897 | |
21898 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21899 | unsafe fn test_mm_rsqrt_ph() { |
21900 | let a = _mm_set1_ph(4.0); |
21901 | let r = _mm_rsqrt_ph(a); |
21902 | let e = _mm_set1_ph(0.5); |
21903 | assert_eq_m128h(r, e); |
21904 | } |
21905 | |
21906 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21907 | unsafe fn test_mm_mask_rsqrt_ph() { |
21908 | let a = _mm_set1_ph(4.0); |
21909 | let src = _mm_set1_ph(1.0); |
21910 | let r = _mm_mask_rsqrt_ph(src, 0b01010101, a); |
21911 | let e = _mm_set_ph(1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5); |
21912 | assert_eq_m128h(r, e); |
21913 | } |
21914 | |
21915 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21916 | unsafe fn test_mm_maskz_rsqrt_ph() { |
21917 | let a = _mm_set1_ph(4.0); |
21918 | let r = _mm_maskz_rsqrt_ph(0b01010101, a); |
21919 | let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5); |
21920 | assert_eq_m128h(r, e); |
21921 | } |
21922 | |
21923 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21924 | unsafe fn test_mm256_rsqrt_ph() { |
21925 | let a = _mm256_set1_ph(4.0); |
21926 | let r = _mm256_rsqrt_ph(a); |
21927 | let e = _mm256_set1_ph(0.5); |
21928 | assert_eq_m256h(r, e); |
21929 | } |
21930 | |
21931 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21932 | unsafe fn test_mm256_mask_rsqrt_ph() { |
21933 | let a = _mm256_set1_ph(4.0); |
21934 | let src = _mm256_set1_ph(1.0); |
21935 | let r = _mm256_mask_rsqrt_ph(src, 0b0101010101010101, a); |
21936 | let e = _mm256_set_ph( |
21937 | 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, |
21938 | ); |
21939 | assert_eq_m256h(r, e); |
21940 | } |
21941 | |
21942 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
21943 | unsafe fn test_mm256_maskz_rsqrt_ph() { |
21944 | let a = _mm256_set1_ph(4.0); |
21945 | let r = _mm256_maskz_rsqrt_ph(0b0101010101010101, a); |
21946 | let e = _mm256_set_ph( |
21947 | 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, |
21948 | ); |
21949 | assert_eq_m256h(r, e); |
21950 | } |
21951 | |
21952 | #[simd_test(enable = "avx512fp16" )] |
21953 | unsafe fn test_mm512_rsqrt_ph() { |
21954 | let a = _mm512_set1_ph(4.0); |
21955 | let r = _mm512_rsqrt_ph(a); |
21956 | let e = _mm512_set1_ph(0.5); |
21957 | assert_eq_m512h(r, e); |
21958 | } |
21959 | |
21960 | #[simd_test(enable = "avx512fp16" )] |
21961 | unsafe fn test_mm512_mask_rsqrt_ph() { |
21962 | let a = _mm512_set1_ph(4.0); |
21963 | let src = _mm512_set1_ph(1.0); |
21964 | let r = _mm512_mask_rsqrt_ph(src, 0b01010101010101010101010101010101, a); |
21965 | let e = _mm512_set_ph( |
21966 | 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, |
21967 | 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, |
21968 | ); |
21969 | assert_eq_m512h(r, e); |
21970 | } |
21971 | |
21972 | #[simd_test(enable = "avx512fp16" )] |
21973 | unsafe fn test_mm512_maskz_rsqrt_ph() { |
21974 | let a = _mm512_set1_ph(4.0); |
21975 | let r = _mm512_maskz_rsqrt_ph(0b01010101010101010101010101010101, a); |
21976 | let e = _mm512_set_ph( |
21977 | 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, |
21978 | 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, |
21979 | ); |
21980 | assert_eq_m512h(r, e); |
21981 | } |
21982 | |
21983 | #[simd_test(enable = "avx512fp16" )] |
21984 | unsafe fn test_mm_rsqrt_sh() { |
21985 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
21986 | let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0); |
21987 | let r = _mm_rsqrt_sh(a, b); |
21988 | let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
21989 | assert_eq_m128h(r, e); |
21990 | } |
21991 | |
21992 | #[simd_test(enable = "avx512fp16" )] |
21993 | unsafe fn test_mm_mask_rsqrt_sh() { |
21994 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
21995 | let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0); |
21996 | let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0); |
21997 | let r = _mm_mask_rsqrt_sh(src, 0, a, b); |
21998 | let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
21999 | assert_eq_m128h(r, e); |
22000 | let r = _mm_mask_rsqrt_sh(src, 1, a, b); |
22001 | let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22002 | assert_eq_m128h(r, e); |
22003 | } |
22004 | |
22005 | #[simd_test(enable = "avx512fp16" )] |
22006 | unsafe fn test_mm_maskz_rsqrt_sh() { |
22007 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22008 | let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0); |
22009 | let r = _mm_maskz_rsqrt_sh(0, a, b); |
22010 | let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22011 | assert_eq_m128h(r, e); |
22012 | let r = _mm_maskz_rsqrt_sh(1, a, b); |
22013 | let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22014 | assert_eq_m128h(r, e); |
22015 | } |
22016 | |
22017 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22018 | unsafe fn test_mm_sqrt_ph() { |
22019 | let a = _mm_set1_ph(4.0); |
22020 | let r = _mm_sqrt_ph(a); |
22021 | let e = _mm_set1_ph(2.0); |
22022 | assert_eq_m128h(r, e); |
22023 | } |
22024 | |
22025 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22026 | unsafe fn test_mm_mask_sqrt_ph() { |
22027 | let a = _mm_set1_ph(4.0); |
22028 | let src = _mm_set1_ph(1.0); |
22029 | let r = _mm_mask_sqrt_ph(src, 0b01010101, a); |
22030 | let e = _mm_set_ph(1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0); |
22031 | assert_eq_m128h(r, e); |
22032 | } |
22033 | |
22034 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22035 | unsafe fn test_mm_maskz_sqrt_ph() { |
22036 | let a = _mm_set1_ph(4.0); |
22037 | let r = _mm_maskz_sqrt_ph(0b01010101, a); |
22038 | let e = _mm_set_ph(0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0); |
22039 | assert_eq_m128h(r, e); |
22040 | } |
22041 | |
22042 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22043 | unsafe fn test_mm256_sqrt_ph() { |
22044 | let a = _mm256_set1_ph(4.0); |
22045 | let r = _mm256_sqrt_ph(a); |
22046 | let e = _mm256_set1_ph(2.0); |
22047 | assert_eq_m256h(r, e); |
22048 | } |
22049 | |
22050 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22051 | unsafe fn test_mm256_mask_sqrt_ph() { |
22052 | let a = _mm256_set1_ph(4.0); |
22053 | let src = _mm256_set1_ph(1.0); |
22054 | let r = _mm256_mask_sqrt_ph(src, 0b0101010101010101, a); |
22055 | let e = _mm256_set_ph( |
22056 | 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, |
22057 | ); |
22058 | assert_eq_m256h(r, e); |
22059 | } |
22060 | |
22061 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22062 | unsafe fn test_mm256_maskz_sqrt_ph() { |
22063 | let a = _mm256_set1_ph(4.0); |
22064 | let r = _mm256_maskz_sqrt_ph(0b0101010101010101, a); |
22065 | let e = _mm256_set_ph( |
22066 | 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, |
22067 | ); |
22068 | assert_eq_m256h(r, e); |
22069 | } |
22070 | |
22071 | #[simd_test(enable = "avx512fp16" )] |
22072 | unsafe fn test_mm512_sqrt_ph() { |
22073 | let a = _mm512_set1_ph(4.0); |
22074 | let r = _mm512_sqrt_ph(a); |
22075 | let e = _mm512_set1_ph(2.0); |
22076 | assert_eq_m512h(r, e); |
22077 | } |
22078 | |
22079 | #[simd_test(enable = "avx512fp16" )] |
22080 | unsafe fn test_mm512_mask_sqrt_ph() { |
22081 | let a = _mm512_set1_ph(4.0); |
22082 | let src = _mm512_set1_ph(1.0); |
22083 | let r = _mm512_mask_sqrt_ph(src, 0b01010101010101010101010101010101, a); |
22084 | let e = _mm512_set_ph( |
22085 | 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, |
22086 | 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, |
22087 | ); |
22088 | assert_eq_m512h(r, e); |
22089 | } |
22090 | |
22091 | #[simd_test(enable = "avx512fp16" )] |
22092 | unsafe fn test_mm512_maskz_sqrt_ph() { |
22093 | let a = _mm512_set1_ph(4.0); |
22094 | let r = _mm512_maskz_sqrt_ph(0b01010101010101010101010101010101, a); |
22095 | let e = _mm512_set_ph( |
22096 | 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, |
22097 | 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, |
22098 | ); |
22099 | assert_eq_m512h(r, e); |
22100 | } |
22101 | |
22102 | #[simd_test(enable = "avx512fp16" )] |
22103 | unsafe fn test_mm512_sqrt_round_ph() { |
22104 | let a = _mm512_set1_ph(4.0); |
22105 | let r = _mm512_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
22106 | let e = _mm512_set1_ph(2.0); |
22107 | assert_eq_m512h(r, e); |
22108 | } |
22109 | |
22110 | #[simd_test(enable = "avx512fp16" )] |
22111 | unsafe fn test_mm512_mask_sqrt_round_ph() { |
22112 | let a = _mm512_set1_ph(4.0); |
22113 | let src = _mm512_set1_ph(1.0); |
22114 | let r = _mm512_mask_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
22115 | src, |
22116 | 0b01010101010101010101010101010101, |
22117 | a, |
22118 | ); |
22119 | let e = _mm512_set_ph( |
22120 | 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, |
22121 | 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, |
22122 | ); |
22123 | assert_eq_m512h(r, e); |
22124 | } |
22125 | |
22126 | #[simd_test(enable = "avx512fp16" )] |
22127 | unsafe fn test_mm512_maskz_sqrt_round_ph() { |
22128 | let a = _mm512_set1_ph(4.0); |
22129 | let r = _mm512_maskz_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
22130 | 0b01010101010101010101010101010101, |
22131 | a, |
22132 | ); |
22133 | let e = _mm512_set_ph( |
22134 | 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, |
22135 | 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, |
22136 | ); |
22137 | assert_eq_m512h(r, e); |
22138 | } |
22139 | |
22140 | #[simd_test(enable = "avx512fp16" )] |
22141 | unsafe fn test_mm_sqrt_sh() { |
22142 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22143 | let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0); |
22144 | let r = _mm_sqrt_sh(a, b); |
22145 | let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22146 | assert_eq_m128h(r, e); |
22147 | } |
22148 | |
22149 | #[simd_test(enable = "avx512fp16" )] |
22150 | unsafe fn test_mm_mask_sqrt_sh() { |
22151 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22152 | let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0); |
22153 | let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0); |
22154 | let r = _mm_mask_sqrt_sh(src, 0, a, b); |
22155 | let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22156 | assert_eq_m128h(r, e); |
22157 | let r = _mm_mask_sqrt_sh(src, 1, a, b); |
22158 | let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22159 | assert_eq_m128h(r, e); |
22160 | } |
22161 | |
22162 | #[simd_test(enable = "avx512fp16" )] |
22163 | unsafe fn test_mm_maskz_sqrt_sh() { |
22164 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22165 | let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0); |
22166 | let r = _mm_maskz_sqrt_sh(0, a, b); |
22167 | let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22168 | assert_eq_m128h(r, e); |
22169 | let r = _mm_maskz_sqrt_sh(1, a, b); |
22170 | let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22171 | assert_eq_m128h(r, e); |
22172 | } |
22173 | |
22174 | #[simd_test(enable = "avx512fp16" )] |
22175 | unsafe fn test_mm_sqrt_round_sh() { |
22176 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22177 | let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0); |
22178 | let r = _mm_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
22179 | let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22180 | assert_eq_m128h(r, e); |
22181 | } |
22182 | |
22183 | #[simd_test(enable = "avx512fp16" )] |
22184 | unsafe fn test_mm_mask_sqrt_round_sh() { |
22185 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22186 | let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0); |
22187 | let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0); |
22188 | let r = _mm_mask_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
22189 | src, 0, a, b, |
22190 | ); |
22191 | let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22192 | assert_eq_m128h(r, e); |
22193 | let r = _mm_mask_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
22194 | src, 1, a, b, |
22195 | ); |
22196 | let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22197 | assert_eq_m128h(r, e); |
22198 | } |
22199 | |
22200 | #[simd_test(enable = "avx512fp16" )] |
22201 | unsafe fn test_mm_maskz_sqrt_round_sh() { |
22202 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22203 | let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0); |
22204 | let r = |
22205 | _mm_maskz_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); |
22206 | let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22207 | assert_eq_m128h(r, e); |
22208 | let r = |
22209 | _mm_maskz_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b); |
22210 | let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22211 | assert_eq_m128h(r, e); |
22212 | } |
22213 | |
22214 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22215 | unsafe fn test_mm_max_ph() { |
22216 | let a = _mm_set1_ph(2.0); |
22217 | let b = _mm_set1_ph(1.0); |
22218 | let r = _mm_max_ph(a, b); |
22219 | let e = _mm_set1_ph(2.0); |
22220 | assert_eq_m128h(r, e); |
22221 | } |
22222 | |
22223 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22224 | unsafe fn test_mm_mask_max_ph() { |
22225 | let a = _mm_set1_ph(2.0); |
22226 | let b = _mm_set1_ph(1.0); |
22227 | let src = _mm_set1_ph(3.0); |
22228 | let r = _mm_mask_max_ph(src, 0b01010101, a, b); |
22229 | let e = _mm_set_ph(3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0); |
22230 | assert_eq_m128h(r, e); |
22231 | } |
22232 | |
22233 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22234 | unsafe fn test_mm_maskz_max_ph() { |
22235 | let a = _mm_set1_ph(2.0); |
22236 | let b = _mm_set1_ph(1.0); |
22237 | let r = _mm_maskz_max_ph(0b01010101, a, b); |
22238 | let e = _mm_set_ph(0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0); |
22239 | assert_eq_m128h(r, e); |
22240 | } |
22241 | |
22242 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22243 | unsafe fn test_mm256_max_ph() { |
22244 | let a = _mm256_set1_ph(2.0); |
22245 | let b = _mm256_set1_ph(1.0); |
22246 | let r = _mm256_max_ph(a, b); |
22247 | let e = _mm256_set1_ph(2.0); |
22248 | assert_eq_m256h(r, e); |
22249 | } |
22250 | |
22251 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22252 | unsafe fn test_mm256_mask_max_ph() { |
22253 | let a = _mm256_set1_ph(2.0); |
22254 | let b = _mm256_set1_ph(1.0); |
22255 | let src = _mm256_set1_ph(3.0); |
22256 | let r = _mm256_mask_max_ph(src, 0b0101010101010101, a, b); |
22257 | let e = _mm256_set_ph( |
22258 | 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, |
22259 | ); |
22260 | assert_eq_m256h(r, e); |
22261 | } |
22262 | |
22263 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22264 | unsafe fn test_mm256_maskz_max_ph() { |
22265 | let a = _mm256_set1_ph(2.0); |
22266 | let b = _mm256_set1_ph(1.0); |
22267 | let r = _mm256_maskz_max_ph(0b0101010101010101, a, b); |
22268 | let e = _mm256_set_ph( |
22269 | 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, |
22270 | ); |
22271 | assert_eq_m256h(r, e); |
22272 | } |
22273 | |
22274 | #[simd_test(enable = "avx512fp16" )] |
22275 | unsafe fn test_mm512_max_ph() { |
22276 | let a = _mm512_set1_ph(2.0); |
22277 | let b = _mm512_set1_ph(1.0); |
22278 | let r = _mm512_max_ph(a, b); |
22279 | let e = _mm512_set1_ph(2.0); |
22280 | assert_eq_m512h(r, e); |
22281 | } |
22282 | |
22283 | #[simd_test(enable = "avx512fp16" )] |
22284 | unsafe fn test_mm512_mask_max_ph() { |
22285 | let a = _mm512_set1_ph(2.0); |
22286 | let b = _mm512_set1_ph(1.0); |
22287 | let src = _mm512_set1_ph(3.0); |
22288 | let r = _mm512_mask_max_ph(src, 0b01010101010101010101010101010101, a, b); |
22289 | let e = _mm512_set_ph( |
22290 | 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, |
22291 | 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, |
22292 | ); |
22293 | assert_eq_m512h(r, e); |
22294 | } |
22295 | |
22296 | #[simd_test(enable = "avx512fp16" )] |
22297 | unsafe fn test_mm512_maskz_max_ph() { |
22298 | let a = _mm512_set1_ph(2.0); |
22299 | let b = _mm512_set1_ph(1.0); |
22300 | let r = _mm512_maskz_max_ph(0b01010101010101010101010101010101, a, b); |
22301 | let e = _mm512_set_ph( |
22302 | 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, |
22303 | 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, |
22304 | ); |
22305 | assert_eq_m512h(r, e); |
22306 | } |
22307 | |
22308 | #[simd_test(enable = "avx512fp16" )] |
22309 | unsafe fn test_mm512_max_round_ph() { |
22310 | let a = _mm512_set1_ph(2.0); |
22311 | let b = _mm512_set1_ph(1.0); |
22312 | let r = _mm512_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
22313 | let e = _mm512_set1_ph(2.0); |
22314 | assert_eq_m512h(r, e); |
22315 | } |
22316 | |
22317 | #[simd_test(enable = "avx512fp16" )] |
22318 | unsafe fn test_mm512_mask_max_round_ph() { |
22319 | let a = _mm512_set1_ph(2.0); |
22320 | let b = _mm512_set1_ph(1.0); |
22321 | let src = _mm512_set1_ph(3.0); |
22322 | let r = _mm512_mask_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
22323 | src, |
22324 | 0b01010101010101010101010101010101, |
22325 | a, |
22326 | b, |
22327 | ); |
22328 | let e = _mm512_set_ph( |
22329 | 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, |
22330 | 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, |
22331 | ); |
22332 | assert_eq_m512h(r, e); |
22333 | } |
22334 | |
22335 | #[simd_test(enable = "avx512fp16" )] |
22336 | unsafe fn test_mm512_maskz_max_round_ph() { |
22337 | let a = _mm512_set1_ph(2.0); |
22338 | let b = _mm512_set1_ph(1.0); |
22339 | let r = _mm512_maskz_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
22340 | 0b01010101010101010101010101010101, |
22341 | a, |
22342 | b, |
22343 | ); |
22344 | let e = _mm512_set_ph( |
22345 | 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, |
22346 | 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, |
22347 | ); |
22348 | assert_eq_m512h(r, e); |
22349 | } |
22350 | |
22351 | #[simd_test(enable = "avx512fp16" )] |
22352 | unsafe fn test_mm_max_sh() { |
22353 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22354 | let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); |
22355 | let r = _mm_max_sh(a, b); |
22356 | let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22357 | assert_eq_m128h(r, e); |
22358 | } |
22359 | |
22360 | #[simd_test(enable = "avx512fp16" )] |
22361 | unsafe fn test_mm_mask_max_sh() { |
22362 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22363 | let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); |
22364 | let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0); |
22365 | let r = _mm_mask_max_sh(src, 0, a, b); |
22366 | let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22367 | assert_eq_m128h(r, e); |
22368 | let r = _mm_mask_max_sh(src, 1, a, b); |
22369 | let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22370 | assert_eq_m128h(r, e); |
22371 | } |
22372 | |
22373 | #[simd_test(enable = "avx512fp16" )] |
22374 | unsafe fn test_mm_maskz_max_sh() { |
22375 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22376 | let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); |
22377 | let r = _mm_maskz_max_sh(0, a, b); |
22378 | let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22379 | assert_eq_m128h(r, e); |
22380 | let r = _mm_maskz_max_sh(1, a, b); |
22381 | let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22382 | assert_eq_m128h(r, e); |
22383 | } |
22384 | |
22385 | #[simd_test(enable = "avx512fp16" )] |
22386 | unsafe fn test_mm_max_round_sh() { |
22387 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22388 | let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); |
22389 | let r = _mm_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
22390 | let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22391 | assert_eq_m128h(r, e); |
22392 | } |
22393 | |
22394 | #[simd_test(enable = "avx512fp16" )] |
22395 | unsafe fn test_mm_mask_max_round_sh() { |
22396 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22397 | let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); |
22398 | let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0); |
22399 | let r = _mm_mask_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
22400 | src, 0, a, b, |
22401 | ); |
22402 | let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22403 | assert_eq_m128h(r, e); |
22404 | let r = _mm_mask_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
22405 | src, 1, a, b, |
22406 | ); |
22407 | let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22408 | assert_eq_m128h(r, e); |
22409 | } |
22410 | |
22411 | #[simd_test(enable = "avx512fp16" )] |
22412 | unsafe fn test_mm_maskz_max_round_sh() { |
22413 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22414 | let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); |
22415 | let r = |
22416 | _mm_maskz_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); |
22417 | let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22418 | assert_eq_m128h(r, e); |
22419 | let r = |
22420 | _mm_maskz_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b); |
22421 | let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22422 | assert_eq_m128h(r, e); |
22423 | } |
22424 | |
22425 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22426 | unsafe fn test_mm_min_ph() { |
22427 | let a = _mm_set1_ph(2.0); |
22428 | let b = _mm_set1_ph(1.0); |
22429 | let r = _mm_min_ph(a, b); |
22430 | let e = _mm_set1_ph(1.0); |
22431 | assert_eq_m128h(r, e); |
22432 | } |
22433 | |
22434 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22435 | unsafe fn test_mm_mask_min_ph() { |
22436 | let a = _mm_set1_ph(2.0); |
22437 | let b = _mm_set1_ph(1.0); |
22438 | let src = _mm_set1_ph(3.0); |
22439 | let r = _mm_mask_min_ph(src, 0b01010101, a, b); |
22440 | let e = _mm_set_ph(3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0); |
22441 | assert_eq_m128h(r, e); |
22442 | } |
22443 | |
22444 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22445 | unsafe fn test_mm_maskz_min_ph() { |
22446 | let a = _mm_set1_ph(2.0); |
22447 | let b = _mm_set1_ph(1.0); |
22448 | let r = _mm_maskz_min_ph(0b01010101, a, b); |
22449 | let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0); |
22450 | assert_eq_m128h(r, e); |
22451 | } |
22452 | |
22453 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22454 | unsafe fn test_mm256_min_ph() { |
22455 | let a = _mm256_set1_ph(2.0); |
22456 | let b = _mm256_set1_ph(1.0); |
22457 | let r = _mm256_min_ph(a, b); |
22458 | let e = _mm256_set1_ph(1.0); |
22459 | assert_eq_m256h(r, e); |
22460 | } |
22461 | |
22462 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22463 | unsafe fn test_mm256_mask_min_ph() { |
22464 | let a = _mm256_set1_ph(2.0); |
22465 | let b = _mm256_set1_ph(1.0); |
22466 | let src = _mm256_set1_ph(3.0); |
22467 | let r = _mm256_mask_min_ph(src, 0b0101010101010101, a, b); |
22468 | let e = _mm256_set_ph( |
22469 | 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, |
22470 | ); |
22471 | assert_eq_m256h(r, e); |
22472 | } |
22473 | |
22474 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22475 | unsafe fn test_mm256_maskz_min_ph() { |
22476 | let a = _mm256_set1_ph(2.0); |
22477 | let b = _mm256_set1_ph(1.0); |
22478 | let r = _mm256_maskz_min_ph(0b0101010101010101, a, b); |
22479 | let e = _mm256_set_ph( |
22480 | 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, |
22481 | ); |
22482 | assert_eq_m256h(r, e); |
22483 | } |
22484 | |
22485 | #[simd_test(enable = "avx512fp16" )] |
22486 | unsafe fn test_mm512_min_ph() { |
22487 | let a = _mm512_set1_ph(2.0); |
22488 | let b = _mm512_set1_ph(1.0); |
22489 | let r = _mm512_min_ph(a, b); |
22490 | let e = _mm512_set1_ph(1.0); |
22491 | assert_eq_m512h(r, e); |
22492 | } |
22493 | |
22494 | #[simd_test(enable = "avx512fp16" )] |
22495 | unsafe fn test_mm512_mask_min_ph() { |
22496 | let a = _mm512_set1_ph(2.0); |
22497 | let b = _mm512_set1_ph(1.0); |
22498 | let src = _mm512_set1_ph(3.0); |
22499 | let r = _mm512_mask_min_ph(src, 0b01010101010101010101010101010101, a, b); |
22500 | let e = _mm512_set_ph( |
22501 | 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, |
22502 | 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, |
22503 | ); |
22504 | assert_eq_m512h(r, e); |
22505 | } |
22506 | |
22507 | #[simd_test(enable = "avx512fp16" )] |
22508 | unsafe fn test_mm512_maskz_min_ph() { |
22509 | let a = _mm512_set1_ph(2.0); |
22510 | let b = _mm512_set1_ph(1.0); |
22511 | let r = _mm512_maskz_min_ph(0b01010101010101010101010101010101, a, b); |
22512 | let e = _mm512_set_ph( |
22513 | 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, |
22514 | 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, |
22515 | ); |
22516 | assert_eq_m512h(r, e); |
22517 | } |
22518 | |
22519 | #[simd_test(enable = "avx512fp16" )] |
22520 | unsafe fn test_mm512_min_round_ph() { |
22521 | let a = _mm512_set1_ph(2.0); |
22522 | let b = _mm512_set1_ph(1.0); |
22523 | let r = _mm512_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
22524 | let e = _mm512_set1_ph(1.0); |
22525 | assert_eq_m512h(r, e); |
22526 | } |
22527 | |
22528 | #[simd_test(enable = "avx512fp16" )] |
22529 | unsafe fn test_mm512_mask_min_round_ph() { |
22530 | let a = _mm512_set1_ph(2.0); |
22531 | let b = _mm512_set1_ph(1.0); |
22532 | let src = _mm512_set1_ph(3.0); |
22533 | let r = _mm512_mask_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
22534 | src, |
22535 | 0b01010101010101010101010101010101, |
22536 | a, |
22537 | b, |
22538 | ); |
22539 | let e = _mm512_set_ph( |
22540 | 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, |
22541 | 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, |
22542 | ); |
22543 | assert_eq_m512h(r, e); |
22544 | } |
22545 | |
22546 | #[simd_test(enable = "avx512fp16" )] |
22547 | unsafe fn test_mm512_maskz_min_round_ph() { |
22548 | let a = _mm512_set1_ph(2.0); |
22549 | let b = _mm512_set1_ph(1.0); |
22550 | let r = _mm512_maskz_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
22551 | 0b01010101010101010101010101010101, |
22552 | a, |
22553 | b, |
22554 | ); |
22555 | let e = _mm512_set_ph( |
22556 | 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, |
22557 | 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, |
22558 | ); |
22559 | assert_eq_m512h(r, e); |
22560 | } |
22561 | |
22562 | #[simd_test(enable = "avx512fp16" )] |
22563 | unsafe fn test_mm_min_sh() { |
22564 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22565 | let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); |
22566 | let r = _mm_min_sh(a, b); |
22567 | let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22568 | assert_eq_m128h(r, e); |
22569 | } |
22570 | |
22571 | #[simd_test(enable = "avx512fp16" )] |
22572 | unsafe fn test_mm_mask_min_sh() { |
22573 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22574 | let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); |
22575 | let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0); |
22576 | let r = _mm_mask_min_sh(src, 0, a, b); |
22577 | let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22578 | assert_eq_m128h(r, e); |
22579 | let r = _mm_mask_min_sh(src, 1, a, b); |
22580 | let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22581 | assert_eq_m128h(r, e); |
22582 | } |
22583 | |
22584 | #[simd_test(enable = "avx512fp16" )] |
22585 | unsafe fn test_mm_maskz_min_sh() { |
22586 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22587 | let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); |
22588 | let r = _mm_maskz_min_sh(0, a, b); |
22589 | let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22590 | assert_eq_m128h(r, e); |
22591 | let r = _mm_maskz_min_sh(1, a, b); |
22592 | let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22593 | assert_eq_m128h(r, e); |
22594 | } |
22595 | |
22596 | #[simd_test(enable = "avx512fp16" )] |
22597 | unsafe fn test_mm_min_round_sh() { |
22598 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22599 | let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); |
22600 | let r = _mm_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
22601 | let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22602 | assert_eq_m128h(r, e); |
22603 | } |
22604 | |
22605 | #[simd_test(enable = "avx512fp16" )] |
22606 | unsafe fn test_mm_mask_min_round_sh() { |
22607 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22608 | let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); |
22609 | let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0); |
22610 | let r = _mm_mask_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
22611 | src, 0, a, b, |
22612 | ); |
22613 | let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22614 | assert_eq_m128h(r, e); |
22615 | let r = _mm_mask_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
22616 | src, 1, a, b, |
22617 | ); |
22618 | let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22619 | assert_eq_m128h(r, e); |
22620 | } |
22621 | |
22622 | #[simd_test(enable = "avx512fp16" )] |
22623 | unsafe fn test_mm_maskz_min_round_sh() { |
22624 | let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22625 | let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0); |
22626 | let r = |
22627 | _mm_maskz_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); |
22628 | let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22629 | assert_eq_m128h(r, e); |
22630 | let r = |
22631 | _mm_maskz_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b); |
22632 | let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
22633 | assert_eq_m128h(r, e); |
22634 | } |
22635 | |
22636 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22637 | unsafe fn test_mm_getexp_ph() { |
22638 | let a = _mm_set1_ph(3.0); |
22639 | let r = _mm_getexp_ph(a); |
22640 | let e = _mm_set1_ph(1.0); |
22641 | assert_eq_m128h(r, e); |
22642 | } |
22643 | |
22644 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22645 | unsafe fn test_mm_mask_getexp_ph() { |
22646 | let a = _mm_set1_ph(3.0); |
22647 | let src = _mm_set1_ph(4.0); |
22648 | let r = _mm_mask_getexp_ph(src, 0b01010101, a); |
22649 | let e = _mm_set_ph(4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0); |
22650 | assert_eq_m128h(r, e); |
22651 | } |
22652 | |
22653 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22654 | unsafe fn test_mm_maskz_getexp_ph() { |
22655 | let a = _mm_set1_ph(3.0); |
22656 | let r = _mm_maskz_getexp_ph(0b01010101, a); |
22657 | let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0); |
22658 | assert_eq_m128h(r, e); |
22659 | } |
22660 | |
22661 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22662 | unsafe fn test_mm256_getexp_ph() { |
22663 | let a = _mm256_set1_ph(3.0); |
22664 | let r = _mm256_getexp_ph(a); |
22665 | let e = _mm256_set1_ph(1.0); |
22666 | assert_eq_m256h(r, e); |
22667 | } |
22668 | |
22669 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22670 | unsafe fn test_mm256_mask_getexp_ph() { |
22671 | let a = _mm256_set1_ph(3.0); |
22672 | let src = _mm256_set1_ph(4.0); |
22673 | let r = _mm256_mask_getexp_ph(src, 0b0101010101010101, a); |
22674 | let e = _mm256_set_ph( |
22675 | 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, |
22676 | ); |
22677 | assert_eq_m256h(r, e); |
22678 | } |
22679 | |
22680 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22681 | unsafe fn test_mm256_maskz_getexp_ph() { |
22682 | let a = _mm256_set1_ph(3.0); |
22683 | let r = _mm256_maskz_getexp_ph(0b0101010101010101, a); |
22684 | let e = _mm256_set_ph( |
22685 | 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, |
22686 | ); |
22687 | assert_eq_m256h(r, e); |
22688 | } |
22689 | |
22690 | #[simd_test(enable = "avx512fp16" )] |
22691 | unsafe fn test_mm512_getexp_ph() { |
22692 | let a = _mm512_set1_ph(3.0); |
22693 | let r = _mm512_getexp_ph(a); |
22694 | let e = _mm512_set1_ph(1.0); |
22695 | assert_eq_m512h(r, e); |
22696 | } |
22697 | |
22698 | #[simd_test(enable = "avx512fp16" )] |
22699 | unsafe fn test_mm512_mask_getexp_ph() { |
22700 | let a = _mm512_set1_ph(3.0); |
22701 | let src = _mm512_set1_ph(4.0); |
22702 | let r = _mm512_mask_getexp_ph(src, 0b01010101010101010101010101010101, a); |
22703 | let e = _mm512_set_ph( |
22704 | 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, |
22705 | 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, |
22706 | ); |
22707 | assert_eq_m512h(r, e); |
22708 | } |
22709 | |
22710 | #[simd_test(enable = "avx512fp16" )] |
22711 | unsafe fn test_mm512_maskz_getexp_ph() { |
22712 | let a = _mm512_set1_ph(3.0); |
22713 | let r = _mm512_maskz_getexp_ph(0b01010101010101010101010101010101, a); |
22714 | let e = _mm512_set_ph( |
22715 | 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, |
22716 | 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, |
22717 | ); |
22718 | assert_eq_m512h(r, e); |
22719 | } |
22720 | |
22721 | #[simd_test(enable = "avx512fp16" )] |
22722 | unsafe fn test_mm512_getexp_round_ph() { |
22723 | let a = _mm512_set1_ph(3.0); |
22724 | let r = _mm512_getexp_round_ph::<_MM_FROUND_NO_EXC>(a); |
22725 | let e = _mm512_set1_ph(1.0); |
22726 | assert_eq_m512h(r, e); |
22727 | } |
22728 | |
22729 | #[simd_test(enable = "avx512fp16" )] |
22730 | unsafe fn test_mm512_mask_getexp_round_ph() { |
22731 | let a = _mm512_set1_ph(3.0); |
22732 | let src = _mm512_set1_ph(4.0); |
22733 | let r = _mm512_mask_getexp_round_ph::<_MM_FROUND_NO_EXC>( |
22734 | src, |
22735 | 0b01010101010101010101010101010101, |
22736 | a, |
22737 | ); |
22738 | let e = _mm512_set_ph( |
22739 | 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, |
22740 | 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, |
22741 | ); |
22742 | assert_eq_m512h(r, e); |
22743 | } |
22744 | |
22745 | #[simd_test(enable = "avx512fp16" )] |
22746 | unsafe fn test_mm512_maskz_getexp_round_ph() { |
22747 | let a = _mm512_set1_ph(3.0); |
22748 | let r = _mm512_maskz_getexp_round_ph::<_MM_FROUND_NO_EXC>( |
22749 | 0b01010101010101010101010101010101, |
22750 | a, |
22751 | ); |
22752 | let e = _mm512_set_ph( |
22753 | 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, |
22754 | 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, |
22755 | ); |
22756 | assert_eq_m512h(r, e); |
22757 | } |
22758 | |
22759 | #[simd_test(enable = "avx512fp16" )] |
22760 | unsafe fn test_mm_getexp_sh() { |
22761 | let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.); |
22762 | let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.); |
22763 | let r = _mm_getexp_sh(a, b); |
22764 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
22765 | assert_eq_m128h(r, e); |
22766 | } |
22767 | |
22768 | #[simd_test(enable = "avx512fp16" )] |
22769 | unsafe fn test_mm_mask_getexp_sh() { |
22770 | let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.); |
22771 | let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.); |
22772 | let src = _mm_setr_ph(4.0, 30., 31., 32., 33., 34., 35., 36.); |
22773 | let r = _mm_mask_getexp_sh(src, 0, a, b); |
22774 | let e = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.); |
22775 | assert_eq_m128h(r, e); |
22776 | let r = _mm_mask_getexp_sh(src, 1, a, b); |
22777 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
22778 | assert_eq_m128h(r, e); |
22779 | } |
22780 | |
22781 | #[simd_test(enable = "avx512fp16" )] |
22782 | unsafe fn test_mm_maskz_getexp_sh() { |
22783 | let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.); |
22784 | let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.); |
22785 | let r = _mm_maskz_getexp_sh(0, a, b); |
22786 | let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); |
22787 | assert_eq_m128h(r, e); |
22788 | let r = _mm_maskz_getexp_sh(1, a, b); |
22789 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
22790 | assert_eq_m128h(r, e); |
22791 | } |
22792 | |
22793 | #[simd_test(enable = "avx512fp16" )] |
22794 | unsafe fn test_mm_getexp_round_sh() { |
22795 | let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.); |
22796 | let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.); |
22797 | let r = _mm_getexp_round_sh::<_MM_FROUND_NO_EXC>(a, b); |
22798 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
22799 | assert_eq_m128h(r, e); |
22800 | } |
22801 | |
22802 | #[simd_test(enable = "avx512fp16" )] |
22803 | unsafe fn test_mm_mask_getexp_round_sh() { |
22804 | let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.); |
22805 | let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.); |
22806 | let src = _mm_setr_ph(4.0, 30., 31., 32., 33., 34., 35., 36.); |
22807 | let r = _mm_mask_getexp_round_sh::<_MM_FROUND_NO_EXC>(src, 0, a, b); |
22808 | let e = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.); |
22809 | assert_eq_m128h(r, e); |
22810 | let r = _mm_mask_getexp_round_sh::<_MM_FROUND_NO_EXC>(src, 1, a, b); |
22811 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
22812 | assert_eq_m128h(r, e); |
22813 | } |
22814 | |
22815 | #[simd_test(enable = "avx512fp16" )] |
22816 | unsafe fn test_mm_maskz_getexp_round_sh() { |
22817 | let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.); |
22818 | let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.); |
22819 | let r = _mm_maskz_getexp_round_sh::<_MM_FROUND_NO_EXC>(0, a, b); |
22820 | let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); |
22821 | assert_eq_m128h(r, e); |
22822 | let r = _mm_maskz_getexp_round_sh::<_MM_FROUND_NO_EXC>(1, a, b); |
22823 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
22824 | assert_eq_m128h(r, e); |
22825 | } |
22826 | |
22827 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22828 | unsafe fn test_mm_getmant_ph() { |
22829 | let a = _mm_set1_ph(10.0); |
22830 | let r = _mm_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a); |
22831 | let e = _mm_set1_ph(1.25); |
22832 | assert_eq_m128h(r, e); |
22833 | } |
22834 | |
22835 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22836 | unsafe fn test_mm_mask_getmant_ph() { |
22837 | let a = _mm_set1_ph(10.0); |
22838 | let src = _mm_set1_ph(20.0); |
22839 | let r = _mm_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, 0b01010101, a); |
22840 | let e = _mm_set_ph(20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25); |
22841 | assert_eq_m128h(r, e); |
22842 | } |
22843 | |
22844 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22845 | unsafe fn test_mm_maskz_getmant_ph() { |
22846 | let a = _mm_set1_ph(10.0); |
22847 | let r = _mm_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(0b01010101, a); |
22848 | let e = _mm_set_ph(0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25); |
22849 | assert_eq_m128h(r, e); |
22850 | } |
22851 | |
22852 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22853 | unsafe fn test_mm256_getmant_ph() { |
22854 | let a = _mm256_set1_ph(10.0); |
22855 | let r = _mm256_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a); |
22856 | let e = _mm256_set1_ph(1.25); |
22857 | assert_eq_m256h(r, e); |
22858 | } |
22859 | |
22860 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22861 | unsafe fn test_mm256_mask_getmant_ph() { |
22862 | let a = _mm256_set1_ph(10.0); |
22863 | let src = _mm256_set1_ph(20.0); |
22864 | let r = _mm256_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>( |
22865 | src, |
22866 | 0b0101010101010101, |
22867 | a, |
22868 | ); |
22869 | let e = _mm256_set_ph( |
22870 | 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, |
22871 | 20.0, 1.25, |
22872 | ); |
22873 | assert_eq_m256h(r, e); |
22874 | } |
22875 | |
22876 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
22877 | unsafe fn test_mm256_maskz_getmant_ph() { |
22878 | let a = _mm256_set1_ph(10.0); |
22879 | let r = _mm256_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>( |
22880 | 0b0101010101010101, |
22881 | a, |
22882 | ); |
22883 | let e = _mm256_set_ph( |
22884 | 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, |
22885 | ); |
22886 | assert_eq_m256h(r, e); |
22887 | } |
22888 | |
22889 | #[simd_test(enable = "avx512fp16" )] |
22890 | unsafe fn test_mm512_getmant_ph() { |
22891 | let a = _mm512_set1_ph(10.0); |
22892 | let r = _mm512_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a); |
22893 | let e = _mm512_set1_ph(1.25); |
22894 | assert_eq_m512h(r, e); |
22895 | } |
22896 | |
22897 | #[simd_test(enable = "avx512fp16" )] |
22898 | unsafe fn test_mm512_mask_getmant_ph() { |
22899 | let a = _mm512_set1_ph(10.0); |
22900 | let src = _mm512_set1_ph(20.0); |
22901 | let r = _mm512_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>( |
22902 | src, |
22903 | 0b01010101010101010101010101010101, |
22904 | a, |
22905 | ); |
22906 | let e = _mm512_set_ph( |
22907 | 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, |
22908 | 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, |
22909 | 20.0, 1.25, 20.0, 1.25, |
22910 | ); |
22911 | assert_eq_m512h(r, e); |
22912 | } |
22913 | |
22914 | #[simd_test(enable = "avx512fp16" )] |
22915 | unsafe fn test_mm512_maskz_getmant_ph() { |
22916 | let a = _mm512_set1_ph(10.0); |
22917 | let r = _mm512_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>( |
22918 | 0b01010101010101010101010101010101, |
22919 | a, |
22920 | ); |
22921 | let e = _mm512_set_ph( |
22922 | 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, |
22923 | 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, |
22924 | ); |
22925 | assert_eq_m512h(r, e); |
22926 | } |
22927 | |
22928 | #[simd_test(enable = "avx512fp16" )] |
22929 | unsafe fn test_mm512_getmant_round_ph() { |
22930 | let a = _mm512_set1_ph(10.0); |
22931 | let r = |
22932 | _mm512_getmant_round_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN, _MM_FROUND_NO_EXC>( |
22933 | a, |
22934 | ); |
22935 | let e = _mm512_set1_ph(1.25); |
22936 | assert_eq_m512h(r, e); |
22937 | } |
22938 | |
22939 | #[simd_test(enable = "avx512fp16" )] |
22940 | unsafe fn test_mm512_mask_getmant_round_ph() { |
22941 | let a = _mm512_set1_ph(10.0); |
22942 | let src = _mm512_set1_ph(20.0); |
22943 | let r = _mm512_mask_getmant_round_ph::< |
22944 | _MM_MANT_NORM_P75_1P5, |
22945 | _MM_MANT_SIGN_NAN, |
22946 | _MM_FROUND_NO_EXC, |
22947 | >(src, 0b01010101010101010101010101010101, a); |
22948 | let e = _mm512_set_ph( |
22949 | 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, |
22950 | 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, |
22951 | 20.0, 1.25, 20.0, 1.25, |
22952 | ); |
22953 | assert_eq_m512h(r, e); |
22954 | } |
22955 | |
22956 | #[simd_test(enable = "avx512fp16" )] |
22957 | unsafe fn test_mm512_maskz_getmant_round_ph() { |
22958 | let a = _mm512_set1_ph(10.0); |
22959 | let r = _mm512_maskz_getmant_round_ph::< |
22960 | _MM_MANT_NORM_P75_1P5, |
22961 | _MM_MANT_SIGN_NAN, |
22962 | _MM_FROUND_NO_EXC, |
22963 | >(0b01010101010101010101010101010101, a); |
22964 | let e = _mm512_set_ph( |
22965 | 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, |
22966 | 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, |
22967 | ); |
22968 | assert_eq_m512h(r, e); |
22969 | } |
22970 | |
22971 | #[simd_test(enable = "avx512fp16" )] |
22972 | unsafe fn test_mm_getmant_sh() { |
22973 | let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.); |
22974 | let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.); |
22975 | let r = _mm_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a, b); |
22976 | let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.); |
22977 | assert_eq_m128h(r, e); |
22978 | } |
22979 | |
22980 | #[simd_test(enable = "avx512fp16" )] |
22981 | unsafe fn test_mm_mask_getmant_sh() { |
22982 | let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.); |
22983 | let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.); |
22984 | let src = _mm_setr_ph(20.0, 30., 31., 32., 33., 34., 35., 36.); |
22985 | let r = _mm_mask_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, 0, a, b); |
22986 | let e = _mm_setr_ph(20.0, 10., 11., 12., 13., 14., 15., 16.); |
22987 | assert_eq_m128h(r, e); |
22988 | let r = _mm_mask_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, 1, a, b); |
22989 | let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.); |
22990 | assert_eq_m128h(r, e); |
22991 | } |
22992 | |
22993 | #[simd_test(enable = "avx512fp16" )] |
22994 | unsafe fn test_mm_maskz_getmant_sh() { |
22995 | let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.); |
22996 | let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.); |
22997 | let r = _mm_maskz_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(0, a, b); |
22998 | let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); |
22999 | assert_eq_m128h(r, e); |
23000 | let r = _mm_maskz_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(1, a, b); |
23001 | let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.); |
23002 | assert_eq_m128h(r, e); |
23003 | } |
23004 | |
23005 | #[simd_test(enable = "avx512fp16" )] |
23006 | unsafe fn test_mm_getmant_round_sh() { |
23007 | let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.); |
23008 | let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.); |
23009 | let r = _mm_getmant_round_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN, _MM_FROUND_NO_EXC>( |
23010 | a, b, |
23011 | ); |
23012 | let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.); |
23013 | assert_eq_m128h(r, e); |
23014 | } |
23015 | |
23016 | #[simd_test(enable = "avx512fp16" )] |
23017 | unsafe fn test_mm_mask_getmant_round_sh() { |
23018 | let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.); |
23019 | let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.); |
23020 | let src = _mm_setr_ph(20.0, 30., 31., 32., 33., 34., 35., 36.); |
23021 | let r = _mm_mask_getmant_round_sh::< |
23022 | _MM_MANT_NORM_P75_1P5, |
23023 | _MM_MANT_SIGN_NAN, |
23024 | _MM_FROUND_NO_EXC, |
23025 | >(src, 0, a, b); |
23026 | let e = _mm_setr_ph(20.0, 10., 11., 12., 13., 14., 15., 16.); |
23027 | assert_eq_m128h(r, e); |
23028 | let r = _mm_mask_getmant_round_sh::< |
23029 | _MM_MANT_NORM_P75_1P5, |
23030 | _MM_MANT_SIGN_NAN, |
23031 | _MM_FROUND_NO_EXC, |
23032 | >(src, 1, a, b); |
23033 | let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.); |
23034 | assert_eq_m128h(r, e); |
23035 | } |
23036 | |
23037 | #[simd_test(enable = "avx512fp16" )] |
23038 | unsafe fn test_mm_maskz_getmant_round_sh() { |
23039 | let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.); |
23040 | let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.); |
23041 | let r = _mm_maskz_getmant_round_sh::< |
23042 | _MM_MANT_NORM_P75_1P5, |
23043 | _MM_MANT_SIGN_NAN, |
23044 | _MM_FROUND_NO_EXC, |
23045 | >(0, a, b); |
23046 | let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); |
23047 | assert_eq_m128h(r, e); |
23048 | let r = _mm_maskz_getmant_round_sh::< |
23049 | _MM_MANT_NORM_P75_1P5, |
23050 | _MM_MANT_SIGN_NAN, |
23051 | _MM_FROUND_NO_EXC, |
23052 | >(1, a, b); |
23053 | let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.); |
23054 | assert_eq_m128h(r, e); |
23055 | } |
23056 | |
23057 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23058 | unsafe fn test_mm_roundscale_ph() { |
23059 | let a = _mm_set1_ph(1.1); |
23060 | let r = _mm_roundscale_ph::<0>(a); |
23061 | let e = _mm_set1_ph(1.0); |
23062 | assert_eq_m128h(r, e); |
23063 | } |
23064 | |
23065 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23066 | unsafe fn test_mm_mask_roundscale_ph() { |
23067 | let a = _mm_set1_ph(1.1); |
23068 | let src = _mm_set1_ph(2.0); |
23069 | let r = _mm_mask_roundscale_ph::<0>(src, 0b01010101, a); |
23070 | let e = _mm_set_ph(2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0); |
23071 | assert_eq_m128h(r, e); |
23072 | } |
23073 | |
23074 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23075 | unsafe fn test_mm_maskz_roundscale_ph() { |
23076 | let a = _mm_set1_ph(1.1); |
23077 | let r = _mm_maskz_roundscale_ph::<0>(0b01010101, a); |
23078 | let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0); |
23079 | assert_eq_m128h(r, e); |
23080 | } |
23081 | |
23082 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23083 | unsafe fn test_mm256_roundscale_ph() { |
23084 | let a = _mm256_set1_ph(1.1); |
23085 | let r = _mm256_roundscale_ph::<0>(a); |
23086 | let e = _mm256_set1_ph(1.0); |
23087 | assert_eq_m256h(r, e); |
23088 | } |
23089 | |
23090 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23091 | unsafe fn test_mm256_mask_roundscale_ph() { |
23092 | let a = _mm256_set1_ph(1.1); |
23093 | let src = _mm256_set1_ph(2.0); |
23094 | let r = _mm256_mask_roundscale_ph::<0>(src, 0b0101010101010101, a); |
23095 | let e = _mm256_set_ph( |
23096 | 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, |
23097 | ); |
23098 | assert_eq_m256h(r, e); |
23099 | } |
23100 | |
23101 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23102 | unsafe fn test_mm256_maskz_roundscale_ph() { |
23103 | let a = _mm256_set1_ph(1.1); |
23104 | let r = _mm256_maskz_roundscale_ph::<0>(0b0101010101010101, a); |
23105 | let e = _mm256_set_ph( |
23106 | 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, |
23107 | ); |
23108 | assert_eq_m256h(r, e); |
23109 | } |
23110 | |
23111 | #[simd_test(enable = "avx512fp16" )] |
23112 | unsafe fn test_mm512_roundscale_ph() { |
23113 | let a = _mm512_set1_ph(1.1); |
23114 | let r = _mm512_roundscale_ph::<0>(a); |
23115 | let e = _mm512_set1_ph(1.0); |
23116 | assert_eq_m512h(r, e); |
23117 | } |
23118 | |
23119 | #[simd_test(enable = "avx512fp16" )] |
23120 | unsafe fn test_mm512_mask_roundscale_ph() { |
23121 | let a = _mm512_set1_ph(1.1); |
23122 | let src = _mm512_set1_ph(2.0); |
23123 | let r = _mm512_mask_roundscale_ph::<0>(src, 0b01010101010101010101010101010101, a); |
23124 | let e = _mm512_set_ph( |
23125 | 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, |
23126 | 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, |
23127 | ); |
23128 | assert_eq_m512h(r, e); |
23129 | } |
23130 | |
23131 | #[simd_test(enable = "avx512fp16" )] |
23132 | unsafe fn test_mm512_maskz_roundscale_ph() { |
23133 | let a = _mm512_set1_ph(1.1); |
23134 | let r = _mm512_maskz_roundscale_ph::<0>(0b01010101010101010101010101010101, a); |
23135 | let e = _mm512_set_ph( |
23136 | 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, |
23137 | 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, |
23138 | ); |
23139 | assert_eq_m512h(r, e); |
23140 | } |
23141 | |
23142 | #[simd_test(enable = "avx512fp16" )] |
23143 | unsafe fn test_mm512_roundscale_round_ph() { |
23144 | let a = _mm512_set1_ph(1.1); |
23145 | let r = _mm512_roundscale_round_ph::<0, _MM_FROUND_NO_EXC>(a); |
23146 | let e = _mm512_set1_ph(1.0); |
23147 | assert_eq_m512h(r, e); |
23148 | } |
23149 | |
23150 | #[simd_test(enable = "avx512fp16" )] |
23151 | unsafe fn test_mm512_mask_roundscale_round_ph() { |
23152 | let a = _mm512_set1_ph(1.1); |
23153 | let src = _mm512_set1_ph(2.0); |
23154 | let r = _mm512_mask_roundscale_round_ph::<0, _MM_FROUND_NO_EXC>( |
23155 | src, |
23156 | 0b01010101010101010101010101010101, |
23157 | a, |
23158 | ); |
23159 | let e = _mm512_set_ph( |
23160 | 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, |
23161 | 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, |
23162 | ); |
23163 | assert_eq_m512h(r, e); |
23164 | } |
23165 | |
23166 | #[simd_test(enable = "avx512fp16" )] |
23167 | unsafe fn test_mm512_maskz_roundscale_round_ph() { |
23168 | let a = _mm512_set1_ph(1.1); |
23169 | let r = _mm512_maskz_roundscale_round_ph::<0, _MM_FROUND_NO_EXC>( |
23170 | 0b01010101010101010101010101010101, |
23171 | a, |
23172 | ); |
23173 | let e = _mm512_set_ph( |
23174 | 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, |
23175 | 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, |
23176 | ); |
23177 | assert_eq_m512h(r, e); |
23178 | } |
23179 | |
23180 | #[simd_test(enable = "avx512fp16" )] |
23181 | unsafe fn test_mm_roundscale_sh() { |
23182 | let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.); |
23183 | let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.); |
23184 | let r = _mm_roundscale_sh::<0>(a, b); |
23185 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
23186 | assert_eq_m128h(r, e); |
23187 | } |
23188 | |
23189 | #[simd_test(enable = "avx512fp16" )] |
23190 | unsafe fn test_mm_mask_roundscale_sh() { |
23191 | let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.); |
23192 | let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.); |
23193 | let src = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
23194 | let r = _mm_mask_roundscale_sh::<0>(src, 0, a, b); |
23195 | let e = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.); |
23196 | assert_eq_m128h(r, e); |
23197 | let r = _mm_mask_roundscale_sh::<0>(src, 1, a, b); |
23198 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
23199 | assert_eq_m128h(r, e); |
23200 | } |
23201 | |
23202 | #[simd_test(enable = "avx512fp16" )] |
23203 | unsafe fn test_mm_maskz_roundscale_sh() { |
23204 | let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.); |
23205 | let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.); |
23206 | let r = _mm_maskz_roundscale_sh::<0>(0, a, b); |
23207 | let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); |
23208 | assert_eq_m128h(r, e); |
23209 | let r = _mm_maskz_roundscale_sh::<0>(1, a, b); |
23210 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
23211 | assert_eq_m128h(r, e); |
23212 | } |
23213 | |
23214 | #[simd_test(enable = "avx512fp16" )] |
23215 | unsafe fn test_mm_roundscale_round_sh() { |
23216 | let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.); |
23217 | let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.); |
23218 | let r = _mm_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(a, b); |
23219 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
23220 | assert_eq_m128h(r, e); |
23221 | } |
23222 | |
23223 | #[simd_test(enable = "avx512fp16" )] |
23224 | unsafe fn test_mm_mask_roundscale_round_sh() { |
23225 | let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.); |
23226 | let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.); |
23227 | let src = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.); |
23228 | let r = _mm_mask_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(src, 0, a, b); |
23229 | let e = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.); |
23230 | assert_eq_m128h(r, e); |
23231 | let r = _mm_mask_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(src, 1, a, b); |
23232 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
23233 | assert_eq_m128h(r, e); |
23234 | } |
23235 | |
23236 | #[simd_test(enable = "avx512fp16" )] |
23237 | unsafe fn test_mm_maskz_roundscale_round_sh() { |
23238 | let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.); |
23239 | let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.); |
23240 | let r = _mm_maskz_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(0, a, b); |
23241 | let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); |
23242 | assert_eq_m128h(r, e); |
23243 | let r = _mm_maskz_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(1, a, b); |
23244 | let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
23245 | assert_eq_m128h(r, e); |
23246 | } |
23247 | |
23248 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23249 | unsafe fn test_mm_scalef_ph() { |
23250 | let a = _mm_set1_ph(1.); |
23251 | let b = _mm_set1_ph(3.); |
23252 | let r = _mm_scalef_ph(a, b); |
23253 | let e = _mm_set1_ph(8.0); |
23254 | assert_eq_m128h(r, e); |
23255 | } |
23256 | |
23257 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23258 | unsafe fn test_mm_mask_scalef_ph() { |
23259 | let a = _mm_set1_ph(1.); |
23260 | let b = _mm_set1_ph(3.); |
23261 | let src = _mm_set1_ph(2.); |
23262 | let r = _mm_mask_scalef_ph(src, 0b01010101, a, b); |
23263 | let e = _mm_set_ph(2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0); |
23264 | assert_eq_m128h(r, e); |
23265 | } |
23266 | |
23267 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23268 | unsafe fn test_mm_maskz_scalef_ph() { |
23269 | let a = _mm_set1_ph(1.); |
23270 | let b = _mm_set1_ph(3.); |
23271 | let r = _mm_maskz_scalef_ph(0b01010101, a, b); |
23272 | let e = _mm_set_ph(0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0); |
23273 | assert_eq_m128h(r, e); |
23274 | } |
23275 | |
23276 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23277 | unsafe fn test_mm256_scalef_ph() { |
23278 | let a = _mm256_set1_ph(1.); |
23279 | let b = _mm256_set1_ph(3.); |
23280 | let r = _mm256_scalef_ph(a, b); |
23281 | let e = _mm256_set1_ph(8.0); |
23282 | assert_eq_m256h(r, e); |
23283 | } |
23284 | |
23285 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23286 | unsafe fn test_mm256_mask_scalef_ph() { |
23287 | let a = _mm256_set1_ph(1.); |
23288 | let b = _mm256_set1_ph(3.); |
23289 | let src = _mm256_set1_ph(2.); |
23290 | let r = _mm256_mask_scalef_ph(src, 0b0101010101010101, a, b); |
23291 | let e = _mm256_set_ph( |
23292 | 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, |
23293 | ); |
23294 | assert_eq_m256h(r, e); |
23295 | } |
23296 | |
23297 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23298 | unsafe fn test_mm256_maskz_scalef_ph() { |
23299 | let a = _mm256_set1_ph(1.); |
23300 | let b = _mm256_set1_ph(3.); |
23301 | let r = _mm256_maskz_scalef_ph(0b0101010101010101, a, b); |
23302 | let e = _mm256_set_ph( |
23303 | 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, |
23304 | ); |
23305 | assert_eq_m256h(r, e); |
23306 | } |
23307 | |
23308 | #[simd_test(enable = "avx512fp16" )] |
23309 | unsafe fn test_mm512_scalef_ph() { |
23310 | let a = _mm512_set1_ph(1.); |
23311 | let b = _mm512_set1_ph(3.); |
23312 | let r = _mm512_scalef_ph(a, b); |
23313 | let e = _mm512_set1_ph(8.0); |
23314 | assert_eq_m512h(r, e); |
23315 | } |
23316 | |
23317 | #[simd_test(enable = "avx512fp16" )] |
23318 | unsafe fn test_mm512_mask_scalef_ph() { |
23319 | let a = _mm512_set1_ph(1.); |
23320 | let b = _mm512_set1_ph(3.); |
23321 | let src = _mm512_set1_ph(2.); |
23322 | let r = _mm512_mask_scalef_ph(src, 0b01010101010101010101010101010101, a, b); |
23323 | let e = _mm512_set_ph( |
23324 | 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, |
23325 | 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, |
23326 | ); |
23327 | assert_eq_m512h(r, e); |
23328 | } |
23329 | |
23330 | #[simd_test(enable = "avx512fp16" )] |
23331 | unsafe fn test_mm512_maskz_scalef_ph() { |
23332 | let a = _mm512_set1_ph(1.); |
23333 | let b = _mm512_set1_ph(3.); |
23334 | let r = _mm512_maskz_scalef_ph(0b01010101010101010101010101010101, a, b); |
23335 | let e = _mm512_set_ph( |
23336 | 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, |
23337 | 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, |
23338 | ); |
23339 | assert_eq_m512h(r, e); |
23340 | } |
23341 | |
23342 | #[simd_test(enable = "avx512fp16" )] |
23343 | unsafe fn test_mm512_scalef_round_ph() { |
23344 | let a = _mm512_set1_ph(1.); |
23345 | let b = _mm512_set1_ph(3.); |
23346 | let r = _mm512_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
23347 | let e = _mm512_set1_ph(8.0); |
23348 | assert_eq_m512h(r, e); |
23349 | } |
23350 | |
23351 | #[simd_test(enable = "avx512fp16" )] |
23352 | unsafe fn test_mm512_mask_scalef_round_ph() { |
23353 | let a = _mm512_set1_ph(1.); |
23354 | let b = _mm512_set1_ph(3.); |
23355 | let src = _mm512_set1_ph(2.); |
23356 | let r = _mm512_mask_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
23357 | src, |
23358 | 0b01010101010101010101010101010101, |
23359 | a, |
23360 | b, |
23361 | ); |
23362 | let e = _mm512_set_ph( |
23363 | 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, |
23364 | 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, |
23365 | ); |
23366 | assert_eq_m512h(r, e); |
23367 | } |
23368 | |
23369 | #[simd_test(enable = "avx512fp16" )] |
23370 | unsafe fn test_mm512_maskz_scalef_round_ph() { |
23371 | let a = _mm512_set1_ph(1.); |
23372 | let b = _mm512_set1_ph(3.); |
23373 | let r = _mm512_maskz_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
23374 | 0b01010101010101010101010101010101, |
23375 | a, |
23376 | b, |
23377 | ); |
23378 | let e = _mm512_set_ph( |
23379 | 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, |
23380 | 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, |
23381 | ); |
23382 | assert_eq_m512h(r, e); |
23383 | } |
23384 | |
23385 | #[simd_test(enable = "avx512fp16" )] |
23386 | unsafe fn test_mm_scalef_sh() { |
23387 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
23388 | let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.); |
23389 | let r = _mm_scalef_sh(a, b); |
23390 | let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.); |
23391 | assert_eq_m128h(r, e); |
23392 | } |
23393 | |
23394 | #[simd_test(enable = "avx512fp16" )] |
23395 | unsafe fn test_mm_mask_scalef_sh() { |
23396 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
23397 | let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.); |
23398 | let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.); |
23399 | let r = _mm_mask_scalef_sh(src, 0, a, b); |
23400 | let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.); |
23401 | assert_eq_m128h(r, e); |
23402 | let r = _mm_mask_scalef_sh(src, 1, a, b); |
23403 | let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.); |
23404 | assert_eq_m128h(r, e); |
23405 | } |
23406 | |
23407 | #[simd_test(enable = "avx512fp16" )] |
23408 | unsafe fn test_mm_maskz_scalef_sh() { |
23409 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
23410 | let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.); |
23411 | let r = _mm_maskz_scalef_sh(0, a, b); |
23412 | let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); |
23413 | assert_eq_m128h(r, e); |
23414 | let r = _mm_maskz_scalef_sh(1, a, b); |
23415 | let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.); |
23416 | assert_eq_m128h(r, e); |
23417 | } |
23418 | |
23419 | #[simd_test(enable = "avx512fp16" )] |
23420 | unsafe fn test_mm_scalef_round_sh() { |
23421 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
23422 | let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.); |
23423 | let r = _mm_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
23424 | let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.); |
23425 | assert_eq_m128h(r, e); |
23426 | } |
23427 | |
23428 | #[simd_test(enable = "avx512fp16" )] |
23429 | unsafe fn test_mm_mask_scalef_round_sh() { |
23430 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
23431 | let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.); |
23432 | let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.); |
23433 | let r = _mm_mask_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
23434 | src, 0, a, b, |
23435 | ); |
23436 | let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.); |
23437 | assert_eq_m128h(r, e); |
23438 | let r = _mm_mask_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
23439 | src, 1, a, b, |
23440 | ); |
23441 | let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.); |
23442 | assert_eq_m128h(r, e); |
23443 | } |
23444 | |
23445 | #[simd_test(enable = "avx512fp16" )] |
23446 | unsafe fn test_mm_maskz_scalef_round_sh() { |
23447 | let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.); |
23448 | let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.); |
23449 | let r = |
23450 | _mm_maskz_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); |
23451 | let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); |
23452 | assert_eq_m128h(r, e); |
23453 | let r = |
23454 | _mm_maskz_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b); |
23455 | let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.); |
23456 | assert_eq_m128h(r, e); |
23457 | } |
23458 | |
23459 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23460 | unsafe fn test_mm_reduce_ph() { |
23461 | let a = _mm_set1_ph(1.25); |
23462 | let r = _mm_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(a); |
23463 | let e = _mm_set1_ph(0.25); |
23464 | assert_eq_m128h(r, e); |
23465 | } |
23466 | |
23467 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23468 | unsafe fn test_mm_mask_reduce_ph() { |
23469 | let a = _mm_set1_ph(1.25); |
23470 | let src = _mm_set1_ph(2.0); |
23471 | let r = _mm_mask_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01010101, a); |
23472 | let e = _mm_set_ph(2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25); |
23473 | assert_eq_m128h(r, e); |
23474 | } |
23475 | |
23476 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23477 | unsafe fn test_mm_maskz_reduce_ph() { |
23478 | let a = _mm_set1_ph(1.25); |
23479 | let r = _mm_maskz_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01010101, a); |
23480 | let e = _mm_set_ph(0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25); |
23481 | assert_eq_m128h(r, e); |
23482 | } |
23483 | |
23484 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23485 | unsafe fn test_mm256_reduce_ph() { |
23486 | let a = _mm256_set1_ph(1.25); |
23487 | let r = _mm256_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(a); |
23488 | let e = _mm256_set1_ph(0.25); |
23489 | assert_eq_m256h(r, e); |
23490 | } |
23491 | |
23492 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23493 | unsafe fn test_mm256_mask_reduce_ph() { |
23494 | let a = _mm256_set1_ph(1.25); |
23495 | let src = _mm256_set1_ph(2.0); |
23496 | let r = _mm256_mask_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0101010101010101, a); |
23497 | let e = _mm256_set_ph( |
23498 | 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, |
23499 | ); |
23500 | assert_eq_m256h(r, e); |
23501 | } |
23502 | |
23503 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23504 | unsafe fn test_mm256_maskz_reduce_ph() { |
23505 | let a = _mm256_set1_ph(1.25); |
23506 | let r = _mm256_maskz_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0101010101010101, a); |
23507 | let e = _mm256_set_ph( |
23508 | 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, |
23509 | ); |
23510 | assert_eq_m256h(r, e); |
23511 | } |
23512 | |
23513 | #[simd_test(enable = "avx512fp16" )] |
23514 | unsafe fn test_mm512_reduce_ph() { |
23515 | let a = _mm512_set1_ph(1.25); |
23516 | let r = _mm512_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(a); |
23517 | let e = _mm512_set1_ph(0.25); |
23518 | assert_eq_m512h(r, e); |
23519 | } |
23520 | |
23521 | #[simd_test(enable = "avx512fp16" )] |
23522 | unsafe fn test_mm512_mask_reduce_ph() { |
23523 | let a = _mm512_set1_ph(1.25); |
23524 | let src = _mm512_set1_ph(2.0); |
23525 | let r = _mm512_mask_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>( |
23526 | src, |
23527 | 0b01010101010101010101010101010101, |
23528 | a, |
23529 | ); |
23530 | let e = _mm512_set_ph( |
23531 | 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, |
23532 | 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, |
23533 | ); |
23534 | assert_eq_m512h(r, e); |
23535 | } |
23536 | |
23537 | #[simd_test(enable = "avx512fp16" )] |
23538 | unsafe fn test_mm512_maskz_reduce_ph() { |
23539 | let a = _mm512_set1_ph(1.25); |
23540 | let r = _mm512_maskz_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>( |
23541 | 0b01010101010101010101010101010101, |
23542 | a, |
23543 | ); |
23544 | let e = _mm512_set_ph( |
23545 | 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, |
23546 | 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, |
23547 | ); |
23548 | assert_eq_m512h(r, e); |
23549 | } |
23550 | |
23551 | #[simd_test(enable = "avx512fp16" )] |
23552 | unsafe fn test_mm512_reduce_round_ph() { |
23553 | let a = _mm512_set1_ph(1.25); |
23554 | let r = _mm512_reduce_round_ph::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a); |
23555 | let e = _mm512_set1_ph(0.25); |
23556 | assert_eq_m512h(r, e); |
23557 | } |
23558 | |
23559 | #[simd_test(enable = "avx512fp16" )] |
23560 | unsafe fn test_mm512_mask_reduce_round_ph() { |
23561 | let a = _mm512_set1_ph(1.25); |
23562 | let src = _mm512_set1_ph(2.0); |
23563 | let r = _mm512_mask_reduce_round_ph::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>( |
23564 | src, |
23565 | 0b01010101010101010101010101010101, |
23566 | a, |
23567 | ); |
23568 | let e = _mm512_set_ph( |
23569 | 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, |
23570 | 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, |
23571 | ); |
23572 | assert_eq_m512h(r, e); |
23573 | } |
23574 | |
23575 | #[simd_test(enable = "avx512fp16" )] |
23576 | unsafe fn test_mm512_maskz_reduce_round_ph() { |
23577 | let a = _mm512_set1_ph(1.25); |
23578 | let r = _mm512_maskz_reduce_round_ph::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>( |
23579 | 0b01010101010101010101010101010101, |
23580 | a, |
23581 | ); |
23582 | let e = _mm512_set_ph( |
23583 | 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, |
23584 | 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, |
23585 | ); |
23586 | assert_eq_m512h(r, e); |
23587 | } |
23588 | |
23589 | #[simd_test(enable = "avx512fp16" )] |
23590 | unsafe fn test_mm_reduce_sh() { |
23591 | let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.); |
23592 | let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.); |
23593 | let r = _mm_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(a, b); |
23594 | let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.); |
23595 | assert_eq_m128h(r, e); |
23596 | } |
23597 | |
23598 | #[simd_test(enable = "avx512fp16" )] |
23599 | unsafe fn test_mm_mask_reduce_sh() { |
23600 | let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.); |
23601 | let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.); |
23602 | let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.); |
23603 | let r = _mm_mask_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0, a, b); |
23604 | let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.); |
23605 | assert_eq_m128h(r, e); |
23606 | let r = _mm_mask_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 1, a, b); |
23607 | let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.); |
23608 | assert_eq_m128h(r, e); |
23609 | } |
23610 | |
23611 | #[simd_test(enable = "avx512fp16" )] |
23612 | unsafe fn test_mm_maskz_reduce_sh() { |
23613 | let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.); |
23614 | let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.); |
23615 | let r = _mm_maskz_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(0, a, b); |
23616 | let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); |
23617 | assert_eq_m128h(r, e); |
23618 | let r = _mm_maskz_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(1, a, b); |
23619 | let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.); |
23620 | assert_eq_m128h(r, e); |
23621 | } |
23622 | |
23623 | #[simd_test(enable = "avx512fp16" )] |
23624 | unsafe fn test_mm_reduce_round_sh() { |
23625 | let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.); |
23626 | let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.); |
23627 | let r = _mm_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a, b); |
23628 | let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.); |
23629 | assert_eq_m128h(r, e); |
23630 | } |
23631 | |
23632 | #[simd_test(enable = "avx512fp16" )] |
23633 | unsafe fn test_mm_mask_reduce_round_sh() { |
23634 | let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.); |
23635 | let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.); |
23636 | let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.); |
23637 | let r = _mm_mask_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>( |
23638 | src, 0, a, b, |
23639 | ); |
23640 | let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.); |
23641 | assert_eq_m128h(r, e); |
23642 | let r = _mm_mask_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>( |
23643 | src, 1, a, b, |
23644 | ); |
23645 | let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.); |
23646 | assert_eq_m128h(r, e); |
23647 | } |
23648 | |
23649 | #[simd_test(enable = "avx512fp16" )] |
23650 | unsafe fn test_mm_maskz_reduce_round_sh() { |
23651 | let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.); |
23652 | let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.); |
23653 | let r = |
23654 | _mm_maskz_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(0, a, b); |
23655 | let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.); |
23656 | assert_eq_m128h(r, e); |
23657 | let r = |
23658 | _mm_maskz_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(1, a, b); |
23659 | let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.); |
23660 | assert_eq_m128h(r, e); |
23661 | } |
23662 | |
23663 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23664 | unsafe fn test_mm_reduce_add_ph() { |
23665 | let a = _mm_set1_ph(2.0); |
23666 | let r = _mm_reduce_add_ph(a); |
23667 | assert_eq!(r, 16.0); |
23668 | } |
23669 | |
23670 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23671 | unsafe fn test_mm256_reduce_add_ph() { |
23672 | let a = _mm256_set1_ph(2.0); |
23673 | let r = _mm256_reduce_add_ph(a); |
23674 | assert_eq!(r, 32.0); |
23675 | } |
23676 | |
23677 | #[simd_test(enable = "avx512fp16" )] |
23678 | unsafe fn test_mm512_reduce_add_ph() { |
23679 | let a = _mm512_set1_ph(2.0); |
23680 | let r = _mm512_reduce_add_ph(a); |
23681 | assert_eq!(r, 64.0); |
23682 | } |
23683 | |
23684 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23685 | unsafe fn test_mm_reduce_mul_ph() { |
23686 | let a = _mm_set1_ph(2.0); |
23687 | let r = _mm_reduce_mul_ph(a); |
23688 | assert_eq!(r, 256.0); |
23689 | } |
23690 | |
23691 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23692 | unsafe fn test_mm256_reduce_mul_ph() { |
23693 | let a = _mm256_set1_ph(2.0); |
23694 | let r = _mm256_reduce_mul_ph(a); |
23695 | assert_eq!(r, 65536.0); |
23696 | } |
23697 | |
23698 | #[simd_test(enable = "avx512fp16" )] |
23699 | unsafe fn test_mm512_reduce_mul_ph() { |
23700 | let a = _mm512_set1_ph(2.0); |
23701 | let r = _mm512_reduce_mul_ph(a); |
23702 | assert_eq!(r, 16777216.0); |
23703 | } |
23704 | |
23705 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23706 | unsafe fn test_mm_reduce_max_ph() { |
23707 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
23708 | let r = _mm_reduce_max_ph(a); |
23709 | assert_eq!(r, 8.0); |
23710 | } |
23711 | |
23712 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23713 | unsafe fn test_mm256_reduce_max_ph() { |
23714 | let a = _mm256_set_ph( |
23715 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
23716 | ); |
23717 | let r = _mm256_reduce_max_ph(a); |
23718 | assert_eq!(r, 16.0); |
23719 | } |
23720 | |
23721 | #[simd_test(enable = "avx512fp16" )] |
23722 | unsafe fn test_mm512_reduce_max_ph() { |
23723 | let a = _mm512_set_ph( |
23724 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
23725 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
23726 | 31.0, 32.0, |
23727 | ); |
23728 | let r = _mm512_reduce_max_ph(a); |
23729 | assert_eq!(r, 32.0); |
23730 | } |
23731 | |
23732 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23733 | unsafe fn test_mm_reduce_min_ph() { |
23734 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
23735 | let r = _mm_reduce_min_ph(a); |
23736 | assert_eq!(r, 1.0); |
23737 | } |
23738 | |
23739 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23740 | unsafe fn test_mm256_reduce_min_ph() { |
23741 | let a = _mm256_set_ph( |
23742 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
23743 | ); |
23744 | let r = _mm256_reduce_min_ph(a); |
23745 | assert_eq!(r, 1.0); |
23746 | } |
23747 | |
23748 | #[simd_test(enable = "avx512fp16" )] |
23749 | unsafe fn test_mm512_reduce_min_ph() { |
23750 | let a = _mm512_set_ph( |
23751 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
23752 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
23753 | 31.0, 32.0, |
23754 | ); |
23755 | let r = _mm512_reduce_min_ph(a); |
23756 | assert_eq!(r, 1.0); |
23757 | } |
23758 | |
23759 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23760 | unsafe fn test_mm_fpclass_ph_mask() { |
23761 | let a = _mm_set_ph( |
23762 | 1., |
23763 | f16::INFINITY, |
23764 | f16::NEG_INFINITY, |
23765 | 0.0, |
23766 | -0.0, |
23767 | -2.0, |
23768 | f16::NAN, |
23769 | 5.9e-8, // Denormal |
23770 | ); |
23771 | let r = _mm_fpclass_ph_mask::<0x18>(a); // infinities |
23772 | assert_eq!(r, 0b01100000); |
23773 | } |
23774 | |
23775 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23776 | unsafe fn test_mm_mask_fpclass_ph_mask() { |
23777 | let a = _mm_set_ph( |
23778 | 1., |
23779 | f16::INFINITY, |
23780 | f16::NEG_INFINITY, |
23781 | 0.0, |
23782 | -0.0, |
23783 | -2.0, |
23784 | f16::NAN, |
23785 | 5.9e-8, // Denormal |
23786 | ); |
23787 | let r = _mm_mask_fpclass_ph_mask::<0x18>(0b01010101, a); |
23788 | assert_eq!(r, 0b01000000); |
23789 | } |
23790 | |
23791 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23792 | unsafe fn test_mm256_fpclass_ph_mask() { |
23793 | let a = _mm256_set_ph( |
23794 | 1., |
23795 | f16::INFINITY, |
23796 | f16::NEG_INFINITY, |
23797 | 0.0, |
23798 | -0.0, |
23799 | -2.0, |
23800 | f16::NAN, |
23801 | 5.9e-8, // Denormal |
23802 | 1., |
23803 | f16::INFINITY, |
23804 | f16::NEG_INFINITY, |
23805 | 0.0, |
23806 | -0.0, |
23807 | -2.0, |
23808 | f16::NAN, |
23809 | 5.9e-8, // Denormal |
23810 | ); |
23811 | let r = _mm256_fpclass_ph_mask::<0x18>(a); // infinities |
23812 | assert_eq!(r, 0b0110000001100000); |
23813 | } |
23814 | |
23815 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23816 | unsafe fn test_mm256_mask_fpclass_ph_mask() { |
23817 | let a = _mm256_set_ph( |
23818 | 1., |
23819 | f16::INFINITY, |
23820 | f16::NEG_INFINITY, |
23821 | 0.0, |
23822 | -0.0, |
23823 | -2.0, |
23824 | f16::NAN, |
23825 | 5.9e-8, // Denormal |
23826 | 1., |
23827 | f16::INFINITY, |
23828 | f16::NEG_INFINITY, |
23829 | 0.0, |
23830 | -0.0, |
23831 | -2.0, |
23832 | f16::NAN, |
23833 | 5.9e-8, // Denormal |
23834 | ); |
23835 | let r = _mm256_mask_fpclass_ph_mask::<0x18>(0b0101010101010101, a); |
23836 | assert_eq!(r, 0b0100000001000000); |
23837 | } |
23838 | |
23839 | #[simd_test(enable = "avx512fp16" )] |
23840 | unsafe fn test_mm512_fpclass_ph_mask() { |
23841 | let a = _mm512_set_ph( |
23842 | 1., |
23843 | f16::INFINITY, |
23844 | f16::NEG_INFINITY, |
23845 | 0.0, |
23846 | -0.0, |
23847 | -2.0, |
23848 | f16::NAN, |
23849 | 5.9e-8, // Denormal |
23850 | 1., |
23851 | f16::INFINITY, |
23852 | f16::NEG_INFINITY, |
23853 | 0.0, |
23854 | -0.0, |
23855 | -2.0, |
23856 | f16::NAN, |
23857 | 5.9e-8, // Denormal |
23858 | 1., |
23859 | f16::INFINITY, |
23860 | f16::NEG_INFINITY, |
23861 | 0.0, |
23862 | -0.0, |
23863 | -2.0, |
23864 | f16::NAN, |
23865 | 5.9e-8, // Denormal |
23866 | 1., |
23867 | f16::INFINITY, |
23868 | f16::NEG_INFINITY, |
23869 | 0.0, |
23870 | -0.0, |
23871 | -2.0, |
23872 | f16::NAN, |
23873 | 5.9e-8, // Denormal |
23874 | ); |
23875 | let r = _mm512_fpclass_ph_mask::<0x18>(a); // infinities |
23876 | assert_eq!(r, 0b01100000011000000110000001100000); |
23877 | } |
23878 | |
23879 | #[simd_test(enable = "avx512fp16" )] |
23880 | unsafe fn test_mm512_mask_fpclass_ph_mask() { |
23881 | let a = _mm512_set_ph( |
23882 | 1., |
23883 | f16::INFINITY, |
23884 | f16::NEG_INFINITY, |
23885 | 0.0, |
23886 | -0.0, |
23887 | -2.0, |
23888 | f16::NAN, |
23889 | 5.9e-8, // Denormal |
23890 | 1., |
23891 | f16::INFINITY, |
23892 | f16::NEG_INFINITY, |
23893 | 0.0, |
23894 | -0.0, |
23895 | -2.0, |
23896 | f16::NAN, |
23897 | 5.9e-8, // Denormal |
23898 | 1., |
23899 | f16::INFINITY, |
23900 | f16::NEG_INFINITY, |
23901 | 0.0, |
23902 | -0.0, |
23903 | -2.0, |
23904 | f16::NAN, |
23905 | 5.9e-8, // Denormal |
23906 | 1., |
23907 | f16::INFINITY, |
23908 | f16::NEG_INFINITY, |
23909 | 0.0, |
23910 | -0.0, |
23911 | -2.0, |
23912 | f16::NAN, |
23913 | 5.9e-8, // Denormal |
23914 | ); |
23915 | let r = _mm512_mask_fpclass_ph_mask::<0x18>(0b01010101010101010101010101010101, a); |
23916 | assert_eq!(r, 0b01000000010000000100000001000000); |
23917 | } |
23918 | |
23919 | #[simd_test(enable = "avx512fp16" )] |
23920 | unsafe fn test_mm_fpclass_sh_mask() { |
23921 | let a = _mm_set_sh(f16::INFINITY); |
23922 | let r = _mm_fpclass_sh_mask::<0x18>(a); |
23923 | assert_eq!(r, 1); |
23924 | } |
23925 | |
23926 | #[simd_test(enable = "avx512fp16" )] |
23927 | unsafe fn test_mm_mask_fpclass_sh_mask() { |
23928 | let a = _mm_set_sh(f16::INFINITY); |
23929 | let r = _mm_mask_fpclass_sh_mask::<0x18>(0, a); |
23930 | assert_eq!(r, 0); |
23931 | let r = _mm_mask_fpclass_sh_mask::<0x18>(1, a); |
23932 | assert_eq!(r, 1); |
23933 | } |
23934 | |
23935 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23936 | unsafe fn test_mm_mask_blend_ph() { |
23937 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
23938 | let b = _mm_set_ph(-1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0); |
23939 | let r = _mm_mask_blend_ph(0b01010101, a, b); |
23940 | let e = _mm_set_ph(1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0); |
23941 | assert_eq_m128h(r, e); |
23942 | } |
23943 | |
23944 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23945 | unsafe fn test_mm256_mask_blend_ph() { |
23946 | let a = _mm256_set_ph( |
23947 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
23948 | ); |
23949 | let b = _mm256_set_ph( |
23950 | -1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0, -9.0, -10.0, -11.0, -12.0, -13.0, |
23951 | -14.0, -15.0, -16.0, |
23952 | ); |
23953 | let r = _mm256_mask_blend_ph(0b0101010101010101, a, b); |
23954 | let e = _mm256_set_ph( |
23955 | 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0, -14.0, 15.0, |
23956 | -16.0, |
23957 | ); |
23958 | assert_eq_m256h(r, e); |
23959 | } |
23960 | |
23961 | #[simd_test(enable = "avx512fp16" )] |
23962 | unsafe fn test_mm512_mask_blend_ph() { |
23963 | let a = _mm512_set_ph( |
23964 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
23965 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
23966 | 31.0, 32.0, |
23967 | ); |
23968 | let b = _mm512_set_ph( |
23969 | -1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0, -9.0, -10.0, -11.0, -12.0, -13.0, |
23970 | -14.0, -15.0, -16.0, -17.0, -18.0, -19.0, -20.0, -21.0, -22.0, -23.0, -24.0, -25.0, |
23971 | -26.0, -27.0, -28.0, -29.0, -30.0, -31.0, -32.0, |
23972 | ); |
23973 | let r = _mm512_mask_blend_ph(0b01010101010101010101010101010101, a, b); |
23974 | let e = _mm512_set_ph( |
23975 | 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0, -14.0, 15.0, |
23976 | -16.0, 17.0, -18.0, 19.0, -20.0, 21.0, -22.0, 23.0, -24.0, 25.0, -26.0, 27.0, -28.0, |
23977 | 29.0, -30.0, 31.0, -32.0, |
23978 | ); |
23979 | assert_eq_m512h(r, e); |
23980 | } |
23981 | |
23982 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23983 | unsafe fn test_mm_permutex2var_ph() { |
23984 | let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
23985 | let b = _mm_setr_ph(9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
23986 | let idx = _mm_setr_epi16(0, 2, 4, 6, 8, 10, 12, 14); |
23987 | let r = _mm_permutex2var_ph(a, idx, b); |
23988 | let e = _mm_setr_ph(1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0); |
23989 | assert_eq_m128h(r, e); |
23990 | } |
23991 | |
23992 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
23993 | unsafe fn test_mm256_permutex2var_ph() { |
23994 | let a = _mm256_setr_ph( |
23995 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
23996 | ); |
23997 | let b = _mm256_setr_ph( |
23998 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
23999 | 31.0, 32.0, |
24000 | ); |
24001 | let idx = _mm256_setr_epi16(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); |
24002 | let r = _mm256_permutex2var_ph(a, idx, b); |
24003 | let e = _mm256_setr_ph( |
24004 | 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, 23.0, 25.0, 27.0, 29.0, |
24005 | 31.0, |
24006 | ); |
24007 | assert_eq_m256h(r, e); |
24008 | } |
24009 | |
24010 | #[simd_test(enable = "avx512fp16" )] |
24011 | unsafe fn test_mm512_permutex2var_ph() { |
24012 | let a = _mm512_setr_ph( |
24013 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24014 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
24015 | 31.0, 32.0, |
24016 | ); |
24017 | let b = _mm512_setr_ph( |
24018 | 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, |
24019 | 47.0, 48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0, |
24020 | 61.0, 62.0, 63.0, 64.0, |
24021 | ); |
24022 | let idx = _mm512_set_epi16( |
24023 | 62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, 36, 34, 32, 30, 28, 26, 24, 22, 20, |
24024 | 18, 16, 14, 12, 10, 8, 6, 4, 2, 0, |
24025 | ); |
24026 | let r = _mm512_permutex2var_ph(a, idx, b); |
24027 | let e = _mm512_setr_ph( |
24028 | 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, 23.0, 25.0, 27.0, 29.0, |
24029 | 31.0, 33.0, 35.0, 37.0, 39.0, 41.0, 43.0, 45.0, 47.0, 49.0, 51.0, 53.0, 55.0, 57.0, |
24030 | 59.0, 61.0, 63.0, |
24031 | ); |
24032 | assert_eq_m512h(r, e); |
24033 | } |
24034 | |
24035 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24036 | unsafe fn test_mm_permutexvar_ph() { |
24037 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24038 | let idx = _mm_set_epi16(0, 2, 4, 6, 1, 3, 5, 7); |
24039 | let r = _mm_permutexvar_ph(idx, a); |
24040 | let e = _mm_setr_ph(1.0, 3.0, 5.0, 7.0, 2.0, 4.0, 6.0, 8.0); |
24041 | assert_eq_m128h(r, e); |
24042 | } |
24043 | |
24044 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24045 | unsafe fn test_mm256_permutexvar_ph() { |
24046 | let a = _mm256_set_ph( |
24047 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24048 | ); |
24049 | let idx = _mm256_set_epi16(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15); |
24050 | let r = _mm256_permutexvar_ph(idx, a); |
24051 | let e = _mm256_setr_ph( |
24052 | 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, |
24053 | ); |
24054 | assert_eq_m256h(r, e); |
24055 | } |
24056 | |
24057 | #[simd_test(enable = "avx512fp16" )] |
24058 | unsafe fn test_mm512_permutexvar_ph() { |
24059 | let a = _mm512_set_ph( |
24060 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24061 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
24062 | 31.0, 32.0, |
24063 | ); |
24064 | let idx = _mm512_set_epi16( |
24065 | 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 1, 3, 5, 7, 9, 11, 13, 15, |
24066 | 17, 19, 21, 23, 25, 27, 29, 31, |
24067 | ); |
24068 | let r = _mm512_permutexvar_ph(idx, a); |
24069 | let e = _mm512_setr_ph( |
24070 | 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, 23.0, 25.0, 27.0, 29.0, |
24071 | 31.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0, 22.0, 24.0, 26.0, 28.0, |
24072 | 30.0, 32.0, |
24073 | ); |
24074 | assert_eq_m512h(r, e); |
24075 | } |
24076 | |
24077 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24078 | unsafe fn test_mm_cvtepi16_ph() { |
24079 | let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); |
24080 | let r = _mm_cvtepi16_ph(a); |
24081 | let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24082 | assert_eq_m128h(r, e); |
24083 | } |
24084 | |
24085 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24086 | unsafe fn test_mm_mask_cvtepi16_ph() { |
24087 | let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); |
24088 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
24089 | let r = _mm_mask_cvtepi16_ph(src, 0b01010101, a); |
24090 | let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.); |
24091 | assert_eq_m128h(r, e); |
24092 | } |
24093 | |
24094 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24095 | unsafe fn test_mm_maskz_cvtepi16_ph() { |
24096 | let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); |
24097 | let r = _mm_maskz_cvtepi16_ph(0b01010101, a); |
24098 | let e = _mm_set_ph(0., 2., 0., 4., 0., 6., 0., 8.); |
24099 | assert_eq_m128h(r, e); |
24100 | } |
24101 | |
24102 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24103 | unsafe fn test_mm256_cvtepi16_ph() { |
24104 | let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
24105 | let r = _mm256_cvtepi16_ph(a); |
24106 | let e = _mm256_set_ph( |
24107 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24108 | ); |
24109 | assert_eq_m256h(r, e); |
24110 | } |
24111 | |
24112 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24113 | unsafe fn test_mm256_mask_cvtepi16_ph() { |
24114 | let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
24115 | let src = _mm256_set_ph( |
24116 | 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., |
24117 | ); |
24118 | let r = _mm256_mask_cvtepi16_ph(src, 0b0101010101010101, a); |
24119 | let e = _mm256_set_ph( |
24120 | 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., |
24121 | ); |
24122 | assert_eq_m256h(r, e); |
24123 | } |
24124 | |
24125 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24126 | unsafe fn test_mm256_maskz_cvtepi16_ph() { |
24127 | let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
24128 | let r = _mm256_maskz_cvtepi16_ph(0b0101010101010101, a); |
24129 | let e = _mm256_set_ph( |
24130 | 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16., |
24131 | ); |
24132 | assert_eq_m256h(r, e); |
24133 | } |
24134 | |
24135 | #[simd_test(enable = "avx512fp16" )] |
24136 | unsafe fn test_mm512_cvtepi16_ph() { |
24137 | let a = _mm512_set_epi16( |
24138 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
24139 | 25, 26, 27, 28, 29, 30, 31, 32, |
24140 | ); |
24141 | let r = _mm512_cvtepi16_ph(a); |
24142 | let e = _mm512_set_ph( |
24143 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24144 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
24145 | 31.0, 32.0, |
24146 | ); |
24147 | assert_eq_m512h(r, e); |
24148 | } |
24149 | |
24150 | #[simd_test(enable = "avx512fp16" )] |
24151 | unsafe fn test_mm512_mask_cvtepi16_ph() { |
24152 | let a = _mm512_set_epi16( |
24153 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
24154 | 25, 26, 27, 28, 29, 30, 31, 32, |
24155 | ); |
24156 | let src = _mm512_set_ph( |
24157 | 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., |
24158 | 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41., |
24159 | ); |
24160 | let r = _mm512_mask_cvtepi16_ph(src, 0b01010101010101010101010101010101, a); |
24161 | let e = _mm512_set_ph( |
24162 | 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., 26., 18., |
24163 | 28., 20., 30., 22., 32., 24., 34., 26., 36., 28., 38., 30., 40., 32., |
24164 | ); |
24165 | assert_eq_m512h(r, e); |
24166 | } |
24167 | |
24168 | #[simd_test(enable = "avx512fp16" )] |
24169 | unsafe fn test_mm512_maskz_cvtepi16_ph() { |
24170 | let a = _mm512_set_epi16( |
24171 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
24172 | 25, 26, 27, 28, 29, 30, 31, 32, |
24173 | ); |
24174 | let r = _mm512_maskz_cvtepi16_ph(0b01010101010101010101010101010101, a); |
24175 | let e = _mm512_set_ph( |
24176 | 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16., 0., 18., 0., 20., |
24177 | 0., 22., 0., 24., 0., 26., 0., 28., 0., 30., 0., 32., |
24178 | ); |
24179 | assert_eq_m512h(r, e); |
24180 | } |
24181 | |
24182 | #[simd_test(enable = "avx512fp16" )] |
24183 | unsafe fn test_mm512_cvt_roundepi16_ph() { |
24184 | let a = _mm512_set_epi16( |
24185 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
24186 | 25, 26, 27, 28, 29, 30, 31, 32, |
24187 | ); |
24188 | let r = _mm512_cvt_roundepi16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
24189 | let e = _mm512_set_ph( |
24190 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24191 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
24192 | 31.0, 32.0, |
24193 | ); |
24194 | assert_eq_m512h(r, e); |
24195 | } |
24196 | |
24197 | #[simd_test(enable = "avx512fp16" )] |
24198 | unsafe fn test_mm512_mask_cvt_roundepi16_ph() { |
24199 | let a = _mm512_set_epi16( |
24200 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
24201 | 25, 26, 27, 28, 29, 30, 31, 32, |
24202 | ); |
24203 | let src = _mm512_set_ph( |
24204 | 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., |
24205 | 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41., |
24206 | ); |
24207 | let r = _mm512_mask_cvt_roundepi16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
24208 | src, |
24209 | 0b01010101010101010101010101010101, |
24210 | a, |
24211 | ); |
24212 | let e = _mm512_set_ph( |
24213 | 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., 26., 18., |
24214 | 28., 20., 30., 22., 32., 24., 34., 26., 36., 28., 38., 30., 40., 32., |
24215 | ); |
24216 | assert_eq_m512h(r, e); |
24217 | } |
24218 | |
24219 | #[simd_test(enable = "avx512fp16" )] |
24220 | unsafe fn test_mm512_maskz_cvt_roundepi16_ph() { |
24221 | let a = _mm512_set_epi16( |
24222 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
24223 | 25, 26, 27, 28, 29, 30, 31, 32, |
24224 | ); |
24225 | let r = _mm512_maskz_cvt_roundepi16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
24226 | 0b01010101010101010101010101010101, |
24227 | a, |
24228 | ); |
24229 | let e = _mm512_set_ph( |
24230 | 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16., 0., 18., 0., 20., |
24231 | 0., 22., 0., 24., 0., 26., 0., 28., 0., 30., 0., 32., |
24232 | ); |
24233 | assert_eq_m512h(r, e); |
24234 | } |
24235 | |
24236 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24237 | unsafe fn test_mm_cvtepu16_ph() { |
24238 | let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); |
24239 | let r = _mm_cvtepu16_ph(a); |
24240 | let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24241 | assert_eq_m128h(r, e); |
24242 | } |
24243 | |
24244 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24245 | unsafe fn test_mm_mask_cvtepu16_ph() { |
24246 | let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); |
24247 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
24248 | let r = _mm_mask_cvtepu16_ph(src, 0b01010101, a); |
24249 | let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.); |
24250 | assert_eq_m128h(r, e); |
24251 | } |
24252 | |
24253 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24254 | unsafe fn test_mm_maskz_cvtepu16_ph() { |
24255 | let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); |
24256 | let r = _mm_maskz_cvtepu16_ph(0b01010101, a); |
24257 | let e = _mm_set_ph(0., 2., 0., 4., 0., 6., 0., 8.); |
24258 | assert_eq_m128h(r, e); |
24259 | } |
24260 | |
24261 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24262 | unsafe fn test_mm256_cvtepu16_ph() { |
24263 | let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
24264 | let r = _mm256_cvtepu16_ph(a); |
24265 | let e = _mm256_set_ph( |
24266 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24267 | ); |
24268 | assert_eq_m256h(r, e); |
24269 | } |
24270 | |
24271 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24272 | unsafe fn test_mm256_mask_cvtepu16_ph() { |
24273 | let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
24274 | let src = _mm256_set_ph( |
24275 | 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., |
24276 | ); |
24277 | let r = _mm256_mask_cvtepu16_ph(src, 0b0101010101010101, a); |
24278 | let e = _mm256_set_ph( |
24279 | 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., |
24280 | ); |
24281 | assert_eq_m256h(r, e); |
24282 | } |
24283 | |
24284 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24285 | unsafe fn test_mm256_maskz_cvtepu16_ph() { |
24286 | let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
24287 | let r = _mm256_maskz_cvtepu16_ph(0b0101010101010101, a); |
24288 | let e = _mm256_set_ph( |
24289 | 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16., |
24290 | ); |
24291 | assert_eq_m256h(r, e); |
24292 | } |
24293 | |
24294 | #[simd_test(enable = "avx512fp16" )] |
24295 | unsafe fn test_mm512_cvtepu16_ph() { |
24296 | let a = _mm512_set_epi16( |
24297 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
24298 | 25, 26, 27, 28, 29, 30, 31, 32, |
24299 | ); |
24300 | let r = _mm512_cvtepu16_ph(a); |
24301 | let e = _mm512_set_ph( |
24302 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24303 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
24304 | 31.0, 32.0, |
24305 | ); |
24306 | assert_eq_m512h(r, e); |
24307 | } |
24308 | |
24309 | #[simd_test(enable = "avx512fp16" )] |
24310 | unsafe fn test_mm512_mask_cvtepu16_ph() { |
24311 | let a = _mm512_set_epi16( |
24312 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
24313 | 25, 26, 27, 28, 29, 30, 31, 32, |
24314 | ); |
24315 | let src = _mm512_set_ph( |
24316 | 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., |
24317 | 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41., |
24318 | ); |
24319 | let r = _mm512_mask_cvtepu16_ph(src, 0b01010101010101010101010101010101, a); |
24320 | let e = _mm512_set_ph( |
24321 | 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., 26., 18., |
24322 | 28., 20., 30., 22., 32., 24., 34., 26., 36., 28., 38., 30., 40., 32., |
24323 | ); |
24324 | assert_eq_m512h(r, e); |
24325 | } |
24326 | |
24327 | #[simd_test(enable = "avx512fp16" )] |
24328 | unsafe fn test_mm512_maskz_cvtepu16_ph() { |
24329 | let a = _mm512_set_epi16( |
24330 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
24331 | 25, 26, 27, 28, 29, 30, 31, 32, |
24332 | ); |
24333 | let r = _mm512_maskz_cvtepu16_ph(0b01010101010101010101010101010101, a); |
24334 | let e = _mm512_set_ph( |
24335 | 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16., 0., 18., 0., 20., |
24336 | 0., 22., 0., 24., 0., 26., 0., 28., 0., 30., 0., 32., |
24337 | ); |
24338 | assert_eq_m512h(r, e); |
24339 | } |
24340 | |
24341 | #[simd_test(enable = "avx512fp16" )] |
24342 | unsafe fn test_mm512_cvt_roundepu16_ph() { |
24343 | let a = _mm512_set_epi16( |
24344 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
24345 | 25, 26, 27, 28, 29, 30, 31, 32, |
24346 | ); |
24347 | let r = _mm512_cvt_roundepu16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
24348 | let e = _mm512_set_ph( |
24349 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24350 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
24351 | 31.0, 32.0, |
24352 | ); |
24353 | assert_eq_m512h(r, e); |
24354 | } |
24355 | |
24356 | #[simd_test(enable = "avx512fp16" )] |
24357 | unsafe fn test_mm512_mask_cvt_roundepu16_ph() { |
24358 | let a = _mm512_set_epi16( |
24359 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
24360 | 25, 26, 27, 28, 29, 30, 31, 32, |
24361 | ); |
24362 | let src = _mm512_set_ph( |
24363 | 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., |
24364 | 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41., |
24365 | ); |
24366 | let r = _mm512_mask_cvt_roundepu16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
24367 | src, |
24368 | 0b01010101010101010101010101010101, |
24369 | a, |
24370 | ); |
24371 | let e = _mm512_set_ph( |
24372 | 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., 26., 18., |
24373 | 28., 20., 30., 22., 32., 24., 34., 26., 36., 28., 38., 30., 40., 32., |
24374 | ); |
24375 | assert_eq_m512h(r, e); |
24376 | } |
24377 | |
24378 | #[simd_test(enable = "avx512fp16" )] |
24379 | unsafe fn test_mm512_maskz_cvt_roundepu16_ph() { |
24380 | let a = _mm512_set_epi16( |
24381 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
24382 | 25, 26, 27, 28, 29, 30, 31, 32, |
24383 | ); |
24384 | let r = _mm512_maskz_cvt_roundepu16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
24385 | 0b01010101010101010101010101010101, |
24386 | a, |
24387 | ); |
24388 | let e = _mm512_set_ph( |
24389 | 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16., 0., 18., 0., 20., |
24390 | 0., 22., 0., 24., 0., 26., 0., 28., 0., 30., 0., 32., |
24391 | ); |
24392 | assert_eq_m512h(r, e); |
24393 | } |
24394 | |
24395 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24396 | unsafe fn test_mm_cvtepi32_ph() { |
24397 | let a = _mm_set_epi32(1, 2, 3, 4); |
24398 | let r = _mm_cvtepi32_ph(a); |
24399 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
24400 | assert_eq_m128h(r, e); |
24401 | } |
24402 | |
24403 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24404 | unsafe fn test_mm_mask_cvtepi32_ph() { |
24405 | let a = _mm_set_epi32(1, 2, 3, 4); |
24406 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
24407 | let r = _mm_mask_cvtepi32_ph(src, 0b0101, a); |
24408 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2., 16., 4.); |
24409 | assert_eq_m128h(r, e); |
24410 | } |
24411 | |
24412 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24413 | unsafe fn test_mm_maskz_cvtepi32_ph() { |
24414 | let a = _mm_set_epi32(1, 2, 3, 4); |
24415 | let r = _mm_maskz_cvtepi32_ph(0b0101, a); |
24416 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2., 0.0, 4.); |
24417 | assert_eq_m128h(r, e); |
24418 | } |
24419 | |
24420 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24421 | unsafe fn test_mm256_cvtepi32_ph() { |
24422 | let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); |
24423 | let r = _mm256_cvtepi32_ph(a); |
24424 | let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24425 | assert_eq_m128h(r, e); |
24426 | } |
24427 | |
24428 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24429 | unsafe fn test_mm256_mask_cvtepi32_ph() { |
24430 | let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); |
24431 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
24432 | let r = _mm256_mask_cvtepi32_ph(src, 0b01010101, a); |
24433 | let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.); |
24434 | assert_eq_m128h(r, e); |
24435 | } |
24436 | |
24437 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24438 | unsafe fn test_mm256_maskz_cvtepi32_ph() { |
24439 | let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); |
24440 | let r = _mm256_maskz_cvtepi32_ph(0b01010101, a); |
24441 | let e = _mm_set_ph(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0); |
24442 | assert_eq_m128h(r, e); |
24443 | } |
24444 | |
24445 | #[simd_test(enable = "avx512fp16" )] |
24446 | unsafe fn test_mm512_cvtepi32_ph() { |
24447 | let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
24448 | let r = _mm512_cvtepi32_ph(a); |
24449 | let e = _mm256_set_ph( |
24450 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24451 | ); |
24452 | assert_eq_m256h(r, e); |
24453 | } |
24454 | |
24455 | #[simd_test(enable = "avx512fp16" )] |
24456 | unsafe fn test_mm512_mask_cvtepi32_ph() { |
24457 | let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
24458 | let src = _mm256_set_ph( |
24459 | 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., |
24460 | ); |
24461 | let r = _mm512_mask_cvtepi32_ph(src, 0b0101010101010101, a); |
24462 | let e = _mm256_set_ph( |
24463 | 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., |
24464 | ); |
24465 | assert_eq_m256h(r, e); |
24466 | } |
24467 | |
24468 | #[simd_test(enable = "avx512fp16" )] |
24469 | unsafe fn test_mm512_maskz_cvtepi32_ph() { |
24470 | let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
24471 | let r = _mm512_maskz_cvtepi32_ph(0b0101010101010101, a); |
24472 | let e = _mm256_set_ph( |
24473 | 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0, |
24474 | ); |
24475 | assert_eq_m256h(r, e); |
24476 | } |
24477 | |
24478 | #[simd_test(enable = "avx512fp16" )] |
24479 | unsafe fn test_mm512_cvt_roundepi32_ph() { |
24480 | let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
24481 | let r = _mm512_cvt_roundepi32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
24482 | let e = _mm256_set_ph( |
24483 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24484 | ); |
24485 | assert_eq_m256h(r, e); |
24486 | } |
24487 | |
24488 | #[simd_test(enable = "avx512fp16" )] |
24489 | unsafe fn test_mm512_mask_cvt_roundepi32_ph() { |
24490 | let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
24491 | let src = _mm256_set_ph( |
24492 | 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., |
24493 | ); |
24494 | let r = _mm512_mask_cvt_roundepi32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
24495 | src, |
24496 | 0b0101010101010101, |
24497 | a, |
24498 | ); |
24499 | let e = _mm256_set_ph( |
24500 | 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., |
24501 | ); |
24502 | assert_eq_m256h(r, e); |
24503 | } |
24504 | |
24505 | #[simd_test(enable = "avx512fp16" )] |
24506 | unsafe fn test_mm512_maskz_cvt_roundepi32_ph() { |
24507 | let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
24508 | let r = _mm512_maskz_cvt_roundepi32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
24509 | 0b0101010101010101, |
24510 | a, |
24511 | ); |
24512 | let e = _mm256_set_ph( |
24513 | 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0, |
24514 | ); |
24515 | assert_eq_m256h(r, e); |
24516 | } |
24517 | |
24518 | #[simd_test(enable = "avx512fp16" )] |
24519 | unsafe fn test_mm_cvti32_sh() { |
24520 | let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24521 | let r = _mm_cvti32_sh(a, 10); |
24522 | let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24523 | assert_eq_m128h(r, e); |
24524 | } |
24525 | |
24526 | #[simd_test(enable = "avx512fp16" )] |
24527 | unsafe fn test_mm_cvt_roundi32_sh() { |
24528 | let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24529 | let r = _mm_cvt_roundi32_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 10); |
24530 | let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24531 | assert_eq_m128h(r, e); |
24532 | } |
24533 | |
24534 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24535 | unsafe fn test_mm_cvtepu32_ph() { |
24536 | let a = _mm_set_epi32(1, 2, 3, 4); |
24537 | let r = _mm_cvtepu32_ph(a); |
24538 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
24539 | assert_eq_m128h(r, e); |
24540 | } |
24541 | |
24542 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24543 | unsafe fn test_mm_mask_cvtepu32_ph() { |
24544 | let a = _mm_set_epi32(1, 2, 3, 4); |
24545 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
24546 | let r = _mm_mask_cvtepu32_ph(src, 0b0101, a); |
24547 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2., 16., 4.); |
24548 | assert_eq_m128h(r, e); |
24549 | } |
24550 | |
24551 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24552 | unsafe fn test_mm_maskz_cvtepu32_ph() { |
24553 | let a = _mm_set_epi32(1, 2, 3, 4); |
24554 | let r = _mm_maskz_cvtepu32_ph(0b0101, a); |
24555 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2., 0.0, 4.); |
24556 | assert_eq_m128h(r, e); |
24557 | } |
24558 | |
24559 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24560 | unsafe fn test_mm256_cvtepu32_ph() { |
24561 | let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); |
24562 | let r = _mm256_cvtepu32_ph(a); |
24563 | let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24564 | assert_eq_m128h(r, e); |
24565 | } |
24566 | |
24567 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24568 | unsafe fn test_mm256_mask_cvtepu32_ph() { |
24569 | let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); |
24570 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
24571 | let r = _mm256_mask_cvtepu32_ph(src, 0b01010101, a); |
24572 | let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.); |
24573 | assert_eq_m128h(r, e); |
24574 | } |
24575 | |
24576 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24577 | unsafe fn test_mm256_maskz_cvtepu32_ph() { |
24578 | let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); |
24579 | let r = _mm256_maskz_cvtepu32_ph(0b01010101, a); |
24580 | let e = _mm_set_ph(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0); |
24581 | assert_eq_m128h(r, e); |
24582 | } |
24583 | |
24584 | #[simd_test(enable = "avx512fp16" )] |
24585 | unsafe fn test_mm512_cvtepu32_ph() { |
24586 | let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
24587 | let r = _mm512_cvtepu32_ph(a); |
24588 | let e = _mm256_set_ph( |
24589 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24590 | ); |
24591 | assert_eq_m256h(r, e); |
24592 | } |
24593 | |
24594 | #[simd_test(enable = "avx512fp16" )] |
24595 | unsafe fn test_mm512_mask_cvtepu32_ph() { |
24596 | let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
24597 | let src = _mm256_set_ph( |
24598 | 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., |
24599 | ); |
24600 | let r = _mm512_mask_cvtepu32_ph(src, 0b0101010101010101, a); |
24601 | let e = _mm256_set_ph( |
24602 | 10., 2.0, 12., 4.0, 14., 6.0, 16., 8.0, 18., 10.0, 20., 12.0, 22., 14.0, 24., 16.0, |
24603 | ); |
24604 | assert_eq_m256h(r, e); |
24605 | } |
24606 | |
24607 | #[simd_test(enable = "avx512fp16" )] |
24608 | unsafe fn test_mm512_maskz_cvtepu32_ph() { |
24609 | let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
24610 | let r = _mm512_maskz_cvtepu32_ph(0b0101010101010101, a); |
24611 | let e = _mm256_set_ph( |
24612 | 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0, |
24613 | ); |
24614 | assert_eq_m256h(r, e); |
24615 | } |
24616 | |
24617 | #[simd_test(enable = "avx512fp16" )] |
24618 | unsafe fn test_mm512_cvt_roundepu32_ph() { |
24619 | let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
24620 | let r = _mm512_cvt_roundepu32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
24621 | let e = _mm256_set_ph( |
24622 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24623 | ); |
24624 | assert_eq_m256h(r, e); |
24625 | } |
24626 | |
24627 | #[simd_test(enable = "avx512fp16" )] |
24628 | unsafe fn test_mm512_mask_cvt_roundepu32_ph() { |
24629 | let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
24630 | let src = _mm256_set_ph( |
24631 | 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., |
24632 | ); |
24633 | let r = _mm512_mask_cvt_roundepu32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
24634 | src, |
24635 | 0b0101010101010101, |
24636 | a, |
24637 | ); |
24638 | let e = _mm256_set_ph( |
24639 | 10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0, 18.0, 10.0, 20.0, 12.0, 22.0, 14.0, 24.0, |
24640 | 16.0, |
24641 | ); |
24642 | assert_eq_m256h(r, e); |
24643 | } |
24644 | |
24645 | #[simd_test(enable = "avx512fp16" )] |
24646 | unsafe fn test_mm512_maskz_cvt_roundepu32_ph() { |
24647 | let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
24648 | let r = _mm512_maskz_cvt_roundepu32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
24649 | 0b0101010101010101, |
24650 | a, |
24651 | ); |
24652 | let e = _mm256_set_ph( |
24653 | 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0, |
24654 | ); |
24655 | assert_eq_m256h(r, e); |
24656 | } |
24657 | |
24658 | #[simd_test(enable = "avx512fp16" )] |
24659 | unsafe fn test_mm_cvtu32_sh() { |
24660 | let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24661 | let r = _mm_cvtu32_sh(a, 10); |
24662 | let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24663 | assert_eq_m128h(r, e); |
24664 | } |
24665 | |
24666 | #[simd_test(enable = "avx512fp16" )] |
24667 | unsafe fn test_mm_cvt_roundu32_sh() { |
24668 | let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24669 | let r = _mm_cvt_roundu32_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 10); |
24670 | let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24671 | assert_eq_m128h(r, e); |
24672 | } |
24673 | |
24674 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24675 | unsafe fn test_mm_cvtepi64_ph() { |
24676 | let a = _mm_set_epi64x(1, 2); |
24677 | let r = _mm_cvtepi64_ph(a); |
24678 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); |
24679 | assert_eq_m128h(r, e); |
24680 | } |
24681 | |
24682 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24683 | unsafe fn test_mm_mask_cvtepi64_ph() { |
24684 | let a = _mm_set_epi64x(1, 2); |
24685 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
24686 | let r = _mm_mask_cvtepi64_ph(src, 0b01, a); |
24687 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 16., 2.); |
24688 | assert_eq_m128h(r, e); |
24689 | } |
24690 | |
24691 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24692 | unsafe fn test_mm_maskz_cvtepi64_ph() { |
24693 | let a = _mm_set_epi64x(1, 2); |
24694 | let r = _mm_maskz_cvtepi64_ph(0b01, a); |
24695 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.); |
24696 | assert_eq_m128h(r, e); |
24697 | } |
24698 | |
24699 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24700 | unsafe fn test_mm256_cvtepi64_ph() { |
24701 | let a = _mm256_set_epi64x(1, 2, 3, 4); |
24702 | let r = _mm256_cvtepi64_ph(a); |
24703 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
24704 | assert_eq_m128h(r, e); |
24705 | } |
24706 | |
24707 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24708 | unsafe fn test_mm256_mask_cvtepi64_ph() { |
24709 | let a = _mm256_set_epi64x(1, 2, 3, 4); |
24710 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
24711 | let r = _mm256_mask_cvtepi64_ph(src, 0b0101, a); |
24712 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2.0, 16.0, 4.0); |
24713 | assert_eq_m128h(r, e); |
24714 | } |
24715 | |
24716 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24717 | unsafe fn test_mm256_maskz_cvtepi64_ph() { |
24718 | let a = _mm256_set_epi64x(1, 2, 3, 4); |
24719 | let r = _mm256_maskz_cvtepi64_ph(0b0101, a); |
24720 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0); |
24721 | assert_eq_m128h(r, e); |
24722 | } |
24723 | |
24724 | #[simd_test(enable = "avx512fp16" )] |
24725 | unsafe fn test_mm512_cvtepi64_ph() { |
24726 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
24727 | let r = _mm512_cvtepi64_ph(a); |
24728 | let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24729 | assert_eq_m128h(r, e); |
24730 | } |
24731 | |
24732 | #[simd_test(enable = "avx512fp16" )] |
24733 | unsafe fn test_mm512_mask_cvtepi64_ph() { |
24734 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
24735 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
24736 | let r = _mm512_mask_cvtepi64_ph(src, 0b01010101, a); |
24737 | let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.); |
24738 | assert_eq_m128h(r, e); |
24739 | } |
24740 | |
24741 | #[simd_test(enable = "avx512fp16" )] |
24742 | unsafe fn test_mm512_maskz_cvtepi64_ph() { |
24743 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
24744 | let r = _mm512_maskz_cvtepi64_ph(0b01010101, a); |
24745 | let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.); |
24746 | assert_eq_m128h(r, e); |
24747 | } |
24748 | |
24749 | #[simd_test(enable = "avx512fp16" )] |
24750 | unsafe fn test_mm512_cvt_roundepi64_ph() { |
24751 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
24752 | let r = _mm512_cvt_roundepi64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
24753 | let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24754 | assert_eq_m128h(r, e); |
24755 | } |
24756 | |
24757 | #[simd_test(enable = "avx512fp16" )] |
24758 | unsafe fn test_mm512_mask_cvt_roundepi64_ph() { |
24759 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
24760 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
24761 | let r = _mm512_mask_cvt_roundepi64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
24762 | src, 0b01010101, a, |
24763 | ); |
24764 | let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.); |
24765 | assert_eq_m128h(r, e); |
24766 | } |
24767 | |
24768 | #[simd_test(enable = "avx512fp16" )] |
24769 | unsafe fn test_mm512_maskz_cvt_roundepi64_ph() { |
24770 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
24771 | let r = _mm512_maskz_cvt_roundepi64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
24772 | 0b01010101, a, |
24773 | ); |
24774 | let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.); |
24775 | assert_eq_m128h(r, e); |
24776 | } |
24777 | |
24778 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24779 | unsafe fn test_mm_cvtepu64_ph() { |
24780 | let a = _mm_set_epi64x(1, 2); |
24781 | let r = _mm_cvtepu64_ph(a); |
24782 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); |
24783 | assert_eq_m128h(r, e); |
24784 | } |
24785 | |
24786 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24787 | unsafe fn test_mm_mask_cvtepu64_ph() { |
24788 | let a = _mm_set_epi64x(1, 2); |
24789 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
24790 | let r = _mm_mask_cvtepu64_ph(src, 0b01, a); |
24791 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 16., 2.); |
24792 | assert_eq_m128h(r, e); |
24793 | } |
24794 | |
24795 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24796 | unsafe fn test_mm_maskz_cvtepu64_ph() { |
24797 | let a = _mm_set_epi64x(1, 2); |
24798 | let r = _mm_maskz_cvtepu64_ph(0b01, a); |
24799 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0); |
24800 | assert_eq_m128h(r, e); |
24801 | } |
24802 | |
24803 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24804 | unsafe fn test_mm256_cvtepu64_ph() { |
24805 | let a = _mm256_set_epi64x(1, 2, 3, 4); |
24806 | let r = _mm256_cvtepu64_ph(a); |
24807 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
24808 | assert_eq_m128h(r, e); |
24809 | } |
24810 | |
24811 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24812 | unsafe fn test_mm256_mask_cvtepu64_ph() { |
24813 | let a = _mm256_set_epi64x(1, 2, 3, 4); |
24814 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
24815 | let r = _mm256_mask_cvtepu64_ph(src, 0b0101, a); |
24816 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2.0, 16.0, 4.0); |
24817 | assert_eq_m128h(r, e); |
24818 | } |
24819 | |
24820 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24821 | unsafe fn test_mm256_maskz_cvtepu64_ph() { |
24822 | let a = _mm256_set_epi64x(1, 2, 3, 4); |
24823 | let r = _mm256_maskz_cvtepu64_ph(0b0101, a); |
24824 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0); |
24825 | assert_eq_m128h(r, e); |
24826 | } |
24827 | |
24828 | #[simd_test(enable = "avx512fp16" )] |
24829 | unsafe fn test_mm512_cvtepu64_ph() { |
24830 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
24831 | let r = _mm512_cvtepu64_ph(a); |
24832 | let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24833 | assert_eq_m128h(r, e); |
24834 | } |
24835 | |
24836 | #[simd_test(enable = "avx512fp16" )] |
24837 | unsafe fn test_mm512_mask_cvtepu64_ph() { |
24838 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
24839 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
24840 | let r = _mm512_mask_cvtepu64_ph(src, 0b01010101, a); |
24841 | let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.); |
24842 | assert_eq_m128h(r, e); |
24843 | } |
24844 | |
24845 | #[simd_test(enable = "avx512fp16" )] |
24846 | unsafe fn test_mm512_maskz_cvtepu64_ph() { |
24847 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
24848 | let r = _mm512_maskz_cvtepu64_ph(0b01010101, a); |
24849 | let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.); |
24850 | assert_eq_m128h(r, e); |
24851 | } |
24852 | |
24853 | #[simd_test(enable = "avx512fp16" )] |
24854 | unsafe fn test_mm512_cvt_roundepu64_ph() { |
24855 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
24856 | let r = _mm512_cvt_roundepu64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
24857 | let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24858 | assert_eq_m128h(r, e); |
24859 | } |
24860 | |
24861 | #[simd_test(enable = "avx512fp16" )] |
24862 | unsafe fn test_mm512_mask_cvt_roundepu64_ph() { |
24863 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
24864 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
24865 | let r = _mm512_mask_cvt_roundepu64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
24866 | src, 0b01010101, a, |
24867 | ); |
24868 | let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.); |
24869 | assert_eq_m128h(r, e); |
24870 | } |
24871 | |
24872 | #[simd_test(enable = "avx512fp16" )] |
24873 | unsafe fn test_mm512_maskz_cvt_roundepu64_ph() { |
24874 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
24875 | let r = _mm512_maskz_cvt_roundepu64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
24876 | 0b01010101, a, |
24877 | ); |
24878 | let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.); |
24879 | assert_eq_m128h(r, e); |
24880 | } |
24881 | |
24882 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24883 | unsafe fn test_mm_cvtxps_ph() { |
24884 | let a = _mm_set_ps(1.0, 2.0, 3.0, 4.0); |
24885 | let r = _mm_cvtxps_ph(a); |
24886 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
24887 | assert_eq_m128h(r, e); |
24888 | } |
24889 | |
24890 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24891 | unsafe fn test_mm_mask_cvtxps_ph() { |
24892 | let a = _mm_set_ps(1.0, 2.0, 3.0, 4.0); |
24893 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
24894 | let r = _mm_mask_cvtxps_ph(src, 0b0101, a); |
24895 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2.0, 16., 4.0); |
24896 | assert_eq_m128h(r, e); |
24897 | } |
24898 | |
24899 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24900 | unsafe fn test_mm_maskz_cvtxps_ph() { |
24901 | let a = _mm_set_ps(1.0, 2.0, 3.0, 4.0); |
24902 | let r = _mm_maskz_cvtxps_ph(0b0101, a); |
24903 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0); |
24904 | assert_eq_m128h(r, e); |
24905 | } |
24906 | |
24907 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24908 | unsafe fn test_mm256_cvtxps_ph() { |
24909 | let a = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24910 | let r = _mm256_cvtxps_ph(a); |
24911 | let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24912 | assert_eq_m128h(r, e); |
24913 | } |
24914 | |
24915 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24916 | unsafe fn test_mm256_mask_cvtxps_ph() { |
24917 | let a = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24918 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
24919 | let r = _mm256_mask_cvtxps_ph(src, 0b01010101, a); |
24920 | let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.); |
24921 | assert_eq_m128h(r, e); |
24922 | } |
24923 | |
24924 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
24925 | unsafe fn test_mm256_maskz_cvtxps_ph() { |
24926 | let a = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
24927 | let r = _mm256_maskz_cvtxps_ph(0b01010101, a); |
24928 | let e = _mm_set_ph(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0); |
24929 | assert_eq_m128h(r, e); |
24930 | } |
24931 | |
24932 | #[simd_test(enable = "avx512fp16" )] |
24933 | unsafe fn test_mm512_cvtxps_ph() { |
24934 | let a = _mm512_set_ps( |
24935 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24936 | ); |
24937 | let r = _mm512_cvtxps_ph(a); |
24938 | let e = _mm256_set_ph( |
24939 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24940 | ); |
24941 | assert_eq_m256h(r, e); |
24942 | } |
24943 | |
24944 | #[simd_test(enable = "avx512fp16" )] |
24945 | unsafe fn test_mm512_mask_cvtxps_ph() { |
24946 | let a = _mm512_set_ps( |
24947 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24948 | ); |
24949 | let src = _mm256_set_ph( |
24950 | 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., |
24951 | ); |
24952 | let r = _mm512_mask_cvtxps_ph(src, 0b0101010101010101, a); |
24953 | let e = _mm256_set_ph( |
24954 | 10., 2.0, 12., 4.0, 14., 6.0, 16., 8.0, 18., 10.0, 20., 12.0, 22., 14.0, 24., 16.0, |
24955 | ); |
24956 | assert_eq_m256h(r, e); |
24957 | } |
24958 | |
24959 | #[simd_test(enable = "avx512fp16" )] |
24960 | unsafe fn test_mm512_maskz_cvtxps_ph() { |
24961 | let a = _mm512_set_ps( |
24962 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24963 | ); |
24964 | let r = _mm512_maskz_cvtxps_ph(0b0101010101010101, a); |
24965 | let e = _mm256_set_ph( |
24966 | 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0, |
24967 | ); |
24968 | assert_eq_m256h(r, e); |
24969 | } |
24970 | |
24971 | #[simd_test(enable = "avx512fp16" )] |
24972 | unsafe fn test_mm512_cvtx_roundps_ph() { |
24973 | let a = _mm512_set_ps( |
24974 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24975 | ); |
24976 | let r = _mm512_cvtx_roundps_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
24977 | let e = _mm256_set_ph( |
24978 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24979 | ); |
24980 | assert_eq_m256h(r, e); |
24981 | } |
24982 | |
24983 | #[simd_test(enable = "avx512fp16" )] |
24984 | unsafe fn test_mm512_mask_cvtx_roundps_ph() { |
24985 | let a = _mm512_set_ps( |
24986 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
24987 | ); |
24988 | let src = _mm256_set_ph( |
24989 | 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., |
24990 | ); |
24991 | let r = _mm512_mask_cvtx_roundps_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
24992 | src, |
24993 | 0b0101010101010101, |
24994 | a, |
24995 | ); |
24996 | let e = _mm256_set_ph( |
24997 | 10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0, 18.0, 10.0, 20.0, 12.0, 22.0, 14.0, 24.0, |
24998 | 16.0, |
24999 | ); |
25000 | assert_eq_m256h(r, e); |
25001 | } |
25002 | |
25003 | #[simd_test(enable = "avx512fp16" )] |
25004 | unsafe fn test_mm512_maskz_cvtx_roundps_ph() { |
25005 | let a = _mm512_set_ps( |
25006 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25007 | ); |
25008 | let r = _mm512_maskz_cvtx_roundps_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
25009 | 0b0101010101010101, |
25010 | a, |
25011 | ); |
25012 | let e = _mm256_set_ph( |
25013 | 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0, |
25014 | ); |
25015 | assert_eq_m256h(r, e); |
25016 | } |
25017 | |
25018 | #[simd_test(enable = "avx512fp16" )] |
25019 | unsafe fn test_mm_cvtss_sh() { |
25020 | let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
25021 | let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); |
25022 | let r = _mm_cvtss_sh(a, b); |
25023 | let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.); |
25024 | assert_eq_m128h(r, e); |
25025 | } |
25026 | |
25027 | #[simd_test(enable = "avx512fp16" )] |
25028 | unsafe fn test_mm_mask_cvtss_sh() { |
25029 | let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
25030 | let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); |
25031 | let src = _mm_setr_ph(20., 21., 22., 23., 24., 25., 26., 27.); |
25032 | let r = _mm_mask_cvtss_sh(src, 0, a, b); |
25033 | let e = _mm_setr_ph(20., 11., 12., 13., 14., 15., 16., 17.); |
25034 | assert_eq_m128h(r, e); |
25035 | let r = _mm_mask_cvtss_sh(src, 1, a, b); |
25036 | let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.); |
25037 | assert_eq_m128h(r, e); |
25038 | } |
25039 | |
25040 | #[simd_test(enable = "avx512fp16" )] |
25041 | unsafe fn test_mm_maskz_cvtss_sh() { |
25042 | let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
25043 | let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); |
25044 | let r = _mm_maskz_cvtss_sh(0, a, b); |
25045 | let e = _mm_setr_ph(0.0, 11., 12., 13., 14., 15., 16., 17.); |
25046 | assert_eq_m128h(r, e); |
25047 | let r = _mm_maskz_cvtss_sh(1, a, b); |
25048 | let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.); |
25049 | assert_eq_m128h(r, e); |
25050 | } |
25051 | |
25052 | #[simd_test(enable = "avx512fp16" )] |
25053 | unsafe fn test_mm_cvt_roundss_sh() { |
25054 | let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
25055 | let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); |
25056 | let r = _mm_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
25057 | let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.); |
25058 | assert_eq_m128h(r, e); |
25059 | } |
25060 | |
25061 | #[simd_test(enable = "avx512fp16" )] |
25062 | unsafe fn test_mm_mask_cvt_roundss_sh() { |
25063 | let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
25064 | let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); |
25065 | let src = _mm_setr_ph(20., 21., 22., 23., 24., 25., 26., 27.); |
25066 | let r = _mm_mask_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
25067 | src, 0, a, b, |
25068 | ); |
25069 | let e = _mm_setr_ph(20., 11., 12., 13., 14., 15., 16., 17.); |
25070 | assert_eq_m128h(r, e); |
25071 | let r = _mm_mask_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
25072 | src, 1, a, b, |
25073 | ); |
25074 | let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.); |
25075 | assert_eq_m128h(r, e); |
25076 | } |
25077 | |
25078 | #[simd_test(enable = "avx512fp16" )] |
25079 | unsafe fn test_mm_maskz_cvt_roundss_sh() { |
25080 | let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
25081 | let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); |
25082 | let r = |
25083 | _mm_maskz_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); |
25084 | let e = _mm_setr_ph(0.0, 11., 12., 13., 14., 15., 16., 17.); |
25085 | assert_eq_m128h(r, e); |
25086 | let r = |
25087 | _mm_maskz_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b); |
25088 | let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.); |
25089 | assert_eq_m128h(r, e); |
25090 | } |
25091 | |
25092 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25093 | unsafe fn test_mm_cvtpd_ph() { |
25094 | let a = _mm_set_pd(1.0, 2.0); |
25095 | let r = _mm_cvtpd_ph(a); |
25096 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); |
25097 | assert_eq_m128h(r, e); |
25098 | } |
25099 | |
25100 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25101 | unsafe fn test_mm_mask_cvtpd_ph() { |
25102 | let a = _mm_set_pd(1.0, 2.0); |
25103 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
25104 | let r = _mm_mask_cvtpd_ph(src, 0b01, a); |
25105 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 16., 2.); |
25106 | assert_eq_m128h(r, e); |
25107 | } |
25108 | |
25109 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25110 | unsafe fn test_mm_maskz_cvtpd_ph() { |
25111 | let a = _mm_set_pd(1.0, 2.0); |
25112 | let r = _mm_maskz_cvtpd_ph(0b01, a); |
25113 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0); |
25114 | assert_eq_m128h(r, e); |
25115 | } |
25116 | |
25117 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25118 | unsafe fn test_mm256_cvtpd_ph() { |
25119 | let a = _mm256_set_pd(1.0, 2.0, 3.0, 4.0); |
25120 | let r = _mm256_cvtpd_ph(a); |
25121 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
25122 | assert_eq_m128h(r, e); |
25123 | } |
25124 | |
25125 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25126 | unsafe fn test_mm256_mask_cvtpd_ph() { |
25127 | let a = _mm256_set_pd(1.0, 2.0, 3.0, 4.0); |
25128 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
25129 | let r = _mm256_mask_cvtpd_ph(src, 0b0101, a); |
25130 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2.0, 16.0, 4.0); |
25131 | assert_eq_m128h(r, e); |
25132 | } |
25133 | |
25134 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25135 | unsafe fn test_mm256_maskz_cvtpd_ph() { |
25136 | let a = _mm256_set_pd(1.0, 2.0, 3.0, 4.0); |
25137 | let r = _mm256_maskz_cvtpd_ph(0b0101, a); |
25138 | let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0); |
25139 | assert_eq_m128h(r, e); |
25140 | } |
25141 | |
25142 | #[simd_test(enable = "avx512fp16" )] |
25143 | unsafe fn test_mm512_cvtpd_ph() { |
25144 | let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25145 | let r = _mm512_cvtpd_ph(a); |
25146 | let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25147 | assert_eq_m128h(r, e); |
25148 | } |
25149 | |
25150 | #[simd_test(enable = "avx512fp16" )] |
25151 | unsafe fn test_mm512_mask_cvtpd_ph() { |
25152 | let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25153 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
25154 | let r = _mm512_mask_cvtpd_ph(src, 0b01010101, a); |
25155 | let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.); |
25156 | assert_eq_m128h(r, e); |
25157 | } |
25158 | |
25159 | #[simd_test(enable = "avx512fp16" )] |
25160 | unsafe fn test_mm512_maskz_cvtpd_ph() { |
25161 | let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25162 | let r = _mm512_maskz_cvtpd_ph(0b01010101, a); |
25163 | let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.); |
25164 | assert_eq_m128h(r, e); |
25165 | } |
25166 | |
25167 | #[simd_test(enable = "avx512fp16" )] |
25168 | unsafe fn test_mm512_cvt_roundpd_ph() { |
25169 | let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25170 | let r = _mm512_cvt_roundpd_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
25171 | let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25172 | assert_eq_m128h(r, e); |
25173 | } |
25174 | |
25175 | #[simd_test(enable = "avx512fp16" )] |
25176 | unsafe fn test_mm512_mask_cvt_roundpd_ph() { |
25177 | let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25178 | let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
25179 | let r = _mm512_mask_cvt_roundpd_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
25180 | src, 0b01010101, a, |
25181 | ); |
25182 | let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.); |
25183 | assert_eq_m128h(r, e); |
25184 | } |
25185 | |
25186 | #[simd_test(enable = "avx512fp16" )] |
25187 | unsafe fn test_mm512_maskz_cvt_roundpd_ph() { |
25188 | let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25189 | let r = _mm512_maskz_cvt_roundpd_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
25190 | 0b01010101, a, |
25191 | ); |
25192 | let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.); |
25193 | assert_eq_m128h(r, e); |
25194 | } |
25195 | |
25196 | #[simd_test(enable = "avx512fp16" )] |
25197 | unsafe fn test_mm_cvtsd_sh() { |
25198 | let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
25199 | let b = _mm_setr_pd(1.0, 2.0); |
25200 | let r = _mm_cvtsd_sh(a, b); |
25201 | let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.); |
25202 | assert_eq_m128h(r, e); |
25203 | } |
25204 | |
25205 | #[simd_test(enable = "avx512fp16" )] |
25206 | unsafe fn test_mm_mask_cvtsd_sh() { |
25207 | let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
25208 | let b = _mm_setr_pd(1.0, 2.0); |
25209 | let src = _mm_setr_ph(20., 21., 22., 23., 24., 25., 26., 27.); |
25210 | let r = _mm_mask_cvtsd_sh(src, 0, a, b); |
25211 | let e = _mm_setr_ph(20., 11., 12., 13., 14., 15., 16., 17.); |
25212 | assert_eq_m128h(r, e); |
25213 | let r = _mm_mask_cvtsd_sh(src, 1, a, b); |
25214 | let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.); |
25215 | assert_eq_m128h(r, e); |
25216 | } |
25217 | |
25218 | #[simd_test(enable = "avx512fp16" )] |
25219 | unsafe fn test_mm_maskz_cvtsd_sh() { |
25220 | let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
25221 | let b = _mm_setr_pd(1.0, 2.0); |
25222 | let r = _mm_maskz_cvtsd_sh(0, a, b); |
25223 | let e = _mm_setr_ph(0.0, 11., 12., 13., 14., 15., 16., 17.); |
25224 | assert_eq_m128h(r, e); |
25225 | let r = _mm_maskz_cvtsd_sh(1, a, b); |
25226 | let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.); |
25227 | assert_eq_m128h(r, e); |
25228 | } |
25229 | |
25230 | #[simd_test(enable = "avx512fp16" )] |
25231 | unsafe fn test_mm_cvt_roundsd_sh() { |
25232 | let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
25233 | let b = _mm_setr_pd(1.0, 2.0); |
25234 | let r = _mm_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); |
25235 | let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.); |
25236 | assert_eq_m128h(r, e); |
25237 | } |
25238 | |
25239 | #[simd_test(enable = "avx512fp16" )] |
25240 | unsafe fn test_mm_mask_cvt_roundsd_sh() { |
25241 | let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
25242 | let b = _mm_setr_pd(1.0, 2.0); |
25243 | let src = _mm_setr_ph(20., 21., 22., 23., 24., 25., 26., 27.); |
25244 | let r = _mm_mask_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
25245 | src, 0, a, b, |
25246 | ); |
25247 | let e = _mm_setr_ph(20., 11., 12., 13., 14., 15., 16., 17.); |
25248 | assert_eq_m128h(r, e); |
25249 | let r = _mm_mask_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
25250 | src, 1, a, b, |
25251 | ); |
25252 | let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.); |
25253 | assert_eq_m128h(r, e); |
25254 | } |
25255 | |
25256 | #[simd_test(enable = "avx512fp16" )] |
25257 | unsafe fn test_mm_maskz_cvt_roundsd_sh() { |
25258 | let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.); |
25259 | let b = _mm_setr_pd(1.0, 2.0); |
25260 | let r = |
25261 | _mm_maskz_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); |
25262 | let e = _mm_setr_ph(0.0, 11., 12., 13., 14., 15., 16., 17.); |
25263 | assert_eq_m128h(r, e); |
25264 | let r = |
25265 | _mm_maskz_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b); |
25266 | let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.); |
25267 | assert_eq_m128h(r, e); |
25268 | } |
25269 | |
25270 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25271 | unsafe fn test_mm_cvtph_epi16() { |
25272 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25273 | let r = _mm_cvttph_epi16(a); |
25274 | let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); |
25275 | assert_eq_m128i(r, e); |
25276 | } |
25277 | |
25278 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25279 | unsafe fn test_mm_mask_cvtph_epi16() { |
25280 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25281 | let src = _mm_set_epi16(10, 11, 12, 13, 14, 15, 16, 17); |
25282 | let r = _mm_mask_cvttph_epi16(src, 0b01010101, a); |
25283 | let e = _mm_set_epi16(10, 2, 12, 4, 14, 6, 16, 8); |
25284 | assert_eq_m128i(r, e); |
25285 | } |
25286 | |
25287 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25288 | unsafe fn test_mm_maskz_cvtph_epi16() { |
25289 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25290 | let r = _mm_maskz_cvttph_epi16(0b01010101, a); |
25291 | let e = _mm_set_epi16(0, 2, 0, 4, 0, 6, 0, 8); |
25292 | assert_eq_m128i(r, e); |
25293 | } |
25294 | |
25295 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25296 | unsafe fn test_mm256_cvtph_epi16() { |
25297 | let a = _mm256_set_ph( |
25298 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25299 | ); |
25300 | let r = _mm256_cvttph_epi16(a); |
25301 | let e = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
25302 | assert_eq_m256i(r, e); |
25303 | } |
25304 | |
25305 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25306 | unsafe fn test_mm256_mask_cvtph_epi16() { |
25307 | let a = _mm256_set_ph( |
25308 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25309 | ); |
25310 | let src = _mm256_set_epi16( |
25311 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, |
25312 | ); |
25313 | let r = _mm256_mask_cvttph_epi16(src, 0b0101010101010101, a); |
25314 | let e = _mm256_set_epi16(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16); |
25315 | assert_eq_m256i(r, e); |
25316 | } |
25317 | |
25318 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25319 | unsafe fn test_mm256_maskz_cvtph_epi16() { |
25320 | let a = _mm256_set_ph( |
25321 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25322 | ); |
25323 | let r = _mm256_maskz_cvttph_epi16(0b0101010101010101, a); |
25324 | let e = _mm256_set_epi16(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16); |
25325 | assert_eq_m256i(r, e); |
25326 | } |
25327 | |
25328 | #[simd_test(enable = "avx512fp16" )] |
25329 | unsafe fn test_mm512_cvtph_epi16() { |
25330 | let a = _mm512_set_ph( |
25331 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25332 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25333 | 31.0, 32.0, |
25334 | ); |
25335 | let r = _mm512_cvttph_epi16(a); |
25336 | let e = _mm512_set_epi16( |
25337 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
25338 | 25, 26, 27, 28, 29, 30, 31, 32, |
25339 | ); |
25340 | assert_eq_m512i(r, e); |
25341 | } |
25342 | |
25343 | #[simd_test(enable = "avx512fp16" )] |
25344 | unsafe fn test_mm512_mask_cvtph_epi16() { |
25345 | let a = _mm512_set_ph( |
25346 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25347 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25348 | 31.0, 32.0, |
25349 | ); |
25350 | let src = _mm512_set_epi16( |
25351 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
25352 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, |
25353 | ); |
25354 | let r = _mm512_mask_cvttph_epi16(src, 0b01010101010101010101010101010101, a); |
25355 | let e = _mm512_set_epi16( |
25356 | 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32, |
25357 | 24, 34, 26, 36, 28, 38, 30, 40, 32, |
25358 | ); |
25359 | assert_eq_m512i(r, e); |
25360 | } |
25361 | |
25362 | #[simd_test(enable = "avx512fp16" )] |
25363 | unsafe fn test_mm512_maskz_cvtph_epi16() { |
25364 | let a = _mm512_set_ph( |
25365 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25366 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25367 | 31.0, 32.0, |
25368 | ); |
25369 | let r = _mm512_maskz_cvttph_epi16(0b01010101010101010101010101010101, a); |
25370 | let e = _mm512_set_epi16( |
25371 | 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26, |
25372 | 0, 28, 0, 30, 0, 32, |
25373 | ); |
25374 | assert_eq_m512i(r, e); |
25375 | } |
25376 | |
25377 | #[simd_test(enable = "avx512fp16" )] |
25378 | unsafe fn test_mm512_cvt_roundph_epi16() { |
25379 | let a = _mm512_set_ph( |
25380 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25381 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25382 | 31.0, 32.0, |
25383 | ); |
25384 | let r = _mm512_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(a); |
25385 | let e = _mm512_set_epi16( |
25386 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
25387 | 25, 26, 27, 28, 29, 30, 31, 32, |
25388 | ); |
25389 | assert_eq_m512i(r, e); |
25390 | } |
25391 | |
25392 | #[simd_test(enable = "avx512fp16" )] |
25393 | unsafe fn test_mm512_mask_cvt_roundph_epi16() { |
25394 | let a = _mm512_set_ph( |
25395 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25396 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25397 | 31.0, 32.0, |
25398 | ); |
25399 | let src = _mm512_set_epi16( |
25400 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
25401 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, |
25402 | ); |
25403 | let r = _mm512_mask_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>( |
25404 | src, |
25405 | 0b01010101010101010101010101010101, |
25406 | a, |
25407 | ); |
25408 | let e = _mm512_set_epi16( |
25409 | 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32, |
25410 | 24, 34, 26, 36, 28, 38, 30, 40, 32, |
25411 | ); |
25412 | assert_eq_m512i(r, e); |
25413 | } |
25414 | |
25415 | #[simd_test(enable = "avx512fp16" )] |
25416 | unsafe fn test_mm512_maskz_cvt_roundph_epi16() { |
25417 | let a = _mm512_set_ph( |
25418 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25419 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25420 | 31.0, 32.0, |
25421 | ); |
25422 | let r = _mm512_maskz_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>( |
25423 | 0b01010101010101010101010101010101, |
25424 | a, |
25425 | ); |
25426 | let e = _mm512_set_epi16( |
25427 | 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26, |
25428 | 0, 28, 0, 30, 0, 32, |
25429 | ); |
25430 | assert_eq_m512i(r, e); |
25431 | } |
25432 | |
25433 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25434 | unsafe fn test_mm_cvtph_epu16() { |
25435 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25436 | let r = _mm_cvttph_epu16(a); |
25437 | let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); |
25438 | assert_eq_m128i(r, e); |
25439 | } |
25440 | |
25441 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25442 | unsafe fn test_mm_mask_cvtph_epu16() { |
25443 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25444 | let src = _mm_set_epi16(10, 11, 12, 13, 14, 15, 16, 17); |
25445 | let r = _mm_mask_cvttph_epu16(src, 0b01010101, a); |
25446 | let e = _mm_set_epi16(10, 2, 12, 4, 14, 6, 16, 8); |
25447 | assert_eq_m128i(r, e); |
25448 | } |
25449 | |
25450 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25451 | unsafe fn test_mm_maskz_cvtph_epu16() { |
25452 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25453 | let r = _mm_maskz_cvttph_epu16(0b01010101, a); |
25454 | let e = _mm_set_epi16(0, 2, 0, 4, 0, 6, 0, 8); |
25455 | assert_eq_m128i(r, e); |
25456 | } |
25457 | |
25458 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25459 | unsafe fn test_mm256_cvtph_epu16() { |
25460 | let a = _mm256_set_ph( |
25461 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25462 | ); |
25463 | let r = _mm256_cvttph_epu16(a); |
25464 | let e = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
25465 | assert_eq_m256i(r, e); |
25466 | } |
25467 | |
25468 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25469 | unsafe fn test_mm256_mask_cvtph_epu16() { |
25470 | let a = _mm256_set_ph( |
25471 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25472 | ); |
25473 | let src = _mm256_set_epi16( |
25474 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, |
25475 | ); |
25476 | let r = _mm256_mask_cvttph_epu16(src, 0b0101010101010101, a); |
25477 | let e = _mm256_set_epi16(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16); |
25478 | assert_eq_m256i(r, e); |
25479 | } |
25480 | |
25481 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25482 | unsafe fn test_mm256_maskz_cvtph_epu16() { |
25483 | let a = _mm256_set_ph( |
25484 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25485 | ); |
25486 | let r = _mm256_maskz_cvttph_epu16(0b0101010101010101, a); |
25487 | let e = _mm256_set_epi16(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16); |
25488 | assert_eq_m256i(r, e); |
25489 | } |
25490 | |
25491 | #[simd_test(enable = "avx512fp16" )] |
25492 | unsafe fn test_mm512_cvtph_epu16() { |
25493 | let a = _mm512_set_ph( |
25494 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25495 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25496 | 31.0, 32.0, |
25497 | ); |
25498 | let r = _mm512_cvttph_epu16(a); |
25499 | let e = _mm512_set_epi16( |
25500 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
25501 | 25, 26, 27, 28, 29, 30, 31, 32, |
25502 | ); |
25503 | assert_eq_m512i(r, e); |
25504 | } |
25505 | |
25506 | #[simd_test(enable = "avx512fp16" )] |
25507 | unsafe fn test_mm512_mask_cvtph_epu16() { |
25508 | let a = _mm512_set_ph( |
25509 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25510 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25511 | 31.0, 32.0, |
25512 | ); |
25513 | let src = _mm512_set_epi16( |
25514 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
25515 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, |
25516 | ); |
25517 | let r = _mm512_mask_cvttph_epu16(src, 0b01010101010101010101010101010101, a); |
25518 | let e = _mm512_set_epi16( |
25519 | 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32, |
25520 | 24, 34, 26, 36, 28, 38, 30, 40, 32, |
25521 | ); |
25522 | assert_eq_m512i(r, e); |
25523 | } |
25524 | |
25525 | #[simd_test(enable = "avx512fp16" )] |
25526 | unsafe fn test_mm512_maskz_cvtph_epu16() { |
25527 | let a = _mm512_set_ph( |
25528 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25529 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25530 | 31.0, 32.0, |
25531 | ); |
25532 | let r = _mm512_maskz_cvttph_epu16(0b01010101010101010101010101010101, a); |
25533 | let e = _mm512_set_epi16( |
25534 | 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26, |
25535 | 0, 28, 0, 30, 0, 32, |
25536 | ); |
25537 | assert_eq_m512i(r, e); |
25538 | } |
25539 | |
25540 | #[simd_test(enable = "avx512fp16" )] |
25541 | unsafe fn test_mm512_cvt_roundph_epu16() { |
25542 | let a = _mm512_set_ph( |
25543 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25544 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25545 | 31.0, 32.0, |
25546 | ); |
25547 | let r = _mm512_cvt_roundph_epu16::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
25548 | let e = _mm512_set_epi16( |
25549 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
25550 | 25, 26, 27, 28, 29, 30, 31, 32, |
25551 | ); |
25552 | assert_eq_m512i(r, e); |
25553 | } |
25554 | |
25555 | #[simd_test(enable = "avx512fp16" )] |
25556 | unsafe fn test_mm512_mask_cvt_roundph_epu16() { |
25557 | let a = _mm512_set_ph( |
25558 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25559 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25560 | 31.0, 32.0, |
25561 | ); |
25562 | let src = _mm512_set_epi16( |
25563 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
25564 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, |
25565 | ); |
25566 | let r = _mm512_mask_cvt_roundph_epu16::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
25567 | src, |
25568 | 0b01010101010101010101010101010101, |
25569 | a, |
25570 | ); |
25571 | let e = _mm512_set_epi16( |
25572 | 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32, |
25573 | 24, 34, 26, 36, 28, 38, 30, 40, 32, |
25574 | ); |
25575 | assert_eq_m512i(r, e); |
25576 | } |
25577 | |
25578 | #[simd_test(enable = "avx512fp16" )] |
25579 | unsafe fn test_mm512_maskz_cvt_roundph_epu16() { |
25580 | let a = _mm512_set_ph( |
25581 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25582 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25583 | 31.0, 32.0, |
25584 | ); |
25585 | let r = _mm512_maskz_cvt_roundph_epu16::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
25586 | 0b01010101010101010101010101010101, |
25587 | a, |
25588 | ); |
25589 | let e = _mm512_set_epi16( |
25590 | 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26, |
25591 | 0, 28, 0, 30, 0, 32, |
25592 | ); |
25593 | assert_eq_m512i(r, e); |
25594 | } |
25595 | |
25596 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25597 | unsafe fn test_mm_cvttph_epi16() { |
25598 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25599 | let r = _mm_cvttph_epi16(a); |
25600 | let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); |
25601 | assert_eq_m128i(r, e); |
25602 | } |
25603 | |
25604 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25605 | unsafe fn test_mm_mask_cvttph_epi16() { |
25606 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25607 | let src = _mm_set_epi16(10, 11, 12, 13, 14, 15, 16, 17); |
25608 | let r = _mm_mask_cvttph_epi16(src, 0b01010101, a); |
25609 | let e = _mm_set_epi16(10, 2, 12, 4, 14, 6, 16, 8); |
25610 | assert_eq_m128i(r, e); |
25611 | } |
25612 | |
25613 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25614 | unsafe fn test_mm_maskz_cvttph_epi16() { |
25615 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25616 | let r = _mm_maskz_cvttph_epi16(0b01010101, a); |
25617 | let e = _mm_set_epi16(0, 2, 0, 4, 0, 6, 0, 8); |
25618 | assert_eq_m128i(r, e); |
25619 | } |
25620 | |
25621 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25622 | unsafe fn test_mm256_cvttph_epi16() { |
25623 | let a = _mm256_set_ph( |
25624 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25625 | ); |
25626 | let r = _mm256_cvttph_epi16(a); |
25627 | let e = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
25628 | assert_eq_m256i(r, e); |
25629 | } |
25630 | |
25631 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25632 | unsafe fn test_mm256_mask_cvttph_epi16() { |
25633 | let a = _mm256_set_ph( |
25634 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25635 | ); |
25636 | let src = _mm256_set_epi16( |
25637 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, |
25638 | ); |
25639 | let r = _mm256_mask_cvttph_epi16(src, 0b0101010101010101, a); |
25640 | let e = _mm256_set_epi16(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16); |
25641 | assert_eq_m256i(r, e); |
25642 | } |
25643 | |
25644 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25645 | unsafe fn test_mm256_maskz_cvttph_epi16() { |
25646 | let a = _mm256_set_ph( |
25647 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25648 | ); |
25649 | let r = _mm256_maskz_cvttph_epi16(0b0101010101010101, a); |
25650 | let e = _mm256_set_epi16(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16); |
25651 | assert_eq_m256i(r, e); |
25652 | } |
25653 | |
25654 | #[simd_test(enable = "avx512fp16" )] |
25655 | unsafe fn test_mm512_cvttph_epi16() { |
25656 | let a = _mm512_set_ph( |
25657 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25658 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25659 | 31.0, 32.0, |
25660 | ); |
25661 | let r = _mm512_cvttph_epi16(a); |
25662 | let e = _mm512_set_epi16( |
25663 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
25664 | 25, 26, 27, 28, 29, 30, 31, 32, |
25665 | ); |
25666 | assert_eq_m512i(r, e); |
25667 | } |
25668 | |
25669 | #[simd_test(enable = "avx512fp16" )] |
25670 | unsafe fn test_mm512_mask_cvttph_epi16() { |
25671 | let a = _mm512_set_ph( |
25672 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25673 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25674 | 31.0, 32.0, |
25675 | ); |
25676 | let src = _mm512_set_epi16( |
25677 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
25678 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, |
25679 | ); |
25680 | let r = _mm512_mask_cvttph_epi16(src, 0b01010101010101010101010101010101, a); |
25681 | let e = _mm512_set_epi16( |
25682 | 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32, |
25683 | 24, 34, 26, 36, 28, 38, 30, 40, 32, |
25684 | ); |
25685 | assert_eq_m512i(r, e); |
25686 | } |
25687 | |
25688 | #[simd_test(enable = "avx512fp16" )] |
25689 | unsafe fn test_mm512_maskz_cvttph_epi16() { |
25690 | let a = _mm512_set_ph( |
25691 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25692 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25693 | 31.0, 32.0, |
25694 | ); |
25695 | let r = _mm512_maskz_cvttph_epi16(0b01010101010101010101010101010101, a); |
25696 | let e = _mm512_set_epi16( |
25697 | 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26, |
25698 | 0, 28, 0, 30, 0, 32, |
25699 | ); |
25700 | assert_eq_m512i(r, e); |
25701 | } |
25702 | |
25703 | #[simd_test(enable = "avx512fp16" )] |
25704 | unsafe fn test_mm512_cvtt_roundph_epi16() { |
25705 | let a = _mm512_set_ph( |
25706 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25707 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25708 | 31.0, 32.0, |
25709 | ); |
25710 | let r = _mm512_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(a); |
25711 | let e = _mm512_set_epi16( |
25712 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
25713 | 25, 26, 27, 28, 29, 30, 31, 32, |
25714 | ); |
25715 | assert_eq_m512i(r, e); |
25716 | } |
25717 | |
25718 | #[simd_test(enable = "avx512fp16" )] |
25719 | unsafe fn test_mm512_mask_cvtt_roundph_epi16() { |
25720 | let a = _mm512_set_ph( |
25721 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25722 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25723 | 31.0, 32.0, |
25724 | ); |
25725 | let src = _mm512_set_epi16( |
25726 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
25727 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, |
25728 | ); |
25729 | let r = _mm512_mask_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>( |
25730 | src, |
25731 | 0b01010101010101010101010101010101, |
25732 | a, |
25733 | ); |
25734 | let e = _mm512_set_epi16( |
25735 | 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32, |
25736 | 24, 34, 26, 36, 28, 38, 30, 40, 32, |
25737 | ); |
25738 | assert_eq_m512i(r, e); |
25739 | } |
25740 | |
25741 | #[simd_test(enable = "avx512fp16" )] |
25742 | unsafe fn test_mm512_maskz_cvtt_roundph_epi16() { |
25743 | let a = _mm512_set_ph( |
25744 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25745 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25746 | 31.0, 32.0, |
25747 | ); |
25748 | let r = _mm512_maskz_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>( |
25749 | 0b01010101010101010101010101010101, |
25750 | a, |
25751 | ); |
25752 | let e = _mm512_set_epi16( |
25753 | 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26, |
25754 | 0, 28, 0, 30, 0, 32, |
25755 | ); |
25756 | assert_eq_m512i(r, e); |
25757 | } |
25758 | |
25759 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25760 | unsafe fn test_mm_cvttph_epu16() { |
25761 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25762 | let r = _mm_cvttph_epu16(a); |
25763 | let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); |
25764 | assert_eq_m128i(r, e); |
25765 | } |
25766 | |
25767 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25768 | unsafe fn test_mm_mask_cvttph_epu16() { |
25769 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25770 | let src = _mm_set_epi16(10, 11, 12, 13, 14, 15, 16, 17); |
25771 | let r = _mm_mask_cvttph_epu16(src, 0b01010101, a); |
25772 | let e = _mm_set_epi16(10, 2, 12, 4, 14, 6, 16, 8); |
25773 | assert_eq_m128i(r, e); |
25774 | } |
25775 | |
25776 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25777 | unsafe fn test_mm_maskz_cvttph_epu16() { |
25778 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25779 | let r = _mm_maskz_cvttph_epu16(0b01010101, a); |
25780 | let e = _mm_set_epi16(0, 2, 0, 4, 0, 6, 0, 8); |
25781 | assert_eq_m128i(r, e); |
25782 | } |
25783 | |
25784 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25785 | unsafe fn test_mm256_cvttph_epu16() { |
25786 | let a = _mm256_set_ph( |
25787 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25788 | ); |
25789 | let r = _mm256_cvttph_epu16(a); |
25790 | let e = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
25791 | assert_eq_m256i(r, e); |
25792 | } |
25793 | |
25794 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25795 | unsafe fn test_mm256_mask_cvttph_epu16() { |
25796 | let a = _mm256_set_ph( |
25797 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25798 | ); |
25799 | let src = _mm256_set_epi16( |
25800 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, |
25801 | ); |
25802 | let r = _mm256_mask_cvttph_epu16(src, 0b0101010101010101, a); |
25803 | let e = _mm256_set_epi16(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16); |
25804 | assert_eq_m256i(r, e); |
25805 | } |
25806 | |
25807 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25808 | unsafe fn test_mm256_maskz_cvttph_epu16() { |
25809 | let a = _mm256_set_ph( |
25810 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25811 | ); |
25812 | let r = _mm256_maskz_cvttph_epu16(0b0101010101010101, a); |
25813 | let e = _mm256_set_epi16(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16); |
25814 | assert_eq_m256i(r, e); |
25815 | } |
25816 | |
25817 | #[simd_test(enable = "avx512fp16" )] |
25818 | unsafe fn test_mm512_cvttph_epu16() { |
25819 | let a = _mm512_set_ph( |
25820 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25821 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25822 | 31.0, 32.0, |
25823 | ); |
25824 | let r = _mm512_cvttph_epu16(a); |
25825 | let e = _mm512_set_epi16( |
25826 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
25827 | 25, 26, 27, 28, 29, 30, 31, 32, |
25828 | ); |
25829 | assert_eq_m512i(r, e); |
25830 | } |
25831 | |
25832 | #[simd_test(enable = "avx512fp16" )] |
25833 | unsafe fn test_mm512_mask_cvttph_epu16() { |
25834 | let a = _mm512_set_ph( |
25835 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25836 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25837 | 31.0, 32.0, |
25838 | ); |
25839 | let src = _mm512_set_epi16( |
25840 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
25841 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, |
25842 | ); |
25843 | let r = _mm512_mask_cvttph_epu16(src, 0b01010101010101010101010101010101, a); |
25844 | let e = _mm512_set_epi16( |
25845 | 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32, |
25846 | 24, 34, 26, 36, 28, 38, 30, 40, 32, |
25847 | ); |
25848 | assert_eq_m512i(r, e); |
25849 | } |
25850 | |
25851 | #[simd_test(enable = "avx512fp16" )] |
25852 | unsafe fn test_mm512_maskz_cvttph_epu16() { |
25853 | let a = _mm512_set_ph( |
25854 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25855 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25856 | 31.0, 32.0, |
25857 | ); |
25858 | let r = _mm512_maskz_cvttph_epu16(0b01010101010101010101010101010101, a); |
25859 | let e = _mm512_set_epi16( |
25860 | 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26, |
25861 | 0, 28, 0, 30, 0, 32, |
25862 | ); |
25863 | assert_eq_m512i(r, e); |
25864 | } |
25865 | |
25866 | #[simd_test(enable = "avx512fp16" )] |
25867 | unsafe fn test_mm512_cvtt_roundph_epu16() { |
25868 | let a = _mm512_set_ph( |
25869 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25870 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25871 | 31.0, 32.0, |
25872 | ); |
25873 | let r = _mm512_cvtt_roundph_epu16::<_MM_FROUND_NO_EXC>(a); |
25874 | let e = _mm512_set_epi16( |
25875 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
25876 | 25, 26, 27, 28, 29, 30, 31, 32, |
25877 | ); |
25878 | assert_eq_m512i(r, e); |
25879 | } |
25880 | |
25881 | #[simd_test(enable = "avx512fp16" )] |
25882 | unsafe fn test_mm512_mask_cvtt_roundph_epu16() { |
25883 | let a = _mm512_set_ph( |
25884 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25885 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25886 | 31.0, 32.0, |
25887 | ); |
25888 | let src = _mm512_set_epi16( |
25889 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
25890 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, |
25891 | ); |
25892 | let r = _mm512_mask_cvtt_roundph_epu16::<_MM_FROUND_NO_EXC>( |
25893 | src, |
25894 | 0b01010101010101010101010101010101, |
25895 | a, |
25896 | ); |
25897 | let e = _mm512_set_epi16( |
25898 | 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32, |
25899 | 24, 34, 26, 36, 28, 38, 30, 40, 32, |
25900 | ); |
25901 | assert_eq_m512i(r, e); |
25902 | } |
25903 | |
25904 | #[simd_test(enable = "avx512fp16" )] |
25905 | unsafe fn test_mm512_maskz_cvtt_roundph_epu16() { |
25906 | let a = _mm512_set_ph( |
25907 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25908 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
25909 | 31.0, 32.0, |
25910 | ); |
25911 | let r = _mm512_maskz_cvtt_roundph_epu16::<_MM_FROUND_NO_EXC>( |
25912 | 0b01010101010101010101010101010101, |
25913 | a, |
25914 | ); |
25915 | let e = _mm512_set_epi16( |
25916 | 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26, |
25917 | 0, 28, 0, 30, 0, 32, |
25918 | ); |
25919 | assert_eq_m512i(r, e); |
25920 | } |
25921 | |
25922 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25923 | unsafe fn test_mm_cvtph_epi32() { |
25924 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
25925 | let r = _mm_cvtph_epi32(a); |
25926 | let e = _mm_set_epi32(1, 2, 3, 4); |
25927 | assert_eq_m128i(r, e); |
25928 | } |
25929 | |
25930 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25931 | unsafe fn test_mm_mask_cvtph_epi32() { |
25932 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
25933 | let src = _mm_set_epi32(10, 11, 12, 13); |
25934 | let r = _mm_mask_cvtph_epi32(src, 0b0101, a); |
25935 | let e = _mm_set_epi32(10, 2, 12, 4); |
25936 | assert_eq_m128i(r, e); |
25937 | } |
25938 | |
25939 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25940 | unsafe fn test_mm_maskz_cvtph_epi32() { |
25941 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
25942 | let r = _mm_maskz_cvtph_epi32(0b0101, a); |
25943 | let e = _mm_set_epi32(0, 2, 0, 4); |
25944 | assert_eq_m128i(r, e); |
25945 | } |
25946 | |
25947 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25948 | unsafe fn test_mm256_cvtph_epi32() { |
25949 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25950 | let r = _mm256_cvtph_epi32(a); |
25951 | let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); |
25952 | assert_eq_m256i(r, e); |
25953 | } |
25954 | |
25955 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25956 | unsafe fn test_mm256_mask_cvtph_epi32() { |
25957 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25958 | let src = _mm256_set_epi32(10, 11, 12, 13, 14, 15, 16, 17); |
25959 | let r = _mm256_mask_cvtph_epi32(src, 0b01010101, a); |
25960 | let e = _mm256_set_epi32(10, 2, 12, 4, 14, 6, 16, 8); |
25961 | assert_eq_m256i(r, e); |
25962 | } |
25963 | |
25964 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
25965 | unsafe fn test_mm256_maskz_cvtph_epi32() { |
25966 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
25967 | let r = _mm256_maskz_cvtph_epi32(0b01010101, a); |
25968 | let e = _mm256_set_epi32(0, 2, 0, 4, 0, 6, 0, 8); |
25969 | assert_eq_m256i(r, e); |
25970 | } |
25971 | |
25972 | #[simd_test(enable = "avx512fp16" )] |
25973 | unsafe fn test_mm512_cvtph_epi32() { |
25974 | let a = _mm256_set_ph( |
25975 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25976 | ); |
25977 | let r = _mm512_cvtph_epi32(a); |
25978 | let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
25979 | assert_eq_m512i(r, e); |
25980 | } |
25981 | |
25982 | #[simd_test(enable = "avx512fp16" )] |
25983 | unsafe fn test_mm512_mask_cvtph_epi32() { |
25984 | let a = _mm256_set_ph( |
25985 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25986 | ); |
25987 | let src = _mm512_set_epi32( |
25988 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, |
25989 | ); |
25990 | let r = _mm512_mask_cvtph_epi32(src, 0b0101010101010101, a); |
25991 | let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16); |
25992 | assert_eq_m512i(r, e); |
25993 | } |
25994 | |
25995 | #[simd_test(enable = "avx512fp16" )] |
25996 | unsafe fn test_mm512_maskz_cvtph_epi32() { |
25997 | let a = _mm256_set_ph( |
25998 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
25999 | ); |
26000 | let r = _mm512_maskz_cvtph_epi32(0b0101010101010101, a); |
26001 | let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16); |
26002 | assert_eq_m512i(r, e); |
26003 | } |
26004 | |
26005 | #[simd_test(enable = "avx512fp16" )] |
26006 | unsafe fn test_mm512_cvt_roundph_epi32() { |
26007 | let a = _mm256_set_ph( |
26008 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26009 | ); |
26010 | let r = _mm512_cvt_roundph_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
26011 | let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
26012 | assert_eq_m512i(r, e); |
26013 | } |
26014 | |
26015 | #[simd_test(enable = "avx512fp16" )] |
26016 | unsafe fn test_mm512_mask_cvt_roundph_epi32() { |
26017 | let a = _mm256_set_ph( |
26018 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26019 | ); |
26020 | let src = _mm512_set_epi32( |
26021 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, |
26022 | ); |
26023 | let r = _mm512_mask_cvt_roundph_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
26024 | src, |
26025 | 0b0101010101010101, |
26026 | a, |
26027 | ); |
26028 | let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16); |
26029 | assert_eq_m512i(r, e); |
26030 | } |
26031 | |
26032 | #[simd_test(enable = "avx512fp16" )] |
26033 | unsafe fn test_mm512_maskz_cvt_roundph_epi32() { |
26034 | let a = _mm256_set_ph( |
26035 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26036 | ); |
26037 | let r = _mm512_maskz_cvt_roundph_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
26038 | 0b0101010101010101, |
26039 | a, |
26040 | ); |
26041 | let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16); |
26042 | assert_eq_m512i(r, e); |
26043 | } |
26044 | |
26045 | #[simd_test(enable = "avx512fp16" )] |
26046 | unsafe fn test_mm_cvtsh_i32() { |
26047 | let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26048 | let r = _mm_cvtsh_i32(a); |
26049 | assert_eq!(r, 1); |
26050 | } |
26051 | |
26052 | #[simd_test(enable = "avx512fp16" )] |
26053 | unsafe fn test_mm_cvt_roundsh_i32() { |
26054 | let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26055 | let r = _mm_cvt_roundsh_i32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
26056 | assert_eq!(r, 1); |
26057 | } |
26058 | |
26059 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26060 | unsafe fn test_mm_cvtph_epu32() { |
26061 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26062 | let r = _mm_cvtph_epu32(a); |
26063 | let e = _mm_set_epi32(1, 2, 3, 4); |
26064 | assert_eq_m128i(r, e); |
26065 | } |
26066 | |
26067 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26068 | unsafe fn test_mm_mask_cvtph_epu32() { |
26069 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26070 | let src = _mm_set_epi32(10, 11, 12, 13); |
26071 | let r = _mm_mask_cvtph_epu32(src, 0b0101, a); |
26072 | let e = _mm_set_epi32(10, 2, 12, 4); |
26073 | assert_eq_m128i(r, e); |
26074 | } |
26075 | |
26076 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26077 | unsafe fn test_mm_maskz_cvtph_epu32() { |
26078 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26079 | let r = _mm_maskz_cvtph_epu32(0b0101, a); |
26080 | let e = _mm_set_epi32(0, 2, 0, 4); |
26081 | assert_eq_m128i(r, e); |
26082 | } |
26083 | |
26084 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26085 | unsafe fn test_mm256_cvtph_epu32() { |
26086 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26087 | let r = _mm256_cvtph_epu32(a); |
26088 | let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); |
26089 | assert_eq_m256i(r, e); |
26090 | } |
26091 | |
26092 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26093 | unsafe fn test_mm256_mask_cvtph_epu32() { |
26094 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26095 | let src = _mm256_set_epi32(10, 11, 12, 13, 14, 15, 16, 17); |
26096 | let r = _mm256_mask_cvtph_epu32(src, 0b01010101, a); |
26097 | let e = _mm256_set_epi32(10, 2, 12, 4, 14, 6, 16, 8); |
26098 | assert_eq_m256i(r, e); |
26099 | } |
26100 | |
26101 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26102 | unsafe fn test_mm256_maskz_cvtph_epu32() { |
26103 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26104 | let r = _mm256_maskz_cvtph_epu32(0b01010101, a); |
26105 | let e = _mm256_set_epi32(0, 2, 0, 4, 0, 6, 0, 8); |
26106 | assert_eq_m256i(r, e); |
26107 | } |
26108 | |
26109 | #[simd_test(enable = "avx512fp16" )] |
26110 | unsafe fn test_mm512_cvtph_epu32() { |
26111 | let a = _mm256_set_ph( |
26112 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26113 | ); |
26114 | let r = _mm512_cvtph_epu32(a); |
26115 | let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
26116 | assert_eq_m512i(r, e); |
26117 | } |
26118 | |
26119 | #[simd_test(enable = "avx512fp16" )] |
26120 | unsafe fn test_mm512_mask_cvtph_epu32() { |
26121 | let a = _mm256_set_ph( |
26122 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26123 | ); |
26124 | let src = _mm512_set_epi32( |
26125 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, |
26126 | ); |
26127 | let r = _mm512_mask_cvtph_epu32(src, 0b0101010101010101, a); |
26128 | let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16); |
26129 | assert_eq_m512i(r, e); |
26130 | } |
26131 | |
26132 | #[simd_test(enable = "avx512fp16" )] |
26133 | unsafe fn test_mm512_maskz_cvtph_epu32() { |
26134 | let a = _mm256_set_ph( |
26135 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26136 | ); |
26137 | let r = _mm512_maskz_cvtph_epu32(0b0101010101010101, a); |
26138 | let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16); |
26139 | assert_eq_m512i(r, e); |
26140 | } |
26141 | |
26142 | #[simd_test(enable = "avx512fp16" )] |
26143 | unsafe fn test_mm512_cvt_roundph_epu32() { |
26144 | let a = _mm256_set_ph( |
26145 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26146 | ); |
26147 | let r = _mm512_cvt_roundph_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
26148 | let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
26149 | assert_eq_m512i(r, e); |
26150 | } |
26151 | |
26152 | #[simd_test(enable = "avx512fp16" )] |
26153 | unsafe fn test_mm512_mask_cvt_roundph_epu32() { |
26154 | let a = _mm256_set_ph( |
26155 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26156 | ); |
26157 | let src = _mm512_set_epi32( |
26158 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, |
26159 | ); |
26160 | let r = _mm512_mask_cvt_roundph_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
26161 | src, |
26162 | 0b0101010101010101, |
26163 | a, |
26164 | ); |
26165 | let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16); |
26166 | assert_eq_m512i(r, e); |
26167 | } |
26168 | |
26169 | #[simd_test(enable = "avx512fp16" )] |
26170 | unsafe fn test_mm512_maskz_cvt_roundph_epu32() { |
26171 | let a = _mm256_set_ph( |
26172 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26173 | ); |
26174 | let r = _mm512_maskz_cvt_roundph_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
26175 | 0b0101010101010101, |
26176 | a, |
26177 | ); |
26178 | let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16); |
26179 | assert_eq_m512i(r, e); |
26180 | } |
26181 | |
26182 | #[simd_test(enable = "avx512fp16" )] |
26183 | unsafe fn test_mm_cvtsh_u32() { |
26184 | let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26185 | let r = _mm_cvtsh_u32(a); |
26186 | assert_eq!(r, 1); |
26187 | } |
26188 | |
26189 | #[simd_test(enable = "avx512fp16" )] |
26190 | unsafe fn test_mm_cvt_roundsh_u32() { |
26191 | let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26192 | let r = _mm_cvt_roundsh_u32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
26193 | assert_eq!(r, 1); |
26194 | } |
26195 | |
26196 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26197 | unsafe fn test_mm_cvttph_epi32() { |
26198 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26199 | let r = _mm_cvttph_epi32(a); |
26200 | let e = _mm_set_epi32(1, 2, 3, 4); |
26201 | assert_eq_m128i(r, e); |
26202 | } |
26203 | |
26204 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26205 | unsafe fn test_mm_mask_cvttph_epi32() { |
26206 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26207 | let src = _mm_set_epi32(10, 11, 12, 13); |
26208 | let r = _mm_mask_cvttph_epi32(src, 0b0101, a); |
26209 | let e = _mm_set_epi32(10, 2, 12, 4); |
26210 | assert_eq_m128i(r, e); |
26211 | } |
26212 | |
26213 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26214 | unsafe fn test_mm_maskz_cvttph_epi32() { |
26215 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26216 | let r = _mm_maskz_cvttph_epi32(0b0101, a); |
26217 | let e = _mm_set_epi32(0, 2, 0, 4); |
26218 | assert_eq_m128i(r, e); |
26219 | } |
26220 | |
26221 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26222 | unsafe fn test_mm256_cvttph_epi32() { |
26223 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26224 | let r = _mm256_cvttph_epi32(a); |
26225 | let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); |
26226 | assert_eq_m256i(r, e); |
26227 | } |
26228 | |
26229 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26230 | unsafe fn test_mm256_mask_cvttph_epi32() { |
26231 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26232 | let src = _mm256_set_epi32(10, 11, 12, 13, 14, 15, 16, 17); |
26233 | let r = _mm256_mask_cvttph_epi32(src, 0b01010101, a); |
26234 | let e = _mm256_set_epi32(10, 2, 12, 4, 14, 6, 16, 8); |
26235 | assert_eq_m256i(r, e); |
26236 | } |
26237 | |
26238 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26239 | unsafe fn test_mm256_maskz_cvttph_epi32() { |
26240 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26241 | let r = _mm256_maskz_cvttph_epi32(0b01010101, a); |
26242 | let e = _mm256_set_epi32(0, 2, 0, 4, 0, 6, 0, 8); |
26243 | assert_eq_m256i(r, e); |
26244 | } |
26245 | |
26246 | #[simd_test(enable = "avx512fp16" )] |
26247 | unsafe fn test_mm512_cvttph_epi32() { |
26248 | let a = _mm256_set_ph( |
26249 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26250 | ); |
26251 | let r = _mm512_cvttph_epi32(a); |
26252 | let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
26253 | assert_eq_m512i(r, e); |
26254 | } |
26255 | |
26256 | #[simd_test(enable = "avx512fp16" )] |
26257 | unsafe fn test_mm512_mask_cvttph_epi32() { |
26258 | let a = _mm256_set_ph( |
26259 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26260 | ); |
26261 | let src = _mm512_set_epi32( |
26262 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, |
26263 | ); |
26264 | let r = _mm512_mask_cvttph_epi32(src, 0b0101010101010101, a); |
26265 | let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16); |
26266 | assert_eq_m512i(r, e); |
26267 | } |
26268 | |
26269 | #[simd_test(enable = "avx512fp16" )] |
26270 | unsafe fn test_mm512_maskz_cvttph_epi32() { |
26271 | let a = _mm256_set_ph( |
26272 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26273 | ); |
26274 | let r = _mm512_maskz_cvttph_epi32(0b0101010101010101, a); |
26275 | let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16); |
26276 | assert_eq_m512i(r, e); |
26277 | } |
26278 | |
26279 | #[simd_test(enable = "avx512fp16" )] |
26280 | unsafe fn test_mm512_cvtt_roundph_epi32() { |
26281 | let a = _mm256_set_ph( |
26282 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26283 | ); |
26284 | let r = _mm512_cvtt_roundph_epi32::<_MM_FROUND_NO_EXC>(a); |
26285 | let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
26286 | assert_eq_m512i(r, e); |
26287 | } |
26288 | |
26289 | #[simd_test(enable = "avx512fp16" )] |
26290 | unsafe fn test_mm512_mask_cvtt_roundph_epi32() { |
26291 | let a = _mm256_set_ph( |
26292 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26293 | ); |
26294 | let src = _mm512_set_epi32( |
26295 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, |
26296 | ); |
26297 | let r = _mm512_mask_cvtt_roundph_epi32::<_MM_FROUND_NO_EXC>(src, 0b0101010101010101, a); |
26298 | let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16); |
26299 | assert_eq_m512i(r, e); |
26300 | } |
26301 | |
26302 | #[simd_test(enable = "avx512fp16" )] |
26303 | unsafe fn test_mm512_maskz_cvtt_roundph_epi32() { |
26304 | let a = _mm256_set_ph( |
26305 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26306 | ); |
26307 | let r = _mm512_maskz_cvtt_roundph_epi32::<_MM_FROUND_NO_EXC>(0b0101010101010101, a); |
26308 | let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16); |
26309 | assert_eq_m512i(r, e); |
26310 | } |
26311 | |
26312 | #[simd_test(enable = "avx512fp16" )] |
26313 | unsafe fn test_mm_cvttsh_i32() { |
26314 | let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26315 | let r = _mm_cvttsh_i32(a); |
26316 | assert_eq!(r, 1); |
26317 | } |
26318 | |
26319 | #[simd_test(enable = "avx512fp16" )] |
26320 | unsafe fn test_mm_cvtt_roundsh_i32() { |
26321 | let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26322 | let r = _mm_cvtt_roundsh_i32::<_MM_FROUND_NO_EXC>(a); |
26323 | assert_eq!(r, 1); |
26324 | } |
26325 | |
26326 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26327 | unsafe fn test_mm_cvttph_epu32() { |
26328 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26329 | let r = _mm_cvttph_epu32(a); |
26330 | let e = _mm_set_epi32(1, 2, 3, 4); |
26331 | assert_eq_m128i(r, e); |
26332 | } |
26333 | |
26334 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26335 | unsafe fn test_mm_mask_cvttph_epu32() { |
26336 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26337 | let src = _mm_set_epi32(10, 11, 12, 13); |
26338 | let r = _mm_mask_cvttph_epu32(src, 0b0101, a); |
26339 | let e = _mm_set_epi32(10, 2, 12, 4); |
26340 | assert_eq_m128i(r, e); |
26341 | } |
26342 | |
26343 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26344 | unsafe fn test_mm_maskz_cvttph_epu32() { |
26345 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26346 | let r = _mm_maskz_cvttph_epu32(0b0101, a); |
26347 | let e = _mm_set_epi32(0, 2, 0, 4); |
26348 | assert_eq_m128i(r, e); |
26349 | } |
26350 | |
26351 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26352 | unsafe fn test_mm256_cvttph_epu32() { |
26353 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26354 | let r = _mm256_cvttph_epu32(a); |
26355 | let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); |
26356 | assert_eq_m256i(r, e); |
26357 | } |
26358 | |
26359 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26360 | unsafe fn test_mm256_mask_cvttph_epu32() { |
26361 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26362 | let src = _mm256_set_epi32(10, 11, 12, 13, 14, 15, 16, 17); |
26363 | let r = _mm256_mask_cvttph_epu32(src, 0b01010101, a); |
26364 | let e = _mm256_set_epi32(10, 2, 12, 4, 14, 6, 16, 8); |
26365 | assert_eq_m256i(r, e); |
26366 | } |
26367 | |
26368 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26369 | unsafe fn test_mm256_maskz_cvttph_epu32() { |
26370 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26371 | let r = _mm256_maskz_cvttph_epu32(0b01010101, a); |
26372 | let e = _mm256_set_epi32(0, 2, 0, 4, 0, 6, 0, 8); |
26373 | assert_eq_m256i(r, e); |
26374 | } |
26375 | |
26376 | #[simd_test(enable = "avx512fp16" )] |
26377 | unsafe fn test_mm512_cvttph_epu32() { |
26378 | let a = _mm256_set_ph( |
26379 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26380 | ); |
26381 | let r = _mm512_cvttph_epu32(a); |
26382 | let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
26383 | assert_eq_m512i(r, e); |
26384 | } |
26385 | |
26386 | #[simd_test(enable = "avx512fp16" )] |
26387 | unsafe fn test_mm512_mask_cvttph_epu32() { |
26388 | let a = _mm256_set_ph( |
26389 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26390 | ); |
26391 | let src = _mm512_set_epi32( |
26392 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, |
26393 | ); |
26394 | let r = _mm512_mask_cvttph_epu32(src, 0b0101010101010101, a); |
26395 | let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16); |
26396 | assert_eq_m512i(r, e); |
26397 | } |
26398 | |
26399 | #[simd_test(enable = "avx512fp16" )] |
26400 | unsafe fn test_mm512_maskz_cvttph_epu32() { |
26401 | let a = _mm256_set_ph( |
26402 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26403 | ); |
26404 | let r = _mm512_maskz_cvttph_epu32(0b0101010101010101, a); |
26405 | let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16); |
26406 | assert_eq_m512i(r, e); |
26407 | } |
26408 | |
26409 | #[simd_test(enable = "avx512fp16" )] |
26410 | unsafe fn test_mm512_cvtt_roundph_epu32() { |
26411 | let a = _mm256_set_ph( |
26412 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26413 | ); |
26414 | let r = _mm512_cvtt_roundph_epu32::<_MM_FROUND_NO_EXC>(a); |
26415 | let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
26416 | assert_eq_m512i(r, e); |
26417 | } |
26418 | |
26419 | #[simd_test(enable = "avx512fp16" )] |
26420 | unsafe fn test_mm512_mask_cvtt_roundph_epu32() { |
26421 | let a = _mm256_set_ph( |
26422 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26423 | ); |
26424 | let src = _mm512_set_epi32( |
26425 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, |
26426 | ); |
26427 | let r = _mm512_mask_cvtt_roundph_epu32::<_MM_FROUND_NO_EXC>(src, 0b0101010101010101, a); |
26428 | let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16); |
26429 | assert_eq_m512i(r, e); |
26430 | } |
26431 | |
26432 | #[simd_test(enable = "avx512fp16" )] |
26433 | unsafe fn test_mm512_maskz_cvtt_roundph_epu32() { |
26434 | let a = _mm256_set_ph( |
26435 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26436 | ); |
26437 | let r = _mm512_maskz_cvtt_roundph_epu32::<_MM_FROUND_NO_EXC>(0b0101010101010101, a); |
26438 | let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16); |
26439 | assert_eq_m512i(r, e); |
26440 | } |
26441 | |
26442 | #[simd_test(enable = "avx512fp16" )] |
26443 | unsafe fn test_mm_cvttsh_u32() { |
26444 | let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26445 | let r = _mm_cvttsh_u32(a); |
26446 | assert_eq!(r, 1); |
26447 | } |
26448 | |
26449 | #[simd_test(enable = "avx512fp16" )] |
26450 | unsafe fn test_mm_cvtt_roundsh_u32() { |
26451 | let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26452 | let r = _mm_cvtt_roundsh_u32::<_MM_FROUND_NO_EXC>(a); |
26453 | assert_eq!(r, 1); |
26454 | } |
26455 | |
26456 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26457 | unsafe fn test_mm_cvtph_epi64() { |
26458 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); |
26459 | let r = _mm_cvtph_epi64(a); |
26460 | let e = _mm_set_epi64x(1, 2); |
26461 | assert_eq_m128i(r, e); |
26462 | } |
26463 | |
26464 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26465 | unsafe fn test_mm_mask_cvtph_epi64() { |
26466 | let src = _mm_set_epi64x(3, 4); |
26467 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); |
26468 | let r = _mm_mask_cvtph_epi64(src, 0b01, a); |
26469 | let e = _mm_set_epi64x(3, 2); |
26470 | assert_eq_m128i(r, e); |
26471 | } |
26472 | |
26473 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26474 | unsafe fn test_mm_maskz_cvtph_epi64() { |
26475 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); |
26476 | let r = _mm_maskz_cvtph_epi64(0b01, a); |
26477 | let e = _mm_set_epi64x(0, 2); |
26478 | assert_eq_m128i(r, e); |
26479 | } |
26480 | |
26481 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26482 | unsafe fn test_mm256_cvtph_epi64() { |
26483 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26484 | let r = _mm256_cvtph_epi64(a); |
26485 | let e = _mm256_set_epi64x(1, 2, 3, 4); |
26486 | assert_eq_m256i(r, e); |
26487 | } |
26488 | |
26489 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26490 | unsafe fn test_mm256_mask_cvtph_epi64() { |
26491 | let src = _mm256_set_epi64x(5, 6, 7, 8); |
26492 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26493 | let r = _mm256_mask_cvtph_epi64(src, 0b0101, a); |
26494 | let e = _mm256_set_epi64x(5, 2, 7, 4); |
26495 | assert_eq_m256i(r, e); |
26496 | } |
26497 | |
26498 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26499 | unsafe fn test_mm256_maskz_cvtph_epi64() { |
26500 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26501 | let r = _mm256_maskz_cvtph_epi64(0b0101, a); |
26502 | let e = _mm256_set_epi64x(0, 2, 0, 4); |
26503 | assert_eq_m256i(r, e); |
26504 | } |
26505 | |
26506 | #[simd_test(enable = "avx512fp16" )] |
26507 | unsafe fn test_mm512_cvtph_epi64() { |
26508 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26509 | let r = _mm512_cvtph_epi64(a); |
26510 | let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
26511 | assert_eq_m512i(r, e); |
26512 | } |
26513 | |
26514 | #[simd_test(enable = "avx512fp16" )] |
26515 | unsafe fn test_mm512_mask_cvtph_epi64() { |
26516 | let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
26517 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26518 | let r = _mm512_mask_cvtph_epi64(src, 0b01010101, a); |
26519 | let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8); |
26520 | assert_eq_m512i(r, e); |
26521 | } |
26522 | |
26523 | #[simd_test(enable = "avx512fp16" )] |
26524 | unsafe fn test_mm512_maskz_cvtph_epi64() { |
26525 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26526 | let r = _mm512_maskz_cvtph_epi64(0b01010101, a); |
26527 | let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8); |
26528 | assert_eq_m512i(r, e); |
26529 | } |
26530 | |
26531 | #[simd_test(enable = "avx512fp16" )] |
26532 | unsafe fn test_mm512_cvt_roundph_epi64() { |
26533 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26534 | let r = _mm512_cvt_roundph_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
26535 | let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
26536 | assert_eq_m512i(r, e); |
26537 | } |
26538 | |
26539 | #[simd_test(enable = "avx512fp16" )] |
26540 | unsafe fn test_mm512_mask_cvt_roundph_epi64() { |
26541 | let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
26542 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26543 | let r = _mm512_mask_cvt_roundph_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
26544 | src, 0b01010101, a, |
26545 | ); |
26546 | let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8); |
26547 | assert_eq_m512i(r, e); |
26548 | } |
26549 | |
26550 | #[simd_test(enable = "avx512fp16" )] |
26551 | unsafe fn test_mm512_maskz_cvt_roundph_epi64() { |
26552 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26553 | let r = _mm512_maskz_cvt_roundph_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
26554 | 0b01010101, a, |
26555 | ); |
26556 | let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8); |
26557 | assert_eq_m512i(r, e); |
26558 | } |
26559 | |
26560 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26561 | unsafe fn test_mm_cvtph_epu64() { |
26562 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); |
26563 | let r = _mm_cvtph_epu64(a); |
26564 | let e = _mm_set_epi64x(1, 2); |
26565 | assert_eq_m128i(r, e); |
26566 | } |
26567 | |
26568 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26569 | unsafe fn test_mm_mask_cvtph_epu64() { |
26570 | let src = _mm_set_epi64x(3, 4); |
26571 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); |
26572 | let r = _mm_mask_cvtph_epu64(src, 0b01, a); |
26573 | let e = _mm_set_epi64x(3, 2); |
26574 | assert_eq_m128i(r, e); |
26575 | } |
26576 | |
26577 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26578 | unsafe fn test_mm_maskz_cvtph_epu64() { |
26579 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); |
26580 | let r = _mm_maskz_cvtph_epu64(0b01, a); |
26581 | let e = _mm_set_epi64x(0, 2); |
26582 | assert_eq_m128i(r, e); |
26583 | } |
26584 | |
26585 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26586 | unsafe fn test_mm256_cvtph_epu64() { |
26587 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26588 | let r = _mm256_cvtph_epu64(a); |
26589 | let e = _mm256_set_epi64x(1, 2, 3, 4); |
26590 | assert_eq_m256i(r, e); |
26591 | } |
26592 | |
26593 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26594 | unsafe fn test_mm256_mask_cvtph_epu64() { |
26595 | let src = _mm256_set_epi64x(5, 6, 7, 8); |
26596 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26597 | let r = _mm256_mask_cvtph_epu64(src, 0b0101, a); |
26598 | let e = _mm256_set_epi64x(5, 2, 7, 4); |
26599 | assert_eq_m256i(r, e); |
26600 | } |
26601 | |
26602 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26603 | unsafe fn test_mm256_maskz_cvtph_epu64() { |
26604 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26605 | let r = _mm256_maskz_cvtph_epu64(0b0101, a); |
26606 | let e = _mm256_set_epi64x(0, 2, 0, 4); |
26607 | assert_eq_m256i(r, e); |
26608 | } |
26609 | |
26610 | #[simd_test(enable = "avx512fp16" )] |
26611 | unsafe fn test_mm512_cvtph_epu64() { |
26612 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26613 | let r = _mm512_cvtph_epu64(a); |
26614 | let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
26615 | assert_eq_m512i(r, e); |
26616 | } |
26617 | |
26618 | #[simd_test(enable = "avx512fp16" )] |
26619 | unsafe fn test_mm512_mask_cvtph_epu64() { |
26620 | let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
26621 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26622 | let r = _mm512_mask_cvtph_epu64(src, 0b01010101, a); |
26623 | let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8); |
26624 | assert_eq_m512i(r, e); |
26625 | } |
26626 | |
26627 | #[simd_test(enable = "avx512fp16" )] |
26628 | unsafe fn test_mm512_maskz_cvtph_epu64() { |
26629 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26630 | let r = _mm512_maskz_cvtph_epu64(0b01010101, a); |
26631 | let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8); |
26632 | assert_eq_m512i(r, e); |
26633 | } |
26634 | |
26635 | #[simd_test(enable = "avx512fp16" )] |
26636 | unsafe fn test_mm512_cvt_roundph_epu64() { |
26637 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26638 | let r = _mm512_cvt_roundph_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
26639 | let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
26640 | assert_eq_m512i(r, e); |
26641 | } |
26642 | |
26643 | #[simd_test(enable = "avx512fp16" )] |
26644 | unsafe fn test_mm512_mask_cvt_roundph_epu64() { |
26645 | let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
26646 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26647 | let r = _mm512_mask_cvt_roundph_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
26648 | src, 0b01010101, a, |
26649 | ); |
26650 | let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8); |
26651 | assert_eq_m512i(r, e); |
26652 | } |
26653 | |
26654 | #[simd_test(enable = "avx512fp16" )] |
26655 | unsafe fn test_mm512_maskz_cvt_roundph_epu64() { |
26656 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26657 | let r = _mm512_maskz_cvt_roundph_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
26658 | 0b01010101, a, |
26659 | ); |
26660 | let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8); |
26661 | assert_eq_m512i(r, e); |
26662 | } |
26663 | |
26664 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26665 | unsafe fn test_mm_cvttph_epi64() { |
26666 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); |
26667 | let r = _mm_cvttph_epi64(a); |
26668 | let e = _mm_set_epi64x(1, 2); |
26669 | assert_eq_m128i(r, e); |
26670 | } |
26671 | |
26672 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26673 | unsafe fn test_mm_mask_cvttph_epi64() { |
26674 | let src = _mm_set_epi64x(3, 4); |
26675 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); |
26676 | let r = _mm_mask_cvttph_epi64(src, 0b01, a); |
26677 | let e = _mm_set_epi64x(3, 2); |
26678 | assert_eq_m128i(r, e); |
26679 | } |
26680 | |
26681 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26682 | unsafe fn test_mm_maskz_cvttph_epi64() { |
26683 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); |
26684 | let r = _mm_maskz_cvttph_epi64(0b01, a); |
26685 | let e = _mm_set_epi64x(0, 2); |
26686 | assert_eq_m128i(r, e); |
26687 | } |
26688 | |
26689 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26690 | unsafe fn test_mm256_cvttph_epi64() { |
26691 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26692 | let r = _mm256_cvttph_epi64(a); |
26693 | let e = _mm256_set_epi64x(1, 2, 3, 4); |
26694 | assert_eq_m256i(r, e); |
26695 | } |
26696 | |
26697 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26698 | unsafe fn test_mm256_mask_cvttph_epi64() { |
26699 | let src = _mm256_set_epi64x(5, 6, 7, 8); |
26700 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26701 | let r = _mm256_mask_cvttph_epi64(src, 0b0101, a); |
26702 | let e = _mm256_set_epi64x(5, 2, 7, 4); |
26703 | assert_eq_m256i(r, e); |
26704 | } |
26705 | |
26706 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26707 | unsafe fn test_mm256_maskz_cvttph_epi64() { |
26708 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26709 | let r = _mm256_maskz_cvttph_epi64(0b0101, a); |
26710 | let e = _mm256_set_epi64x(0, 2, 0, 4); |
26711 | assert_eq_m256i(r, e); |
26712 | } |
26713 | |
26714 | #[simd_test(enable = "avx512fp16" )] |
26715 | unsafe fn test_mm512_cvttph_epi64() { |
26716 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26717 | let r = _mm512_cvttph_epi64(a); |
26718 | let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
26719 | assert_eq_m512i(r, e); |
26720 | } |
26721 | |
26722 | #[simd_test(enable = "avx512fp16" )] |
26723 | unsafe fn test_mm512_mask_cvttph_epi64() { |
26724 | let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
26725 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26726 | let r = _mm512_mask_cvttph_epi64(src, 0b01010101, a); |
26727 | let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8); |
26728 | assert_eq_m512i(r, e); |
26729 | } |
26730 | |
26731 | #[simd_test(enable = "avx512fp16" )] |
26732 | unsafe fn test_mm512_maskz_cvttph_epi64() { |
26733 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26734 | let r = _mm512_maskz_cvttph_epi64(0b01010101, a); |
26735 | let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8); |
26736 | assert_eq_m512i(r, e); |
26737 | } |
26738 | |
26739 | #[simd_test(enable = "avx512fp16" )] |
26740 | unsafe fn test_mm512_cvtt_roundph_epi64() { |
26741 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26742 | let r = _mm512_cvtt_roundph_epi64::<_MM_FROUND_NO_EXC>(a); |
26743 | let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
26744 | assert_eq_m512i(r, e); |
26745 | } |
26746 | |
26747 | #[simd_test(enable = "avx512fp16" )] |
26748 | unsafe fn test_mm512_mask_cvtt_roundph_epi64() { |
26749 | let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
26750 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26751 | let r = _mm512_mask_cvtt_roundph_epi64::<_MM_FROUND_NO_EXC>(src, 0b01010101, a); |
26752 | let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8); |
26753 | assert_eq_m512i(r, e); |
26754 | } |
26755 | |
26756 | #[simd_test(enable = "avx512fp16" )] |
26757 | unsafe fn test_mm512_maskz_cvtt_roundph_epi64() { |
26758 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26759 | let r = _mm512_maskz_cvtt_roundph_epi64::<_MM_FROUND_NO_EXC>(0b01010101, a); |
26760 | let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8); |
26761 | assert_eq_m512i(r, e); |
26762 | } |
26763 | |
26764 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26765 | unsafe fn test_mm_cvttph_epu64() { |
26766 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); |
26767 | let r = _mm_cvttph_epu64(a); |
26768 | let e = _mm_set_epi64x(1, 2); |
26769 | assert_eq_m128i(r, e); |
26770 | } |
26771 | |
26772 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26773 | unsafe fn test_mm_mask_cvttph_epu64() { |
26774 | let src = _mm_set_epi64x(3, 4); |
26775 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); |
26776 | let r = _mm_mask_cvttph_epu64(src, 0b01, a); |
26777 | let e = _mm_set_epi64x(3, 2); |
26778 | assert_eq_m128i(r, e); |
26779 | } |
26780 | |
26781 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26782 | unsafe fn test_mm_maskz_cvttph_epu64() { |
26783 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); |
26784 | let r = _mm_maskz_cvttph_epu64(0b01, a); |
26785 | let e = _mm_set_epi64x(0, 2); |
26786 | assert_eq_m128i(r, e); |
26787 | } |
26788 | |
26789 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26790 | unsafe fn test_mm256_cvttph_epu64() { |
26791 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26792 | let r = _mm256_cvttph_epu64(a); |
26793 | let e = _mm256_set_epi64x(1, 2, 3, 4); |
26794 | assert_eq_m256i(r, e); |
26795 | } |
26796 | |
26797 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26798 | unsafe fn test_mm256_mask_cvttph_epu64() { |
26799 | let src = _mm256_set_epi64x(5, 6, 7, 8); |
26800 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26801 | let r = _mm256_mask_cvttph_epu64(src, 0b0101, a); |
26802 | let e = _mm256_set_epi64x(5, 2, 7, 4); |
26803 | assert_eq_m256i(r, e); |
26804 | } |
26805 | |
26806 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26807 | unsafe fn test_mm256_maskz_cvttph_epu64() { |
26808 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26809 | let r = _mm256_maskz_cvttph_epu64(0b0101, a); |
26810 | let e = _mm256_set_epi64x(0, 2, 0, 4); |
26811 | assert_eq_m256i(r, e); |
26812 | } |
26813 | |
26814 | #[simd_test(enable = "avx512fp16" )] |
26815 | unsafe fn test_mm512_cvttph_epu64() { |
26816 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26817 | let r = _mm512_cvttph_epu64(a); |
26818 | let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
26819 | assert_eq_m512i(r, e); |
26820 | } |
26821 | |
26822 | #[simd_test(enable = "avx512fp16" )] |
26823 | unsafe fn test_mm512_mask_cvttph_epu64() { |
26824 | let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
26825 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26826 | let r = _mm512_mask_cvttph_epu64(src, 0b01010101, a); |
26827 | let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8); |
26828 | assert_eq_m512i(r, e); |
26829 | } |
26830 | |
26831 | #[simd_test(enable = "avx512fp16" )] |
26832 | unsafe fn test_mm512_maskz_cvttph_epu64() { |
26833 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26834 | let r = _mm512_maskz_cvttph_epu64(0b01010101, a); |
26835 | let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8); |
26836 | assert_eq_m512i(r, e); |
26837 | } |
26838 | |
26839 | #[simd_test(enable = "avx512fp16" )] |
26840 | unsafe fn test_mm512_cvtt_roundph_epu64() { |
26841 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26842 | let r = _mm512_cvtt_roundph_epu64::<_MM_FROUND_NO_EXC>(a); |
26843 | let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
26844 | assert_eq_m512i(r, e); |
26845 | } |
26846 | |
26847 | #[simd_test(enable = "avx512fp16" )] |
26848 | unsafe fn test_mm512_mask_cvtt_roundph_epu64() { |
26849 | let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
26850 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26851 | let r = _mm512_mask_cvtt_roundph_epu64::<_MM_FROUND_NO_EXC>(src, 0b01010101, a); |
26852 | let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8); |
26853 | assert_eq_m512i(r, e); |
26854 | } |
26855 | |
26856 | #[simd_test(enable = "avx512fp16" )] |
26857 | unsafe fn test_mm512_maskz_cvtt_roundph_epu64() { |
26858 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26859 | let r = _mm512_maskz_cvtt_roundph_epu64::<_MM_FROUND_NO_EXC>(0b01010101, a); |
26860 | let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8); |
26861 | assert_eq_m512i(r, e); |
26862 | } |
26863 | |
26864 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26865 | unsafe fn test_mm_cvtxph_ps() { |
26866 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26867 | let r = _mm_cvtxph_ps(a); |
26868 | let e = _mm_set_ps(1.0, 2.0, 3.0, 4.0); |
26869 | assert_eq_m128(r, e); |
26870 | } |
26871 | |
26872 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26873 | unsafe fn test_mm_mask_cvtxph_ps() { |
26874 | let src = _mm_set_ps(10.0, 11.0, 12.0, 13.0); |
26875 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26876 | let r = _mm_mask_cvtxph_ps(src, 0b0101, a); |
26877 | let e = _mm_set_ps(10.0, 2.0, 12.0, 4.0); |
26878 | assert_eq_m128(r, e); |
26879 | } |
26880 | |
26881 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26882 | unsafe fn test_mm_maskz_cvtxph_ps() { |
26883 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
26884 | let r = _mm_maskz_cvtxph_ps(0b0101, a); |
26885 | let e = _mm_set_ps(0.0, 2.0, 0.0, 4.0); |
26886 | assert_eq_m128(r, e); |
26887 | } |
26888 | |
26889 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26890 | unsafe fn test_mm256_cvtxph_ps() { |
26891 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26892 | let r = _mm256_cvtxph_ps(a); |
26893 | let e = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26894 | assert_eq_m256(r, e); |
26895 | } |
26896 | |
26897 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26898 | unsafe fn test_mm256_mask_cvtxph_ps() { |
26899 | let src = _mm256_set_ps(10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0); |
26900 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26901 | let r = _mm256_mask_cvtxph_ps(src, 0b01010101, a); |
26902 | let e = _mm256_set_ps(10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0); |
26903 | assert_eq_m256(r, e); |
26904 | } |
26905 | |
26906 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
26907 | unsafe fn test_mm256_maskz_cvtxph_ps() { |
26908 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
26909 | let r = _mm256_maskz_cvtxph_ps(0b01010101, a); |
26910 | let e = _mm256_set_ps(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0); |
26911 | assert_eq_m256(r, e); |
26912 | } |
26913 | |
26914 | #[simd_test(enable = "avx512fp16" )] |
26915 | unsafe fn test_mm512_cvtxph_ps() { |
26916 | let a = _mm256_set_ph( |
26917 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26918 | ); |
26919 | let r = _mm512_cvtxph_ps(a); |
26920 | let e = _mm512_set_ps( |
26921 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26922 | ); |
26923 | assert_eq_m512(r, e); |
26924 | } |
26925 | |
26926 | #[simd_test(enable = "avx512fp16" )] |
26927 | unsafe fn test_mm512_mask_cvtxph_ps() { |
26928 | let src = _mm512_set_ps( |
26929 | 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, |
26930 | 24.0, 25.0, |
26931 | ); |
26932 | let a = _mm256_set_ph( |
26933 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26934 | ); |
26935 | let r = _mm512_mask_cvtxph_ps(src, 0b0101010101010101, a); |
26936 | let e = _mm512_set_ps( |
26937 | 10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0, 18.0, 10.0, 20.0, 12.0, 22.0, 14.0, 24.0, |
26938 | 16.0, |
26939 | ); |
26940 | assert_eq_m512(r, e); |
26941 | } |
26942 | |
26943 | #[simd_test(enable = "avx512fp16" )] |
26944 | unsafe fn test_mm512_maskz_cvtxph_ps() { |
26945 | let a = _mm256_set_ph( |
26946 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26947 | ); |
26948 | let r = _mm512_maskz_cvtxph_ps(0b0101010101010101, a); |
26949 | let e = _mm512_set_ps( |
26950 | 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0, |
26951 | ); |
26952 | assert_eq_m512(r, e); |
26953 | } |
26954 | |
26955 | #[simd_test(enable = "avx512fp16" )] |
26956 | unsafe fn test_mm512_cvtx_roundph_ps() { |
26957 | let a = _mm256_set_ph( |
26958 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26959 | ); |
26960 | let r = _mm512_cvtx_roundph_ps::<_MM_FROUND_NO_EXC>(a); |
26961 | let e = _mm512_set_ps( |
26962 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26963 | ); |
26964 | assert_eq_m512(r, e); |
26965 | } |
26966 | |
26967 | #[simd_test(enable = "avx512fp16" )] |
26968 | unsafe fn test_mm512_mask_cvtx_roundph_ps() { |
26969 | let src = _mm512_set_ps( |
26970 | 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, |
26971 | 24.0, 25.0, |
26972 | ); |
26973 | let a = _mm256_set_ph( |
26974 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26975 | ); |
26976 | let r = _mm512_mask_cvtx_roundph_ps::<_MM_FROUND_NO_EXC>(src, 0b0101010101010101, a); |
26977 | let e = _mm512_set_ps( |
26978 | 10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0, 18.0, 10.0, 20.0, 12.0, 22.0, 14.0, 24.0, |
26979 | 16.0, |
26980 | ); |
26981 | assert_eq_m512(r, e); |
26982 | } |
26983 | |
26984 | #[simd_test(enable = "avx512fp16" )] |
26985 | unsafe fn test_mm512_maskz_cvtx_roundph_ps() { |
26986 | let a = _mm256_set_ph( |
26987 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
26988 | ); |
26989 | let r = _mm512_maskz_cvtx_roundph_ps::<_MM_FROUND_NO_EXC>(0b0101010101010101, a); |
26990 | let e = _mm512_set_ps( |
26991 | 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0, |
26992 | ); |
26993 | assert_eq_m512(r, e); |
26994 | } |
26995 | |
26996 | #[simd_test(enable = "avx512fp16" )] |
26997 | unsafe fn test_mm_cvtsh_ss() { |
26998 | let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0); |
26999 | let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
27000 | let r = _mm_cvtsh_ss(a, b); |
27001 | let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0); |
27002 | assert_eq_m128(r, e); |
27003 | } |
27004 | |
27005 | #[simd_test(enable = "avx512fp16" )] |
27006 | unsafe fn test_mm_mask_cvtsh_ss() { |
27007 | let src = _mm_setr_ps(3.0, 11.0, 12.0, 13.0); |
27008 | let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0); |
27009 | let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
27010 | let r = _mm_mask_cvtsh_ss(src, 0, a, b); |
27011 | let e = _mm_setr_ps(3.0, 20.0, 21.0, 22.0); |
27012 | assert_eq_m128(r, e); |
27013 | let r = _mm_mask_cvtsh_ss(src, 1, a, b); |
27014 | let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0); |
27015 | assert_eq_m128(r, e); |
27016 | } |
27017 | |
27018 | #[simd_test(enable = "avx512fp16" )] |
27019 | unsafe fn test_mm_maskz_cvtsh_ss() { |
27020 | let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0); |
27021 | let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
27022 | let r = _mm_maskz_cvtsh_ss(0, a, b); |
27023 | let e = _mm_setr_ps(0.0, 20.0, 21.0, 22.0); |
27024 | assert_eq_m128(r, e); |
27025 | let r = _mm_maskz_cvtsh_ss(1, a, b); |
27026 | let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0); |
27027 | assert_eq_m128(r, e); |
27028 | } |
27029 | |
27030 | #[simd_test(enable = "avx512fp16" )] |
27031 | unsafe fn test_mm_cvt_roundsh_ss() { |
27032 | let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0); |
27033 | let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
27034 | let r = _mm_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(a, b); |
27035 | let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0); |
27036 | assert_eq_m128(r, e); |
27037 | } |
27038 | |
27039 | #[simd_test(enable = "avx512fp16" )] |
27040 | unsafe fn test_mm_mask_cvt_roundsh_ss() { |
27041 | let src = _mm_setr_ps(3.0, 11.0, 12.0, 13.0); |
27042 | let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0); |
27043 | let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
27044 | let r = _mm_mask_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(src, 0, a, b); |
27045 | let e = _mm_setr_ps(3.0, 20.0, 21.0, 22.0); |
27046 | assert_eq_m128(r, e); |
27047 | let r = _mm_mask_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(src, 1, a, b); |
27048 | let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0); |
27049 | assert_eq_m128(r, e); |
27050 | } |
27051 | |
27052 | #[simd_test(enable = "avx512fp16" )] |
27053 | unsafe fn test_mm_maskz_cvt_roundsh_ss() { |
27054 | let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0); |
27055 | let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
27056 | let r = _mm_maskz_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(0, a, b); |
27057 | let e = _mm_setr_ps(0.0, 20.0, 21.0, 22.0); |
27058 | assert_eq_m128(r, e); |
27059 | let r = _mm_maskz_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(1, a, b); |
27060 | let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0); |
27061 | assert_eq_m128(r, e); |
27062 | } |
27063 | |
27064 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
27065 | unsafe fn test_mm_cvtph_pd() { |
27066 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); |
27067 | let r = _mm_cvtph_pd(a); |
27068 | let e = _mm_set_pd(1.0, 2.0); |
27069 | assert_eq_m128d(r, e); |
27070 | } |
27071 | |
27072 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
27073 | unsafe fn test_mm_mask_cvtph_pd() { |
27074 | let src = _mm_set_pd(10.0, 11.0); |
27075 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); |
27076 | let r = _mm_mask_cvtph_pd(src, 0b01, a); |
27077 | let e = _mm_set_pd(10.0, 2.0); |
27078 | assert_eq_m128d(r, e); |
27079 | } |
27080 | |
27081 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
27082 | unsafe fn test_mm_maskz_cvtph_pd() { |
27083 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0); |
27084 | let r = _mm_maskz_cvtph_pd(0b01, a); |
27085 | let e = _mm_set_pd(0.0, 2.0); |
27086 | assert_eq_m128d(r, e); |
27087 | } |
27088 | |
27089 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
27090 | unsafe fn test_mm256_cvtph_pd() { |
27091 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
27092 | let r = _mm256_cvtph_pd(a); |
27093 | let e = _mm256_set_pd(1.0, 2.0, 3.0, 4.0); |
27094 | assert_eq_m256d(r, e); |
27095 | } |
27096 | |
27097 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
27098 | unsafe fn test_mm256_mask_cvtph_pd() { |
27099 | let src = _mm256_set_pd(10.0, 11.0, 12.0, 13.0); |
27100 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
27101 | let r = _mm256_mask_cvtph_pd(src, 0b0101, a); |
27102 | let e = _mm256_set_pd(10.0, 2.0, 12.0, 4.0); |
27103 | assert_eq_m256d(r, e); |
27104 | } |
27105 | |
27106 | #[simd_test(enable = "avx512fp16,avx512vl" )] |
27107 | unsafe fn test_mm256_maskz_cvtph_pd() { |
27108 | let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0); |
27109 | let r = _mm256_maskz_cvtph_pd(0b0101, a); |
27110 | let e = _mm256_set_pd(0.0, 2.0, 0.0, 4.0); |
27111 | assert_eq_m256d(r, e); |
27112 | } |
27113 | |
27114 | #[simd_test(enable = "avx512fp16" )] |
27115 | unsafe fn test_mm512_cvtph_pd() { |
27116 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
27117 | let r = _mm512_cvtph_pd(a); |
27118 | let e = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
27119 | assert_eq_m512d(r, e); |
27120 | } |
27121 | |
27122 | #[simd_test(enable = "avx512fp16" )] |
27123 | unsafe fn test_mm512_mask_cvtph_pd() { |
27124 | let src = _mm512_set_pd(10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0); |
27125 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
27126 | let r = _mm512_mask_cvtph_pd(src, 0b01010101, a); |
27127 | let e = _mm512_set_pd(10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0); |
27128 | assert_eq_m512d(r, e); |
27129 | } |
27130 | |
27131 | #[simd_test(enable = "avx512fp16" )] |
27132 | unsafe fn test_mm512_maskz_cvtph_pd() { |
27133 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
27134 | let r = _mm512_maskz_cvtph_pd(0b01010101, a); |
27135 | let e = _mm512_set_pd(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0); |
27136 | assert_eq_m512d(r, e); |
27137 | } |
27138 | |
27139 | #[simd_test(enable = "avx512fp16" )] |
27140 | unsafe fn test_mm512_cvt_roundph_pd() { |
27141 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
27142 | let r = _mm512_cvt_roundph_pd::<_MM_FROUND_NO_EXC>(a); |
27143 | let e = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
27144 | assert_eq_m512d(r, e); |
27145 | } |
27146 | |
27147 | #[simd_test(enable = "avx512fp16" )] |
27148 | unsafe fn test_mm512_mask_cvt_roundph_pd() { |
27149 | let src = _mm512_set_pd(10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0); |
27150 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
27151 | let r = _mm512_mask_cvt_roundph_pd::<_MM_FROUND_NO_EXC>(src, 0b01010101, a); |
27152 | let e = _mm512_set_pd(10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0); |
27153 | assert_eq_m512d(r, e); |
27154 | } |
27155 | |
27156 | #[simd_test(enable = "avx512fp16" )] |
27157 | unsafe fn test_mm512_maskz_cvt_roundph_pd() { |
27158 | let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
27159 | let r = _mm512_maskz_cvt_roundph_pd::<_MM_FROUND_NO_EXC>(0b01010101, a); |
27160 | let e = _mm512_set_pd(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0); |
27161 | assert_eq_m512d(r, e); |
27162 | } |
27163 | |
27164 | #[simd_test(enable = "avx512fp16" )] |
27165 | unsafe fn test_mm_cvtsh_sd() { |
27166 | let a = _mm_setr_pd(2.0, 20.0); |
27167 | let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
27168 | let r = _mm_cvtsh_sd(a, b); |
27169 | let e = _mm_setr_pd(1.0, 20.0); |
27170 | assert_eq_m128d(r, e); |
27171 | } |
27172 | |
27173 | #[simd_test(enable = "avx512fp16" )] |
27174 | unsafe fn test_mm_mask_cvtsh_sd() { |
27175 | let src = _mm_setr_pd(3.0, 11.0); |
27176 | let a = _mm_setr_pd(2.0, 20.0); |
27177 | let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
27178 | let r = _mm_mask_cvtsh_sd(src, 0, a, b); |
27179 | let e = _mm_setr_pd(3.0, 20.0); |
27180 | assert_eq_m128d(r, e); |
27181 | let r = _mm_mask_cvtsh_sd(src, 1, a, b); |
27182 | let e = _mm_setr_pd(1.0, 20.0); |
27183 | assert_eq_m128d(r, e); |
27184 | } |
27185 | |
27186 | #[simd_test(enable = "avx512fp16" )] |
27187 | unsafe fn test_mm_maskz_cvtsh_sd() { |
27188 | let a = _mm_setr_pd(2.0, 20.0); |
27189 | let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
27190 | let r = _mm_maskz_cvtsh_sd(0, a, b); |
27191 | let e = _mm_setr_pd(0.0, 20.0); |
27192 | assert_eq_m128d(r, e); |
27193 | let r = _mm_maskz_cvtsh_sd(1, a, b); |
27194 | let e = _mm_setr_pd(1.0, 20.0); |
27195 | assert_eq_m128d(r, e); |
27196 | } |
27197 | |
27198 | #[simd_test(enable = "avx512fp16" )] |
27199 | unsafe fn test_mm_cvt_roundsh_sd() { |
27200 | let a = _mm_setr_pd(2.0, 20.0); |
27201 | let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
27202 | let r = _mm_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(a, b); |
27203 | let e = _mm_setr_pd(1.0, 20.0); |
27204 | assert_eq_m128d(r, e); |
27205 | } |
27206 | |
27207 | #[simd_test(enable = "avx512fp16" )] |
27208 | unsafe fn test_mm_mask_cvt_roundsh_sd() { |
27209 | let src = _mm_setr_pd(3.0, 11.0); |
27210 | let a = _mm_setr_pd(2.0, 20.0); |
27211 | let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
27212 | let r = _mm_mask_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(src, 0, a, b); |
27213 | let e = _mm_setr_pd(3.0, 20.0); |
27214 | assert_eq_m128d(r, e); |
27215 | let r = _mm_mask_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(src, 1, a, b); |
27216 | let e = _mm_setr_pd(1.0, 20.0); |
27217 | assert_eq_m128d(r, e); |
27218 | } |
27219 | |
27220 | #[simd_test(enable = "avx512fp16" )] |
27221 | unsafe fn test_mm_maskz_cvt_roundsh_sd() { |
27222 | let a = _mm_setr_pd(2.0, 20.0); |
27223 | let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); |
27224 | let r = _mm_maskz_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(0, a, b); |
27225 | let e = _mm_setr_pd(0.0, 20.0); |
27226 | assert_eq_m128d(r, e); |
27227 | let r = _mm_maskz_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(1, a, b); |
27228 | let e = _mm_setr_pd(1.0, 20.0); |
27229 | assert_eq_m128d(r, e); |
27230 | } |
27231 | |
27232 | #[simd_test(enable = "avx512fp16" )] |
27233 | unsafe fn test_mm_cvtsh_h() { |
27234 | let a = _mm_setr_ph(1.0, 2.0, 3.0, 42.0, 5.0, 6.0, 7.0, 8.0); |
27235 | let r = _mm_cvtsh_h(a); |
27236 | assert_eq!(r, 1.0); |
27237 | } |
27238 | |
27239 | #[simd_test(enable = "avx512fp16" )] |
27240 | unsafe fn test_mm256_cvtsh_h() { |
27241 | let a = _mm256_setr_ph( |
27242 | 1.0, 2.0, 3.0, 42.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
27243 | ); |
27244 | let r = _mm256_cvtsh_h(a); |
27245 | assert_eq!(r, 1.0); |
27246 | } |
27247 | |
27248 | #[simd_test(enable = "avx512fp16" )] |
27249 | unsafe fn test_mm512_cvtsh_h() { |
27250 | let a = _mm512_setr_ph( |
27251 | 1.0, 2.0, 3.0, 42.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
27252 | 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, |
27253 | 31.0, 32.0, |
27254 | ); |
27255 | let r = _mm512_cvtsh_h(a); |
27256 | assert_eq!(r, 1.0); |
27257 | } |
27258 | |
27259 | #[simd_test(enable = "avx512fp16" )] |
27260 | unsafe fn test_mm_cvtsi128_si16() { |
27261 | let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); |
27262 | let r = _mm_cvtsi128_si16(a); |
27263 | assert_eq!(r, 1); |
27264 | } |
27265 | |
27266 | #[simd_test(enable = "avx512fp16" )] |
27267 | unsafe fn test_mm_cvtsi16_si128() { |
27268 | let a = 1; |
27269 | let r = _mm_cvtsi16_si128(a); |
27270 | let e = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0); |
27271 | assert_eq_m128i(r, e); |
27272 | } |
27273 | } |
27274 | |