1 | //! [F16C intrinsics]. |
2 | //! |
3 | //! [F16C intrinsics]: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=fp16&expand=1769 |
4 | |
5 | use crate::core_arch::{simd::*, x86::*}; |
6 | |
7 | #[cfg (test)] |
8 | use stdarch_test::assert_instr; |
9 | |
10 | #[allow (improper_ctypes)] |
11 | extern "unadjusted" { |
12 | #[link_name = "llvm.x86.vcvtph2ps.128" ] |
13 | fn llvm_vcvtph2ps_128(a: i16x8) -> f32x4; |
14 | #[link_name = "llvm.x86.vcvtph2ps.256" ] |
15 | fn llvm_vcvtph2ps_256(a: i16x8) -> f32x8; |
16 | #[link_name = "llvm.x86.vcvtps2ph.128" ] |
17 | fn llvm_vcvtps2ph_128(a: f32x4, rounding: i32) -> i16x8; |
18 | #[link_name = "llvm.x86.vcvtps2ph.256" ] |
19 | fn llvm_vcvtps2ph_256(a: f32x8, rounding: i32) -> i16x8; |
20 | } |
21 | |
22 | /// Converts the 4 x 16-bit half-precision float values in the lowest 64-bit of |
23 | /// the 128-bit vector `a` into 4 x 32-bit float values stored in a 128-bit wide |
24 | /// vector. |
25 | #[inline ] |
26 | #[target_feature (enable = "f16c" )] |
27 | #[cfg_attr (test, assert_instr("vcvtph2ps" ))] |
28 | #[stable (feature = "x86_f16c_intrinsics" , since = "1.68.0" )] |
29 | pub unsafe fn _mm_cvtph_ps(a: __m128i) -> __m128 { |
30 | transmute(src:llvm_vcvtph2ps_128(transmute(src:a))) |
31 | } |
32 | |
33 | /// Converts the 8 x 16-bit half-precision float values in the 128-bit vector |
34 | /// `a` into 8 x 32-bit float values stored in a 256-bit wide vector. |
35 | #[inline ] |
36 | #[target_feature (enable = "f16c" )] |
37 | #[cfg_attr (test, assert_instr("vcvtph2ps" ))] |
38 | #[stable (feature = "x86_f16c_intrinsics" , since = "1.68.0" )] |
39 | pub unsafe fn _mm256_cvtph_ps(a: __m128i) -> __m256 { |
40 | transmute(src:llvm_vcvtph2ps_256(transmute(src:a))) |
41 | } |
42 | |
43 | /// Converts the 4 x 32-bit float values in the 128-bit vector `a` into 4 x |
44 | /// 16-bit half-precision float values stored in the lowest 64-bit of a 128-bit |
45 | /// vector. |
46 | /// |
47 | /// Rounding is done according to the `imm_rounding` parameter, which can be one of: |
48 | /// |
49 | /// * `_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC`: round to nearest and suppress exceptions, |
50 | /// * `_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC`: round down and suppress exceptions, |
51 | /// * `_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC`: round up and suppress exceptions, |
52 | /// * `_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC`: truncate and suppress exceptions, |
53 | /// * `_MM_FROUND_CUR_DIRECTION`: use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]. |
54 | #[inline ] |
55 | #[target_feature (enable = "f16c" )] |
56 | #[cfg_attr (test, assert_instr("vcvtps2ph" , IMM_ROUNDING = 0))] |
57 | #[rustc_legacy_const_generics (1)] |
58 | #[stable (feature = "x86_f16c_intrinsics" , since = "1.68.0" )] |
59 | pub unsafe fn _mm_cvtps_ph<const IMM_ROUNDING: i32>(a: __m128) -> __m128i { |
60 | static_assert_uimm_bits!(IMM_ROUNDING, 3); |
61 | let a: f32x4 = a.as_f32x4(); |
62 | let r: i16x8 = llvm_vcvtps2ph_128(a, IMM_ROUNDING); |
63 | transmute(src:r) |
64 | } |
65 | |
66 | /// Converts the 8 x 32-bit float values in the 256-bit vector `a` into 8 x |
67 | /// 16-bit half-precision float values stored in a 128-bit wide vector. |
68 | /// |
69 | /// Rounding is done according to the `imm_rounding` parameter, which can be one of: |
70 | /// |
71 | /// * `_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC`: round to nearest and suppress exceptions, |
72 | /// * `_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC`: round down and suppress exceptions, |
73 | /// * `_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC`: round up and suppress exceptions, |
74 | /// * `_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC`: truncate and suppress exceptions, |
75 | /// * `_MM_FROUND_CUR_DIRECTION`: use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]. |
76 | #[inline ] |
77 | #[target_feature (enable = "f16c" )] |
78 | #[cfg_attr (test, assert_instr("vcvtps2ph" , IMM_ROUNDING = 0))] |
79 | #[rustc_legacy_const_generics (1)] |
80 | #[stable (feature = "x86_f16c_intrinsics" , since = "1.68.0" )] |
81 | pub unsafe fn _mm256_cvtps_ph<const IMM_ROUNDING: i32>(a: __m256) -> __m128i { |
82 | static_assert_uimm_bits!(IMM_ROUNDING, 3); |
83 | let a: f32x8 = a.as_f32x8(); |
84 | let r: i16x8 = llvm_vcvtps2ph_256(a, IMM_ROUNDING); |
85 | transmute(src:r) |
86 | } |
87 | |
88 | #[cfg (test)] |
89 | mod tests { |
90 | use crate::{core_arch::x86::*, mem::transmute}; |
91 | use stdarch_test::simd_test; |
92 | |
93 | #[simd_test(enable = "f16c" )] |
94 | unsafe fn test_mm_cvtph_ps() { |
95 | let array = [1_f32, 2_f32, 3_f32, 4_f32]; |
96 | let float_vec: __m128 = transmute(array); |
97 | let halfs: __m128i = _mm_cvtps_ph::<0>(float_vec); |
98 | let floats: __m128 = _mm_cvtph_ps(halfs); |
99 | let result: [f32; 4] = transmute(floats); |
100 | assert_eq!(result, array); |
101 | } |
102 | |
103 | #[simd_test(enable = "f16c" )] |
104 | unsafe fn test_mm256_cvtph_ps() { |
105 | let array = [1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32]; |
106 | let float_vec: __m256 = transmute(array); |
107 | let halfs: __m128i = _mm256_cvtps_ph::<0>(float_vec); |
108 | let floats: __m256 = _mm256_cvtph_ps(halfs); |
109 | let result: [f32; 8] = transmute(floats); |
110 | assert_eq!(result, array); |
111 | } |
112 | } |
113 | |