1 | //! [F16C intrinsics]. |
2 | //! |
3 | //! [F16C intrinsics]: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=fp16&expand=1769 |
4 | |
5 | use crate::core_arch::{simd::*, x86::*}; |
6 | |
7 | #[cfg (test)] |
8 | use stdarch_test::assert_instr; |
9 | |
10 | #[allow (improper_ctypes)] |
11 | unsafe extern "unadjusted" { |
12 | #[link_name = "llvm.x86.vcvtph2ps.128" ] |
13 | unsafefn llvm_vcvtph2ps_128(a: i16x8) -> f32x4; |
14 | #[link_name = "llvm.x86.vcvtph2ps.256" ] |
15 | unsafefn llvm_vcvtph2ps_256(a: i16x8) -> f32x8; |
16 | #[link_name = "llvm.x86.vcvtps2ph.128" ] |
17 | unsafefn llvm_vcvtps2ph_128(a: f32x4, rounding: i32) -> i16x8; |
18 | #[link_name = "llvm.x86.vcvtps2ph.256" ] |
19 | unsafefn llvm_vcvtps2ph_256(a: f32x8, rounding: i32) -> i16x8; |
20 | } |
21 | |
22 | /// Converts the 4 x 16-bit half-precision float values in the lowest 64-bit of |
23 | /// the 128-bit vector `a` into 4 x 32-bit float values stored in a 128-bit wide |
24 | /// vector. |
25 | /// |
26 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_ps) |
27 | #[inline ] |
28 | #[target_feature (enable = "f16c" )] |
29 | #[cfg_attr (test, assert_instr("vcvtph2ps" ))] |
30 | #[stable (feature = "x86_f16c_intrinsics" , since = "1.68.0" )] |
31 | pub fn _mm_cvtph_ps(a: __m128i) -> __m128 { |
32 | unsafe { transmute(src:llvm_vcvtph2ps_128(transmute(src:a))) } |
33 | } |
34 | |
35 | /// Converts the 8 x 16-bit half-precision float values in the 128-bit vector |
36 | /// `a` into 8 x 32-bit float values stored in a 256-bit wide vector. |
37 | /// |
38 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_ps) |
39 | #[inline ] |
40 | #[target_feature (enable = "f16c" )] |
41 | #[cfg_attr (test, assert_instr("vcvtph2ps" ))] |
42 | #[stable (feature = "x86_f16c_intrinsics" , since = "1.68.0" )] |
43 | pub fn _mm256_cvtph_ps(a: __m128i) -> __m256 { |
44 | unsafe { transmute(src:llvm_vcvtph2ps_256(transmute(src:a))) } |
45 | } |
46 | |
47 | /// Converts the 4 x 32-bit float values in the 128-bit vector `a` into 4 x |
48 | /// 16-bit half-precision float values stored in the lowest 64-bit of a 128-bit |
49 | /// vector. |
50 | /// |
51 | /// Rounding is done according to the `imm_rounding` parameter, which can be one of: |
52 | /// |
53 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
54 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
55 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
56 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
57 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
58 | /// |
59 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_ph) |
60 | #[inline ] |
61 | #[target_feature (enable = "f16c" )] |
62 | #[cfg_attr (test, assert_instr("vcvtps2ph" , IMM_ROUNDING = 0))] |
63 | #[rustc_legacy_const_generics (1)] |
64 | #[stable (feature = "x86_f16c_intrinsics" , since = "1.68.0" )] |
65 | pub fn _mm_cvtps_ph<const IMM_ROUNDING: i32>(a: __m128) -> __m128i { |
66 | static_assert_uimm_bits!(IMM_ROUNDING, 3); |
67 | unsafe { |
68 | let a: f32x4 = a.as_f32x4(); |
69 | let r: i16x8 = llvm_vcvtps2ph_128(a, IMM_ROUNDING); |
70 | transmute(src:r) |
71 | } |
72 | } |
73 | |
74 | /// Converts the 8 x 32-bit float values in the 256-bit vector `a` into 8 x |
75 | /// 16-bit half-precision float values stored in a 128-bit wide vector. |
76 | /// |
77 | /// Rounding is done according to the `imm_rounding` parameter, which can be one of: |
78 | /// |
79 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
80 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
81 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
82 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
83 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
84 | /// |
85 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtps_ph) |
86 | #[inline ] |
87 | #[target_feature (enable = "f16c" )] |
88 | #[cfg_attr (test, assert_instr("vcvtps2ph" , IMM_ROUNDING = 0))] |
89 | #[rustc_legacy_const_generics (1)] |
90 | #[stable (feature = "x86_f16c_intrinsics" , since = "1.68.0" )] |
91 | pub fn _mm256_cvtps_ph<const IMM_ROUNDING: i32>(a: __m256) -> __m128i { |
92 | static_assert_uimm_bits!(IMM_ROUNDING, 3); |
93 | unsafe { |
94 | let a: f32x8 = a.as_f32x8(); |
95 | let r: i16x8 = llvm_vcvtps2ph_256(a, IMM_ROUNDING); |
96 | transmute(src:r) |
97 | } |
98 | } |
99 | |
100 | #[cfg (test)] |
101 | mod tests { |
102 | use crate::{core_arch::x86::*, mem::transmute}; |
103 | use stdarch_test::simd_test; |
104 | |
105 | const F16_ONE: i16 = 0x3c00; |
106 | const F16_TWO: i16 = 0x4000; |
107 | const F16_THREE: i16 = 0x4200; |
108 | const F16_FOUR: i16 = 0x4400; |
109 | const F16_FIVE: i16 = 0x4500; |
110 | const F16_SIX: i16 = 0x4600; |
111 | const F16_SEVEN: i16 = 0x4700; |
112 | const F16_EIGHT: i16 = 0x4800; |
113 | |
114 | #[simd_test(enable = "f16c" )] |
115 | unsafe fn test_mm_cvtph_ps() { |
116 | let a = _mm_set_epi16(0, 0, 0, 0, F16_ONE, F16_TWO, F16_THREE, F16_FOUR); |
117 | let r = _mm_cvtph_ps(a); |
118 | let e = _mm_set_ps(1.0, 2.0, 3.0, 4.0); |
119 | assert_eq_m128(r, e); |
120 | } |
121 | |
122 | #[simd_test(enable = "f16c" )] |
123 | unsafe fn test_mm256_cvtph_ps() { |
124 | let a = _mm_set_epi16( |
125 | F16_ONE, F16_TWO, F16_THREE, F16_FOUR, F16_FIVE, F16_SIX, F16_SEVEN, F16_EIGHT, |
126 | ); |
127 | let r = _mm256_cvtph_ps(a); |
128 | let e = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
129 | assert_eq_m256(r, e); |
130 | } |
131 | |
132 | #[simd_test(enable = "f16c" )] |
133 | unsafe fn test_mm_cvtps_ph() { |
134 | let a = _mm_set_ps(1.0, 2.0, 3.0, 4.0); |
135 | let r = _mm_cvtps_ph::<_MM_FROUND_CUR_DIRECTION>(a); |
136 | let e = _mm_set_epi16(0, 0, 0, 0, F16_ONE, F16_TWO, F16_THREE, F16_FOUR); |
137 | assert_eq_m128i(r, e); |
138 | } |
139 | |
140 | #[simd_test(enable = "f16c" )] |
141 | unsafe fn test_mm256_cvtps_ph() { |
142 | let a = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
143 | let r = _mm256_cvtps_ph::<_MM_FROUND_CUR_DIRECTION>(a); |
144 | let e = _mm_set_epi16( |
145 | F16_ONE, F16_TWO, F16_THREE, F16_FOUR, F16_FIVE, F16_SIX, F16_SEVEN, F16_EIGHT, |
146 | ); |
147 | assert_eq_m128i(r, e); |
148 | } |
149 | } |
150 | |