| 1 | //! [F16C intrinsics]. |
| 2 | //! |
| 3 | //! [F16C intrinsics]: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=fp16&expand=1769 |
| 4 | |
| 5 | use crate::core_arch::{simd::*, x86::*}; |
| 6 | use crate::intrinsics::simd::*; |
| 7 | |
| 8 | #[cfg (test)] |
| 9 | use stdarch_test::assert_instr; |
| 10 | |
| 11 | #[allow (improper_ctypes)] |
| 12 | unsafe extern "unadjusted" { |
| 13 | #[link_name = "llvm.x86.vcvtps2ph.128" ] |
| 14 | unsafefn llvm_vcvtps2ph_128(a: f32x4, rounding: i32) -> i16x8; |
| 15 | #[link_name = "llvm.x86.vcvtps2ph.256" ] |
| 16 | unsafefn llvm_vcvtps2ph_256(a: f32x8, rounding: i32) -> i16x8; |
| 17 | } |
| 18 | |
| 19 | /// Converts the 4 x 16-bit half-precision float values in the lowest 64-bit of |
| 20 | /// the 128-bit vector `a` into 4 x 32-bit float values stored in a 128-bit wide |
| 21 | /// vector. |
| 22 | /// |
| 23 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_ps) |
| 24 | #[inline ] |
| 25 | #[target_feature (enable = "f16c" )] |
| 26 | #[cfg_attr (test, assert_instr("vcvtph2ps" ))] |
| 27 | #[stable (feature = "x86_f16c_intrinsics" , since = "1.68.0" )] |
| 28 | #[rustc_const_unstable (feature = "stdarch_const_x86" , issue = "149298" )] |
| 29 | pub const fn _mm_cvtph_ps(a: __m128i) -> __m128 { |
| 30 | unsafe { |
| 31 | let a: f16x8 = transmute(src:a); |
| 32 | let a: f16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); |
| 33 | simd_cast(a) |
| 34 | } |
| 35 | } |
| 36 | |
| 37 | /// Converts the 8 x 16-bit half-precision float values in the 128-bit vector |
| 38 | /// `a` into 8 x 32-bit float values stored in a 256-bit wide vector. |
| 39 | /// |
| 40 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_ps) |
| 41 | #[inline ] |
| 42 | #[target_feature (enable = "f16c" )] |
| 43 | #[cfg_attr (test, assert_instr("vcvtph2ps" ))] |
| 44 | #[stable (feature = "x86_f16c_intrinsics" , since = "1.68.0" )] |
| 45 | #[rustc_const_unstable (feature = "stdarch_const_x86" , issue = "149298" )] |
| 46 | pub const fn _mm256_cvtph_ps(a: __m128i) -> __m256 { |
| 47 | unsafe { |
| 48 | let a: f16x8 = transmute(src:a); |
| 49 | simd_cast(a) |
| 50 | } |
| 51 | } |
| 52 | |
| 53 | /// Converts the 4 x 32-bit float values in the 128-bit vector `a` into 4 x |
| 54 | /// 16-bit half-precision float values stored in the lowest 64-bit of a 128-bit |
| 55 | /// vector. |
| 56 | /// |
| 57 | /// Rounding is done according to the `imm_rounding` parameter, which can be one of: |
| 58 | /// |
| 59 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
| 60 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
| 61 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
| 62 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
| 63 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| 64 | /// |
| 65 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_ph) |
| 66 | #[inline ] |
| 67 | #[target_feature (enable = "f16c" )] |
| 68 | #[cfg_attr (test, assert_instr("vcvtps2ph" , IMM_ROUNDING = 0))] |
| 69 | #[rustc_legacy_const_generics (1)] |
| 70 | #[stable (feature = "x86_f16c_intrinsics" , since = "1.68.0" )] |
| 71 | pub fn _mm_cvtps_ph<const IMM_ROUNDING: i32>(a: __m128) -> __m128i { |
| 72 | static_assert_uimm_bits!(IMM_ROUNDING, 3); |
| 73 | unsafe { |
| 74 | let a: Simd = a.as_f32x4(); |
| 75 | let r: Simd = llvm_vcvtps2ph_128(a, IMM_ROUNDING); |
| 76 | transmute(src:r) |
| 77 | } |
| 78 | } |
| 79 | |
| 80 | /// Converts the 8 x 32-bit float values in the 256-bit vector `a` into 8 x |
| 81 | /// 16-bit half-precision float values stored in a 128-bit wide vector. |
| 82 | /// |
| 83 | /// Rounding is done according to the `imm_rounding` parameter, which can be one of: |
| 84 | /// |
| 85 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
| 86 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
| 87 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
| 88 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
| 89 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| 90 | /// |
| 91 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtps_ph) |
| 92 | #[inline ] |
| 93 | #[target_feature (enable = "f16c" )] |
| 94 | #[cfg_attr (test, assert_instr("vcvtps2ph" , IMM_ROUNDING = 0))] |
| 95 | #[rustc_legacy_const_generics (1)] |
| 96 | #[stable (feature = "x86_f16c_intrinsics" , since = "1.68.0" )] |
| 97 | pub fn _mm256_cvtps_ph<const IMM_ROUNDING: i32>(a: __m256) -> __m128i { |
| 98 | static_assert_uimm_bits!(IMM_ROUNDING, 3); |
| 99 | unsafe { |
| 100 | let a: Simd = a.as_f32x8(); |
| 101 | let r: Simd = llvm_vcvtps2ph_256(a, IMM_ROUNDING); |
| 102 | transmute(src:r) |
| 103 | } |
| 104 | } |
| 105 | |
| 106 | #[cfg (test)] |
| 107 | mod tests { |
| 108 | use crate::core_arch::assert_eq_const as assert_eq; |
| 109 | use crate::core_arch::x86::*; |
| 110 | use stdarch_test::simd_test; |
| 111 | |
| 112 | const F16_ONE: i16 = 0x3c00; |
| 113 | const F16_TWO: i16 = 0x4000; |
| 114 | const F16_THREE: i16 = 0x4200; |
| 115 | const F16_FOUR: i16 = 0x4400; |
| 116 | const F16_FIVE: i16 = 0x4500; |
| 117 | const F16_SIX: i16 = 0x4600; |
| 118 | const F16_SEVEN: i16 = 0x4700; |
| 119 | const F16_EIGHT: i16 = 0x4800; |
| 120 | |
| 121 | #[simd_test(enable = "f16c" )] |
| 122 | const fn test_mm_cvtph_ps() { |
| 123 | let a = _mm_set_epi16(0, 0, 0, 0, F16_ONE, F16_TWO, F16_THREE, F16_FOUR); |
| 124 | let r = _mm_cvtph_ps(a); |
| 125 | let e = _mm_set_ps(1.0, 2.0, 3.0, 4.0); |
| 126 | assert_eq_m128(r, e); |
| 127 | } |
| 128 | |
| 129 | #[simd_test(enable = "f16c" )] |
| 130 | const fn test_mm256_cvtph_ps() { |
| 131 | let a = _mm_set_epi16( |
| 132 | F16_ONE, F16_TWO, F16_THREE, F16_FOUR, F16_FIVE, F16_SIX, F16_SEVEN, F16_EIGHT, |
| 133 | ); |
| 134 | let r = _mm256_cvtph_ps(a); |
| 135 | let e = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
| 136 | assert_eq_m256(r, e); |
| 137 | } |
| 138 | |
| 139 | #[simd_test(enable = "f16c" )] |
| 140 | fn test_mm_cvtps_ph() { |
| 141 | let a = _mm_set_ps(1.0, 2.0, 3.0, 4.0); |
| 142 | let r = _mm_cvtps_ph::<_MM_FROUND_CUR_DIRECTION>(a); |
| 143 | let e = _mm_set_epi16(0, 0, 0, 0, F16_ONE, F16_TWO, F16_THREE, F16_FOUR); |
| 144 | assert_eq_m128i(r, e); |
| 145 | } |
| 146 | |
| 147 | #[simd_test(enable = "f16c" )] |
| 148 | fn test_mm256_cvtps_ph() { |
| 149 | let a = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); |
| 150 | let r = _mm256_cvtps_ph::<_MM_FROUND_CUR_DIRECTION>(a); |
| 151 | let e = _mm_set_epi16( |
| 152 | F16_ONE, F16_TWO, F16_THREE, F16_FOUR, F16_FIVE, F16_SIX, F16_SEVEN, F16_EIGHT, |
| 153 | ); |
| 154 | assert_eq_m128i(r, e); |
| 155 | } |
| 156 | } |
| 157 | |