1//! [F16C intrinsics].
2//!
3//! [F16C intrinsics]: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=fp16&expand=1769
4
5use crate::core_arch::{simd::*, x86::*};
6use crate::intrinsics::simd::*;
7
8#[cfg(test)]
9use stdarch_test::assert_instr;
10
11#[allow(improper_ctypes)]
12unsafe extern "unadjusted" {
13 #[link_name = "llvm.x86.vcvtps2ph.128"]
14 unsafefn llvm_vcvtps2ph_128(a: f32x4, rounding: i32) -> i16x8;
15 #[link_name = "llvm.x86.vcvtps2ph.256"]
16 unsafefn llvm_vcvtps2ph_256(a: f32x8, rounding: i32) -> i16x8;
17}
18
19/// Converts the 4 x 16-bit half-precision float values in the lowest 64-bit of
20/// the 128-bit vector `a` into 4 x 32-bit float values stored in a 128-bit wide
21/// vector.
22///
23/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_ps)
24#[inline]
25#[target_feature(enable = "f16c")]
26#[cfg_attr(test, assert_instr("vcvtph2ps"))]
27#[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")]
28#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29pub const fn _mm_cvtph_ps(a: __m128i) -> __m128 {
30 unsafe {
31 let a: f16x8 = transmute(src:a);
32 let a: f16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
33 simd_cast(a)
34 }
35}
36
37/// Converts the 8 x 16-bit half-precision float values in the 128-bit vector
38/// `a` into 8 x 32-bit float values stored in a 256-bit wide vector.
39///
40/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_ps)
41#[inline]
42#[target_feature(enable = "f16c")]
43#[cfg_attr(test, assert_instr("vcvtph2ps"))]
44#[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")]
45#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
46pub const fn _mm256_cvtph_ps(a: __m128i) -> __m256 {
47 unsafe {
48 let a: f16x8 = transmute(src:a);
49 simd_cast(a)
50 }
51}
52
53/// Converts the 4 x 32-bit float values in the 128-bit vector `a` into 4 x
54/// 16-bit half-precision float values stored in the lowest 64-bit of a 128-bit
55/// vector.
56///
57/// Rounding is done according to the `imm_rounding` parameter, which can be one of:
58///
59/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
60/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
61/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
62/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
63/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
64///
65/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_ph)
66#[inline]
67#[target_feature(enable = "f16c")]
68#[cfg_attr(test, assert_instr("vcvtps2ph", IMM_ROUNDING = 0))]
69#[rustc_legacy_const_generics(1)]
70#[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")]
71pub fn _mm_cvtps_ph<const IMM_ROUNDING: i32>(a: __m128) -> __m128i {
72 static_assert_uimm_bits!(IMM_ROUNDING, 3);
73 unsafe {
74 let a: Simd = a.as_f32x4();
75 let r: Simd = llvm_vcvtps2ph_128(a, IMM_ROUNDING);
76 transmute(src:r)
77 }
78}
79
80/// Converts the 8 x 32-bit float values in the 256-bit vector `a` into 8 x
81/// 16-bit half-precision float values stored in a 128-bit wide vector.
82///
83/// Rounding is done according to the `imm_rounding` parameter, which can be one of:
84///
85/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
86/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
87/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
88/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
89/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
90///
91/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtps_ph)
92#[inline]
93#[target_feature(enable = "f16c")]
94#[cfg_attr(test, assert_instr("vcvtps2ph", IMM_ROUNDING = 0))]
95#[rustc_legacy_const_generics(1)]
96#[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")]
97pub fn _mm256_cvtps_ph<const IMM_ROUNDING: i32>(a: __m256) -> __m128i {
98 static_assert_uimm_bits!(IMM_ROUNDING, 3);
99 unsafe {
100 let a: Simd = a.as_f32x8();
101 let r: Simd = llvm_vcvtps2ph_256(a, IMM_ROUNDING);
102 transmute(src:r)
103 }
104}
105
106#[cfg(test)]
107mod tests {
108 use crate::core_arch::assert_eq_const as assert_eq;
109 use crate::core_arch::x86::*;
110 use stdarch_test::simd_test;
111
112 const F16_ONE: i16 = 0x3c00;
113 const F16_TWO: i16 = 0x4000;
114 const F16_THREE: i16 = 0x4200;
115 const F16_FOUR: i16 = 0x4400;
116 const F16_FIVE: i16 = 0x4500;
117 const F16_SIX: i16 = 0x4600;
118 const F16_SEVEN: i16 = 0x4700;
119 const F16_EIGHT: i16 = 0x4800;
120
121 #[simd_test(enable = "f16c")]
122 const fn test_mm_cvtph_ps() {
123 let a = _mm_set_epi16(0, 0, 0, 0, F16_ONE, F16_TWO, F16_THREE, F16_FOUR);
124 let r = _mm_cvtph_ps(a);
125 let e = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
126 assert_eq_m128(r, e);
127 }
128
129 #[simd_test(enable = "f16c")]
130 const fn test_mm256_cvtph_ps() {
131 let a = _mm_set_epi16(
132 F16_ONE, F16_TWO, F16_THREE, F16_FOUR, F16_FIVE, F16_SIX, F16_SEVEN, F16_EIGHT,
133 );
134 let r = _mm256_cvtph_ps(a);
135 let e = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
136 assert_eq_m256(r, e);
137 }
138
139 #[simd_test(enable = "f16c")]
140 fn test_mm_cvtps_ph() {
141 let a = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
142 let r = _mm_cvtps_ph::<_MM_FROUND_CUR_DIRECTION>(a);
143 let e = _mm_set_epi16(0, 0, 0, 0, F16_ONE, F16_TWO, F16_THREE, F16_FOUR);
144 assert_eq_m128i(r, e);
145 }
146
147 #[simd_test(enable = "f16c")]
148 fn test_mm256_cvtps_ph() {
149 let a = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
150 let r = _mm256_cvtps_ph::<_MM_FROUND_CUR_DIRECTION>(a);
151 let e = _mm_set_epi16(
152 F16_ONE, F16_TWO, F16_THREE, F16_FOUR, F16_FIVE, F16_SIX, F16_SEVEN, F16_EIGHT,
153 );
154 assert_eq_m128i(r, e);
155 }
156}
157