1//! [F16C intrinsics].
2//!
3//! [F16C intrinsics]: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=fp16&expand=1769
4
5use crate::{
6 core_arch::{simd::*, x86::*},
7 // hint::unreachable_unchecked,
8 mem::transmute,
9};
10
11#[cfg(test)]
12use stdarch_test::assert_instr;
13
14#[allow(improper_ctypes)]
15extern "unadjusted" {
16 #[link_name = "llvm.x86.vcvtph2ps.128"]
17 fn llvm_vcvtph2ps_128(a: i16x8) -> f32x4;
18 #[link_name = "llvm.x86.vcvtph2ps.256"]
19 fn llvm_vcvtph2ps_256(a: i16x8) -> f32x8;
20 #[link_name = "llvm.x86.vcvtps2ph.128"]
21 fn llvm_vcvtps2ph_128(a: f32x4, rounding: i32) -> i16x8;
22 #[link_name = "llvm.x86.vcvtps2ph.256"]
23 fn llvm_vcvtps2ph_256(a: f32x8, rounding: i32) -> i16x8;
24}
25
26/// Converts the 4 x 16-bit half-precision float values in the lowest 64-bit of
27/// the 128-bit vector `a` into 4 x 32-bit float values stored in a 128-bit wide
28/// vector.
29#[inline]
30#[target_feature(enable = "f16c")]
31#[cfg_attr(test, assert_instr("vcvtph2ps"))]
32#[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")]
33pub unsafe fn _mm_cvtph_ps(a: __m128i) -> __m128 {
34 transmute(src:llvm_vcvtph2ps_128(transmute(src:a)))
35}
36
37/// Converts the 8 x 16-bit half-precision float values in the 128-bit vector
38/// `a` into 8 x 32-bit float values stored in a 256-bit wide vector.
39#[inline]
40#[target_feature(enable = "f16c")]
41#[cfg_attr(test, assert_instr("vcvtph2ps"))]
42#[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")]
43pub unsafe fn _mm256_cvtph_ps(a: __m128i) -> __m256 {
44 transmute(src:llvm_vcvtph2ps_256(transmute(src:a)))
45}
46
47/// Converts the 4 x 32-bit float values in the 128-bit vector `a` into 4 x
48/// 16-bit half-precision float values stored in the lowest 64-bit of a 128-bit
49/// vector.
50///
51/// Rounding is done according to the `imm_rounding` parameter, which can be one of:
52///
53/// * `_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC`: round to nearest and suppress exceptions,
54/// * `_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC`: round down and suppress exceptions,
55/// * `_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC`: round up and suppress exceptions,
56/// * `_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC`: truncate and suppress exceptions,
57/// * `_MM_FROUND_CUR_DIRECTION`: use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`].
58#[inline]
59#[target_feature(enable = "f16c")]
60#[cfg_attr(test, assert_instr("vcvtps2ph", IMM_ROUNDING = 0))]
61#[rustc_legacy_const_generics(1)]
62#[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")]
63pub unsafe fn _mm_cvtps_ph<const IMM_ROUNDING: i32>(a: __m128) -> __m128i {
64 static_assert_uimm_bits!(IMM_ROUNDING, 3);
65 let a: f32x4 = a.as_f32x4();
66 let r: i16x8 = llvm_vcvtps2ph_128(a, IMM_ROUNDING);
67 transmute(src:r)
68}
69
70/// Converts the 8 x 32-bit float values in the 256-bit vector `a` into 8 x
71/// 16-bit half-precision float values stored in a 128-bit wide vector.
72///
73/// Rounding is done according to the `imm_rounding` parameter, which can be one of:
74///
75/// * `_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC`: round to nearest and suppress exceptions,
76/// * `_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC`: round down and suppress exceptions,
77/// * `_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC`: round up and suppress exceptions,
78/// * `_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC`: truncate and suppress exceptions,
79/// * `_MM_FROUND_CUR_DIRECTION`: use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`].
80#[inline]
81#[target_feature(enable = "f16c")]
82#[cfg_attr(test, assert_instr("vcvtps2ph", IMM_ROUNDING = 0))]
83#[rustc_legacy_const_generics(1)]
84#[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")]
85pub unsafe fn _mm256_cvtps_ph<const IMM_ROUNDING: i32>(a: __m256) -> __m128i {
86 static_assert_uimm_bits!(IMM_ROUNDING, 3);
87 let a: f32x8 = a.as_f32x8();
88 let r: i16x8 = llvm_vcvtps2ph_256(a, IMM_ROUNDING);
89 transmute(src:r)
90}
91
92#[cfg(test)]
93mod tests {
94 use crate::{core_arch::x86::*, mem::transmute};
95 use stdarch_test::simd_test;
96
97 #[simd_test(enable = "f16c")]
98 unsafe fn test_mm_cvtph_ps() {
99 let array = [1_f32, 2_f32, 3_f32, 4_f32];
100 let float_vec: __m128 = transmute(array);
101 let halfs: __m128i = _mm_cvtps_ph::<0>(float_vec);
102 let floats: __m128 = _mm_cvtph_ps(halfs);
103 let result: [f32; 4] = transmute(floats);
104 assert_eq!(result, array);
105 }
106
107 #[simd_test(enable = "f16c")]
108 unsafe fn test_mm256_cvtph_ps() {
109 let array = [1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32];
110 let float_vec: __m256 = transmute(array);
111 let halfs: __m128i = _mm256_cvtps_ph::<0>(float_vec);
112 let floats: __m256 = _mm256_cvtph_ps(halfs);
113 let result: [f32; 8] = transmute(floats);
114 assert_eq!(result, array);
115 }
116}
117