f16c.rs source code [crates/core_arch/src/x86/f16c.rs]

1	//! [F16C intrinsics].
2	//!
3	//! [F16C intrinsics]: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=fp16&expand=1769
4
5	use crate::core_arch::{simd::, x86::};
6
7	#[cfg(test)]
8	use stdarch_test::assert_instr;
9
10	#[allow(improper_ctypes)]
11	extern "unadjusted" {
12	#[link_name = "llvm.x86.vcvtph2ps.128"]
13	fn llvm_vcvtph2ps_128(a: i16x8) -> f32x4;
14	#[link_name = "llvm.x86.vcvtph2ps.256"]
15	fn llvm_vcvtph2ps_256(a: i16x8) -> f32x8;
16	#[link_name = "llvm.x86.vcvtps2ph.128"]
17	fn llvm_vcvtps2ph_128(a: f32x4, rounding: i32) -> i16x8;
18	#[link_name = "llvm.x86.vcvtps2ph.256"]
19	fn llvm_vcvtps2ph_256(a: f32x8, rounding: i32) -> i16x8;
20	}
21
22	/// Converts the 4 x 16-bit half-precision float values in the lowest 64-bit of
23	/// the 128-bit vector `a` into 4 x 32-bit float values stored in a 128-bit wide
24	/// vector.
25	#[inline]
26	#[target_feature(enable = "f16c")]
27	#[cfg_attr(test, assert_instr("vcvtph2ps"))]
28	#[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")]
29	pub unsafe fn _mm_cvtph_ps(a: __m128i) -> __m128 {
30	transmute(src:llvm_vcvtph2ps_128(transmute(src:a)))
31	}
32
33	/// Converts the 8 x 16-bit half-precision float values in the 128-bit vector
34	/// `a` into 8 x 32-bit float values stored in a 256-bit wide vector.
35	#[inline]
36	#[target_feature(enable = "f16c")]
37	#[cfg_attr(test, assert_instr("vcvtph2ps"))]
38	#[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")]
39	pub unsafe fn _mm256_cvtph_ps(a: __m128i) -> __m256 {
40	transmute(src:llvm_vcvtph2ps_256(transmute(src:a)))
41	}
42
43	/// Converts the 4 x 32-bit float values in the 128-bit vector `a` into 4 x
44	/// 16-bit half-precision float values stored in the lowest 64-bit of a 128-bit
45	/// vector.
46	///
47	/// Rounding is done according to the `imm_rounding` parameter, which can be one of:
48	///
49	/// `_MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC`: round to nearest and suppress exceptions,*
50	/// `_MM_FROUND_TO_NEG_INF \| _MM_FROUND_NO_EXC`: round down and suppress exceptions,*
51	/// `_MM_FROUND_TO_POS_INF \| _MM_FROUND_NO_EXC`: round up and suppress exceptions,*
52	/// `_MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC`: truncate and suppress exceptions,*
53	/// `_MM_FROUND_CUR_DIRECTION`: use `MXCSR.RC` - see* [`_MM_SET_ROUNDING_MODE`].
54	#[inline]
55	#[target_feature(enable = "f16c")]
56	#[cfg_attr(test, assert_instr("vcvtps2ph", IMM_ROUNDING = `0`))]
57	#[rustc_legacy_const_generics(`1`)]
58	#[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")]
59	pub unsafe fn _mm_cvtps_ph<const IMM_ROUNDING: i32>(a: __m128) -> __m128i {
60	static_assert_uimm_bits!(IMM_ROUNDING, `3`);
61	let a: f32x4 = a.as_f32x4();
62	let r: i16x8 = llvm_vcvtps2ph_128(a, IMM_ROUNDING);
63	transmute(src:r)
64	}
65
66	/// Converts the 8 x 32-bit float values in the 256-bit vector `a` into 8 x
67	/// 16-bit half-precision float values stored in a 128-bit wide vector.
68	///
69	/// Rounding is done according to the `imm_rounding` parameter, which can be one of:
70	///
71	/// `_MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC`: round to nearest and suppress exceptions,*
72	/// `_MM_FROUND_TO_NEG_INF \| _MM_FROUND_NO_EXC`: round down and suppress exceptions,*
73	/// `_MM_FROUND_TO_POS_INF \| _MM_FROUND_NO_EXC`: round up and suppress exceptions,*
74	/// `_MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC`: truncate and suppress exceptions,*
75	/// `_MM_FROUND_CUR_DIRECTION`: use `MXCSR.RC` - see* [`_MM_SET_ROUNDING_MODE`].
76	#[inline]
77	#[target_feature(enable = "f16c")]
78	#[cfg_attr(test, assert_instr("vcvtps2ph", IMM_ROUNDING = `0`))]
79	#[rustc_legacy_const_generics(`1`)]
80	#[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")]
81	pub unsafe fn _mm256_cvtps_ph<const IMM_ROUNDING: i32>(a: __m256) -> __m128i {
82	static_assert_uimm_bits!(IMM_ROUNDING, `3`);
83	let a: f32x8 = a.as_f32x8();
84	let r: i16x8 = llvm_vcvtps2ph_256(a, IMM_ROUNDING);
85	transmute(src:r)
86	}
87
88	#[cfg(test)]
89	mod tests {
90	use crate::{core_arch::x86::*, mem::transmute};
91	use stdarch_test::simd_test;
92
93	#[simd_test(enable = "f16c")]
94	unsafe fn test_mm_cvtph_ps() {
95	let array = [`1_f32`, `2_f32`, `3_f32`, `4_f32`];
96	let float_vec: __m128 = transmute(array);
97	let halfs: __m128i = _mm_cvtps_ph::<`0`>(float_vec);
98	let floats: __m128 = _mm_cvtph_ps(halfs);
99	let result: [f32; `4`] = transmute(floats);
100	assert_eq!(result, array);
101	}
102
103	#[simd_test(enable = "f16c")]
104	unsafe fn test_mm256_cvtph_ps() {
105	let array = [`1_f32`, `2_f32`, `3_f32`, `4_f32`, `5_f32`, `6_f32`, `7_f32`, `8_f32`];
106	let float_vec: __m256 = transmute(array);
107	let halfs: __m128i = _mm256_cvtps_ph::<`0`>(float_vec);
108	let floats: __m256 = _mm256_cvtph_ps(halfs);
109	let result: [f32; `8`] = transmute(floats);
110	assert_eq!(result, array);
111	}
112	}
113