f16c.rs source code [crates/core_arch/src/x86/f16c.rs]

1	//! [F16C intrinsics].
2	//!
3	//! [F16C intrinsics]: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=fp16&expand=1769
4
5	use crate::core_arch::{simd::, x86::};
6
7	#[cfg(test)]
8	use stdarch_test::assert_instr;
9
10	#[allow(improper_ctypes)]
11	unsafe extern "unadjusted" {
12	#[link_name = "llvm.x86.vcvtph2ps.128"]
13	unsafefn llvm_vcvtph2ps_128(a: i16x8) -> f32x4;
14	#[link_name = "llvm.x86.vcvtph2ps.256"]
15	unsafefn llvm_vcvtph2ps_256(a: i16x8) -> f32x8;
16	#[link_name = "llvm.x86.vcvtps2ph.128"]
17	unsafefn llvm_vcvtps2ph_128(a: f32x4, rounding: i32) -> i16x8;
18	#[link_name = "llvm.x86.vcvtps2ph.256"]
19	unsafefn llvm_vcvtps2ph_256(a: f32x8, rounding: i32) -> i16x8;
20	}
21
22	/// Converts the 4 x 16-bit half-precision float values in the lowest 64-bit of
23	/// the 128-bit vector `a` into 4 x 32-bit float values stored in a 128-bit wide
24	/// vector.
25	///
26	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_ps)
27	#[inline]
28	#[target_feature(enable = "f16c")]
29	#[cfg_attr(test, assert_instr("vcvtph2ps"))]
30	#[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")]
31	pub fn _mm_cvtph_ps(a: __m128i) -> __m128 {
32	unsafe { transmute(src:llvm_vcvtph2ps_128(transmute(src:a))) }
33	}
34
35	/// Converts the 8 x 16-bit half-precision float values in the 128-bit vector
36	/// `a` into 8 x 32-bit float values stored in a 256-bit wide vector.
37	///
38	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_ps)
39	#[inline]
40	#[target_feature(enable = "f16c")]
41	#[cfg_attr(test, assert_instr("vcvtph2ps"))]
42	#[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")]
43	pub fn _mm256_cvtph_ps(a: __m128i) -> __m256 {
44	unsafe { transmute(src:llvm_vcvtph2ps_256(transmute(src:a))) }
45	}
46
47	/// Converts the 4 x 32-bit float values in the 128-bit vector `a` into 4 x
48	/// 16-bit half-precision float values stored in the lowest 64-bit of a 128-bit
49	/// vector.
50	///
51	/// Rounding is done according to the `imm_rounding` parameter, which can be one of:
52	///
53	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
54	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
55	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
56	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
57	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
58	///
59	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_ph)
60	#[inline]
61	#[target_feature(enable = "f16c")]
62	#[cfg_attr(test, assert_instr("vcvtps2ph", IMM_ROUNDING = `0`))]
63	#[rustc_legacy_const_generics(`1`)]
64	#[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")]
65	pub fn _mm_cvtps_ph<const IMM_ROUNDING: i32>(a: __m128) -> __m128i {
66	static_assert_uimm_bits!(IMM_ROUNDING, `3`);
67	unsafe {
68	let a: f32x4 = a.as_f32x4();
69	let r: i16x8 = llvm_vcvtps2ph_128(a, IMM_ROUNDING);
70	transmute(src:r)
71	}
72	}
73
74	/// Converts the 8 x 32-bit float values in the 256-bit vector `a` into 8 x
75	/// 16-bit half-precision float values stored in a 128-bit wide vector.
76	///
77	/// Rounding is done according to the `imm_rounding` parameter, which can be one of:
78	///
79	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
80	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
81	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
82	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
83	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
84	///
85	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtps_ph)
86	#[inline]
87	#[target_feature(enable = "f16c")]
88	#[cfg_attr(test, assert_instr("vcvtps2ph", IMM_ROUNDING = `0`))]
89	#[rustc_legacy_const_generics(`1`)]
90	#[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")]
91	pub fn _mm256_cvtps_ph<const IMM_ROUNDING: i32>(a: __m256) -> __m128i {
92	static_assert_uimm_bits!(IMM_ROUNDING, `3`);
93	unsafe {
94	let a: f32x8 = a.as_f32x8();
95	let r: i16x8 = llvm_vcvtps2ph_256(a, IMM_ROUNDING);
96	transmute(src:r)
97	}
98	}
99
100	#[cfg(test)]
101	mod tests {
102	use crate::{core_arch::x86::*, mem::transmute};
103	use stdarch_test::simd_test;
104
105	const F16_ONE: i16 = `0x3c00`;
106	const F16_TWO: i16 = `0x4000`;
107	const F16_THREE: i16 = `0x4200`;
108	const F16_FOUR: i16 = `0x4400`;
109	const F16_FIVE: i16 = `0x4500`;
110	const F16_SIX: i16 = `0x4600`;
111	const F16_SEVEN: i16 = `0x4700`;
112	const F16_EIGHT: i16 = `0x4800`;
113
114	#[simd_test(enable = "f16c")]
115	unsafe fn test_mm_cvtph_ps() {
116	let a = _mm_set_epi16(`0`, `0`, `0`, `0`, F16_ONE, F16_TWO, F16_THREE, F16_FOUR);
117	let r = _mm_cvtph_ps(a);
118	let e = _mm_set_ps(`1.0`, `2.0`, `3.0`, `4.0`);
119	assert_eq_m128(r, e);
120	}
121
122	#[simd_test(enable = "f16c")]
123	unsafe fn test_mm256_cvtph_ps() {
124	let a = _mm_set_epi16(
125	F16_ONE, F16_TWO, F16_THREE, F16_FOUR, F16_FIVE, F16_SIX, F16_SEVEN, F16_EIGHT,
126	);
127	let r = _mm256_cvtph_ps(a);
128	let e = _mm256_set_ps(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
129	assert_eq_m256(r, e);
130	}
131
132	#[simd_test(enable = "f16c")]
133	unsafe fn test_mm_cvtps_ph() {
134	let a = _mm_set_ps(`1.0`, `2.0`, `3.0`, `4.0`);
135	let r = _mm_cvtps_ph::<_MM_FROUND_CUR_DIRECTION>(a);
136	let e = _mm_set_epi16(`0`, `0`, `0`, `0`, F16_ONE, F16_TWO, F16_THREE, F16_FOUR);
137	assert_eq_m128i(r, e);
138	}
139
140	#[simd_test(enable = "f16c")]
141	unsafe fn test_mm256_cvtps_ph() {
142	let a = _mm256_set_ps(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
143	let r = _mm256_cvtps_ph::<_MM_FROUND_CUR_DIRECTION>(a);
144	let e = _mm_set_epi16(
145	F16_ONE, F16_TWO, F16_THREE, F16_FOUR, F16_FIVE, F16_SIX, F16_SEVEN, F16_EIGHT,
146	);
147	assert_eq_m128i(r, e);
148	}
149	}
150