avx512fp16.rs source code [crates/core_arch/src/x86_64/avx512fp16.rs]

1	use crate::core_arch::x86::*;
2	#[cfg(test)]
3	use stdarch_test::assert_instr;
4
5	/// Convert the signed 64-bit integer b to a half-precision (16-bit) floating-point element, store the
6	/// result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements
7	/// of dst.
8	///
9	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvti64_sh)
10	#[inline]
11	#[target_feature(enable = "avx512fp16")]
12	#[cfg_attr(test, assert_instr(vcvtsi2sh))]
13	#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
14	pub fn _mm_cvti64_sh(a: __m128h, b: i64) -> __m128h {
15	unsafe { vcvtsi642sh(a, b, _MM_FROUND_CUR_DIRECTION) }
16	}
17
18	/// Convert the signed 64-bit integer b to a half-precision (16-bit) floating-point element, store the
19	/// result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements
20	/// of dst.
21	///
22	/// Rounding is done according to the rounding parameter, which can be one of:
23	///
24	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
25	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
26	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
27	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
28	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
29	///
30	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi64_sh)
31	#[inline]
32	#[target_feature(enable = "avx512fp16")]
33	#[cfg_attr(test, assert_instr(vcvtsi2sh, ROUNDING = `8`))]
34	#[rustc_legacy_const_generics(`2`)]
35	#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
36	pub fn _mm_cvt_roundi64_sh<const ROUNDING: i32>(a: __m128h, b: i64) -> __m128h {
37	unsafe {
38	static_assert_rounding!(ROUNDING);
39	vcvtsi642sh(a, b, ROUNDING)
40	}
41	}
42
43	/// Convert the unsigned 64-bit integer b to a half-precision (16-bit) floating-point element, store the
44	/// result in the lower element of dst, and copy the upper 1 packed elements from a to the upper elements
45	/// of dst.
46	///
47	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu64_sh)
48	#[inline]
49	#[target_feature(enable = "avx512fp16")]
50	#[cfg_attr(test, assert_instr(vcvtusi2sh))]
51	#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
52	pub fn _mm_cvtu64_sh(a: __m128h, b: u64) -> __m128h {
53	unsafe { vcvtusi642sh(a, b, _MM_FROUND_CUR_DIRECTION) }
54	}
55
56	/// Convert the unsigned 64-bit integer b to a half-precision (16-bit) floating-point element, store the
57	/// result in the lower element of dst, and copy the upper 1 packed elements from a to the upper elements
58	/// of dst.
59	///
60	/// Rounding is done according to the rounding parameter, which can be one of:
61	///
62	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
63	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
64	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
65	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
66	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
67	///
68	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundu64_sh)
69	#[inline]
70	#[target_feature(enable = "avx512fp16")]
71	#[cfg_attr(test, assert_instr(vcvtusi2sh, ROUNDING = `8`))]
72	#[rustc_legacy_const_generics(`2`)]
73	#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
74	pub fn _mm_cvt_roundu64_sh<const ROUNDING: i32>(a: __m128h, b: u64) -> __m128h {
75	unsafe {
76	static_assert_rounding!(ROUNDING);
77	vcvtusi642sh(a, b, ROUNDING)
78	}
79	}
80
81	/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit integer, and store
82	/// the result in dst.
83	///
84	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_i64)
85	#[inline]
86	#[target_feature(enable = "avx512fp16")]
87	#[cfg_attr(test, assert_instr(vcvtsh2si))]
88	#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
89	pub fn _mm_cvtsh_i64(a: __m128h) -> i64 {
90	unsafe { vcvtsh2si64(a, _MM_FROUND_CUR_DIRECTION) }
91	}
92
93	/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit integer, and store
94	/// the result in dst.
95	///
96	/// Rounding is done according to the rounding parameter, which can be one of:
97	///
98	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
99	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
100	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
101	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
102	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
103	///
104	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_i64)
105	#[inline]
106	#[target_feature(enable = "avx512fp16")]
107	#[cfg_attr(test, assert_instr(vcvtsh2si, ROUNDING = `8`))]
108	#[rustc_legacy_const_generics(`1`)]
109	#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
110	pub fn _mm_cvt_roundsh_i64<const ROUNDING: i32>(a: __m128h) -> i64 {
111	unsafe {
112	static_assert_rounding!(ROUNDING);
113	vcvtsh2si64(a, ROUNDING)
114	}
115	}
116
117	/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit unsigned integer, and store
118	/// the result in dst.
119	///
120	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_u64)
121	#[inline]
122	#[target_feature(enable = "avx512fp16")]
123	#[cfg_attr(test, assert_instr(vcvtsh2usi))]
124	#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
125	pub fn _mm_cvtsh_u64(a: __m128h) -> u64 {
126	unsafe { vcvtsh2usi64(a, _MM_FROUND_CUR_DIRECTION) }
127	}
128
129	/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit unsigned integer, and store
130	/// the result in dst.
131	///
132	/// Rounding is done according to the rounding parameter, which can be one of:
133	///
134	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
135	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
136	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
137	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
138	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
139	///
140	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_u64)
141	#[inline]
142	#[target_feature(enable = "avx512fp16")]
143	#[cfg_attr(test, assert_instr(vcvtsh2usi, ROUNDING = `8`))]
144	#[rustc_legacy_const_generics(`1`)]
145	#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
146	pub fn _mm_cvt_roundsh_u64<const ROUNDING: i32>(a: __m128h) -> u64 {
147	unsafe {
148	static_assert_rounding!(ROUNDING);
149	vcvtsh2usi64(a, ROUNDING)
150	}
151	}
152
153	/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit integer with truncation,
154	/// and store the result in dst.
155	///
156	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_i64)
157	#[inline]
158	#[target_feature(enable = "avx512fp16")]
159	#[cfg_attr(test, assert_instr(vcvttsh2si))]
160	#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
161	pub fn _mm_cvttsh_i64(a: __m128h) -> i64 {
162	unsafe { vcvttsh2si64(a, _MM_FROUND_CUR_DIRECTION) }
163	}
164
165	/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit integer with truncation,
166	/// and store the result in dst.
167	///
168	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
169	///
170	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_i64)
171	#[inline]
172	#[target_feature(enable = "avx512fp16")]
173	#[cfg_attr(test, assert_instr(vcvttsh2si, SAE = `8`))]
174	#[rustc_legacy_const_generics(`1`)]
175	#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
176	pub fn _mm_cvtt_roundsh_i64<const SAE: i32>(a: __m128h) -> i64 {
177	unsafe {
178	static_assert_sae!(SAE);
179	vcvttsh2si64(a, SAE)
180	}
181	}
182
183	/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit unsigned integer with truncation,
184	/// and store the result in dst.
185	///
186	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_u64)
187	#[inline]
188	#[target_feature(enable = "avx512fp16")]
189	#[cfg_attr(test, assert_instr(vcvttsh2usi))]
190	#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
191	pub fn _mm_cvttsh_u64(a: __m128h) -> u64 {
192	unsafe { vcvttsh2usi64(a, _MM_FROUND_CUR_DIRECTION) }
193	}
194
195	/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit unsigned integer with truncation,
196	/// and store the result in dst.
197	///
198	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
199	///
200	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_u64)
201	#[inline]
202	#[target_feature(enable = "avx512fp16")]
203	#[cfg_attr(test, assert_instr(vcvttsh2usi, SAE = `8`))]
204	#[rustc_legacy_const_generics(`1`)]
205	#[stable(feature = "stdarch_x86_avx512fp16", since = "CURRENT_RUSTC_VERSION")]
206	pub fn _mm_cvtt_roundsh_u64<const SAE: i32>(a: __m128h) -> u64 {
207	unsafe {
208	static_assert_sae!(SAE);
209	vcvttsh2usi64(a, SAE)
210	}
211	}
212
213	#[allow(improper_ctypes)]
214	unsafe extern "C" {
215	#[link_name = "llvm.x86.avx512fp16.vcvtsi642sh"]
216	unsafefn vcvtsi642sh(a: __m128h, b: i64, rounding: i32) -> __m128h;
217	#[link_name = "llvm.x86.avx512fp16.vcvtusi642sh"]
218	unsafefn vcvtusi642sh(a: __m128h, b: u64, rounding: i32) -> __m128h;
219	#[link_name = "llvm.x86.avx512fp16.vcvtsh2si64"]
220	unsafefn vcvtsh2si64(a: __m128h, rounding: i32) -> i64;
221	#[link_name = "llvm.x86.avx512fp16.vcvtsh2usi64"]
222	unsafefn vcvtsh2usi64(a: __m128h, rounding: i32) -> u64;
223	#[link_name = "llvm.x86.avx512fp16.vcvttsh2si64"]
224	unsafefn vcvttsh2si64(a: __m128h, sae: i32) -> i64;
225	#[link_name = "llvm.x86.avx512fp16.vcvttsh2usi64"]
226	unsafefn vcvttsh2usi64(a: __m128h, sae: i32) -> u64;
227	}
228
229	#[cfg(test)]
230	mod tests {
231	use crate::core_arch::{x86::, x86_64::};
232	use stdarch_test::simd_test;
233
234	#[simd_test(enable = "avx512fp16,avx512vl")]
235	fn test_mm_cvti64_sh() {
236	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
237	let r = _mm_cvti64_sh(a, `10`);
238	let e = _mm_setr_ph(`10.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
239	assert_eq_m128h(r, e);
240	}
241
242	#[simd_test(enable = "avx512fp16,avx512vl")]
243	fn test_mm_cvt_roundi64_sh() {
244	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
245	let r = _mm_cvt_roundi64_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, `10`);
246	let e = _mm_setr_ph(`10.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
247	assert_eq_m128h(r, e);
248	}
249
250	#[simd_test(enable = "avx512fp16,avx512vl")]
251	fn test_mm_cvtu64_sh() {
252	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
253	let r = _mm_cvtu64_sh(a, `10`);
254	let e = _mm_setr_ph(`10.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
255	assert_eq_m128h(r, e);
256	}
257
258	#[simd_test(enable = "avx512fp16,avx512vl")]
259	fn test_mm_cvt_roundu64_sh() {
260	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
261	let r = _mm_cvt_roundu64_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, `10`);
262	let e = _mm_setr_ph(`10.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
263	assert_eq_m128h(r, e);
264	}
265
266	#[simd_test(enable = "avx512fp16")]
267	fn test_mm_cvtsh_i64() {
268	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
269	let r = _mm_cvtsh_i64(a);
270	assert_eq!(r, `1`);
271	}
272
273	#[simd_test(enable = "avx512fp16")]
274	fn test_mm_cvt_roundsh_i64() {
275	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
276	let r = _mm_cvt_roundsh_i64::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
277	assert_eq!(r, `1`);
278	}
279
280	#[simd_test(enable = "avx512fp16")]
281	fn test_mm_cvtsh_u64() {
282	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
283	let r = _mm_cvtsh_u64(a);
284	assert_eq!(r, `1`);
285	}
286
287	#[simd_test(enable = "avx512fp16")]
288	fn test_mm_cvt_roundsh_u64() {
289	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
290	let r = _mm_cvt_roundsh_u64::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
291	assert_eq!(r, `1`);
292	}
293
294	#[simd_test(enable = "avx512fp16")]
295	fn test_mm_cvttsh_i64() {
296	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
297	let r = _mm_cvttsh_i64(a);
298	assert_eq!(r, `1`);
299	}
300
301	#[simd_test(enable = "avx512fp16")]
302	fn test_mm_cvtt_roundsh_i64() {
303	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
304	let r = _mm_cvtt_roundsh_i64::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
305	assert_eq!(r, `1`);
306	}
307
308	#[simd_test(enable = "avx512fp16")]
309	fn test_mm_cvttsh_u64() {
310	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
311	let r = _mm_cvttsh_u64(a);
312	assert_eq!(r, `1`);
313	}
314
315	#[simd_test(enable = "avx512fp16")]
316	fn test_mm_cvtt_roundsh_u64() {
317	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
318	let r = _mm_cvtt_roundsh_u64::<_MM_FROUND_NO_EXC>(a);
319	assert_eq!(r, `1`);
320	}
321	}
322