avxneconvert.rs source code [crates/core_arch/src/x86/avxneconvert.rs]

1	use crate::arch::asm;
2	use crate::core_arch::x86::*;
3
4	#[cfg(test)]
5	use stdarch_test::assert_instr;
6
7	/// Convert scalar BF16 (16-bit) floating point element stored at memory locations starting at location
8	/// a to single precision (32-bit) floating-point, broadcast it to packed single precision (32-bit)
9	/// floating-point elements, and store the results in dst.
10	///
11	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bcstnebf16_ps)
12	#[inline]
13	#[target_feature(enable = "avxneconvert")]
14	#[cfg_attr(
15	all(test, any(target_os = "linux", target_env = "msvc")),
16	assert_instr(vbcstnebf162ps)
17	)]
18	#[unstable(feature = "stdarch_x86_avx512_bf16", issue = "127356")]
19	pub unsafe fn _mm_bcstnebf16_ps(a: *const bf16) -> __m128 {
20	bcstnebf162ps_128(a)
21	}
22
23	/// Convert scalar BF16 (16-bit) floating point element stored at memory locations starting at location
24	/// a to single precision (32-bit) floating-point, broadcast it to packed single precision (32-bit) floating-point
25	/// elements, and store the results in dst.
26	///
27	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_bcstnebf16_ps)
28	#[inline]
29	#[target_feature(enable = "avxneconvert")]
30	#[cfg_attr(
31	all(test, any(target_os = "linux", target_env = "msvc")),
32	assert_instr(vbcstnebf162ps)
33	)]
34	#[unstable(feature = "stdarch_x86_avx512_bf16", issue = "127356")]
35	pub unsafe fn _mm256_bcstnebf16_ps(a: *const bf16) -> __m256 {
36	bcstnebf162ps_256(a)
37	}
38
39	/// Convert scalar half-precision (16-bit) floating-point element stored at memory locations starting
40	/// at location a to a single-precision (32-bit) floating-point, broadcast it to packed single-precision
41	/// (32-bit) floating-point elements, and store the results in dst.
42	///
43	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bcstnesh_ps)
44	#[inline]
45	#[target_feature(enable = "avxneconvert")]
46	#[cfg_attr(
47	all(test, any(target_os = "linux", target_env = "msvc")),
48	assert_instr(vbcstnesh2ps)
49	)]
50	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
51	pub unsafe fn _mm_bcstnesh_ps(a: *const f16) -> __m128 {
52	bcstnesh2ps_128(a)
53	}
54
55	/// Convert scalar half-precision (16-bit) floating-point element stored at memory locations starting
56	/// at location a to a single-precision (32-bit) floating-point, broadcast it to packed single-precision
57	/// (32-bit) floating-point elements, and store the results in dst.
58	///
59	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_bcstnesh_ps)
60	#[inline]
61	#[target_feature(enable = "avxneconvert")]
62	#[cfg_attr(
63	all(test, any(target_os = "linux", target_env = "msvc")),
64	assert_instr(vbcstnesh2ps)
65	)]
66	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
67	pub unsafe fn _mm256_bcstnesh_ps(a: *const f16) -> __m256 {
68	bcstnesh2ps_256(a)
69	}
70
71	/// Convert packed BF16 (16-bit) floating-point even-indexed elements stored at memory locations starting at
72	/// location a to single precision (32-bit) floating-point elements, and store the results in dst.
73	///
74	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtneebf16_ps)
75	#[inline]
76	#[target_feature(enable = "avxneconvert")]
77	#[cfg_attr(
78	all(test, any(target_os = "linux", target_env = "msvc")),
79	assert_instr(vcvtneebf162ps)
80	)]
81	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
82	pub unsafe fn _mm_cvtneebf16_ps(a: *const __m128bh) -> __m128 {
83	transmute(src:cvtneebf162ps_128(a))
84	}
85
86	/// Convert packed BF16 (16-bit) floating-point even-indexed elements stored at memory locations starting at
87	/// location a to single precision (32-bit) floating-point elements, and store the results in dst.
88	///
89	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtneebf16_ps)
90	#[inline]
91	#[target_feature(enable = "avxneconvert")]
92	#[cfg_attr(
93	all(test, any(target_os = "linux", target_env = "msvc")),
94	assert_instr(vcvtneebf162ps)
95	)]
96	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
97	pub unsafe fn _mm256_cvtneebf16_ps(a: *const __m256bh) -> __m256 {
98	transmute(src:cvtneebf162ps_256(a))
99	}
100
101	/// Convert packed half-precision (16-bit) floating-point even-indexed elements stored at memory locations starting at
102	/// location a to single precision (32-bit) floating-point elements, and store the results in dst.
103	///
104	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtneeph_ps)
105	#[inline]
106	#[target_feature(enable = "avxneconvert")]
107	#[cfg_attr(
108	all(test, any(target_os = "linux", target_env = "msvc")),
109	assert_instr(vcvtneeph2ps)
110	)]
111	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
112	pub unsafe fn _mm_cvtneeph_ps(a: *const __m128h) -> __m128 {
113	transmute(src:cvtneeph2ps_128(a))
114	}
115
116	/// Convert packed half-precision (16-bit) floating-point even-indexed elements stored at memory locations starting at
117	/// location a to single precision (32-bit) floating-point elements, and store the results in dst.
118	///
119	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtneeph_ps)
120	#[inline]
121	#[target_feature(enable = "avxneconvert")]
122	#[cfg_attr(
123	all(test, any(target_os = "linux", target_env = "msvc")),
124	assert_instr(vcvtneeph2ps)
125	)]
126	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
127	pub unsafe fn _mm256_cvtneeph_ps(a: *const __m256h) -> __m256 {
128	transmute(src:cvtneeph2ps_256(a))
129	}
130
131	/// Convert packed BF16 (16-bit) floating-point odd-indexed elements stored at memory locations starting at
132	/// location a to single precision (32-bit) floating-point elements, and store the results in dst.
133	///
134	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtneobf16_ps)
135	#[inline]
136	#[target_feature(enable = "avxneconvert")]
137	#[cfg_attr(
138	all(test, any(target_os = "linux", target_env = "msvc")),
139	assert_instr(vcvtneobf162ps)
140	)]
141	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
142	pub unsafe fn _mm_cvtneobf16_ps(a: *const __m128bh) -> __m128 {
143	transmute(src:cvtneobf162ps_128(a))
144	}
145
146	/// Convert packed BF16 (16-bit) floating-point odd-indexed elements stored at memory locations starting at
147	/// location a to single precision (32-bit) floating-point elements, and store the results in dst.
148	///
149	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtneobf16_ps)
150	#[inline]
151	#[target_feature(enable = "avxneconvert")]
152	#[cfg_attr(
153	all(test, any(target_os = "linux", target_env = "msvc")),
154	assert_instr(vcvtneobf162ps)
155	)]
156	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
157	pub unsafe fn _mm256_cvtneobf16_ps(a: *const __m256bh) -> __m256 {
158	transmute(src:cvtneobf162ps_256(a))
159	}
160
161	/// Convert packed half-precision (16-bit) floating-point odd-indexed elements stored at memory locations starting at
162	/// location a to single precision (32-bit) floating-point elements, and store the results in dst.
163	///
164	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtneoph_ps)
165	#[inline]
166	#[target_feature(enable = "avxneconvert")]
167	#[cfg_attr(
168	all(test, any(target_os = "linux", target_env = "msvc")),
169	assert_instr(vcvtneoph2ps)
170	)]
171	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
172	pub unsafe fn _mm_cvtneoph_ps(a: *const __m128h) -> __m128 {
173	transmute(src:cvtneoph2ps_128(a))
174	}
175
176	/// Convert packed half-precision (16-bit) floating-point odd-indexed elements stored at memory locations starting at
177	/// location a to single precision (32-bit) floating-point elements, and store the results in dst.
178	///
179	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtneoph_ps)
180	#[inline]
181	#[target_feature(enable = "avxneconvert")]
182	#[cfg_attr(
183	all(test, any(target_os = "linux", target_env = "msvc")),
184	assert_instr(vcvtneoph2ps)
185	)]
186	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
187	pub unsafe fn _mm256_cvtneoph_ps(a: *const __m256h) -> __m256 {
188	transmute(src:cvtneoph2ps_256(a))
189	}
190
191	/// Convert packed single precision (32-bit) floating-point elements in a to packed BF16 (16-bit) floating-point
192	/// elements, and store the results in dst.
193	///
194	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtneps_avx_pbh)
195	#[inline]
196	#[target_feature(enable = "avxneconvert")]
197	#[cfg_attr(
198	all(test, any(target_os = "linux", target_env = "msvc")),
199	assert_instr(vcvtneps2bf16)
200	)]
201	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
202	pub fn _mm_cvtneps_avx_pbh(a: __m128) -> __m128bh {
203	unsafe {
204	let mut dst: __m128bh;
205	asm!(
206	"{{vex}}vcvtneps2bf16 {dst},{src}",
207	dst = lateout(xmm_reg) dst,
208	src = in(xmm_reg) a,
209	options(pure, nomem, nostack, preserves_flags)
210	);
211	dst
212	}
213	}
214
215	/// Convert packed single precision (32-bit) floating-point elements in a to packed BF16 (16-bit) floating-point
216	/// elements, and store the results in dst.
217	///
218	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtneps_avx_pbh)
219	#[inline]
220	#[target_feature(enable = "avxneconvert")]
221	#[cfg_attr(
222	all(test, any(target_os = "linux", target_env = "msvc")),
223	assert_instr(vcvtneps2bf16)
224	)]
225	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
226	pub fn _mm256_cvtneps_avx_pbh(a: __m256) -> __m128bh {
227	unsafe {
228	let mut dst: __m128bh;
229	asm!(
230	"{{vex}}vcvtneps2bf16 {dst},{src}",
231	dst = lateout(xmm_reg) dst,
232	src = in(ymm_reg) a,
233	options(pure, nomem, nostack, preserves_flags)
234	);
235	dst
236	}
237	}
238
239	#[allow(improper_ctypes)]
240	unsafe extern "C" {
241	#[link_name = "llvm.x86.vbcstnebf162ps128"]
242	unsafefn bcstnebf162ps_128(a: *const bf16) -> __m128;
243	#[link_name = "llvm.x86.vbcstnebf162ps256"]
244	unsafefn bcstnebf162ps_256(a: *const bf16) -> __m256;
245	#[link_name = "llvm.x86.vbcstnesh2ps128"]
246	unsafefn bcstnesh2ps_128(a: *const f16) -> __m128;
247	#[link_name = "llvm.x86.vbcstnesh2ps256"]
248	unsafefn bcstnesh2ps_256(a: *const f16) -> __m256;
249
250	#[link_name = "llvm.x86.vcvtneebf162ps128"]
251	unsafefn cvtneebf162ps_128(a: *const __m128bh) -> __m128;
252	#[link_name = "llvm.x86.vcvtneebf162ps256"]
253	unsafefn cvtneebf162ps_256(a: *const __m256bh) -> __m256;
254	#[link_name = "llvm.x86.vcvtneeph2ps128"]
255	unsafefn cvtneeph2ps_128(a: *const __m128h) -> __m128;
256	#[link_name = "llvm.x86.vcvtneeph2ps256"]
257	unsafefn cvtneeph2ps_256(a: *const __m256h) -> __m256;
258
259	#[link_name = "llvm.x86.vcvtneobf162ps128"]
260	unsafefn cvtneobf162ps_128(a: *const __m128bh) -> __m128;
261	#[link_name = "llvm.x86.vcvtneobf162ps256"]
262	unsafefn cvtneobf162ps_256(a: *const __m256bh) -> __m256;
263	#[link_name = "llvm.x86.vcvtneoph2ps128"]
264	unsafefn cvtneoph2ps_128(a: *const __m128h) -> __m128;
265	#[link_name = "llvm.x86.vcvtneoph2ps256"]
266	unsafefn cvtneoph2ps_256(a: *const __m256h) -> __m256;
267	}
268
269	#[cfg(test)]
270	mod tests {
271	use crate::core_arch::simd::{u16x4, u16x8};
272	use crate::core_arch::x86::*;
273	use crate::mem::transmute_copy;
274	use std::ptr::addr_of;
275	use stdarch_test::simd_test;
276
277	const BF16_ONE: u16 = `0b0_01111111_0000000`;
278	const BF16_TWO: u16 = `0b0_10000000_0000000`;
279	const BF16_THREE: u16 = `0b0_10000000_1000000`;
280	const BF16_FOUR: u16 = `0b0_10000001_0000000`;
281	const BF16_FIVE: u16 = `0b0_10000001_0100000`;
282	const BF16_SIX: u16 = `0b0_10000001_1000000`;
283	const BF16_SEVEN: u16 = `0b0_10000001_1100000`;
284	const BF16_EIGHT: u16 = `0b0_10000010_0000000`;
285
286	#[simd_test(enable = "avxneconvert")]
287	unsafe fn test_mm_bcstnebf16_ps() {
288	let a = bf16::from_bits(BF16_ONE);
289	let r = _mm_bcstnebf16_ps(addr_of!(a));
290	let e = _mm_set_ps(`1.`, `1.`, `1.`, `1.`);
291	assert_eq_m128(r, e);
292	}
293
294	#[simd_test(enable = "avxneconvert")]
295	unsafe fn test_mm256_bcstnebf16_ps() {
296	let a = bf16::from_bits(BF16_ONE);
297	let r = _mm256_bcstnebf16_ps(addr_of!(a));
298	let e = _mm256_set_ps(`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`);
299	assert_eq_m256(r, e);
300	}
301
302	#[simd_test(enable = "avxneconvert")]
303	unsafe fn test_mm_bcstnesh_ps() {
304	let a = `1.0_f16`;
305	let r = _mm_bcstnesh_ps(addr_of!(a));
306	let e = _mm_set_ps(`1.`, `1.`, `1.`, `1.`);
307	assert_eq_m128(r, e);
308	}
309
310	#[simd_test(enable = "avxneconvert")]
311	unsafe fn test_mm256_bcstnesh_ps() {
312	let a = `1.0_f16`;
313	let r = _mm256_bcstnesh_ps(addr_of!(a));
314	let e = _mm256_set_ps(`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`);
315	assert_eq_m256(r, e);
316	}
317
318	#[simd_test(enable = "avxneconvert")]
319	unsafe fn test_mm_cvtneebf16_ps() {
320	let a = __m128bh([
321	BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
322	]);
323	let r = _mm_cvtneebf16_ps(addr_of!(a));
324	let e = _mm_setr_ps(`1.`, `3.`, `5.`, `7.`);
325	assert_eq_m128(r, e);
326	}
327
328	#[simd_test(enable = "avxneconvert")]
329	unsafe fn test_mm256_cvtneebf16_ps() {
330	let a = __m256bh([
331	BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
332	BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
333	]);
334	let r = _mm256_cvtneebf16_ps(addr_of!(a));
335	let e = _mm256_setr_ps(`1.`, `3.`, `5.`, `7.`, `1.`, `3.`, `5.`, `7.`);
336	assert_eq_m256(r, e);
337	}
338
339	#[simd_test(enable = "avxneconvert")]
340	unsafe fn test_mm_cvtneeph_ps() {
341	let a = __m128h([`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`]);
342	let r = _mm_cvtneeph_ps(addr_of!(a));
343	let e = _mm_setr_ps(`1.`, `3.`, `5.`, `7.`);
344	assert_eq_m128(r, e);
345	}
346
347	#[simd_test(enable = "avxneconvert")]
348	unsafe fn test_mm256_cvtneeph_ps() {
349	let a = __m256h([
350	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
351	]);
352	let r = _mm256_cvtneeph_ps(addr_of!(a));
353	let e = _mm256_setr_ps(`1.`, `3.`, `5.`, `7.`, `9.`, `11.`, `13.`, `15.`);
354	assert_eq_m256(r, e);
355	}
356
357	#[simd_test(enable = "avxneconvert")]
358	unsafe fn test_mm_cvtneobf16_ps() {
359	let a = __m128bh([
360	BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
361	]);
362	let r = _mm_cvtneobf16_ps(addr_of!(a));
363	let e = _mm_setr_ps(`2.`, `4.`, `6.`, `8.`);
364	assert_eq_m128(r, e);
365	}
366
367	#[simd_test(enable = "avxneconvert")]
368	unsafe fn test_mm256_cvtneobf16_ps() {
369	let a = __m256bh([
370	BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
371	BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
372	]);
373	let r = _mm256_cvtneobf16_ps(addr_of!(a));
374	let e = _mm256_setr_ps(`2.`, `4.`, `6.`, `8.`, `2.`, `4.`, `6.`, `8.`);
375	assert_eq_m256(r, e);
376	}
377
378	#[simd_test(enable = "avxneconvert")]
379	unsafe fn test_mm_cvtneoph_ps() {
380	let a = __m128h([`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`]);
381	let r = _mm_cvtneoph_ps(addr_of!(a));
382	let e = _mm_setr_ps(`2.`, `4.`, `6.`, `8.`);
383	assert_eq_m128(r, e);
384	}
385
386	#[simd_test(enable = "avxneconvert")]
387	unsafe fn test_mm256_cvtneoph_ps() {
388	let a = __m256h([
389	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
390	]);
391	let r = _mm256_cvtneoph_ps(addr_of!(a));
392	let e = _mm256_setr_ps(`2.`, `4.`, `6.`, `8.`, `10.`, `12.`, `14.`, `16.`);
393	assert_eq_m256(r, e);
394	}
395
396	#[simd_test(enable = "avxneconvert")]
397	unsafe fn test_mm_cvtneps_avx_pbh() {
398	let a = _mm_setr_ps(`1.`, `2.`, `3.`, `4.`);
399	let r: u16x4 = transmute_copy(&_mm_cvtneps_avx_pbh(a));
400	let e = u16x4::new(BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR);
401	assert_eq!(r, e);
402	}
403
404	#[simd_test(enable = "avxneconvert")]
405	unsafe fn test_mm256_cvtneps_avx_pbh() {
406	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
407	let r: u16x8 = transmute(_mm256_cvtneps_avx_pbh(a));
408	let e = u16x8::new(
409	BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
410	);
411	assert_eq!(r, e);
412	}
413	}
414