1use core::{mem::MaybeUninit, ptr};
2
3#[cfg(target_arch = "x86")]
4use core::arch::x86::{
5 __m128, __m128i, __m256, _mm256_cvtph_ps, _mm256_cvtps_ph, _mm_cvtph_ps,
6 _MM_FROUND_TO_NEAREST_INT,
7};
8#[cfg(target_arch = "x86_64")]
9use core::arch::x86_64::{
10 __m128, __m128i, __m256, _mm256_cvtph_ps, _mm256_cvtps_ph, _mm_cvtph_ps, _mm_cvtps_ph,
11 _MM_FROUND_TO_NEAREST_INT,
12};
13
14#[cfg(target_arch = "x86")]
15use core::arch::x86::_mm_cvtps_ph;
16
17use super::convert_chunked_slice_8;
18
19/////////////// x86/x86_64 f16c ////////////////
20
21#[target_feature(enable = "f16c")]
22#[inline]
23pub(super) unsafe fn f16_to_f32_x86_f16c(i: u16) -> f32 {
24 let mut vec: MaybeUninit<__m128i> = MaybeUninit::<__m128i>::zeroed();
25 vec.as_mut_ptr().cast::<u16>().write(val:i);
26 let retval: __m128 = _mm_cvtph_ps(vec.assume_init());
27 *(&retval as *const __m128).cast()
28}
29
30#[target_feature(enable = "f16c")]
31#[inline]
32pub(super) unsafe fn f32_to_f16_x86_f16c(f: f32) -> u16 {
33 let mut vec: MaybeUninit<__m128> = MaybeUninit::<__m128>::zeroed();
34 vec.as_mut_ptr().cast::<f32>().write(val:f);
35 let retval: __m128i = _mm_cvtps_ph(vec.assume_init(), _MM_FROUND_TO_NEAREST_INT);
36 *(&retval as *const __m128i).cast()
37}
38
39#[target_feature(enable = "f16c")]
40#[inline]
41pub(super) unsafe fn f16x4_to_f32x4_x86_f16c(v: &[u16; 4]) -> [f32; 4] {
42 let mut vec: MaybeUninit<__m128i> = MaybeUninit::<__m128i>::zeroed();
43 ptr::copy_nonoverlapping(src:v.as_ptr(), dst:vec.as_mut_ptr().cast(), count:4);
44 let retval: __m128 = _mm_cvtph_ps(vec.assume_init());
45 *(&retval as *const __m128).cast()
46}
47
48#[target_feature(enable = "f16c")]
49#[inline]
50pub(super) unsafe fn f32x4_to_f16x4_x86_f16c(v: &[f32; 4]) -> [u16; 4] {
51 let mut vec: MaybeUninit<__m128> = MaybeUninit::<__m128>::uninit();
52 ptr::copy_nonoverlapping(src:v.as_ptr(), dst:vec.as_mut_ptr().cast(), count:4);
53 let retval: __m128i = _mm_cvtps_ph(vec.assume_init(), _MM_FROUND_TO_NEAREST_INT);
54 *(&retval as *const __m128i).cast()
55}
56
57#[target_feature(enable = "f16c")]
58#[inline]
59pub(super) unsafe fn f16x4_to_f64x4_x86_f16c(v: &[u16; 4]) -> [f64; 4] {
60 let array: [f32; 4] = f16x4_to_f32x4_x86_f16c(v);
61 // Let compiler vectorize this regular cast for now.
62 // TODO: investigate auto-detecting sse2/avx convert features
63 [
64 array[0] as f64,
65 array[1] as f64,
66 array[2] as f64,
67 array[3] as f64,
68 ]
69}
70
71#[target_feature(enable = "f16c")]
72#[inline]
73pub(super) unsafe fn f64x4_to_f16x4_x86_f16c(v: &[f64; 4]) -> [u16; 4] {
74 // Let compiler vectorize this regular cast for now.
75 // TODO: investigate auto-detecting sse2/avx convert features
76 let v: [f32; 4] = [v[0] as f32, v[1] as f32, v[2] as f32, v[3] as f32];
77 f32x4_to_f16x4_x86_f16c(&v)
78}
79
80#[target_feature(enable = "f16c")]
81#[inline]
82pub(super) unsafe fn f16x8_to_f32x8_x86_f16c(v: &[u16; 8]) -> [f32; 8] {
83 let mut vec: MaybeUninit<__m128i> = MaybeUninit::<__m128i>::zeroed();
84 ptr::copy_nonoverlapping(src:v.as_ptr(), dst:vec.as_mut_ptr().cast(), count:8);
85 let retval: __m256 = _mm256_cvtph_ps(vec.assume_init());
86 *(&retval as *const __m256).cast()
87}
88
89#[target_feature(enable = "f16c")]
90#[inline]
91pub(super) unsafe fn f32x8_to_f16x8_x86_f16c(v: &[f32; 8]) -> [u16; 8] {
92 let mut vec: MaybeUninit<__m256> = MaybeUninit::<__m256>::uninit();
93 ptr::copy_nonoverlapping(src:v.as_ptr(), dst:vec.as_mut_ptr().cast(), count:8);
94 let retval: __m128i = _mm256_cvtps_ph(vec.assume_init(), _MM_FROUND_TO_NEAREST_INT);
95 *(&retval as *const __m128i).cast()
96}
97
98#[target_feature(enable = "f16c")]
99#[inline]
100pub(super) unsafe fn f16x8_to_f64x8_x86_f16c(v: &[u16; 8]) -> [f64; 8] {
101 let array: [f32; 8] = f16x8_to_f32x8_x86_f16c(v);
102 // Let compiler vectorize this regular cast for now.
103 // TODO: investigate auto-detecting sse2/avx convert features
104 [
105 array[0] as f64,
106 array[1] as f64,
107 array[2] as f64,
108 array[3] as f64,
109 array[4] as f64,
110 array[5] as f64,
111 array[6] as f64,
112 array[7] as f64,
113 ]
114}
115
116#[target_feature(enable = "f16c")]
117#[inline]
118pub(super) unsafe fn f64x8_to_f16x8_x86_f16c(v: &[f64; 8]) -> [u16; 8] {
119 // Let compiler vectorize this regular cast for now.
120 // TODO: investigate auto-detecting sse2/avx convert features
121 let v: [f32; 8] = [
122 v[0] as f32,
123 v[1] as f32,
124 v[2] as f32,
125 v[3] as f32,
126 v[4] as f32,
127 v[5] as f32,
128 v[6] as f32,
129 v[7] as f32,
130 ];
131 f32x8_to_f16x8_x86_f16c(&v)
132}
133