1 | #![allow (non_camel_case_types)] |
2 | |
3 | use core::ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Sub, SubAssign}; |
4 | |
5 | #[cfg (target_arch = "x86" )] |
6 | use core::arch::x86::*; |
7 | #[cfg (target_arch = "x86_64" )] |
8 | use core::arch::x86_64::*; |
9 | |
10 | #[derive (Copy, Clone)] |
11 | #[repr (transparent)] |
12 | pub struct f32x4(__m128); |
13 | |
14 | impl f32x4 { |
15 | #[inline (always)] |
16 | pub fn new(x0: f32, x1: f32, x2: f32, x3: f32) -> Self { |
17 | f32x4(unsafe { _mm_set_ps(x3, x2, x1, x0) }) |
18 | } |
19 | |
20 | #[inline (always)] |
21 | pub fn new_u32(x0: u32, x1: u32, x2: u32, x3: u32) -> Self { |
22 | f32x4(unsafe { |
23 | _mm_set_ps( |
24 | core::mem::transmute::<u32, f32>(x3), |
25 | core::mem::transmute::<u32, f32>(x2), |
26 | core::mem::transmute::<u32, f32>(x1), |
27 | core::mem::transmute::<u32, f32>(x0), |
28 | ) |
29 | }) |
30 | } |
31 | |
32 | #[inline (always)] |
33 | pub fn splat(value: f32) -> Self { |
34 | f32x4(unsafe { _mm_set1_ps(value) }) |
35 | } |
36 | |
37 | pub fn sub_integer(&self, other: f32x4) -> f32x4 { |
38 | f32x4(unsafe { _mm_castsi128_ps(_mm_sub_epi32(_mm_castps_si128(self.0), _mm_castps_si128(other.0))) }) |
39 | } |
40 | |
41 | #[inline (always)] |
42 | pub fn zero() -> Self { |
43 | f32x4(unsafe { _mm_setzero_ps() }) |
44 | } |
45 | |
46 | #[inline (always)] |
47 | pub fn copied(self) -> (f32, f32, f32, f32) { |
48 | unsafe { core::mem::transmute::<__m128, (f32, f32, f32, f32)>(self.0) } |
49 | } |
50 | |
51 | #[inline (always)] |
52 | pub fn trunc(self) -> Self { |
53 | unsafe { f32x4(_mm_cvtepi32_ps(_mm_cvttps_epi32(self.0))) } |
54 | } |
55 | |
56 | #[inline (always)] |
57 | pub fn sqrt(self) -> Self { |
58 | unsafe { f32x4(_mm_sqrt_ps(self.0)) } |
59 | } |
60 | } |
61 | |
62 | impl Add for f32x4 { |
63 | type Output = f32x4; |
64 | #[inline (always)] |
65 | fn add(self, other: f32x4) -> f32x4 { |
66 | unsafe { f32x4(_mm_add_ps(self.0, b:other.0)) } |
67 | } |
68 | } |
69 | |
70 | impl AddAssign for f32x4 { |
71 | #[inline (always)] |
72 | fn add_assign(&mut self, other: f32x4) { |
73 | self.0 = unsafe { _mm_add_ps(self.0, b:other.0) }; |
74 | } |
75 | } |
76 | |
77 | impl Sub for f32x4 { |
78 | type Output = f32x4; |
79 | #[inline (always)] |
80 | fn sub(self, other: f32x4) -> f32x4 { |
81 | unsafe { f32x4(_mm_sub_ps(self.0, b:other.0)) } |
82 | } |
83 | } |
84 | |
85 | impl SubAssign for f32x4 { |
86 | #[inline (always)] |
87 | fn sub_assign(&mut self, other: f32x4) { |
88 | self.0 = unsafe { _mm_sub_ps(self.0, b:other.0) }; |
89 | } |
90 | } |
91 | |
92 | impl Mul for f32x4 { |
93 | type Output = f32x4; |
94 | #[inline (always)] |
95 | fn mul(self, other: f32x4) -> f32x4 { |
96 | unsafe { f32x4(_mm_mul_ps(self.0, b:other.0)) } |
97 | } |
98 | } |
99 | |
100 | impl MulAssign for f32x4 { |
101 | #[inline (always)] |
102 | fn mul_assign(&mut self, other: f32x4) { |
103 | self.0 = unsafe { _mm_mul_ps(self.0, b:other.0) }; |
104 | } |
105 | } |
106 | |
107 | impl Div for f32x4 { |
108 | type Output = f32x4; |
109 | #[inline (always)] |
110 | fn div(self, other: f32x4) -> f32x4 { |
111 | unsafe { f32x4(_mm_div_ps(self.0, b:other.0)) } |
112 | } |
113 | } |
114 | |
115 | impl DivAssign for f32x4 { |
116 | #[inline (always)] |
117 | fn div_assign(&mut self, other: f32x4) { |
118 | self.0 = unsafe { _mm_div_ps(self.0, b:other.0) }; |
119 | } |
120 | } |
121 | |