| 1 | #![allow (non_camel_case_types)] |
| 2 | |
| 3 | use core::ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Sub, SubAssign}; |
| 4 | |
| 5 | #[cfg (target_arch = "x86" )] |
| 6 | use core::arch::x86::*; |
| 7 | #[cfg (target_arch = "x86_64" )] |
| 8 | use core::arch::x86_64::*; |
| 9 | |
| 10 | #[derive (Copy, Clone)] |
| 11 | #[repr (transparent)] |
| 12 | pub struct f32x4(__m128); |
| 13 | |
| 14 | impl f32x4 { |
| 15 | #[inline (always)] |
| 16 | pub fn new(x0: f32, x1: f32, x2: f32, x3: f32) -> Self { |
| 17 | f32x4(unsafe { _mm_set_ps(x3, x2, x1, x0) }) |
| 18 | } |
| 19 | |
| 20 | #[inline (always)] |
| 21 | pub fn new_u32(x0: u32, x1: u32, x2: u32, x3: u32) -> Self { |
| 22 | f32x4(unsafe { |
| 23 | _mm_set_ps( |
| 24 | core::mem::transmute::<u32, f32>(x3), |
| 25 | core::mem::transmute::<u32, f32>(x2), |
| 26 | core::mem::transmute::<u32, f32>(x1), |
| 27 | core::mem::transmute::<u32, f32>(x0), |
| 28 | ) |
| 29 | }) |
| 30 | } |
| 31 | |
| 32 | #[inline (always)] |
| 33 | pub fn splat(value: f32) -> Self { |
| 34 | f32x4(unsafe { _mm_set1_ps(value) }) |
| 35 | } |
| 36 | |
| 37 | pub fn sub_integer(&self, other: f32x4) -> f32x4 { |
| 38 | f32x4(unsafe { _mm_castsi128_ps(_mm_sub_epi32(_mm_castps_si128(self.0), _mm_castps_si128(other.0))) }) |
| 39 | } |
| 40 | |
| 41 | #[inline (always)] |
| 42 | pub fn zero() -> Self { |
| 43 | f32x4(unsafe { _mm_setzero_ps() }) |
| 44 | } |
| 45 | |
| 46 | #[inline (always)] |
| 47 | pub fn copied(self) -> (f32, f32, f32, f32) { |
| 48 | unsafe { core::mem::transmute::<__m128, (f32, f32, f32, f32)>(self.0) } |
| 49 | } |
| 50 | |
| 51 | #[inline (always)] |
| 52 | pub fn trunc(self) -> Self { |
| 53 | unsafe { f32x4(_mm_cvtepi32_ps(_mm_cvttps_epi32(self.0))) } |
| 54 | } |
| 55 | |
| 56 | #[inline (always)] |
| 57 | pub fn sqrt(self) -> Self { |
| 58 | unsafe { f32x4(_mm_sqrt_ps(self.0)) } |
| 59 | } |
| 60 | } |
| 61 | |
| 62 | impl Add for f32x4 { |
| 63 | type Output = f32x4; |
| 64 | #[inline (always)] |
| 65 | fn add(self, other: f32x4) -> f32x4 { |
| 66 | unsafe { f32x4(_mm_add_ps(self.0, b:other.0)) } |
| 67 | } |
| 68 | } |
| 69 | |
| 70 | impl AddAssign for f32x4 { |
| 71 | #[inline (always)] |
| 72 | fn add_assign(&mut self, other: f32x4) { |
| 73 | self.0 = unsafe { _mm_add_ps(self.0, b:other.0) }; |
| 74 | } |
| 75 | } |
| 76 | |
| 77 | impl Sub for f32x4 { |
| 78 | type Output = f32x4; |
| 79 | #[inline (always)] |
| 80 | fn sub(self, other: f32x4) -> f32x4 { |
| 81 | unsafe { f32x4(_mm_sub_ps(self.0, b:other.0)) } |
| 82 | } |
| 83 | } |
| 84 | |
| 85 | impl SubAssign for f32x4 { |
| 86 | #[inline (always)] |
| 87 | fn sub_assign(&mut self, other: f32x4) { |
| 88 | self.0 = unsafe { _mm_sub_ps(self.0, b:other.0) }; |
| 89 | } |
| 90 | } |
| 91 | |
| 92 | impl Mul for f32x4 { |
| 93 | type Output = f32x4; |
| 94 | #[inline (always)] |
| 95 | fn mul(self, other: f32x4) -> f32x4 { |
| 96 | unsafe { f32x4(_mm_mul_ps(self.0, b:other.0)) } |
| 97 | } |
| 98 | } |
| 99 | |
| 100 | impl MulAssign for f32x4 { |
| 101 | #[inline (always)] |
| 102 | fn mul_assign(&mut self, other: f32x4) { |
| 103 | self.0 = unsafe { _mm_mul_ps(self.0, b:other.0) }; |
| 104 | } |
| 105 | } |
| 106 | |
| 107 | impl Div for f32x4 { |
| 108 | type Output = f32x4; |
| 109 | #[inline (always)] |
| 110 | fn div(self, other: f32x4) -> f32x4 { |
| 111 | unsafe { f32x4(_mm_div_ps(self.0, b:other.0)) } |
| 112 | } |
| 113 | } |
| 114 | |
| 115 | impl DivAssign for f32x4 { |
| 116 | #[inline (always)] |
| 117 | fn div_assign(&mut self, other: f32x4) { |
| 118 | self.0 = unsafe { _mm_div_ps(self.0, b:other.0) }; |
| 119 | } |
| 120 | } |
| 121 | |