i32x4_t.rs source code [crates/tiny_skia/src/wide/i32x4_t.rs]

1	// Copyright 2020 Yevhenii Reizner
2	//
3	// Use of this source code is governed by a BSD-style license that can be
4	// found in the LICENSE file.
5
6	// Based on https://github.com/Lokathor/wide (Zlib)
7
8	use bytemuck::cast;
9
10	use super::f32x4;
11
12	cfg_if::cfg_if! {
13	if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
14	#[cfg(target_arch = "x86")]
15	use core::arch::x86::*;
16	#[cfg(target_arch = "x86_64")]
17	use core::arch::x86_64::*;
18
19	#[derive(Clone, Copy, Debug)]
20	#[repr(C, align(`16`))]
21	pub struct i32x4(pub __m128i);
22	} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
23	use core::arch::wasm32::*;
24
25	#[derive(Clone, Copy, Debug)]
26	#[repr(C, align(`16`))]
27	pub struct i32x4(pub v128);
28	} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
29	use core::arch::aarch64::*;
30
31	#[derive(Clone, Copy, Debug)]
32	#[repr(C, align(`16`))]
33	pub struct i32x4(pub int32x4_t);
34	} else {
35	#[derive(Clone, Copy, Debug)]
36	#[repr(C, align(`16`))]
37	pub struct i32x4([i32; `4`]);
38	}
39	}
40
41	unsafe impl bytemuck::Zeroable for i32x4 {}
42	unsafe impl bytemuck::Pod for i32x4 {}
43
44	impl Default for i32x4 {
45	fn default() -> Self {
46	Self::splat(`0`)
47	}
48	}
49
50	impl i32x4 {
51	pub fn splat(n: i32) -> Self {
52	cast([n, n, n, n])
53	}
54
55	pub fn blend(self, t: Self, f: Self) -> Self {
56	cfg_if::cfg_if! {
57	if #[cfg(all(feature = "simd", target_feature = "sse4.1"))] {
58	Self(unsafe { _mm_blendv_epi8(f.0, t.0, self.0) })
59	} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
60	Self(v128_bitselect(t.0, f.0, self.0))
61	} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
62	Self(unsafe { vbslq_s32(cast(self.0), t.0, f.0) })
63	} else {
64	super::generic_bit_blend(self, t, f)
65	}
66	}
67	}
68
69	pub fn cmp_eq(self, rhs: Self) -> Self {
70	cfg_if::cfg_if! {
71	if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
72	cast(Self(cast(unsafe { _mm_cmpeq_epi32(self.0, rhs.0) })))
73	} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
74	Self(i32x4_eq(self.0, rhs.0))
75	} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
76	Self(unsafe { cast(vceqq_s32(self.0, rhs.0)) })
77	} else {
78	Self([
79	if self.0[`0`] == rhs.0[`0`] { `-1` } else { `0` },
80	if self.0[`1`] == rhs.0[`1`] { `-1` } else { `0` },
81	if self.0[`2`] == rhs.0[`2`] { `-1` } else { `0` },
82	if self.0[`3`] == rhs.0[`3`] { `-1` } else { `0` },
83	])
84	}
85	}
86	}
87
88	pub fn cmp_gt(self, rhs: Self) -> Self {
89	cfg_if::cfg_if! {
90	if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
91	cast(Self(cast(unsafe { _mm_cmpgt_epi32(self.0, rhs.0) })))
92	} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
93	Self(i32x4_gt(self.0, rhs.0))
94	} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
95	Self(unsafe { cast(vcgtq_s32(self.0, rhs.0)) })
96	} else {
97	Self([
98	if self.0[`0`] > rhs.0[`0`] { `-1` } else { `0` },
99	if self.0[`1`] > rhs.0[`1`] { `-1` } else { `0` },
100	if self.0[`2`] > rhs.0[`2`] { `-1` } else { `0` },
101	if self.0[`3`] > rhs.0[`3`] { `-1` } else { `0` },
102	])
103	}
104	}
105	}
106
107	pub fn cmp_lt(self, rhs: Self) -> Self {
108	cfg_if::cfg_if! {
109	if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
110	cast(Self(cast(unsafe { _mm_cmplt_epi32(self.0, rhs.0) })))
111	} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
112	Self(i32x4_lt(self.0, rhs.0))
113	} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
114	Self(unsafe { cast(vcltq_s32(self.0, rhs.0)) })
115	} else {
116	Self([
117	if self.0[`0`] < rhs.0[`0`] { `-1` } else { `0` },
118	if self.0[`1`] < rhs.0[`1`] { `-1` } else { `0` },
119	if self.0[`2`] < rhs.0[`2`] { `-1` } else { `0` },
120	if self.0[`3`] < rhs.0[`3`] { `-1` } else { `0` },
121	])
122	}
123	}
124	}
125
126	pub fn to_f32x4(self) -> f32x4 {
127	cfg_if::cfg_if! {
128	if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
129	cast(Self(cast(unsafe { _mm_cvtepi32_ps(self.0) })))
130	} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
131	cast(Self(f32x4_convert_i32x4(self.0)))
132	} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
133	cast(Self(unsafe { cast(vcvtq_f32_s32(self.0)) }))
134	} else {
135	let arr: [i32; `4`] = cast(self);
136	cast([
137	arr[`0`] as f32,
138	arr[`1`] as f32,
139	arr[`2`] as f32,
140	arr[`3`] as f32,
141	])
142	}
143	}
144	}
145
146	pub fn to_f32x4_bitcast(self) -> f32x4 {
147	bytemuck::cast(self)
148	}
149	}
150
151	impl From<[i32; `4`]> for i32x4 {
152	fn from(v: [i32; `4`]) -> Self {
153	cast(v)
154	}
155	}
156
157	impl From<i32x4> for [i32; `4`] {
158	fn from(v: i32x4) -> Self {
159	cast(v)
160	}
161	}
162
163	impl core::ops::Add for i32x4 {
164	type Output = Self;
165
166	fn add(self, rhs: Self) -> Self::Output {
167	cfg_if::cfg_if! {
168	if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
169	Self(unsafe { _mm_add_epi32(self.0, rhs.0) })
170	} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
171	Self(i32x4_add(self.0, rhs.0))
172	} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
173	Self(unsafe { vaddq_s32(self.0, rhs.0) })
174	} else {
175	Self([
176	self.0[`0`].wrapping_add(rhs.0[`0`]),
177	self.0[`1`].wrapping_add(rhs.0[`1`]),
178	self.0[`2`].wrapping_add(rhs.0[`2`]),
179	self.0[`3`].wrapping_add(rhs.0[`3`]),
180	])
181	}
182	}
183	}
184	}
185
186	impl core::ops::BitAnd for i32x4 {
187	type Output = Self;
188
189	fn bitand(self, rhs: Self) -> Self::Output {
190	cfg_if::cfg_if! {
191	if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
192	Self(unsafe { _mm_and_si128(self.0, rhs.0) })
193	} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
194	Self(v128_and(self.0, rhs.0))
195	} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
196	Self(unsafe { vandq_s32(self.0, rhs.0) })
197	} else {
198	Self([
199	self.0[`0`] & rhs.0[`0`],
200	self.0[`1`] & rhs.0[`1`],
201	self.0[`2`] & rhs.0[`2`],
202	self.0[`3`] & rhs.0[`3`],
203	])
204	}
205	}
206	}
207	}
208
209	impl core::ops::Mul for i32x4 {
210	type Output = Self;
211
212	fn mul(self, rhs: Self) -> Self::Output {
213	cfg_if::cfg_if! {
214	if #[cfg(all(feature = "simd", target_feature = "sse4.1"))] {
215	Self(unsafe { _mm_mullo_epi32(self.0, rhs.0) })
216	} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
217	Self(i32x4_mul(self.0, rhs.0))
218	} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
219	Self(unsafe { vmulq_s32(self.0, rhs.0) })
220	} else {
221	// Cast is required, since we have to use scalar multiplication on SSE2.
222	let a: [i32; `4`] = cast(self);
223	let b: [i32; `4`] = cast(rhs);
224	Self(cast([
225	a[`0`].wrapping_mul(b[`0`]),
226	a[`1`].wrapping_mul(b[`1`]),
227	a[`2`].wrapping_mul(b[`2`]),
228	a[`3`].wrapping_mul(b[`3`]),
229	]))
230	}
231	}
232	}
233	}
234
235	impl core::ops::BitOr for i32x4 {
236	type Output = Self;
237
238	#[inline]
239	fn bitor(self, rhs: Self) -> Self::Output {
240	cfg_if::cfg_if! {
241	if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
242	Self(unsafe { _mm_or_si128(self.0, rhs.0) })
243	} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
244	Self(v128_or(self.0, rhs.0))
245	} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
246	Self(unsafe { vorrq_s32(self.0, rhs.0) })
247	} else {
248	Self([
249	self.0[`0`] \| rhs.0[`0`],
250	self.0[`1`] \| rhs.0[`1`],
251	self.0[`2`] \| rhs.0[`2`],
252	self.0[`3`] \| rhs.0[`3`],
253	])
254	}
255	}
256	}
257	}
258
259	impl core::ops::BitXor for i32x4 {
260	type Output = Self;
261
262	#[inline]
263	fn bitxor(self, rhs: Self) -> Self::Output {
264	cfg_if::cfg_if! {
265	if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
266	Self(unsafe { _mm_xor_si128(self.0, rhs.0) })
267	} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
268	Self(v128_xor(self.0, rhs.0))
269	} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
270	Self(unsafe { veorq_s32(self.0, rhs.0) })
271	} else {
272	Self([
273	self.0[`0`] ^ rhs.0[`0`],
274	self.0[`1`] ^ rhs.0[`1`],
275	self.0[`2`] ^ rhs.0[`2`],
276	self.0[`3`] ^ rhs.0[`3`],
277	])
278	}
279	}
280	}
281	}
282