f32x4_t.rs source code [crates/tiny_skia/src/wide/f32x4_t.rs]

1	// Copyright 2020 Yevhenii Reizner
2	//
3	// Use of this source code is governed by a BSD-style license that can be
4	// found in the LICENSE file.
5
6	// Based on https://github.com/Lokathor/wide (Zlib)
7
8	use bytemuck::cast;
9
10	#[cfg(all(not(feature = "std"), feature = "no-std-float"))]
11	use tiny_skia_path::NoStdFloat;
12
13	use super::i32x4;
14
15	cfg_if::cfg_if! {
16	if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
17	#[cfg(target_arch = "x86")]
18	use core::arch::x86::*;
19	#[cfg(target_arch = "x86_64")]
20	use core::arch::x86_64::*;
21
22	#[derive(Clone, Copy, Debug)]
23	#[repr(C, align(`16`))]
24	pub struct f32x4(__m128);
25	} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
26	use core::arch::wasm32::*;
27
28	// repr(transparent) allows for directly passing the v128 on the WASM stack.
29	#[derive(Clone, Copy, Debug)]
30	#[repr(transparent)]
31	pub struct f32x4(v128);
32	} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
33	use core::arch::aarch64::*;
34
35	#[derive(Clone, Copy, Debug)]
36	#[repr(C, align(`16`))]
37	pub struct f32x4(float32x4_t);
38	} else {
39	use super::FasterMinMax;
40
41	#[derive(Clone, Copy, Debug)]
42	#[repr(C, align(`16`))]
43	pub struct f32x4([f32; `4`]);
44	}
45	}
46
47	unsafe impl bytemuck::Zeroable for f32x4 {}
48	unsafe impl bytemuck::Pod for f32x4 {}
49
50	impl Default for f32x4 {
51	fn default() -> Self {
52	Self::splat(`0.0`)
53	}
54	}
55
56	impl f32x4 {
57	pub fn splat(n: f32) -> Self {
58	Self::from([n, n, n, n])
59	}
60
61	pub fn floor(self) -> Self {
62	cfg_if::cfg_if! {
63	if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
64	Self(f32x4_floor(self.0))
65	} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
66	Self(unsafe { vrndmq_f32(self.0) })
67	} else {
68	let roundtrip: f32x4 = cast(self.trunc_int().to_f32x4());
69	roundtrip - roundtrip.cmp_gt(self).blend(f32x4::splat(`1.0`), f32x4::default())
70	}
71	}
72	}
73
74	pub fn abs(self) -> Self {
75	cfg_if::cfg_if! {
76	if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
77	Self(f32x4_abs(self.0))
78	} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
79	Self(unsafe { vabsq_f32(self.0) })
80	} else {
81	let non_sign_bits = f32x4::splat(f32::from_bits(i32::MAX as u32));
82	self & non_sign_bits
83	}
84	}
85	}
86
87	pub fn max(self, rhs: Self) -> Self {
88	// These technically don't have the same semantics for NaN and 0, but it
89	// doesn't seem to matter as Skia does it the same way.
90	cfg_if::cfg_if! {
91	if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
92	Self(unsafe { _mm_max_ps(self.0, rhs.0) })
93	} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
94	Self(f32x4_pmax(self.0, rhs.0))
95	} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
96	Self(unsafe { vmaxq_f32(self.0, rhs.0) })
97	} else {
98	Self([
99	self.0[`0`].faster_max(rhs.0[`0`]),
100	self.0[`1`].faster_max(rhs.0[`1`]),
101	self.0[`2`].faster_max(rhs.0[`2`]),
102	self.0[`3`].faster_max(rhs.0[`3`]),
103	])
104	}
105	}
106	}
107
108	pub fn min(self, rhs: Self) -> Self {
109	// These technically don't have the same semantics for NaN and 0, but it
110	// doesn't seem to matter as Skia does it the same way.
111	cfg_if::cfg_if! {
112	if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
113	Self(unsafe { _mm_min_ps(self.0, rhs.0) })
114	} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
115	Self(f32x4_pmin(self.0, rhs.0))
116	} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
117	Self(unsafe { vminq_f32(self.0, rhs.0) })
118	} else {
119	Self([
120	self.0[`0`].faster_min(rhs.0[`0`]),
121	self.0[`1`].faster_min(rhs.0[`1`]),
122	self.0[`2`].faster_min(rhs.0[`2`]),
123	self.0[`3`].faster_min(rhs.0[`3`]),
124	])
125	}
126	}
127	}
128
129	pub fn cmp_eq(self, rhs: Self) -> Self {
130	cfg_if::cfg_if! {
131	if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
132	Self(unsafe { _mm_cmpeq_ps(self.0, rhs.0) })
133	} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
134	Self(f32x4_eq(self.0, rhs.0))
135	} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
136	Self(cast(unsafe { vceqq_f32(self.0, rhs.0) }))
137	} else {
138	Self([
139	if self.0[`0`] == rhs.0[`0`] { f32::from_bits(u32::MAX) } else { `0.0` },
140	if self.0[`1`] == rhs.0[`1`] { f32::from_bits(u32::MAX) } else { `0.0` },
141	if self.0[`2`] == rhs.0[`2`] { f32::from_bits(u32::MAX) } else { `0.0` },
142	if self.0[`3`] == rhs.0[`3`] { f32::from_bits(u32::MAX) } else { `0.0` },
143	])
144	}
145	}
146	}
147
148	pub fn cmp_ne(self, rhs: Self) -> Self {
149	cfg_if::cfg_if! {
150	if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
151	Self(unsafe { _mm_cmpneq_ps(self.0, rhs.0) })
152	} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
153	Self(f32x4_ne(self.0, rhs.0))
154	} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
155	Self(cast(unsafe { vmvnq_u32(vceqq_f32(self.0, rhs.0)) }))
156	} else {
157	Self([
158	if self.0[`0`] != rhs.0[`0`] { f32::from_bits(u32::MAX) } else { `0.0` },
159	if self.0[`1`] != rhs.0[`1`] { f32::from_bits(u32::MAX) } else { `0.0` },
160	if self.0[`2`] != rhs.0[`2`] { f32::from_bits(u32::MAX) } else { `0.0` },
161	if self.0[`3`] != rhs.0[`3`] { f32::from_bits(u32::MAX) } else { `0.0` },
162	])
163	}
164	}
165	}
166
167	pub fn cmp_ge(self, rhs: Self) -> Self {
168	cfg_if::cfg_if! {
169	if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
170	Self(unsafe { _mm_cmpge_ps(self.0, rhs.0) })
171	} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
172	Self(f32x4_ge(self.0, rhs.0))
173	} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
174	Self(cast(unsafe { vcgeq_f32(self.0, rhs.0) }))
175	} else {
176	Self([
177	if self.0[`0`] >= rhs.0[`0`] { f32::from_bits(u32::MAX) } else { `0.0` },
178	if self.0[`1`] >= rhs.0[`1`] { f32::from_bits(u32::MAX) } else { `0.0` },
179	if self.0[`2`] >= rhs.0[`2`] { f32::from_bits(u32::MAX) } else { `0.0` },
180	if self.0[`3`] >= rhs.0[`3`] { f32::from_bits(u32::MAX) } else { `0.0` },
181	])
182	}
183	}
184	}
185
186	pub fn cmp_gt(self, rhs: Self) -> Self {
187	cfg_if::cfg_if! {
188	if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
189	Self(unsafe { _mm_cmpgt_ps(self.0, rhs.0) })
190	} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
191	Self(f32x4_gt(self.0, rhs.0))
192	} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
193	Self(cast(unsafe { vcgtq_f32(self.0, rhs.0) }))
194	} else {
195	Self([
196	if self.0[`0`] > rhs.0[`0`] { f32::from_bits(u32::MAX) } else { `0.0` },
197	if self.0[`1`] > rhs.0[`1`] { f32::from_bits(u32::MAX) } else { `0.0` },
198	if self.0[`2`] > rhs.0[`2`] { f32::from_bits(u32::MAX) } else { `0.0` },
199	if self.0[`3`] > rhs.0[`3`] { f32::from_bits(u32::MAX) } else { `0.0` },
200	])
201	}
202	}
203	}
204
205	pub fn cmp_le(self, rhs: Self) -> Self {
206	cfg_if::cfg_if! {
207	if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
208	Self(unsafe { _mm_cmple_ps(self.0, rhs.0) })
209	} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
210	Self(f32x4_le(self.0, rhs.0))
211	} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
212	Self(cast(unsafe { vcleq_f32(self.0, rhs.0) }))
213	} else {
214	Self([
215	if self.0[`0`] <= rhs.0[`0`] { f32::from_bits(u32::MAX) } else { `0.0` },
216	if self.0[`1`] <= rhs.0[`1`] { f32::from_bits(u32::MAX) } else { `0.0` },
217	if self.0[`2`] <= rhs.0[`2`] { f32::from_bits(u32::MAX) } else { `0.0` },
218	if self.0[`3`] <= rhs.0[`3`] { f32::from_bits(u32::MAX) } else { `0.0` },
219	])
220	}
221	}
222	}
223
224	pub fn cmp_lt(self, rhs: Self) -> Self {
225	cfg_if::cfg_if! {
226	if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
227	Self(unsafe { _mm_cmplt_ps(self.0, rhs.0) })
228	} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
229	Self(f32x4_lt(self.0, rhs.0))
230	} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
231	Self(cast(unsafe { vcltq_f32(self.0, rhs.0) }))
232	} else {
233	Self([
234	if self.0[`0`] < rhs.0[`0`] { f32::from_bits(u32::MAX) } else { `0.0` },
235	if self.0[`1`] < rhs.0[`1`] { f32::from_bits(u32::MAX) } else { `0.0` },
236	if self.0[`2`] < rhs.0[`2`] { f32::from_bits(u32::MAX) } else { `0.0` },
237	if self.0[`3`] < rhs.0[`3`] { f32::from_bits(u32::MAX) } else { `0.0` },
238	])
239	}
240	}
241	}
242
243	#[inline]
244	pub fn blend(self, t: Self, f: Self) -> Self {
245	cfg_if::cfg_if! {
246	if #[cfg(all(feature = "simd", target_feature = "sse4.1"))] {
247	Self(unsafe { _mm_blendv_ps(f.0, t.0, self.0) })
248	} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
249	Self(v128_bitselect(t.0, f.0, self.0))
250	} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
251	Self(unsafe { cast(vbslq_u32( cast(self.0), cast(t.0), cast(f.0))) })
252	} else {
253	super::generic_bit_blend(self, t, f)
254	}
255	}
256	}
257
258	pub fn round(self) -> Self {
259	cfg_if::cfg_if! {
260	if #[cfg(all(feature = "simd", target_feature = "sse4.1"))] {
261	Self(
262	unsafe { _mm_round_ps(self.0, _MM_FROUND_NO_EXC \| _MM_FROUND_TO_NEAREST_INT) },
263	)
264	} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
265	Self(f32x4_nearest(self.0))
266	} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
267	Self(unsafe { vrndnq_f32(self.0) })
268	} else {
269	use super::u32x4;
270
271	let to_int = f32x4::splat(`1.0` / f32::EPSILON);
272	let u: u32x4 = cast(self);
273	let e: i32x4 = cast(u.shr::<`23`>() & u32x4::splat(`0xff`));
274	let mut y: f32x4;
275
276	let no_op_magic = i32x4::splat(`0x7f` + `23`);
277	let no_op_mask: f32x4 = cast(e.cmp_gt(no_op_magic) \| e.cmp_eq(no_op_magic));
278	let no_op_val: f32x4 = self;
279
280	let zero_magic = i32x4::splat(`0x7f` - `1`);
281	let zero_mask: f32x4 = cast(e.cmp_lt(zero_magic));
282	let zero_val: f32x4 = self * f32x4::splat(`0.0`);
283
284	let neg_bit: f32x4 = cast(cast::<u32x4, i32x4>(u).cmp_lt(i32x4::default()));
285	let x: f32x4 = neg_bit.blend(-self, self);
286	y = x + to_int - to_int - x;
287	y = y.cmp_gt(f32x4::splat(`0.5`)).blend(
288	y + x - f32x4::splat(`-1.0`),
289	y.cmp_lt(f32x4::splat(`-0.5`)).blend(y + x + f32x4::splat(`1.0`), y + x),
290	);
291	y = neg_bit.blend(-y, y);
292
293	no_op_mask.blend(no_op_val, zero_mask.blend(zero_val, y))
294	}
295	}
296	}
297
298	pub fn round_int(self) -> i32x4 {
299	// These technically don't have the same semantics for NaN and out of
300	// range values, but it doesn't seem to matter as Skia does it the same
301	// way.
302	cfg_if::cfg_if! {
303	if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
304	i32x4(unsafe { _mm_cvtps_epi32(self.0) })
305	} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
306	i32x4(i32x4_trunc_sat_f32x4(self.round().0))
307	} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
308	i32x4(unsafe { vcvtnq_s32_f32(self.0) } )
309	} else {
310	let rounded: [f32; `4`] = cast(self.round());
311	cast([
312	rounded[`0`] as i32,
313	rounded[`1`] as i32,
314	rounded[`2`] as i32,
315	rounded[`3`] as i32,
316	])
317	}
318	}
319	}
320
321	pub fn trunc_int(self) -> i32x4 {
322	// These technically don't have the same semantics for NaN and out of
323	// range values, but it doesn't seem to matter as Skia does it the same
324	// way.
325	cfg_if::cfg_if! {
326	if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
327	i32x4(unsafe { _mm_cvttps_epi32(self.0) })
328	} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
329	i32x4(i32x4_trunc_sat_f32x4(self.0))
330	} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
331	i32x4(unsafe { vcvtq_s32_f32(self.0) })
332	} else {
333	cast([
334	self.0[`0`] as i32,
335	self.0[`1`] as i32,
336	self.0[`2`] as i32,
337	self.0[`3`] as i32,
338	])
339	}
340	}
341	}
342
343	pub fn recip_fast(self) -> Self {
344	cfg_if::cfg_if! {
345	if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
346	Self(unsafe { _mm_rcp_ps(self.0) })
347	} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
348	Self(f32x4_div(f32x4_splat(`1.0`), self.0))
349	} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
350	unsafe {
351	let a = vrecpeq_f32(self.0);
352	let a = vmulq_f32(vrecpsq_f32(self.0, a), a);
353	Self(a)
354	}
355	} else {
356	Self::from([
357	`1.0` / self.0[`0`],
358	`1.0` / self.0[`1`],
359	`1.0` / self.0[`2`],
360	`1.0` / self.0[`3`],
361	])
362	}
363	}
364	}
365
366	pub fn recip_sqrt(self) -> Self {
367	cfg_if::cfg_if! {
368	if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
369	Self(unsafe { _mm_rsqrt_ps(self.0) })
370	} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
371	Self(f32x4_div(f32x4_splat(`1.0`), f32x4_sqrt(self.0)))
372	} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
373	unsafe {
374	let a = vrsqrteq_f32(self.0);
375	let a = vmulq_f32(vrsqrtsq_f32(self.0, vmulq_f32(a, a)), a);
376	Self(a)
377	}
378	} else {
379	Self::from([
380	`1.0` / self.0[`0`].sqrt(),
381	`1.0` / self.0[`1`].sqrt(),
382	`1.0` / self.0[`2`].sqrt(),
383	`1.0` / self.0[`3`].sqrt(),
384	])
385	}
386	}
387	}
388
389	pub fn sqrt(self) -> Self {
390	cfg_if::cfg_if! {
391	if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
392	Self(unsafe { _mm_sqrt_ps(self.0) })
393	} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
394	Self(f32x4_sqrt(self.0))
395	} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
396	Self(unsafe { vsqrtq_f32(self.0) })
397	} else {
398	Self::from([
399	self.0[`0`].sqrt(),
400	self.0[`1`].sqrt(),
401	self.0[`2`].sqrt(),
402	self.0[`3`].sqrt(),
403	])
404	}
405	}
406	}
407	}
408
409	impl From<[f32; `4`]> for f32x4 {
410	fn from(v: [f32; `4`]) -> Self {
411	cast(v)
412	}
413	}
414
415	impl From<f32x4> for [f32; `4`] {
416	fn from(v: f32x4) -> Self {
417	cast(v)
418	}
419	}
420
421	impl core::ops::Add for f32x4 {
422	type Output = Self;
423
424	fn add(self, rhs: Self) -> Self::Output {
425	cfg_if::cfg_if! {
426	if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
427	Self(unsafe { _mm_add_ps(self.0, rhs.0) })
428	} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
429	Self(f32x4_add(self.0, rhs.0))
430	} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
431	Self(unsafe { vaddq_f32(self.0, rhs.0) })
432	} else {
433	Self([
434	self.0[`0`] + rhs.0[`0`],
435	self.0[`1`] + rhs.0[`1`],
436	self.0[`2`] + rhs.0[`2`],
437	self.0[`3`] + rhs.0[`3`],
438	])
439	}
440	}
441	}
442	}
443
444	impl core::ops::AddAssign for f32x4 {
445	fn add_assign(&mut self, rhs: f32x4) {
446	self = self + rhs;
447	}
448	}
449
450	impl core::ops::Sub for f32x4 {
451	type Output = Self;
452
453	fn sub(self, rhs: Self) -> Self::Output {
454	cfg_if::cfg_if! {
455	if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
456	Self(unsafe { _mm_sub_ps(self.0, rhs.0) })
457	} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
458	Self(f32x4_sub(self.0, rhs.0))
459	} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
460	Self(unsafe { vsubq_f32(self.0, rhs.0) })
461	} else {
462	Self([
463	self.0[`0`] - rhs.0[`0`],
464	self.0[`1`] - rhs.0[`1`],
465	self.0[`2`] - rhs.0[`2`],
466	self.0[`3`] - rhs.0[`3`],
467	])
468	}
469	}
470	}
471	}
472
473	impl core::ops::Mul for f32x4 {
474	type Output = Self;
475
476	fn mul(self, rhs: Self) -> Self::Output {
477	cfg_if::cfg_if! {
478	if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
479	Self(unsafe { _mm_mul_ps(self.0, rhs.0) })
480	} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
481	Self(f32x4_mul(self.0, rhs.0))
482	} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
483	Self(unsafe { vmulq_f32(self.0, rhs.0) })
484	} else {
485	Self([
486	self.0[`0`] * rhs.0[`0`],
487	self.0[`1`] * rhs.0[`1`],
488	self.0[`2`] * rhs.0[`2`],
489	self.0[`3`] * rhs.0[`3`],
490	])
491	}
492	}
493	}
494	}
495
496	impl core::ops::MulAssign for f32x4 {
497	fn mul_assign(&mut self, rhs: f32x4) {
498	self = self * rhs;
499	}
500	}
501
502	impl core::ops::Div for f32x4 {
503	type Output = Self;
504
505	fn div(self, rhs: Self) -> Self::Output {
506	cfg_if::cfg_if! {
507	if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
508	Self(unsafe { _mm_div_ps(self.0, rhs.0) })
509	} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
510	Self(f32x4_div(self.0, rhs.0))
511	} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
512	Self(unsafe { vdivq_f32(self.0, rhs.0) })
513	} else {
514	Self([
515	self.0[`0`] / rhs.0[`0`],
516	self.0[`1`] / rhs.0[`1`],
517	self.0[`2`] / rhs.0[`2`],
518	self.0[`3`] / rhs.0[`3`],
519	])
520	}
521	}
522	}
523	}
524
525	impl core::ops::BitAnd for f32x4 {
526	type Output = Self;
527
528	#[inline(always)]
529	fn bitand(self, rhs: Self) -> Self::Output {
530	cfg_if::cfg_if! {
531	if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
532	Self(unsafe { _mm_and_ps(self.0, rhs.0) })
533	} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
534	Self(v128_and(self.0, rhs.0))
535	} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
536	Self(cast(unsafe { vandq_u32(cast(self.0), cast(rhs.0)) }))
537	} else {
538	Self([
539	f32::from_bits(self.0[`0`].to_bits() & rhs.0[`0`].to_bits()),
540	f32::from_bits(self.0[`1`].to_bits() & rhs.0[`1`].to_bits()),
541	f32::from_bits(self.0[`2`].to_bits() & rhs.0[`2`].to_bits()),
542	f32::from_bits(self.0[`3`].to_bits() & rhs.0[`3`].to_bits()),
543	])
544	}
545	}
546	}
547	}
548
549	impl core::ops::BitOr for f32x4 {
550	type Output = Self;
551
552	#[inline(always)]
553	fn bitor(self, rhs: Self) -> Self::Output {
554	cfg_if::cfg_if! {
555	if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
556	Self(unsafe { _mm_or_ps(self.0, rhs.0) })
557	} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
558	Self(v128_or(self.0, rhs.0))
559	} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
560	Self(cast(unsafe { vorrq_u32(cast(self.0), cast(rhs.0)) }))
561	} else {
562	Self([
563	f32::from_bits(self.0[`0`].to_bits() \| rhs.0[`0`].to_bits()),
564	f32::from_bits(self.0[`1`].to_bits() \| rhs.0[`1`].to_bits()),
565	f32::from_bits(self.0[`2`].to_bits() \| rhs.0[`2`].to_bits()),
566	f32::from_bits(self.0[`3`].to_bits() \| rhs.0[`3`].to_bits()),
567	])
568	}
569	}
570	}
571	}
572
573	impl core::ops::BitXor for f32x4 {
574	type Output = Self;
575
576	#[inline(always)]
577	fn bitxor(self, rhs: Self) -> Self::Output {
578	cfg_if::cfg_if! {
579	if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
580	Self(unsafe { _mm_xor_ps(self.0, rhs.0) })
581	} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
582	Self(v128_xor(self.0, rhs.0))
583	} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
584	Self(cast(unsafe { veorq_u32(cast(self.0), cast(rhs.0)) }))
585	} else {
586	Self([
587	f32::from_bits(self.0[`0`].to_bits() ^ rhs.0[`0`].to_bits()),
588	f32::from_bits(self.0[`1`].to_bits() ^ rhs.0[`1`].to_bits()),
589	f32::from_bits(self.0[`2`].to_bits() ^ rhs.0[`2`].to_bits()),
590	f32::from_bits(self.0[`3`].to_bits() ^ rhs.0[`3`].to_bits()),
591	])
592	}
593	}
594	}
595	}
596
597	impl core::ops::Neg for f32x4 {
598	type Output = Self;
599
600	fn neg(self) -> Self {
601	Self::default() - self
602	}
603	}
604
605	impl core::ops::Not for f32x4 {
606	type Output = Self;
607
608	fn not(self) -> Self {
609	cfg_if::cfg_if! {
610	if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
611	unsafe {
612	let all_bits = _mm_set1_ps(f32::from_bits(u32::MAX));
613	Self(_mm_xor_ps(self.0, all_bits))
614	}
615	} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
616	Self(v128_not(self.0))
617	} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
618	Self(cast(unsafe { vmvnq_u32(cast(self.0)) }))
619	} else {
620	self ^ Self::splat(cast(u32::MAX))
621	}
622	}
623	}
624	}
625
626	impl core::cmp::PartialEq for f32x4 {
627	fn eq(&self, rhs: &Self) -> bool {
628	cfg_if::cfg_if! {
629	if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
630	unsafe { _mm_movemask_ps(_mm_cmpeq_ps(self.0, rhs.0)) == `0b1111` }
631	} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
632	unsafe { vminvq_u32(vceqq_f32(self.0, rhs.0)) != `0` }
633	} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
634	u32x4_all_true(f32x4_eq(self.0, rhs.0))
635	} else {
636	self.0 == rhs.0
637	}
638	}
639	}
640	}
641