1// Copyright 2020 Yevhenii Reizner
2//
3// Use of this source code is governed by a BSD-style license that can be
4// found in the LICENSE file.
5
6// Based on https://github.com/Lokathor/wide (Zlib)
7
8use bytemuck::cast;
9
10#[cfg(all(not(feature = "std"), feature = "no-std-float"))]
11use tiny_skia_path::NoStdFloat;
12
13use super::i32x4;
14
15cfg_if::cfg_if! {
16 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
17 #[cfg(target_arch = "x86")]
18 use core::arch::x86::*;
19 #[cfg(target_arch = "x86_64")]
20 use core::arch::x86_64::*;
21
22 #[derive(Clone, Copy, Debug)]
23 #[repr(C, align(16))]
24 pub struct f32x4(__m128);
25 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
26 use core::arch::wasm32::*;
27
28 // repr(transparent) allows for directly passing the v128 on the WASM stack.
29 #[derive(Clone, Copy, Debug)]
30 #[repr(transparent)]
31 pub struct f32x4(v128);
32 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
33 use core::arch::aarch64::*;
34
35 #[derive(Clone, Copy, Debug)]
36 #[repr(C, align(16))]
37 pub struct f32x4(float32x4_t);
38 } else {
39 use super::FasterMinMax;
40
41 #[derive(Clone, Copy, Debug)]
42 #[repr(C, align(16))]
43 pub struct f32x4([f32; 4]);
44 }
45}
46
47unsafe impl bytemuck::Zeroable for f32x4 {}
48unsafe impl bytemuck::Pod for f32x4 {}
49
50impl Default for f32x4 {
51 fn default() -> Self {
52 Self::splat(0.0)
53 }
54}
55
56impl f32x4 {
57 pub fn splat(n: f32) -> Self {
58 Self::from([n, n, n, n])
59 }
60
61 pub fn floor(self) -> Self {
62 cfg_if::cfg_if! {
63 if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
64 Self(f32x4_floor(self.0))
65 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
66 Self(unsafe { vrndmq_f32(self.0) })
67 } else {
68 let roundtrip: f32x4 = cast(self.trunc_int().to_f32x4());
69 roundtrip - roundtrip.cmp_gt(self).blend(f32x4::splat(1.0), f32x4::default())
70 }
71 }
72 }
73
74 pub fn abs(self) -> Self {
75 cfg_if::cfg_if! {
76 if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
77 Self(f32x4_abs(self.0))
78 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
79 Self(unsafe { vabsq_f32(self.0) })
80 } else {
81 let non_sign_bits = f32x4::splat(f32::from_bits(i32::MAX as u32));
82 self & non_sign_bits
83 }
84 }
85 }
86
87 pub fn max(self, rhs: Self) -> Self {
88 // These technically don't have the same semantics for NaN and 0, but it
89 // doesn't seem to matter as Skia does it the same way.
90 cfg_if::cfg_if! {
91 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
92 Self(unsafe { _mm_max_ps(self.0, rhs.0) })
93 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
94 Self(f32x4_pmax(self.0, rhs.0))
95 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
96 Self(unsafe { vmaxq_f32(self.0, rhs.0) })
97 } else {
98 Self([
99 self.0[0].faster_max(rhs.0[0]),
100 self.0[1].faster_max(rhs.0[1]),
101 self.0[2].faster_max(rhs.0[2]),
102 self.0[3].faster_max(rhs.0[3]),
103 ])
104 }
105 }
106 }
107
108 pub fn min(self, rhs: Self) -> Self {
109 // These technically don't have the same semantics for NaN and 0, but it
110 // doesn't seem to matter as Skia does it the same way.
111 cfg_if::cfg_if! {
112 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
113 Self(unsafe { _mm_min_ps(self.0, rhs.0) })
114 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
115 Self(f32x4_pmin(self.0, rhs.0))
116 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
117 Self(unsafe { vminq_f32(self.0, rhs.0) })
118 } else {
119 Self([
120 self.0[0].faster_min(rhs.0[0]),
121 self.0[1].faster_min(rhs.0[1]),
122 self.0[2].faster_min(rhs.0[2]),
123 self.0[3].faster_min(rhs.0[3]),
124 ])
125 }
126 }
127 }
128
129 pub fn cmp_eq(self, rhs: Self) -> Self {
130 cfg_if::cfg_if! {
131 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
132 Self(unsafe { _mm_cmpeq_ps(self.0, rhs.0) })
133 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
134 Self(f32x4_eq(self.0, rhs.0))
135 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
136 Self(cast(unsafe { vceqq_f32(self.0, rhs.0) }))
137 } else {
138 Self([
139 if self.0[0] == rhs.0[0] { f32::from_bits(u32::MAX) } else { 0.0 },
140 if self.0[1] == rhs.0[1] { f32::from_bits(u32::MAX) } else { 0.0 },
141 if self.0[2] == rhs.0[2] { f32::from_bits(u32::MAX) } else { 0.0 },
142 if self.0[3] == rhs.0[3] { f32::from_bits(u32::MAX) } else { 0.0 },
143 ])
144 }
145 }
146 }
147
148 pub fn cmp_ne(self, rhs: Self) -> Self {
149 cfg_if::cfg_if! {
150 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
151 Self(unsafe { _mm_cmpneq_ps(self.0, rhs.0) })
152 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
153 Self(f32x4_ne(self.0, rhs.0))
154 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
155 Self(cast(unsafe { vmvnq_u32(vceqq_f32(self.0, rhs.0)) }))
156 } else {
157 Self([
158 if self.0[0] != rhs.0[0] { f32::from_bits(u32::MAX) } else { 0.0 },
159 if self.0[1] != rhs.0[1] { f32::from_bits(u32::MAX) } else { 0.0 },
160 if self.0[2] != rhs.0[2] { f32::from_bits(u32::MAX) } else { 0.0 },
161 if self.0[3] != rhs.0[3] { f32::from_bits(u32::MAX) } else { 0.0 },
162 ])
163 }
164 }
165 }
166
167 pub fn cmp_ge(self, rhs: Self) -> Self {
168 cfg_if::cfg_if! {
169 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
170 Self(unsafe { _mm_cmpge_ps(self.0, rhs.0) })
171 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
172 Self(f32x4_ge(self.0, rhs.0))
173 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
174 Self(cast(unsafe { vcgeq_f32(self.0, rhs.0) }))
175 } else {
176 Self([
177 if self.0[0] >= rhs.0[0] { f32::from_bits(u32::MAX) } else { 0.0 },
178 if self.0[1] >= rhs.0[1] { f32::from_bits(u32::MAX) } else { 0.0 },
179 if self.0[2] >= rhs.0[2] { f32::from_bits(u32::MAX) } else { 0.0 },
180 if self.0[3] >= rhs.0[3] { f32::from_bits(u32::MAX) } else { 0.0 },
181 ])
182 }
183 }
184 }
185
186 pub fn cmp_gt(self, rhs: Self) -> Self {
187 cfg_if::cfg_if! {
188 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
189 Self(unsafe { _mm_cmpgt_ps(self.0, rhs.0) })
190 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
191 Self(f32x4_gt(self.0, rhs.0))
192 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
193 Self(cast(unsafe { vcgtq_f32(self.0, rhs.0) }))
194 } else {
195 Self([
196 if self.0[0] > rhs.0[0] { f32::from_bits(u32::MAX) } else { 0.0 },
197 if self.0[1] > rhs.0[1] { f32::from_bits(u32::MAX) } else { 0.0 },
198 if self.0[2] > rhs.0[2] { f32::from_bits(u32::MAX) } else { 0.0 },
199 if self.0[3] > rhs.0[3] { f32::from_bits(u32::MAX) } else { 0.0 },
200 ])
201 }
202 }
203 }
204
205 pub fn cmp_le(self, rhs: Self) -> Self {
206 cfg_if::cfg_if! {
207 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
208 Self(unsafe { _mm_cmple_ps(self.0, rhs.0) })
209 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
210 Self(f32x4_le(self.0, rhs.0))
211 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
212 Self(cast(unsafe { vcleq_f32(self.0, rhs.0) }))
213 } else {
214 Self([
215 if self.0[0] <= rhs.0[0] { f32::from_bits(u32::MAX) } else { 0.0 },
216 if self.0[1] <= rhs.0[1] { f32::from_bits(u32::MAX) } else { 0.0 },
217 if self.0[2] <= rhs.0[2] { f32::from_bits(u32::MAX) } else { 0.0 },
218 if self.0[3] <= rhs.0[3] { f32::from_bits(u32::MAX) } else { 0.0 },
219 ])
220 }
221 }
222 }
223
224 pub fn cmp_lt(self, rhs: Self) -> Self {
225 cfg_if::cfg_if! {
226 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
227 Self(unsafe { _mm_cmplt_ps(self.0, rhs.0) })
228 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
229 Self(f32x4_lt(self.0, rhs.0))
230 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
231 Self(cast(unsafe { vcltq_f32(self.0, rhs.0) }))
232 } else {
233 Self([
234 if self.0[0] < rhs.0[0] { f32::from_bits(u32::MAX) } else { 0.0 },
235 if self.0[1] < rhs.0[1] { f32::from_bits(u32::MAX) } else { 0.0 },
236 if self.0[2] < rhs.0[2] { f32::from_bits(u32::MAX) } else { 0.0 },
237 if self.0[3] < rhs.0[3] { f32::from_bits(u32::MAX) } else { 0.0 },
238 ])
239 }
240 }
241 }
242
243 #[inline]
244 pub fn blend(self, t: Self, f: Self) -> Self {
245 cfg_if::cfg_if! {
246 if #[cfg(all(feature = "simd", target_feature = "sse4.1"))] {
247 Self(unsafe { _mm_blendv_ps(f.0, t.0, self.0) })
248 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
249 Self(v128_bitselect(t.0, f.0, self.0))
250 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
251 Self(unsafe { cast(vbslq_u32( cast(self.0), cast(t.0), cast(f.0))) })
252 } else {
253 super::generic_bit_blend(self, t, f)
254 }
255 }
256 }
257
258 pub fn round(self) -> Self {
259 cfg_if::cfg_if! {
260 if #[cfg(all(feature = "simd", target_feature = "sse4.1"))] {
261 Self(
262 unsafe { _mm_round_ps(self.0, _MM_FROUND_NO_EXC | _MM_FROUND_TO_NEAREST_INT) },
263 )
264 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
265 Self(f32x4_nearest(self.0))
266 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
267 Self(unsafe { vrndnq_f32(self.0) })
268 } else {
269 use super::u32x4;
270
271 let to_int = f32x4::splat(1.0 / f32::EPSILON);
272 let u: u32x4 = cast(self);
273 let e: i32x4 = cast(u.shr::<23>() & u32x4::splat(0xff));
274 let mut y: f32x4;
275
276 let no_op_magic = i32x4::splat(0x7f + 23);
277 let no_op_mask: f32x4 = cast(e.cmp_gt(no_op_magic) | e.cmp_eq(no_op_magic));
278 let no_op_val: f32x4 = self;
279
280 let zero_magic = i32x4::splat(0x7f - 1);
281 let zero_mask: f32x4 = cast(e.cmp_lt(zero_magic));
282 let zero_val: f32x4 = self * f32x4::splat(0.0);
283
284 let neg_bit: f32x4 = cast(cast::<u32x4, i32x4>(u).cmp_lt(i32x4::default()));
285 let x: f32x4 = neg_bit.blend(-self, self);
286 y = x + to_int - to_int - x;
287 y = y.cmp_gt(f32x4::splat(0.5)).blend(
288 y + x - f32x4::splat(-1.0),
289 y.cmp_lt(f32x4::splat(-0.5)).blend(y + x + f32x4::splat(1.0), y + x),
290 );
291 y = neg_bit.blend(-y, y);
292
293 no_op_mask.blend(no_op_val, zero_mask.blend(zero_val, y))
294 }
295 }
296 }
297
298 pub fn round_int(self) -> i32x4 {
299 // These technically don't have the same semantics for NaN and out of
300 // range values, but it doesn't seem to matter as Skia does it the same
301 // way.
302 cfg_if::cfg_if! {
303 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
304 i32x4(unsafe { _mm_cvtps_epi32(self.0) })
305 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
306 i32x4(i32x4_trunc_sat_f32x4(self.round().0))
307 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
308 i32x4(unsafe { vcvtnq_s32_f32(self.0) } )
309 } else {
310 let rounded: [f32; 4] = cast(self.round());
311 cast([
312 rounded[0] as i32,
313 rounded[1] as i32,
314 rounded[2] as i32,
315 rounded[3] as i32,
316 ])
317 }
318 }
319 }
320
321 pub fn trunc_int(self) -> i32x4 {
322 // These technically don't have the same semantics for NaN and out of
323 // range values, but it doesn't seem to matter as Skia does it the same
324 // way.
325 cfg_if::cfg_if! {
326 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
327 i32x4(unsafe { _mm_cvttps_epi32(self.0) })
328 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
329 i32x4(i32x4_trunc_sat_f32x4(self.0))
330 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
331 i32x4(unsafe { vcvtq_s32_f32(self.0) })
332 } else {
333 cast([
334 self.0[0] as i32,
335 self.0[1] as i32,
336 self.0[2] as i32,
337 self.0[3] as i32,
338 ])
339 }
340 }
341 }
342
343 pub fn recip_fast(self) -> Self {
344 cfg_if::cfg_if! {
345 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
346 Self(unsafe { _mm_rcp_ps(self.0) })
347 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
348 Self(f32x4_div(f32x4_splat(1.0), self.0))
349 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
350 unsafe {
351 let a = vrecpeq_f32(self.0);
352 let a = vmulq_f32(vrecpsq_f32(self.0, a), a);
353 Self(a)
354 }
355 } else {
356 Self::from([
357 1.0 / self.0[0],
358 1.0 / self.0[1],
359 1.0 / self.0[2],
360 1.0 / self.0[3],
361 ])
362 }
363 }
364 }
365
366 pub fn recip_sqrt(self) -> Self {
367 cfg_if::cfg_if! {
368 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
369 Self(unsafe { _mm_rsqrt_ps(self.0) })
370 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
371 Self(f32x4_div(f32x4_splat(1.0), f32x4_sqrt(self.0)))
372 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
373 unsafe {
374 let a = vrsqrteq_f32(self.0);
375 let a = vmulq_f32(vrsqrtsq_f32(self.0, vmulq_f32(a, a)), a);
376 Self(a)
377 }
378 } else {
379 Self::from([
380 1.0 / self.0[0].sqrt(),
381 1.0 / self.0[1].sqrt(),
382 1.0 / self.0[2].sqrt(),
383 1.0 / self.0[3].sqrt(),
384 ])
385 }
386 }
387 }
388
389 pub fn sqrt(self) -> Self {
390 cfg_if::cfg_if! {
391 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
392 Self(unsafe { _mm_sqrt_ps(self.0) })
393 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
394 Self(f32x4_sqrt(self.0))
395 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
396 Self(unsafe { vsqrtq_f32(self.0) })
397 } else {
398 Self::from([
399 self.0[0].sqrt(),
400 self.0[1].sqrt(),
401 self.0[2].sqrt(),
402 self.0[3].sqrt(),
403 ])
404 }
405 }
406 }
407}
408
409impl From<[f32; 4]> for f32x4 {
410 fn from(v: [f32; 4]) -> Self {
411 cast(v)
412 }
413}
414
415impl From<f32x4> for [f32; 4] {
416 fn from(v: f32x4) -> Self {
417 cast(v)
418 }
419}
420
421impl core::ops::Add for f32x4 {
422 type Output = Self;
423
424 fn add(self, rhs: Self) -> Self::Output {
425 cfg_if::cfg_if! {
426 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
427 Self(unsafe { _mm_add_ps(self.0, rhs.0) })
428 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
429 Self(f32x4_add(self.0, rhs.0))
430 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
431 Self(unsafe { vaddq_f32(self.0, rhs.0) })
432 } else {
433 Self([
434 self.0[0] + rhs.0[0],
435 self.0[1] + rhs.0[1],
436 self.0[2] + rhs.0[2],
437 self.0[3] + rhs.0[3],
438 ])
439 }
440 }
441 }
442}
443
444impl core::ops::AddAssign for f32x4 {
445 fn add_assign(&mut self, rhs: f32x4) {
446 *self = *self + rhs;
447 }
448}
449
450impl core::ops::Sub for f32x4 {
451 type Output = Self;
452
453 fn sub(self, rhs: Self) -> Self::Output {
454 cfg_if::cfg_if! {
455 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
456 Self(unsafe { _mm_sub_ps(self.0, rhs.0) })
457 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
458 Self(f32x4_sub(self.0, rhs.0))
459 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
460 Self(unsafe { vsubq_f32(self.0, rhs.0) })
461 } else {
462 Self([
463 self.0[0] - rhs.0[0],
464 self.0[1] - rhs.0[1],
465 self.0[2] - rhs.0[2],
466 self.0[3] - rhs.0[3],
467 ])
468 }
469 }
470 }
471}
472
473impl core::ops::Mul for f32x4 {
474 type Output = Self;
475
476 fn mul(self, rhs: Self) -> Self::Output {
477 cfg_if::cfg_if! {
478 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
479 Self(unsafe { _mm_mul_ps(self.0, rhs.0) })
480 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
481 Self(f32x4_mul(self.0, rhs.0))
482 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
483 Self(unsafe { vmulq_f32(self.0, rhs.0) })
484 } else {
485 Self([
486 self.0[0] * rhs.0[0],
487 self.0[1] * rhs.0[1],
488 self.0[2] * rhs.0[2],
489 self.0[3] * rhs.0[3],
490 ])
491 }
492 }
493 }
494}
495
496impl core::ops::MulAssign for f32x4 {
497 fn mul_assign(&mut self, rhs: f32x4) {
498 *self = *self * rhs;
499 }
500}
501
502impl core::ops::Div for f32x4 {
503 type Output = Self;
504
505 fn div(self, rhs: Self) -> Self::Output {
506 cfg_if::cfg_if! {
507 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
508 Self(unsafe { _mm_div_ps(self.0, rhs.0) })
509 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
510 Self(f32x4_div(self.0, rhs.0))
511 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
512 Self(unsafe { vdivq_f32(self.0, rhs.0) })
513 } else {
514 Self([
515 self.0[0] / rhs.0[0],
516 self.0[1] / rhs.0[1],
517 self.0[2] / rhs.0[2],
518 self.0[3] / rhs.0[3],
519 ])
520 }
521 }
522 }
523}
524
525impl core::ops::BitAnd for f32x4 {
526 type Output = Self;
527
528 #[inline(always)]
529 fn bitand(self, rhs: Self) -> Self::Output {
530 cfg_if::cfg_if! {
531 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
532 Self(unsafe { _mm_and_ps(self.0, rhs.0) })
533 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
534 Self(v128_and(self.0, rhs.0))
535 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
536 Self(cast(unsafe { vandq_u32(cast(self.0), cast(rhs.0)) }))
537 } else {
538 Self([
539 f32::from_bits(self.0[0].to_bits() & rhs.0[0].to_bits()),
540 f32::from_bits(self.0[1].to_bits() & rhs.0[1].to_bits()),
541 f32::from_bits(self.0[2].to_bits() & rhs.0[2].to_bits()),
542 f32::from_bits(self.0[3].to_bits() & rhs.0[3].to_bits()),
543 ])
544 }
545 }
546 }
547}
548
549impl core::ops::BitOr for f32x4 {
550 type Output = Self;
551
552 #[inline(always)]
553 fn bitor(self, rhs: Self) -> Self::Output {
554 cfg_if::cfg_if! {
555 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
556 Self(unsafe { _mm_or_ps(self.0, rhs.0) })
557 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
558 Self(v128_or(self.0, rhs.0))
559 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
560 Self(cast(unsafe { vorrq_u32(cast(self.0), cast(rhs.0)) }))
561 } else {
562 Self([
563 f32::from_bits(self.0[0].to_bits() | rhs.0[0].to_bits()),
564 f32::from_bits(self.0[1].to_bits() | rhs.0[1].to_bits()),
565 f32::from_bits(self.0[2].to_bits() | rhs.0[2].to_bits()),
566 f32::from_bits(self.0[3].to_bits() | rhs.0[3].to_bits()),
567 ])
568 }
569 }
570 }
571}
572
573impl core::ops::BitXor for f32x4 {
574 type Output = Self;
575
576 #[inline(always)]
577 fn bitxor(self, rhs: Self) -> Self::Output {
578 cfg_if::cfg_if! {
579 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
580 Self(unsafe { _mm_xor_ps(self.0, rhs.0) })
581 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
582 Self(v128_xor(self.0, rhs.0))
583 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
584 Self(cast(unsafe { veorq_u32(cast(self.0), cast(rhs.0)) }))
585 } else {
586 Self([
587 f32::from_bits(self.0[0].to_bits() ^ rhs.0[0].to_bits()),
588 f32::from_bits(self.0[1].to_bits() ^ rhs.0[1].to_bits()),
589 f32::from_bits(self.0[2].to_bits() ^ rhs.0[2].to_bits()),
590 f32::from_bits(self.0[3].to_bits() ^ rhs.0[3].to_bits()),
591 ])
592 }
593 }
594 }
595}
596
597impl core::ops::Neg for f32x4 {
598 type Output = Self;
599
600 fn neg(self) -> Self {
601 Self::default() - self
602 }
603}
604
605impl core::ops::Not for f32x4 {
606 type Output = Self;
607
608 fn not(self) -> Self {
609 cfg_if::cfg_if! {
610 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
611 unsafe {
612 let all_bits = _mm_set1_ps(f32::from_bits(u32::MAX));
613 Self(_mm_xor_ps(self.0, all_bits))
614 }
615 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
616 Self(v128_not(self.0))
617 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
618 Self(cast(unsafe { vmvnq_u32(cast(self.0)) }))
619 } else {
620 self ^ Self::splat(cast(u32::MAX))
621 }
622 }
623 }
624}
625
626impl core::cmp::PartialEq for f32x4 {
627 fn eq(&self, rhs: &Self) -> bool {
628 cfg_if::cfg_if! {
629 if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
630 unsafe { _mm_movemask_ps(_mm_cmpeq_ps(self.0, rhs.0)) == 0b1111 }
631 } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
632 unsafe { vminvq_u32(vceqq_f32(self.0, rhs.0)) != 0 }
633 } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
634 u32x4_all_true(f32x4_eq(self.0, rhs.0))
635 } else {
636 self.0 == rhs.0
637 }
638 }
639 }
640}
641