1// Copyright 2020 Yevhenii Reizner
2//
3// Use of this source code is governed by a BSD-style license that can be
4// found in the LICENSE file.
5
6// Based on https://github.com/Lokathor/wide (Zlib)
7
8use bytemuck::cast;
9
10use super::{i32x8, u32x8};
11
12cfg_if::cfg_if! {
13 if #[cfg(all(feature = "simd", target_feature = "avx"))] {
14 #[cfg(target_arch = "x86")]
15 use core::arch::x86::*;
16 #[cfg(target_arch = "x86_64")]
17 use core::arch::x86_64::*;
18
19 #[derive(Clone, Copy, Debug)]
20 #[repr(C, align(32))]
21 pub struct f32x8(__m256);
22 } else {
23 use super::f32x4;
24
25 #[derive(Clone, Copy, Debug)]
26 #[repr(C, align(32))]
27 pub struct f32x8(pub f32x4, pub f32x4);
28 }
29}
30
31unsafe impl bytemuck::Zeroable for f32x8 {}
32unsafe impl bytemuck::Pod for f32x8 {}
33
34impl Default for f32x8 {
35 fn default() -> Self {
36 Self::splat(0.0)
37 }
38}
39
40impl f32x8 {
41 pub fn splat(n: f32) -> Self {
42 cast([n, n, n, n, n, n, n, n])
43 }
44
45 pub fn floor(self) -> Self {
46 let roundtrip: f32x8 = cast(self.trunc_int().to_f32x8());
47 roundtrip
48 - roundtrip
49 .cmp_gt(self)
50 .blend(f32x8::splat(1.0), f32x8::default())
51 }
52
53 pub fn fract(self) -> Self {
54 self - self.floor()
55 }
56
57 pub fn normalize(self) -> Self {
58 self.max(f32x8::default()).min(f32x8::splat(1.0))
59 }
60
61 pub fn to_i32x8_bitcast(self) -> i32x8 {
62 bytemuck::cast(self)
63 }
64
65 pub fn to_u32x8_bitcast(self) -> u32x8 {
66 bytemuck::cast(self)
67 }
68
69 pub fn cmp_eq(self, rhs: Self) -> Self {
70 cfg_if::cfg_if! {
71 if #[cfg(all(feature = "simd", target_feature = "avx"))] {
72 Self(unsafe { _mm256_cmp_ps(self.0, rhs.0, _CMP_EQ_OQ) })
73 } else {
74 Self(self.0.cmp_eq(rhs.0), self.1.cmp_eq(rhs.1))
75 }
76 }
77 }
78
79 pub fn cmp_ne(self, rhs: Self) -> Self {
80 cfg_if::cfg_if! {
81 if #[cfg(all(feature = "simd", target_feature = "avx"))] {
82 Self(unsafe { _mm256_cmp_ps(self.0, rhs.0, _CMP_NEQ_OQ) })
83 } else {
84 Self(self.0.cmp_ne(rhs.0), self.1.cmp_ne(rhs.1))
85 }
86 }
87 }
88
89 pub fn cmp_ge(self, rhs: Self) -> Self {
90 cfg_if::cfg_if! {
91 if #[cfg(all(feature = "simd", target_feature = "avx"))] {
92 Self(unsafe { _mm256_cmp_ps(self.0, rhs.0, _CMP_GE_OQ) })
93 } else {
94 Self(self.0.cmp_ge(rhs.0), self.1.cmp_ge(rhs.1))
95 }
96 }
97 }
98
99 pub fn cmp_gt(self, rhs: Self) -> Self {
100 cfg_if::cfg_if! {
101 if #[cfg(all(feature = "simd", target_feature = "avx"))] {
102 Self(unsafe { _mm256_cmp_ps(self.0, rhs.0, _CMP_GT_OQ) })
103 } else {
104 Self(self.0.cmp_gt(rhs.0), self.1.cmp_gt(rhs.1))
105 }
106 }
107 }
108
109 pub fn cmp_le(self, rhs: Self) -> Self {
110 cfg_if::cfg_if! {
111 if #[cfg(all(feature = "simd", target_feature = "avx"))] {
112 Self(unsafe { _mm256_cmp_ps(self.0, rhs.0, _CMP_LE_OQ) })
113 } else {
114 Self(self.0.cmp_le(rhs.0), self.1.cmp_le(rhs.1))
115 }
116 }
117 }
118
119 pub fn cmp_lt(self, rhs: Self) -> Self {
120 cfg_if::cfg_if! {
121 if #[cfg(all(feature = "simd", target_feature = "avx"))] {
122 Self(unsafe { _mm256_cmp_ps(self.0, rhs.0, _CMP_LT_OQ) })
123 } else {
124 Self(self.0.cmp_lt(rhs.0), self.1.cmp_lt(rhs.1))
125 }
126 }
127 }
128
129 #[inline]
130 pub fn blend(self, t: Self, f: Self) -> Self {
131 cfg_if::cfg_if! {
132 if #[cfg(all(feature = "simd", target_feature = "avx"))] {
133 Self(unsafe { _mm256_blendv_ps(f.0, t.0, self.0) })
134 } else {
135 Self(self.0.blend(t.0, f.0), self.1.blend(t.1, f.1))
136 }
137 }
138 }
139
140 pub fn abs(self) -> Self {
141 let non_sign_bits = f32x8::splat(f32::from_bits(i32::MAX as u32));
142 self & non_sign_bits
143 }
144
145 pub fn max(self, rhs: Self) -> Self {
146 // These technically don't have the same semantics for NaN and 0, but it
147 // doesn't seem to matter as Skia does it the same way.
148 cfg_if::cfg_if! {
149 if #[cfg(all(feature = "simd", target_feature = "avx"))] {
150 Self(unsafe { _mm256_max_ps(self.0, rhs.0) })
151 } else {
152 Self(self.0.max(rhs.0), self.1.max(rhs.1))
153 }
154 }
155 }
156
157 pub fn min(self, rhs: Self) -> Self {
158 // These technically don't have the same semantics for NaN and 0, but it
159 // doesn't seem to matter as Skia does it the same way.
160 cfg_if::cfg_if! {
161 if #[cfg(all(feature = "simd", target_feature = "avx"))] {
162 Self(unsafe { _mm256_min_ps(self.0, rhs.0) })
163 } else {
164 Self(self.0.min(rhs.0), self.1.min(rhs.1))
165 }
166 }
167 }
168
169 pub fn is_finite(self) -> Self {
170 let shifted_exp_mask = u32x8::splat(0xFF000000);
171 let u: u32x8 = cast(self);
172 let shift_u = u.shl::<1>();
173 let out = !(shift_u & shifted_exp_mask).cmp_eq(shifted_exp_mask);
174 cast(out)
175 }
176
177 pub fn round(self) -> Self {
178 cfg_if::cfg_if! {
179 if #[cfg(all(feature = "simd", target_feature = "avx"))] {
180 Self(unsafe { _mm256_round_ps(self.0, _MM_FROUND_NO_EXC | _MM_FROUND_TO_NEAREST_INT) })
181 } else {
182 Self(self.0.round(), self.1.round())
183 }
184 }
185 }
186
187 pub fn round_int(self) -> i32x8 {
188 // These technically don't have the same semantics for NaN and out of
189 // range values, but it doesn't seem to matter as Skia does it the same
190 // way.
191 cfg_if::cfg_if! {
192 if #[cfg(all(feature = "simd", target_feature = "avx"))] {
193 cast(unsafe { _mm256_cvtps_epi32(self.0) })
194 } else {
195 i32x8(self.0.round_int(), self.1.round_int())
196 }
197 }
198 }
199
200 pub fn trunc_int(self) -> i32x8 {
201 // These technically don't have the same semantics for NaN and out of
202 // range values, but it doesn't seem to matter as Skia does it the same
203 // way.
204 cfg_if::cfg_if! {
205 if #[cfg(all(feature = "simd", target_feature = "avx"))] {
206 cast(unsafe { _mm256_cvttps_epi32(self.0) })
207 } else {
208 i32x8(self.0.trunc_int(), self.1.trunc_int())
209 }
210 }
211 }
212
213 pub fn recip_fast(self) -> Self {
214 cfg_if::cfg_if! {
215 if #[cfg(all(feature = "simd", target_feature = "avx"))] {
216 Self(unsafe { _mm256_rcp_ps(self.0) })
217 } else {
218 Self(self.0.recip_fast(), self.1.recip_fast())
219 }
220 }
221 }
222
223 pub fn recip_sqrt(self) -> Self {
224 cfg_if::cfg_if! {
225 if #[cfg(all(feature = "simd", target_feature = "avx"))] {
226 Self(unsafe { _mm256_rsqrt_ps(self.0) })
227 } else {
228 Self(self.0.recip_sqrt(), self.1.recip_sqrt())
229 }
230 }
231 }
232
233 pub fn sqrt(self) -> Self {
234 cfg_if::cfg_if! {
235 if #[cfg(all(feature = "simd", target_feature = "avx"))] {
236 Self(unsafe { _mm256_sqrt_ps(self.0) })
237 } else {
238 Self(self.0.sqrt(), self.1.sqrt())
239 }
240 }
241 }
242}
243
244impl From<[f32; 8]> for f32x8 {
245 fn from(v: [f32; 8]) -> Self {
246 cast(v)
247 }
248}
249
250impl From<f32x8> for [f32; 8] {
251 fn from(v: f32x8) -> Self {
252 cast(v)
253 }
254}
255
256impl core::ops::Add for f32x8 {
257 type Output = Self;
258
259 fn add(self, rhs: Self) -> Self::Output {
260 cfg_if::cfg_if! {
261 if #[cfg(all(feature = "simd", target_feature = "avx"))] {
262 Self(unsafe { _mm256_add_ps(self.0, rhs.0) })
263 } else {
264 Self(self.0 + rhs.0, self.1 + rhs.1)
265 }
266 }
267 }
268}
269
270impl core::ops::AddAssign for f32x8 {
271 fn add_assign(&mut self, rhs: f32x8) {
272 *self = *self + rhs;
273 }
274}
275
276impl core::ops::Sub for f32x8 {
277 type Output = Self;
278
279 fn sub(self, rhs: Self) -> Self::Output {
280 cfg_if::cfg_if! {
281 if #[cfg(all(feature = "simd", target_feature = "avx"))] {
282 Self(unsafe { _mm256_sub_ps(self.0, rhs.0) })
283 } else {
284 Self(self.0 - rhs.0, self.1 - rhs.1)
285 }
286 }
287 }
288}
289
290impl core::ops::Mul for f32x8 {
291 type Output = Self;
292
293 fn mul(self, rhs: Self) -> Self::Output {
294 cfg_if::cfg_if! {
295 if #[cfg(all(feature = "simd", target_feature = "avx"))] {
296 Self(unsafe { _mm256_mul_ps(self.0, rhs.0) })
297 } else {
298 Self(self.0 * rhs.0, self.1 * rhs.1)
299 }
300 }
301 }
302}
303
304impl core::ops::MulAssign for f32x8 {
305 fn mul_assign(&mut self, rhs: f32x8) {
306 *self = *self * rhs;
307 }
308}
309
310impl core::ops::Div for f32x8 {
311 type Output = Self;
312
313 fn div(self, rhs: Self) -> Self::Output {
314 cfg_if::cfg_if! {
315 if #[cfg(all(feature = "simd", target_feature = "avx"))] {
316 Self(unsafe { _mm256_div_ps(self.0, rhs.0) })
317 } else {
318 Self(self.0 / rhs.0, self.1 / rhs.1)
319 }
320 }
321 }
322}
323
324impl core::ops::BitAnd for f32x8 {
325 type Output = Self;
326
327 #[inline(always)]
328 fn bitand(self, rhs: Self) -> Self::Output {
329 cfg_if::cfg_if! {
330 if #[cfg(all(feature = "simd", target_feature = "avx"))] {
331 Self(unsafe { _mm256_and_ps(self.0, rhs.0) })
332 } else {
333 Self(self.0 & rhs.0, self.1 & rhs.1)
334 }
335 }
336 }
337}
338
339impl core::ops::BitOr for f32x8 {
340 type Output = Self;
341
342 #[inline(always)]
343 fn bitor(self, rhs: Self) -> Self::Output {
344 cfg_if::cfg_if! {
345 if #[cfg(all(feature = "simd", target_feature = "avx"))] {
346 Self(unsafe { _mm256_or_ps(self.0, rhs.0) })
347 } else {
348 Self(self.0 | rhs.0, self.1 | rhs.1)
349 }
350 }
351 }
352}
353
354impl core::ops::BitXor for f32x8 {
355 type Output = Self;
356
357 #[inline(always)]
358 fn bitxor(self, rhs: Self) -> Self::Output {
359 cfg_if::cfg_if! {
360 if #[cfg(all(feature = "simd", target_feature = "avx"))] {
361 Self(unsafe { _mm256_xor_ps(self.0, rhs.0) })
362 } else {
363 Self(self.0 ^ rhs.0, self.1 ^ rhs.1)
364 }
365 }
366 }
367}
368
369impl core::ops::Neg for f32x8 {
370 type Output = Self;
371
372 fn neg(self) -> Self {
373 Self::default() - self
374 }
375}
376
377impl core::ops::Not for f32x8 {
378 type Output = Self;
379
380 fn not(self) -> Self {
381 cfg_if::cfg_if! {
382 if #[cfg(all(feature = "simd", target_feature = "avx"))] {
383 let all_bits = unsafe { _mm256_set1_ps(f32::from_bits(u32::MAX)) };
384 Self(unsafe { _mm256_xor_ps(self.0, all_bits) })
385 } else {
386 Self(!self.0, !self.1)
387 }
388 }
389 }
390}
391
392impl core::cmp::PartialEq for f32x8 {
393 fn eq(&self, rhs: &Self) -> bool {
394 cfg_if::cfg_if! {
395 if #[cfg(all(feature = "simd", target_feature = "avx"))] {
396 let mask = unsafe { _mm256_cmp_ps(self.0, rhs.0, _CMP_EQ_OQ) };
397 unsafe { _mm256_movemask_ps(mask) == 0b1111_1111 }
398 } else {
399 self.0 == rhs.0 && self.1 == rhs.1
400 }
401 }
402 }
403}
404