f32x8_t.rs source code [crates/tiny_skia/src/wide/f32x8_t.rs]

1	// Copyright 2020 Yevhenii Reizner
2	//
3	// Use of this source code is governed by a BSD-style license that can be
4	// found in the LICENSE file.
5
6	// Based on https://github.com/Lokathor/wide (Zlib)
7
8	use bytemuck::cast;
9
10	use super::{i32x8, u32x8};
11
12	cfg_if::cfg_if! {
13	if #[cfg(all(feature = "simd", target_feature = "avx"))] {
14	#[cfg(target_arch = "x86")]
15	use core::arch::x86::*;
16	#[cfg(target_arch = "x86_64")]
17	use core::arch::x86_64::*;
18
19	#[derive(Clone, Copy, Debug)]
20	#[repr(C, align(`32`))]
21	pub struct f32x8(__m256);
22	} else {
23	use super::f32x4;
24
25	#[derive(Clone, Copy, Debug)]
26	#[repr(C, align(`32`))]
27	pub struct f32x8(pub f32x4, pub f32x4);
28	}
29	}
30
31	unsafe impl bytemuck::Zeroable for f32x8 {}
32	unsafe impl bytemuck::Pod for f32x8 {}
33
34	impl Default for f32x8 {
35	fn default() -> Self {
36	Self::splat(`0.0`)
37	}
38	}
39
40	impl f32x8 {
41	pub fn splat(n: f32) -> Self {
42	cast([n, n, n, n, n, n, n, n])
43	}
44
45	pub fn floor(self) -> Self {
46	let roundtrip: f32x8 = cast(self.trunc_int().to_f32x8());
47	roundtrip
48	- roundtrip
49	.cmp_gt(self)
50	.blend(f32x8::splat(`1.0`), f32x8::default())
51	}
52
53	pub fn fract(self) -> Self {
54	self - self.floor()
55	}
56
57	pub fn normalize(self) -> Self {
58	self.max(f32x8::default()).min(f32x8::splat(`1.0`))
59	}
60
61	pub fn to_i32x8_bitcast(self) -> i32x8 {
62	bytemuck::cast(self)
63	}
64
65	pub fn to_u32x8_bitcast(self) -> u32x8 {
66	bytemuck::cast(self)
67	}
68
69	pub fn cmp_eq(self, rhs: Self) -> Self {
70	cfg_if::cfg_if! {
71	if #[cfg(all(feature = "simd", target_feature = "avx"))] {
72	Self(unsafe { _mm256_cmp_ps(self.0, rhs.0, _CMP_EQ_OQ) })
73	} else {
74	Self(self.0.cmp_eq(rhs.0), self.1.cmp_eq(rhs.1))
75	}
76	}
77	}
78
79	pub fn cmp_ne(self, rhs: Self) -> Self {
80	cfg_if::cfg_if! {
81	if #[cfg(all(feature = "simd", target_feature = "avx"))] {
82	Self(unsafe { _mm256_cmp_ps(self.0, rhs.0, _CMP_NEQ_OQ) })
83	} else {
84	Self(self.0.cmp_ne(rhs.0), self.1.cmp_ne(rhs.1))
85	}
86	}
87	}
88
89	pub fn cmp_ge(self, rhs: Self) -> Self {
90	cfg_if::cfg_if! {
91	if #[cfg(all(feature = "simd", target_feature = "avx"))] {
92	Self(unsafe { _mm256_cmp_ps(self.0, rhs.0, _CMP_GE_OQ) })
93	} else {
94	Self(self.0.cmp_ge(rhs.0), self.1.cmp_ge(rhs.1))
95	}
96	}
97	}
98
99	pub fn cmp_gt(self, rhs: Self) -> Self {
100	cfg_if::cfg_if! {
101	if #[cfg(all(feature = "simd", target_feature = "avx"))] {
102	Self(unsafe { _mm256_cmp_ps(self.0, rhs.0, _CMP_GT_OQ) })
103	} else {
104	Self(self.0.cmp_gt(rhs.0), self.1.cmp_gt(rhs.1))
105	}
106	}
107	}
108
109	pub fn cmp_le(self, rhs: Self) -> Self {
110	cfg_if::cfg_if! {
111	if #[cfg(all(feature = "simd", target_feature = "avx"))] {
112	Self(unsafe { _mm256_cmp_ps(self.0, rhs.0, _CMP_LE_OQ) })
113	} else {
114	Self(self.0.cmp_le(rhs.0), self.1.cmp_le(rhs.1))
115	}
116	}
117	}
118
119	pub fn cmp_lt(self, rhs: Self) -> Self {
120	cfg_if::cfg_if! {
121	if #[cfg(all(feature = "simd", target_feature = "avx"))] {
122	Self(unsafe { _mm256_cmp_ps(self.0, rhs.0, _CMP_LT_OQ) })
123	} else {
124	Self(self.0.cmp_lt(rhs.0), self.1.cmp_lt(rhs.1))
125	}
126	}
127	}
128
129	#[inline]
130	pub fn blend(self, t: Self, f: Self) -> Self {
131	cfg_if::cfg_if! {
132	if #[cfg(all(feature = "simd", target_feature = "avx"))] {
133	Self(unsafe { _mm256_blendv_ps(f.0, t.0, self.0) })
134	} else {
135	Self(self.0.blend(t.0, f.0), self.1.blend(t.1, f.1))
136	}
137	}
138	}
139
140	pub fn abs(self) -> Self {
141	let non_sign_bits = f32x8::splat(f32::from_bits(i32::MAX as u32));
142	self & non_sign_bits
143	}
144
145	pub fn max(self, rhs: Self) -> Self {
146	// These technically don't have the same semantics for NaN and 0, but it
147	// doesn't seem to matter as Skia does it the same way.
148	cfg_if::cfg_if! {
149	if #[cfg(all(feature = "simd", target_feature = "avx"))] {
150	Self(unsafe { _mm256_max_ps(self.0, rhs.0) })
151	} else {
152	Self(self.0.max(rhs.0), self.1.max(rhs.1))
153	}
154	}
155	}
156
157	pub fn min(self, rhs: Self) -> Self {
158	// These technically don't have the same semantics for NaN and 0, but it
159	// doesn't seem to matter as Skia does it the same way.
160	cfg_if::cfg_if! {
161	if #[cfg(all(feature = "simd", target_feature = "avx"))] {
162	Self(unsafe { _mm256_min_ps(self.0, rhs.0) })
163	} else {
164	Self(self.0.min(rhs.0), self.1.min(rhs.1))
165	}
166	}
167	}
168
169	pub fn is_finite(self) -> Self {
170	let shifted_exp_mask = u32x8::splat(`0xFF000000`);
171	let u: u32x8 = cast(self);
172	let shift_u = u.shl::<`1`>();
173	let out = !(shift_u & shifted_exp_mask).cmp_eq(shifted_exp_mask);
174	cast(out)
175	}
176
177	pub fn round(self) -> Self {
178	cfg_if::cfg_if! {
179	if #[cfg(all(feature = "simd", target_feature = "avx"))] {
180	Self(unsafe { _mm256_round_ps(self.0, _MM_FROUND_NO_EXC \| _MM_FROUND_TO_NEAREST_INT) })
181	} else {
182	Self(self.0.round(), self.1.round())
183	}
184	}
185	}
186
187	pub fn round_int(self) -> i32x8 {
188	// These technically don't have the same semantics for NaN and out of
189	// range values, but it doesn't seem to matter as Skia does it the same
190	// way.
191	cfg_if::cfg_if! {
192	if #[cfg(all(feature = "simd", target_feature = "avx"))] {
193	cast(unsafe { _mm256_cvtps_epi32(self.0) })
194	} else {
195	i32x8(self.0.round_int(), self.1.round_int())
196	}
197	}
198	}
199
200	pub fn trunc_int(self) -> i32x8 {
201	// These technically don't have the same semantics for NaN and out of
202	// range values, but it doesn't seem to matter as Skia does it the same
203	// way.
204	cfg_if::cfg_if! {
205	if #[cfg(all(feature = "simd", target_feature = "avx"))] {
206	cast(unsafe { _mm256_cvttps_epi32(self.0) })
207	} else {
208	i32x8(self.0.trunc_int(), self.1.trunc_int())
209	}
210	}
211	}
212
213	pub fn recip_fast(self) -> Self {
214	cfg_if::cfg_if! {
215	if #[cfg(all(feature = "simd", target_feature = "avx"))] {
216	Self(unsafe { _mm256_rcp_ps(self.0) })
217	} else {
218	Self(self.0.recip_fast(), self.1.recip_fast())
219	}
220	}
221	}
222
223	pub fn recip_sqrt(self) -> Self {
224	cfg_if::cfg_if! {
225	if #[cfg(all(feature = "simd", target_feature = "avx"))] {
226	Self(unsafe { _mm256_rsqrt_ps(self.0) })
227	} else {
228	Self(self.0.recip_sqrt(), self.1.recip_sqrt())
229	}
230	}
231	}
232
233	pub fn sqrt(self) -> Self {
234	cfg_if::cfg_if! {
235	if #[cfg(all(feature = "simd", target_feature = "avx"))] {
236	Self(unsafe { _mm256_sqrt_ps(self.0) })
237	} else {
238	Self(self.0.sqrt(), self.1.sqrt())
239	}
240	}
241	}
242	}
243
244	impl From<[f32; `8`]> for f32x8 {
245	fn from(v: [f32; `8`]) -> Self {
246	cast(v)
247	}
248	}
249
250	impl From<f32x8> for [f32; `8`] {
251	fn from(v: f32x8) -> Self {
252	cast(v)
253	}
254	}
255
256	impl core::ops::Add for f32x8 {
257	type Output = Self;
258
259	fn add(self, rhs: Self) -> Self::Output {
260	cfg_if::cfg_if! {
261	if #[cfg(all(feature = "simd", target_feature = "avx"))] {
262	Self(unsafe { _mm256_add_ps(self.0, rhs.0) })
263	} else {
264	Self(self.0 + rhs.0, self.1 + rhs.1)
265	}
266	}
267	}
268	}
269
270	impl core::ops::AddAssign for f32x8 {
271	fn add_assign(&mut self, rhs: f32x8) {
272	self = self + rhs;
273	}
274	}
275
276	impl core::ops::Sub for f32x8 {
277	type Output = Self;
278
279	fn sub(self, rhs: Self) -> Self::Output {
280	cfg_if::cfg_if! {
281	if #[cfg(all(feature = "simd", target_feature = "avx"))] {
282	Self(unsafe { _mm256_sub_ps(self.0, rhs.0) })
283	} else {
284	Self(self.0 - rhs.0, self.1 - rhs.1)
285	}
286	}
287	}
288	}
289
290	impl core::ops::Mul for f32x8 {
291	type Output = Self;
292
293	fn mul(self, rhs: Self) -> Self::Output {
294	cfg_if::cfg_if! {
295	if #[cfg(all(feature = "simd", target_feature = "avx"))] {
296	Self(unsafe { _mm256_mul_ps(self.0, rhs.0) })
297	} else {
298	Self(self.0 * rhs.0, self.1 * rhs.1)
299	}
300	}
301	}
302	}
303
304	impl core::ops::MulAssign for f32x8 {
305	fn mul_assign(&mut self, rhs: f32x8) {
306	self = self * rhs;
307	}
308	}
309
310	impl core::ops::Div for f32x8 {
311	type Output = Self;
312
313	fn div(self, rhs: Self) -> Self::Output {
314	cfg_if::cfg_if! {
315	if #[cfg(all(feature = "simd", target_feature = "avx"))] {
316	Self(unsafe { _mm256_div_ps(self.0, rhs.0) })
317	} else {
318	Self(self.0 / rhs.0, self.1 / rhs.1)
319	}
320	}
321	}
322	}
323
324	impl core::ops::BitAnd for f32x8 {
325	type Output = Self;
326
327	#[inline(always)]
328	fn bitand(self, rhs: Self) -> Self::Output {
329	cfg_if::cfg_if! {
330	if #[cfg(all(feature = "simd", target_feature = "avx"))] {
331	Self(unsafe { _mm256_and_ps(self.0, rhs.0) })
332	} else {
333	Self(self.0 & rhs.0, self.1 & rhs.1)
334	}
335	}
336	}
337	}
338
339	impl core::ops::BitOr for f32x8 {
340	type Output = Self;
341
342	#[inline(always)]
343	fn bitor(self, rhs: Self) -> Self::Output {
344	cfg_if::cfg_if! {
345	if #[cfg(all(feature = "simd", target_feature = "avx"))] {
346	Self(unsafe { _mm256_or_ps(self.0, rhs.0) })
347	} else {
348	Self(self.0 \| rhs.0, self.1 \| rhs.1)
349	}
350	}
351	}
352	}
353
354	impl core::ops::BitXor for f32x8 {
355	type Output = Self;
356
357	#[inline(always)]
358	fn bitxor(self, rhs: Self) -> Self::Output {
359	cfg_if::cfg_if! {
360	if #[cfg(all(feature = "simd", target_feature = "avx"))] {
361	Self(unsafe { _mm256_xor_ps(self.0, rhs.0) })
362	} else {
363	Self(self.0 ^ rhs.0, self.1 ^ rhs.1)
364	}
365	}
366	}
367	}
368
369	impl core::ops::Neg for f32x8 {
370	type Output = Self;
371
372	fn neg(self) -> Self {
373	Self::default() - self
374	}
375	}
376
377	impl core::ops::Not for f32x8 {
378	type Output = Self;
379
380	fn not(self) -> Self {
381	cfg_if::cfg_if! {
382	if #[cfg(all(feature = "simd", target_feature = "avx"))] {
383	let all_bits = unsafe { _mm256_set1_ps(f32::from_bits(u32::MAX)) };
384	Self(unsafe { _mm256_xor_ps(self.0, all_bits) })
385	} else {
386	Self(!self.0, !self.1)
387	}
388	}
389	}
390	}
391
392	impl core::cmp::PartialEq for f32x8 {
393	fn eq(&self, rhs: &Self) -> bool {
394	cfg_if::cfg_if! {
395	if #[cfg(all(feature = "simd", target_feature = "avx"))] {
396	let mask = unsafe { _mm256_cmp_ps(self.0, rhs.0, _CMP_EQ_OQ) };
397	unsafe { _mm256_movemask_ps(mask) == `0b1111_1111` }
398	} else {
399	self.0 == rhs.0 && self.1 == rhs.1
400	}
401	}
402	}
403	}
404