1// pathfinder/simd/src/x86.rs
2//
3// Copyright © 2019 The Pathfinder Project Developers.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11use std::cmp::PartialEq;
12use std::fmt::{self, Debug, Formatter};
13use std::mem;
14use std::ops::{Add, BitAnd, BitOr, BitXor, Div, Index, IndexMut, Mul, Not, Shr, Sub};
15
16#[cfg(target_pointer_width = "32")]
17use std::arch::x86::{__m128, __m128i};
18#[cfg(target_pointer_width = "32")]
19use std::arch::x86;
20#[cfg(target_pointer_width = "64")]
21use std::arch::x86_64::{__m128, __m128i};
22#[cfg(target_pointer_width = "64")]
23use std::arch::x86_64 as x86;
24
25mod swizzle_f32x4;
26mod swizzle_i32x4;
27
28// Two 32-bit floats
29
30#[derive(Clone, Copy)]
31pub struct F32x2(pub u64);
32
33impl F32x2 {
34 // Constructors
35
36 #[inline]
37 pub fn new(a: f32, b: f32) -> F32x2 {
38 unsafe {
39 let a = mem::transmute::<*const f32, *const u32>(&a);
40 let b = mem::transmute::<*const f32, *const u32>(&b);
41 F32x2((*a as u64) | ((*b as u64) << 32))
42 }
43 }
44
45 #[inline]
46 pub fn splat(x: f32) -> F32x2 {
47 F32x2::new(x, x)
48 }
49
50 // Basic operations
51
52 #[inline]
53 pub fn approx_recip(self) -> F32x2 {
54 self.to_f32x4().approx_recip().xy()
55 }
56
57 #[inline]
58 pub fn min(self, other: F32x2) -> F32x2 {
59 self.to_f32x4().min(other.to_f32x4()).xy()
60 }
61
62 #[inline]
63 pub fn max(self, other: F32x2) -> F32x2 {
64 self.to_f32x4().max(other.to_f32x4()).xy()
65 }
66
67 #[inline]
68 pub fn clamp(self, min: F32x2, max: F32x2) -> F32x2 {
69 self.to_f32x4().clamp(min.to_f32x4(), max.to_f32x4()).xy()
70 }
71
72 #[inline]
73 pub fn abs(self) -> F32x2 {
74 self.to_f32x4().abs().xy()
75 }
76
77 #[inline]
78 pub fn floor(self) -> F32x2 {
79 self.to_f32x4().floor().xy()
80 }
81
82 #[inline]
83 pub fn ceil(self) -> F32x2 {
84 self.to_f32x4().ceil().xy()
85 }
86
87 #[inline]
88 pub fn sqrt(self) -> F32x2 {
89 self.to_f32x4().sqrt().xy()
90 }
91
92 // Packed comparisons
93
94 #[inline]
95 pub fn packed_eq(self, other: F32x2) -> U32x2 {
96 self.to_f32x4().packed_eq(other.to_f32x4()).xy()
97 }
98
99 #[inline]
100 pub fn packed_gt(self, other: F32x2) -> U32x2 {
101 self.to_f32x4().packed_gt(other.to_f32x4()).xy()
102 }
103
104 #[inline]
105 pub fn packed_lt(self, other: F32x2) -> U32x2 {
106 self.to_f32x4().packed_lt(other.to_f32x4()).xy()
107 }
108
109 #[inline]
110 pub fn packed_le(self, other: F32x2) -> U32x2 {
111 self.to_f32x4().packed_le(other.to_f32x4()).xy()
112 }
113
114 // Conversions
115
116 #[inline]
117 pub fn to_f32x4(self) -> F32x4 {
118 unsafe {
119 let mut result = F32x4::default();
120 *mem::transmute::<&mut __m128, &mut u64>(&mut result.0) = self.0;
121 result
122 }
123 }
124
125 #[inline]
126 pub fn to_i32x2(self) -> I32x2 {
127 self.to_i32x4().xy()
128 }
129
130 #[inline]
131 pub fn to_i32x4(self) -> I32x4 {
132 self.to_f32x4().to_i32x4()
133 }
134
135 // Swizzle
136
137 #[inline]
138 pub fn yx(self) -> F32x2 {
139 self.to_f32x4().yx()
140 }
141
142 // Concatenations
143
144 #[inline]
145 pub fn concat_xy_xy(self, other: F32x2) -> F32x4 {
146 self.to_f32x4().concat_xy_xy(other.to_f32x4())
147 }
148}
149
150impl Default for F32x2 {
151 #[inline]
152 fn default() -> F32x2 {
153 F32x2(0)
154 }
155}
156
157impl Index<usize> for F32x2 {
158 type Output = f32;
159 #[inline]
160 fn index(&self, index: usize) -> &f32 {
161 unsafe { &mem::transmute::<&u64, &[f32; 2]>(&self.0)[index] }
162 }
163}
164
165impl IndexMut<usize> for F32x2 {
166 #[inline]
167 fn index_mut(&mut self, index: usize) -> &mut f32 {
168 unsafe { &mut mem::transmute::<&mut u64, &mut [f32; 2]>(&mut self.0)[index] }
169 }
170}
171
172impl Debug for F32x2 {
173 #[inline]
174 fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
175 write!(f, "<{}, {}>", self[0], self[1])
176 }
177}
178
179impl PartialEq for F32x2 {
180 #[inline]
181 fn eq(&self, other: &F32x2) -> bool {
182 self.packed_eq(*other).all_true()
183 }
184}
185
186impl Add<F32x2> for F32x2 {
187 type Output = F32x2;
188 #[inline]
189 fn add(self, other: F32x2) -> F32x2 {
190 (self.to_f32x4() + other.to_f32x4()).xy()
191 }
192}
193
194impl Div<F32x2> for F32x2 {
195 type Output = F32x2;
196 #[inline]
197 fn div(self, other: F32x2) -> F32x2 {
198 (self.to_f32x4() / other.to_f32x4()).xy()
199 }
200}
201
202impl Mul<F32x2> for F32x2 {
203 type Output = F32x2;
204 #[inline]
205 fn mul(self, other: F32x2) -> F32x2 {
206 (self.to_f32x4() * other.to_f32x4()).xy()
207 }
208}
209
210impl Sub<F32x2> for F32x2 {
211 type Output = F32x2;
212 #[inline]
213 fn sub(self, other: F32x2) -> F32x2 {
214 (self.to_f32x4() - other.to_f32x4()).xy()
215 }
216}
217
218// Four 32-bit floats
219
220#[derive(Clone, Copy)]
221pub struct F32x4(pub __m128);
222
223impl F32x4 {
224 // Constructors
225
226 #[inline]
227 pub fn new(a: f32, b: f32, c: f32, d: f32) -> F32x4 {
228 unsafe {
229 let vector = [a, b, c, d];
230 F32x4(x86::_mm_loadu_ps(vector.as_ptr()))
231 }
232 }
233
234 #[inline]
235 pub fn splat(x: f32) -> F32x4 {
236 unsafe { F32x4(x86::_mm_set1_ps(x)) }
237 }
238
239 // Basic operations
240
241 #[inline]
242 pub fn approx_recip(self) -> F32x4 {
243 unsafe { F32x4(x86::_mm_rcp_ps(self.0)) }
244 }
245
246 #[inline]
247 pub fn min(self, other: F32x4) -> F32x4 {
248 unsafe { F32x4(x86::_mm_min_ps(self.0, other.0)) }
249 }
250
251 #[inline]
252 pub fn max(self, other: F32x4) -> F32x4 {
253 unsafe { F32x4(x86::_mm_max_ps(self.0, other.0)) }
254 }
255
256 #[inline]
257 pub fn clamp(self, min: F32x4, max: F32x4) -> F32x4 {
258 self.max(min).min(max)
259 }
260
261 #[inline]
262 pub fn abs(self) -> F32x4 {
263 unsafe {
264 let tmp = x86::_mm_srli_epi32(I32x4::splat(-1).0, 1);
265 F32x4(x86::_mm_and_ps(x86::_mm_castsi128_ps(tmp), self.0))
266 }
267 }
268
269 #[inline]
270 pub fn floor(self) -> F32x4 {
271 unsafe { F32x4(x86::_mm_floor_ps(self.0)) }
272 }
273
274 #[inline]
275 pub fn ceil(self) -> F32x4 {
276 unsafe { F32x4(x86::_mm_ceil_ps(self.0)) }
277 }
278
279 #[inline]
280 pub fn sqrt(self) -> F32x4 {
281 unsafe { F32x4(x86::_mm_sqrt_ps(self.0)) }
282 }
283
284 // Packed comparisons
285
286 #[inline]
287 pub fn packed_eq(self, other: F32x4) -> U32x4 {
288 unsafe {
289 U32x4(x86::_mm_castps_si128(x86::_mm_cmpeq_ps(
290 self.0, other.0,
291 )))
292 }
293 }
294
295 #[inline]
296 pub fn packed_gt(self, other: F32x4) -> U32x4 {
297 unsafe {
298 U32x4(x86::_mm_castps_si128(x86::_mm_cmpgt_ps(
299 self.0, other.0,
300 )))
301 }
302 }
303
304 #[inline]
305 pub fn packed_lt(self, other: F32x4) -> U32x4 {
306 other.packed_gt(self)
307 }
308
309 #[inline]
310 pub fn packed_le(self, other: F32x4) -> U32x4 {
311 !self.packed_gt(other)
312 }
313
314 // Conversions
315
316 /// Converts these packed floats to integers via rounding.
317 #[inline]
318 pub fn to_i32x4(self) -> I32x4 {
319 unsafe { I32x4(x86::_mm_cvtps_epi32(self.0)) }
320 }
321
322 // Extraction
323
324 #[inline]
325 pub fn xy(self) -> F32x2 {
326 unsafe {
327 let swizzled = self.0;
328 F32x2(*mem::transmute::<&__m128, &u64>(&swizzled))
329 }
330 }
331
332 #[inline]
333 pub fn xw(self) -> F32x2 {
334 self.xwyz().xy()
335 }
336
337 #[inline]
338 pub fn yx(self) -> F32x2 {
339 self.yxwz().xy()
340 }
341
342 #[inline]
343 pub fn zy(self) -> F32x2 {
344 self.zyxw().xy()
345 }
346
347 #[inline]
348 pub fn zw(self) -> F32x2 {
349 self.zwxy().xy()
350 }
351
352 // Concatenations
353
354 #[inline]
355 pub fn concat_xy_xy(self, other: F32x4) -> F32x4 {
356 unsafe {
357 let this = x86::_mm_castps_pd(self.0);
358 let other = x86::_mm_castps_pd(other.0);
359 let result = x86::_mm_unpacklo_pd(this, other);
360 F32x4(x86::_mm_castpd_ps(result))
361 }
362 }
363
364 #[inline]
365 pub fn concat_xy_zw(self, other: F32x4) -> F32x4 {
366 unsafe {
367 let this = x86::_mm_castps_pd(self.0);
368 let other = x86::_mm_castps_pd(other.0);
369 let result = x86::_mm_shuffle_pd(this, other, 0b10);
370 F32x4(x86::_mm_castpd_ps(result))
371 }
372 }
373
374 #[inline]
375 pub fn concat_zw_zw(self, other: F32x4) -> F32x4 {
376 unsafe {
377 let this = x86::_mm_castps_pd(self.0);
378 let other = x86::_mm_castps_pd(other.0);
379 let result = x86::_mm_unpackhi_pd(this, other);
380 F32x4(x86::_mm_castpd_ps(result))
381 }
382 }
383
384 #[inline]
385 pub fn concat_wz_yx(self, other: F32x4) -> F32x4 {
386 unsafe { F32x4(x86::_mm_shuffle_ps(self.0, other.0, 0b0001_1011)) }
387 }
388}
389
390impl Default for F32x4 {
391 #[inline]
392 fn default() -> F32x4 {
393 unsafe { F32x4(x86::_mm_setzero_ps()) }
394 }
395}
396
397impl Index<usize> for F32x4 {
398 type Output = f32;
399 #[inline]
400 fn index(&self, index: usize) -> &f32 {
401 unsafe { &mem::transmute::<&__m128, &[f32; 4]>(&self.0)[index] }
402 }
403}
404
405impl IndexMut<usize> for F32x4 {
406 #[inline]
407 fn index_mut(&mut self, index: usize) -> &mut f32 {
408 unsafe { &mut mem::transmute::<&mut __m128, &mut [f32; 4]>(&mut self.0)[index] }
409 }
410}
411
412impl Debug for F32x4 {
413 #[inline]
414 fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
415 write!(f, "<{}, {}, {}, {}>", self[0], self[1], self[2], self[3])
416 }
417}
418
419impl PartialEq for F32x4 {
420 #[inline]
421 fn eq(&self, other: &F32x4) -> bool {
422 self.packed_eq(*other).all_true()
423 }
424}
425
426impl Add<F32x4> for F32x4 {
427 type Output = F32x4;
428 #[inline]
429 fn add(self, other: F32x4) -> F32x4 {
430 unsafe { F32x4(x86::_mm_add_ps(self.0, b:other.0)) }
431 }
432}
433
434impl Div<F32x4> for F32x4 {
435 type Output = F32x4;
436 #[inline]
437 fn div(self, other: F32x4) -> F32x4 {
438 unsafe { F32x4(x86::_mm_div_ps(self.0, b:other.0)) }
439 }
440}
441
442impl Mul<F32x4> for F32x4 {
443 type Output = F32x4;
444 #[inline]
445 fn mul(self, other: F32x4) -> F32x4 {
446 unsafe { F32x4(x86::_mm_mul_ps(self.0, b:other.0)) }
447 }
448}
449
450impl Sub<F32x4> for F32x4 {
451 type Output = F32x4;
452 #[inline]
453 fn sub(self, other: F32x4) -> F32x4 {
454 unsafe { F32x4(x86::_mm_sub_ps(self.0, b:other.0)) }
455 }
456}
457
458// Two 32-bit signed integers
459
460#[derive(Clone, Copy)]
461pub struct I32x2(pub u64);
462
463impl I32x2 {
464 // Constructors
465
466 #[inline]
467 pub fn new(a: i32, b: i32) -> I32x2 {
468 unsafe {
469 let a = mem::transmute::<*const i32, *const u32>(&a);
470 let b = mem::transmute::<*const i32, *const u32>(&b);
471 I32x2((*a as u64) | ((*b as u64) << 32))
472 }
473 }
474
475 #[inline]
476 pub fn splat(x: i32) -> I32x2 {
477 I32x2::new(x, x)
478 }
479
480 // Accessors
481
482 #[inline]
483 pub fn x(self) -> i32 {
484 self[0]
485 }
486
487 #[inline]
488 pub fn y(self) -> i32 {
489 self[1]
490 }
491
492 // Concatenations
493
494 #[inline]
495 pub fn concat_xy_xy(self, other: I32x2) -> I32x4 {
496 self.to_i32x4().concat_xy_xy(other.to_i32x4())
497 }
498
499 // Conversions
500
501 #[inline]
502 pub fn to_i32x4(self) -> I32x4 {
503 unsafe {
504 let mut result = I32x4::default();
505 *mem::transmute::<&mut __m128i, &mut u64>(&mut result.0) = self.0;
506 result
507 }
508 }
509
510 #[inline]
511 pub fn to_f32x4(self) -> F32x4 {
512 self.to_i32x4().to_f32x4()
513 }
514
515 /// Converts these packed integers to floats.
516 #[inline]
517 pub fn to_f32x2(self) -> F32x2 {
518 self.to_f32x4().xy()
519 }
520
521 // Basic operations
522
523 #[inline]
524 pub fn max(self, other: I32x2) -> I32x2 {
525 self.to_i32x4().max(other.to_i32x4()).xy()
526 }
527
528 #[inline]
529 pub fn min(self, other: I32x2) -> I32x2 {
530 self.to_i32x4().min(other.to_i32x4()).xy()
531 }
532
533 // Comparisons
534
535 // TODO(pcwalton): Use the `U32x2` type!
536 #[inline]
537 pub fn packed_eq(self, other: I32x2) -> U32x4 {
538 self.to_i32x4().packed_eq(other.to_i32x4())
539 }
540
541 #[inline]
542 pub fn packed_gt(self, other: I32x2) -> U32x4 {
543 self.to_i32x4().packed_gt(other.to_i32x4())
544 }
545
546 #[inline]
547 pub fn packed_le(self, other: I32x2) -> U32x4 {
548 self.to_i32x4().packed_le(other.to_i32x4())
549 }
550}
551
552impl Default for I32x2 {
553 #[inline]
554 fn default() -> I32x2 {
555 I32x2(0)
556 }
557}
558
559impl Index<usize> for I32x2 {
560 type Output = i32;
561 #[inline]
562 fn index(&self, index: usize) -> &i32 {
563 unsafe { &mem::transmute::<&u64, &[i32; 2]>(&self.0)[index] }
564 }
565}
566
567impl IndexMut<usize> for I32x2 {
568 #[inline]
569 fn index_mut(&mut self, index: usize) -> &mut i32 {
570 unsafe { &mut mem::transmute::<&mut u64, &mut [i32; 2]>(&mut self.0)[index] }
571 }
572}
573
574impl Add<I32x2> for I32x2 {
575 type Output = I32x2;
576 #[inline]
577 fn add(self, other: I32x2) -> I32x2 {
578 (self.to_i32x4() + other.to_i32x4()).xy()
579 }
580}
581
582impl Sub<I32x2> for I32x2 {
583 type Output = I32x2;
584 #[inline]
585 fn sub(self, other: I32x2) -> I32x2 {
586 (self.to_i32x4() - other.to_i32x4()).xy()
587 }
588}
589
590impl Mul<I32x2> for I32x2 {
591 type Output = I32x2;
592 #[inline]
593 fn mul(self, other: I32x2) -> I32x2 {
594 (self.to_i32x4() * other.to_i32x4()).xy()
595 }
596}
597
598impl Debug for I32x2 {
599 #[inline]
600 fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
601 write!(f, "<{}, {}>", self[0], self[1])
602 }
603}
604
605impl PartialEq for I32x2 {
606 #[inline]
607 fn eq(&self, other: &I32x2) -> bool {
608 self.packed_eq(*other).all_true()
609 }
610}
611
612// Four 32-bit signed integers
613
614#[derive(Clone, Copy)]
615pub struct I32x4(pub __m128i);
616
617impl I32x4 {
618 // Constructors
619
620 #[inline]
621 pub fn new(a: i32, b: i32, c: i32, d: i32) -> I32x4 {
622 unsafe {
623 let vector = [a, b, c, d];
624 I32x4(x86::_mm_loadu_si128(vector.as_ptr() as *const __m128i))
625 }
626 }
627
628 #[inline]
629 pub fn splat(x: i32) -> I32x4 {
630 unsafe { I32x4(x86::_mm_set1_epi32(x)) }
631 }
632
633 // Extraction
634
635 #[inline]
636 pub fn xy(self) -> I32x2 {
637 unsafe {
638 let swizzled = self.0;
639 I32x2(*mem::transmute::<&__m128i, &u64>(&swizzled))
640 }
641 }
642
643 #[inline]
644 pub fn xw(self) -> I32x2 {
645 self.xwyz().xy()
646 }
647
648 #[inline]
649 pub fn yx(self) -> I32x2 {
650 self.yxwz().xy()
651 }
652
653 #[inline]
654 pub fn zy(self) -> I32x2 {
655 self.zyxw().xy()
656 }
657
658 #[inline]
659 pub fn zw(self) -> I32x2 {
660 self.zwxy().xy()
661 }
662
663 // Concatenations
664
665 #[inline]
666 pub fn concat_xy_xy(self, other: I32x4) -> I32x4 {
667 unsafe {
668 let this = x86::_mm_castsi128_pd(self.0);
669 let other = x86::_mm_castsi128_pd(other.0);
670 let result = x86::_mm_unpacklo_pd(this, other);
671 I32x4(x86::_mm_castpd_si128(result))
672 }
673 }
674
675 #[inline]
676 pub fn concat_zw_zw(self, other: I32x4) -> I32x4 {
677 unsafe {
678 let this = x86::_mm_castsi128_pd(self.0);
679 let other = x86::_mm_castsi128_pd(other.0);
680 let result = x86::_mm_unpackhi_pd(this, other);
681 I32x4(x86::_mm_castpd_si128(result))
682 }
683 }
684
685 // Conversions
686
687 /// Converts these packed integers to floats.
688 #[inline]
689 pub fn to_f32x4(self) -> F32x4 {
690 unsafe { F32x4(x86::_mm_cvtepi32_ps(self.0)) }
691 }
692
693 /// Converts these packed signed integers to unsigned integers.
694 ///
695 /// Overflowing values will wrap around.
696 #[inline]
697 pub fn to_u32x4(self) -> U32x4 {
698 U32x4(self.0)
699 }
700
701 // Basic operations
702
703 #[inline]
704 pub fn max(self, other: I32x4) -> I32x4 {
705 unsafe { I32x4(x86::_mm_max_epi32(self.0, other.0)) }
706 }
707
708 #[inline]
709 pub fn min(self, other: I32x4) -> I32x4 {
710 unsafe { I32x4(x86::_mm_min_epi32(self.0, other.0)) }
711 }
712
713 // Packed comparisons
714
715 #[inline]
716 pub fn packed_eq(self, other: I32x4) -> U32x4 {
717 unsafe { U32x4(x86::_mm_cmpeq_epi32(self.0, other.0)) }
718 }
719
720 // Comparisons
721
722 #[inline]
723 pub fn packed_gt(self, other: I32x4) -> U32x4 {
724 unsafe { U32x4(x86::_mm_cmpgt_epi32(self.0, other.0)) }
725 }
726
727 #[inline]
728 pub fn packed_lt(self, other: I32x4) -> U32x4 {
729 other.packed_gt(self)
730 }
731
732 #[inline]
733 pub fn packed_le(self, other: I32x4) -> U32x4 {
734 !self.packed_gt(other)
735 }
736}
737
738impl Default for I32x4 {
739 #[inline]
740 fn default() -> I32x4 {
741 unsafe { I32x4(x86::_mm_setzero_si128()) }
742 }
743}
744
745impl Index<usize> for I32x4 {
746 type Output = i32;
747 #[inline]
748 fn index(&self, index: usize) -> &i32 {
749 unsafe { &mem::transmute::<&__m128i, &[i32; 4]>(&self.0)[index] }
750 }
751}
752
753impl IndexMut<usize> for I32x4 {
754 #[inline]
755 fn index_mut(&mut self, index: usize) -> &mut i32 {
756 unsafe { &mut mem::transmute::<&mut __m128i, &mut [i32; 4]>(&mut self.0)[index] }
757 }
758}
759
760impl Add<I32x4> for I32x4 {
761 type Output = I32x4;
762 #[inline]
763 fn add(self, other: I32x4) -> I32x4 {
764 unsafe { I32x4(x86::_mm_add_epi32(self.0, b:other.0)) }
765 }
766}
767
768impl Sub<I32x4> for I32x4 {
769 type Output = I32x4;
770 #[inline]
771 fn sub(self, other: I32x4) -> I32x4 {
772 unsafe { I32x4(x86::_mm_sub_epi32(self.0, b:other.0)) }
773 }
774}
775
776impl Mul<I32x4> for I32x4 {
777 type Output = I32x4;
778 #[inline]
779 fn mul(self, other: I32x4) -> I32x4 {
780 unsafe { I32x4(x86::_mm_mullo_epi32(self.0, b:other.0)) }
781 }
782}
783
784impl BitAnd<I32x4> for I32x4 {
785 type Output = I32x4;
786 #[inline]
787 fn bitand(self, other: I32x4) -> I32x4 {
788 unsafe { I32x4(x86::_mm_and_si128(self.0, b:other.0)) }
789 }
790}
791
792impl BitOr<I32x4> for I32x4 {
793 type Output = I32x4;
794 #[inline]
795 fn bitor(self, other: I32x4) -> I32x4 {
796 unsafe { I32x4(x86::_mm_or_si128(self.0, b:other.0)) }
797 }
798}
799
800impl Debug for I32x4 {
801 #[inline]
802 fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
803 write!(f, "<{}, {}, {}, {}>", self[0], self[1], self[2], self[3])
804 }
805}
806
807impl PartialEq for I32x4 {
808 #[inline]
809 fn eq(&self, other: &I32x4) -> bool {
810 self.packed_eq(*other).all_true()
811 }
812}
813
814// Two 32-bit unsigned integers
815
816#[derive(Clone, Copy)]
817pub struct U32x2(pub u64);
818
819impl U32x2 {
820 #[inline]
821 pub fn new(x: u32, y: u32) -> U32x2 {
822 U32x2(x as u64 | ((y as u64) << 32))
823 }
824
825 #[inline]
826 pub fn splat(x: u32) -> U32x2 {
827 U32x2::new(x, x)
828 }
829
830 /// Returns true if both booleans in this vector are true.
831 ///
832 /// The result is *undefined* if both values in this vector are not booleans. A boolean is a
833 /// value with all bits set or all bits clear (i.e. !0 or 0).
834 #[inline]
835 pub fn all_true(self) -> bool {
836 self.0 == !0
837 }
838
839 /// Returns true if both booleans in this vector are false.
840 ///
841 /// The result is *undefined* if both values in this vector are not booleans. A boolean is a
842 /// value with all bits set or all bits clear (i.e. !0 or 0).
843 #[inline]
844 pub fn all_false(self) -> bool {
845 self.0 == 0
846 }
847
848 #[inline]
849 pub fn to_i32x2(self) -> I32x2 {
850 I32x2(self.0)
851 }
852}
853
854impl Not for U32x2 {
855 type Output = U32x2;
856 #[inline]
857 fn not(self) -> U32x2 {
858 U32x2(!self.0)
859 }
860}
861
862impl BitAnd<U32x2> for U32x2 {
863 type Output = U32x2;
864 #[inline]
865 fn bitand(self, other: U32x2) -> U32x2 {
866 U32x2(self.0 & other.0)
867 }
868}
869
870impl BitOr<U32x2> for U32x2 {
871 type Output = U32x2;
872 #[inline]
873 fn bitor(self, other: U32x2) -> U32x2 {
874 U32x2(self.0 | other.0)
875 }
876}
877
878// Four 32-bit unsigned integers
879
880#[derive(Clone, Copy)]
881pub struct U32x4(pub __m128i);
882
883impl U32x4 {
884 // Constructors
885
886 #[inline]
887 pub fn new(a: u32, b: u32, c: u32, d: u32) -> U32x4 {
888 unsafe {
889 let vector = [a, b, c, d];
890 U32x4(x86::_mm_loadu_si128(vector.as_ptr() as *const __m128i))
891 }
892 }
893
894 #[inline]
895 pub fn splat(x: u32) -> U32x4 {
896 unsafe { U32x4(x86::_mm_set1_epi32(x as i32)) }
897 }
898
899 // Conversions
900
901 /// Converts these packed unsigned integers to signed integers.
902 ///
903 /// Overflowing values will wrap around.
904 #[inline]
905 pub fn to_i32x4(self) -> I32x4 {
906 I32x4(self.0)
907 }
908
909 // Basic operations
910
911 /// Returns true if all four booleans in this vector are true.
912 ///
913 /// The result is *undefined* if all four values in this vector are not booleans. A boolean is
914 /// a value with all bits set or all bits clear (i.e. !0 or 0).
915 #[inline]
916 pub fn all_true(self) -> bool {
917 unsafe { x86::_mm_movemask_ps(x86::_mm_castsi128_ps(self.0)) == 0x0f }
918 }
919
920 /// Returns true if all four booleans in this vector are false.
921 ///
922 /// The result is *undefined* if all four values in this vector are not booleans. A boolean is
923 /// a value with all bits set or all bits clear (i.e. !0 or 0).
924 #[inline]
925 pub fn all_false(self) -> bool {
926 unsafe { x86::_mm_movemask_ps(x86::_mm_castsi128_ps(self.0)) == 0x00 }
927 }
928
929 // Extraction
930
931 #[inline]
932 pub fn xy(self) -> U32x2 {
933 unsafe {
934 let swizzled = self.0;
935 U32x2(*mem::transmute::<&__m128i, &u64>(&swizzled))
936 }
937 }
938
939 // Packed comparisons
940
941 #[inline]
942 pub fn packed_eq(self, other: U32x4) -> U32x4 {
943 unsafe { U32x4(x86::_mm_cmpeq_epi32(self.0, other.0)) }
944 }
945}
946
947impl Debug for U32x4 {
948 #[inline]
949 fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
950 write!(f, "<{}, {}, {}, {}>", self[0], self[1], self[2], self[3])
951 }
952}
953
954impl Index<usize> for U32x4 {
955 type Output = u32;
956 #[inline]
957 fn index(&self, index: usize) -> &u32 {
958 unsafe { &mem::transmute::<&__m128i, &[u32; 4]>(&self.0)[index] }
959 }
960}
961
962impl PartialEq for U32x4 {
963 #[inline]
964 fn eq(&self, other: &U32x4) -> bool {
965 self.packed_eq(*other).all_true()
966 }
967}
968
969impl Not for U32x4 {
970 type Output = U32x4;
971 #[inline]
972 fn not(self) -> U32x4 {
973 self ^ U32x4::splat(!0)
974 }
975}
976
977impl BitXor<U32x4> for U32x4 {
978 type Output = U32x4;
979 #[inline]
980 fn bitxor(self, other: U32x4) -> U32x4 {
981 unsafe { U32x4(x86::_mm_xor_si128(self.0, b:other.0)) }
982 }
983}
984
985impl Shr<u32> for U32x4 {
986 type Output = U32x4;
987 #[inline]
988 fn shr(self, amount: u32) -> U32x4 {
989 unsafe { U32x4(x86::_mm_srl_epi32(self.0, count:U32x4::new(a:amount, b:0, c:0, d:0).0)) }
990 }
991}
992