mod.rs source code [crates/pathfinder_simd/src/x86/mod.rs]

1	// pathfinder/simd/src/x86.rs
2	//
3	// Copyright © 2019 The Pathfinder Project Developers.
4	//
5	// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6	// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7	// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8	// option. This file may not be copied, modified, or distributed
9	// except according to those terms.
10
11	use std::cmp::PartialEq;
12	use std::fmt::{self, Debug, Formatter};
13	use std::mem;
14	use std::ops::{Add, BitAnd, BitOr, BitXor, Div, Index, IndexMut, Mul, Not, Shr, Sub};
15
16	#[cfg(target_pointer_width = "32")]
17	use std::arch::x86::{__m128, __m128i};
18	#[cfg(target_pointer_width = "32")]
19	use std::arch::x86;
20	#[cfg(target_pointer_width = "64")]
21	use std::arch::x86_64::{__m128, __m128i};
22	#[cfg(target_pointer_width = "64")]
23	use std::arch::x86_64 as x86;
24
25	mod swizzle_f32x4;
26	mod swizzle_i32x4;
27
28	// Two 32-bit floats
29
30	#[derive(Clone, Copy)]
31	pub struct F32x2(pub u64);
32
33	impl F32x2 {
34	// Constructors
35
36	#[inline]
37	pub fn new(a: f32, b: f32) -> F32x2 {
38	unsafe {
39	let a = mem::transmute::<*const f32, *const u32>(&a);
40	let b = mem::transmute::<*const f32, *const u32>(&b);
41	F32x2((a as u64) \| ((b as u64) << `32`))
42	}
43	}
44
45	#[inline]
46	pub fn splat(x: f32) -> F32x2 {
47	F32x2::new(x, x)
48	}
49
50	// Basic operations
51
52	#[inline]
53	pub fn approx_recip(self) -> F32x2 {
54	self.to_f32x4().approx_recip().xy()
55	}
56
57	#[inline]
58	pub fn min(self, other: F32x2) -> F32x2 {
59	self.to_f32x4().min(other.to_f32x4()).xy()
60	}
61
62	#[inline]
63	pub fn max(self, other: F32x2) -> F32x2 {
64	self.to_f32x4().max(other.to_f32x4()).xy()
65	}
66
67	#[inline]
68	pub fn clamp(self, min: F32x2, max: F32x2) -> F32x2 {
69	self.to_f32x4().clamp(min.to_f32x4(), max.to_f32x4()).xy()
70	}
71
72	#[inline]
73	pub fn abs(self) -> F32x2 {
74	self.to_f32x4().abs().xy()
75	}
76
77	#[inline]
78	pub fn floor(self) -> F32x2 {
79	self.to_f32x4().floor().xy()
80	}
81
82	#[inline]
83	pub fn ceil(self) -> F32x2 {
84	self.to_f32x4().ceil().xy()
85	}
86
87	#[inline]
88	pub fn sqrt(self) -> F32x2 {
89	self.to_f32x4().sqrt().xy()
90	}
91
92	// Packed comparisons
93
94	#[inline]
95	pub fn packed_eq(self, other: F32x2) -> U32x2 {
96	self.to_f32x4().packed_eq(other.to_f32x4()).xy()
97	}
98
99	#[inline]
100	pub fn packed_gt(self, other: F32x2) -> U32x2 {
101	self.to_f32x4().packed_gt(other.to_f32x4()).xy()
102	}
103
104	#[inline]
105	pub fn packed_lt(self, other: F32x2) -> U32x2 {
106	self.to_f32x4().packed_lt(other.to_f32x4()).xy()
107	}
108
109	#[inline]
110	pub fn packed_le(self, other: F32x2) -> U32x2 {
111	self.to_f32x4().packed_le(other.to_f32x4()).xy()
112	}
113
114	// Conversions
115
116	#[inline]
117	pub fn to_f32x4(self) -> F32x4 {
118	unsafe {
119	let mut result = F32x4::default();
120	mem::transmute::<&mut* __m128, &mut u64>(&mut result.0) = self.0;
121	result
122	}
123	}
124
125	#[inline]
126	pub fn to_i32x2(self) -> I32x2 {
127	self.to_i32x4().xy()
128	}
129
130	#[inline]
131	pub fn to_i32x4(self) -> I32x4 {
132	self.to_f32x4().to_i32x4()
133	}
134
135	// Swizzle
136
137	#[inline]
138	pub fn yx(self) -> F32x2 {
139	self.to_f32x4().yx()
140	}
141
142	// Concatenations
143
144	#[inline]
145	pub fn concat_xy_xy(self, other: F32x2) -> F32x4 {
146	self.to_f32x4().concat_xy_xy(other.to_f32x4())
147	}
148	}
149
150	impl Default for F32x2 {
151	#[inline]
152	fn default() -> F32x2 {
153	F32x2(`0`)
154	}
155	}
156
157	impl Index<usize> for F32x2 {
158	type Output = f32;
159	#[inline]
160	fn index(&self, index: usize) -> &f32 {
161	unsafe { &mem::transmute::<&u64, &[f32; `2`]>(&self.0)[index] }
162	}
163	}
164
165	impl IndexMut<usize> for F32x2 {
166	#[inline]
167	fn index_mut(&mut self, index: usize) -> &mut f32 {
168	unsafe { &mut mem::transmute::<&mut u64, &mut [f32; `2`]>(&mut self.0)[index] }
169	}
170	}
171
172	impl Debug for F32x2 {
173	#[inline]
174	fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
175	write!(f, "<{}, {}>", self[`0`], self[`1`])
176	}
177	}
178
179	impl PartialEq for F32x2 {
180	#[inline]
181	fn eq(&self, other: &F32x2) -> bool {
182	self.packed_eq(*other).all_true()
183	}
184	}
185
186	impl Add<F32x2> for F32x2 {
187	type Output = F32x2;
188	#[inline]
189	fn add(self, other: F32x2) -> F32x2 {
190	(self.to_f32x4() + other.to_f32x4()).xy()
191	}
192	}
193
194	impl Div<F32x2> for F32x2 {
195	type Output = F32x2;
196	#[inline]
197	fn div(self, other: F32x2) -> F32x2 {
198	(self.to_f32x4() / other.to_f32x4()).xy()
199	}
200	}
201
202	impl Mul<F32x2> for F32x2 {
203	type Output = F32x2;
204	#[inline]
205	fn mul(self, other: F32x2) -> F32x2 {
206	(self.to_f32x4() * other.to_f32x4()).xy()
207	}
208	}
209
210	impl Sub<F32x2> for F32x2 {
211	type Output = F32x2;
212	#[inline]
213	fn sub(self, other: F32x2) -> F32x2 {
214	(self.to_f32x4() - other.to_f32x4()).xy()
215	}
216	}
217
218	// Four 32-bit floats
219
220	#[derive(Clone, Copy)]
221	pub struct F32x4(pub __m128);
222
223	impl F32x4 {
224	// Constructors
225
226	#[inline]
227	pub fn new(a: f32, b: f32, c: f32, d: f32) -> F32x4 {
228	unsafe {
229	let vector = [a, b, c, d];
230	F32x4(x86::_mm_loadu_ps(vector.as_ptr()))
231	}
232	}
233
234	#[inline]
235	pub fn splat(x: f32) -> F32x4 {
236	unsafe { F32x4(x86::_mm_set1_ps(x)) }
237	}
238
239	// Basic operations
240
241	#[inline]
242	pub fn approx_recip(self) -> F32x4 {
243	unsafe { F32x4(x86::_mm_rcp_ps(self.0)) }
244	}
245
246	#[inline]
247	pub fn min(self, other: F32x4) -> F32x4 {
248	unsafe { F32x4(x86::_mm_min_ps(self.0, other.0)) }
249	}
250
251	#[inline]
252	pub fn max(self, other: F32x4) -> F32x4 {
253	unsafe { F32x4(x86::_mm_max_ps(self.0, other.0)) }
254	}
255
256	#[inline]
257	pub fn clamp(self, min: F32x4, max: F32x4) -> F32x4 {
258	self.max(min).min(max)
259	}
260
261	#[inline]
262	pub fn abs(self) -> F32x4 {
263	unsafe {
264	let tmp = x86::_mm_srli_epi32(I32x4::splat(`-1`).0, `1`);
265	F32x4(x86::_mm_and_ps(x86::_mm_castsi128_ps(tmp), self.0))
266	}
267	}
268
269	#[inline]
270	pub fn floor(self) -> F32x4 {
271	unsafe { F32x4(x86::_mm_floor_ps(self.0)) }
272	}
273
274	#[inline]
275	pub fn ceil(self) -> F32x4 {
276	unsafe { F32x4(x86::_mm_ceil_ps(self.0)) }
277	}
278
279	#[inline]
280	pub fn sqrt(self) -> F32x4 {
281	unsafe { F32x4(x86::_mm_sqrt_ps(self.0)) }
282	}
283
284	// Packed comparisons
285
286	#[inline]
287	pub fn packed_eq(self, other: F32x4) -> U32x4 {
288	unsafe {
289	U32x4(x86::_mm_castps_si128(x86::_mm_cmpeq_ps(
290	self.0, other.0,
291	)))
292	}
293	}
294
295	#[inline]
296	pub fn packed_gt(self, other: F32x4) -> U32x4 {
297	unsafe {
298	U32x4(x86::_mm_castps_si128(x86::_mm_cmpgt_ps(
299	self.0, other.0,
300	)))
301	}
302	}
303
304	#[inline]
305	pub fn packed_lt(self, other: F32x4) -> U32x4 {
306	other.packed_gt(self)
307	}
308
309	#[inline]
310	pub fn packed_le(self, other: F32x4) -> U32x4 {
311	!self.packed_gt(other)
312	}
313
314	// Conversions
315
316	/// Converts these packed floats to integers via rounding.
317	#[inline]
318	pub fn to_i32x4(self) -> I32x4 {
319	unsafe { I32x4(x86::_mm_cvtps_epi32(self.0)) }
320	}
321
322	// Extraction
323
324	#[inline]
325	pub fn xy(self) -> F32x2 {
326	unsafe {
327	let swizzled = self.0;
328	F32x2(mem::transmute::<&__m128, &u64*>(&swizzled))
329	}
330	}
331
332	#[inline]
333	pub fn xw(self) -> F32x2 {
334	self.xwyz().xy()
335	}
336
337	#[inline]
338	pub fn yx(self) -> F32x2 {
339	self.yxwz().xy()
340	}
341
342	#[inline]
343	pub fn zy(self) -> F32x2 {
344	self.zyxw().xy()
345	}
346
347	#[inline]
348	pub fn zw(self) -> F32x2 {
349	self.zwxy().xy()
350	}
351
352	// Concatenations
353
354	#[inline]
355	pub fn concat_xy_xy(self, other: F32x4) -> F32x4 {
356	unsafe {
357	let this = x86::_mm_castps_pd(self.0);
358	let other = x86::_mm_castps_pd(other.0);
359	let result = x86::_mm_unpacklo_pd(this, other);
360	F32x4(x86::_mm_castpd_ps(result))
361	}
362	}
363
364	#[inline]
365	pub fn concat_xy_zw(self, other: F32x4) -> F32x4 {
366	unsafe {
367	let this = x86::_mm_castps_pd(self.0);
368	let other = x86::_mm_castps_pd(other.0);
369	let result = x86::_mm_shuffle_pd(this, other, `0b10`);
370	F32x4(x86::_mm_castpd_ps(result))
371	}
372	}
373
374	#[inline]
375	pub fn concat_zw_zw(self, other: F32x4) -> F32x4 {
376	unsafe {
377	let this = x86::_mm_castps_pd(self.0);
378	let other = x86::_mm_castps_pd(other.0);
379	let result = x86::_mm_unpackhi_pd(this, other);
380	F32x4(x86::_mm_castpd_ps(result))
381	}
382	}
383
384	#[inline]
385	pub fn concat_wz_yx(self, other: F32x4) -> F32x4 {
386	unsafe { F32x4(x86::_mm_shuffle_ps(self.0, other.0, `0b0001_1011`)) }
387	}
388	}
389
390	impl Default for F32x4 {
391	#[inline]
392	fn default() -> F32x4 {
393	unsafe { F32x4(x86::_mm_setzero_ps()) }
394	}
395	}
396
397	impl Index<usize> for F32x4 {
398	type Output = f32;
399	#[inline]
400	fn index(&self, index: usize) -> &f32 {
401	unsafe { &mem::transmute::<&__m128, &[f32; `4`]>(&self.0)[index] }
402	}
403	}
404
405	impl IndexMut<usize> for F32x4 {
406	#[inline]
407	fn index_mut(&mut self, index: usize) -> &mut f32 {
408	unsafe { &mut mem::transmute::<&mut __m128, &mut [f32; `4`]>(&mut self.0)[index] }
409	}
410	}
411
412	impl Debug for F32x4 {
413	#[inline]
414	fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
415	write!(f, "<{}, {}, {}, {}>", self[`0`], self[`1`], self[`2`], self[`3`])
416	}
417	}
418
419	impl PartialEq for F32x4 {
420	#[inline]
421	fn eq(&self, other: &F32x4) -> bool {
422	self.packed_eq(*other).all_true()
423	}
424	}
425
426	impl Add<F32x4> for F32x4 {
427	type Output = F32x4;
428	#[inline]
429	fn add(self, other: F32x4) -> F32x4 {
430	unsafe { F32x4(x86::_mm_add_ps(self.0, b:other.0)) }
431	}
432	}
433
434	impl Div<F32x4> for F32x4 {
435	type Output = F32x4;
436	#[inline]
437	fn div(self, other: F32x4) -> F32x4 {
438	unsafe { F32x4(x86::_mm_div_ps(self.0, b:other.0)) }
439	}
440	}
441
442	impl Mul<F32x4> for F32x4 {
443	type Output = F32x4;
444	#[inline]
445	fn mul(self, other: F32x4) -> F32x4 {
446	unsafe { F32x4(x86::_mm_mul_ps(self.0, b:other.0)) }
447	}
448	}
449
450	impl Sub<F32x4> for F32x4 {
451	type Output = F32x4;
452	#[inline]
453	fn sub(self, other: F32x4) -> F32x4 {
454	unsafe { F32x4(x86::_mm_sub_ps(self.0, b:other.0)) }
455	}
456	}
457
458	// Two 32-bit signed integers
459
460	#[derive(Clone, Copy)]
461	pub struct I32x2(pub u64);
462
463	impl I32x2 {
464	// Constructors
465
466	#[inline]
467	pub fn new(a: i32, b: i32) -> I32x2 {
468	unsafe {
469	let a = mem::transmute::<*const i32, *const u32>(&a);
470	let b = mem::transmute::<*const i32, *const u32>(&b);
471	I32x2((a as u64) \| ((b as u64) << `32`))
472	}
473	}
474
475	#[inline]
476	pub fn splat(x: i32) -> I32x2 {
477	I32x2::new(x, x)
478	}
479
480	// Accessors
481
482	#[inline]
483	pub fn x(self) -> i32 {
484	self[`0`]
485	}
486
487	#[inline]
488	pub fn y(self) -> i32 {
489	self[`1`]
490	}
491
492	// Concatenations
493
494	#[inline]
495	pub fn concat_xy_xy(self, other: I32x2) -> I32x4 {
496	self.to_i32x4().concat_xy_xy(other.to_i32x4())
497	}
498
499	// Conversions
500
501	#[inline]
502	pub fn to_i32x4(self) -> I32x4 {
503	unsafe {
504	let mut result = I32x4::default();
505	mem::transmute::<&mut* __m128i, &mut u64>(&mut result.0) = self.0;
506	result
507	}
508	}
509
510	#[inline]
511	pub fn to_f32x4(self) -> F32x4 {
512	self.to_i32x4().to_f32x4()
513	}
514
515	/// Converts these packed integers to floats.
516	#[inline]
517	pub fn to_f32x2(self) -> F32x2 {
518	self.to_f32x4().xy()
519	}
520
521	// Basic operations
522
523	#[inline]
524	pub fn max(self, other: I32x2) -> I32x2 {
525	self.to_i32x4().max(other.to_i32x4()).xy()
526	}
527
528	#[inline]
529	pub fn min(self, other: I32x2) -> I32x2 {
530	self.to_i32x4().min(other.to_i32x4()).xy()
531	}
532
533	// Comparisons
534
535	// TODO(pcwalton): Use the `U32x2` type!
536	#[inline]
537	pub fn packed_eq(self, other: I32x2) -> U32x4 {
538	self.to_i32x4().packed_eq(other.to_i32x4())
539	}
540
541	#[inline]
542	pub fn packed_gt(self, other: I32x2) -> U32x4 {
543	self.to_i32x4().packed_gt(other.to_i32x4())
544	}
545
546	#[inline]
547	pub fn packed_le(self, other: I32x2) -> U32x4 {
548	self.to_i32x4().packed_le(other.to_i32x4())
549	}
550	}
551
552	impl Default for I32x2 {
553	#[inline]
554	fn default() -> I32x2 {
555	I32x2(`0`)
556	}
557	}
558
559	impl Index<usize> for I32x2 {
560	type Output = i32;
561	#[inline]
562	fn index(&self, index: usize) -> &i32 {
563	unsafe { &mem::transmute::<&u64, &[i32; `2`]>(&self.0)[index] }
564	}
565	}
566
567	impl IndexMut<usize> for I32x2 {
568	#[inline]
569	fn index_mut(&mut self, index: usize) -> &mut i32 {
570	unsafe { &mut mem::transmute::<&mut u64, &mut [i32; `2`]>(&mut self.0)[index] }
571	}
572	}
573
574	impl Add<I32x2> for I32x2 {
575	type Output = I32x2;
576	#[inline]
577	fn add(self, other: I32x2) -> I32x2 {
578	(self.to_i32x4() + other.to_i32x4()).xy()
579	}
580	}
581
582	impl Sub<I32x2> for I32x2 {
583	type Output = I32x2;
584	#[inline]
585	fn sub(self, other: I32x2) -> I32x2 {
586	(self.to_i32x4() - other.to_i32x4()).xy()
587	}
588	}
589
590	impl Mul<I32x2> for I32x2 {
591	type Output = I32x2;
592	#[inline]
593	fn mul(self, other: I32x2) -> I32x2 {
594	(self.to_i32x4() * other.to_i32x4()).xy()
595	}
596	}
597
598	impl Debug for I32x2 {
599	#[inline]
600	fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
601	write!(f, "<{}, {}>", self[`0`], self[`1`])
602	}
603	}
604
605	impl PartialEq for I32x2 {
606	#[inline]
607	fn eq(&self, other: &I32x2) -> bool {
608	self.packed_eq(*other).all_true()
609	}
610	}
611
612	// Four 32-bit signed integers
613
614	#[derive(Clone, Copy)]
615	pub struct I32x4(pub __m128i);
616
617	impl I32x4 {
618	// Constructors
619
620	#[inline]
621	pub fn new(a: i32, b: i32, c: i32, d: i32) -> I32x4 {
622	unsafe {
623	let vector = [a, b, c, d];
624	I32x4(x86::_mm_loadu_si128(vector.as_ptr() as *const __m128i))
625	}
626	}
627
628	#[inline]
629	pub fn splat(x: i32) -> I32x4 {
630	unsafe { I32x4(x86::_mm_set1_epi32(x)) }
631	}
632
633	// Extraction
634
635	#[inline]
636	pub fn xy(self) -> I32x2 {
637	unsafe {
638	let swizzled = self.0;
639	I32x2(mem::transmute::<&__m128i, &u64*>(&swizzled))
640	}
641	}
642
643	#[inline]
644	pub fn xw(self) -> I32x2 {
645	self.xwyz().xy()
646	}
647
648	#[inline]
649	pub fn yx(self) -> I32x2 {
650	self.yxwz().xy()
651	}
652
653	#[inline]
654	pub fn zy(self) -> I32x2 {
655	self.zyxw().xy()
656	}
657
658	#[inline]
659	pub fn zw(self) -> I32x2 {
660	self.zwxy().xy()
661	}
662
663	// Concatenations
664
665	#[inline]
666	pub fn concat_xy_xy(self, other: I32x4) -> I32x4 {
667	unsafe {
668	let this = x86::_mm_castsi128_pd(self.0);
669	let other = x86::_mm_castsi128_pd(other.0);
670	let result = x86::_mm_unpacklo_pd(this, other);
671	I32x4(x86::_mm_castpd_si128(result))
672	}
673	}
674
675	#[inline]
676	pub fn concat_zw_zw(self, other: I32x4) -> I32x4 {
677	unsafe {
678	let this = x86::_mm_castsi128_pd(self.0);
679	let other = x86::_mm_castsi128_pd(other.0);
680	let result = x86::_mm_unpackhi_pd(this, other);
681	I32x4(x86::_mm_castpd_si128(result))
682	}
683	}
684
685	// Conversions
686
687	/// Converts these packed integers to floats.
688	#[inline]
689	pub fn to_f32x4(self) -> F32x4 {
690	unsafe { F32x4(x86::_mm_cvtepi32_ps(self.0)) }
691	}
692
693	/// Converts these packed signed integers to unsigned integers.
694	///
695	/// Overflowing values will wrap around.
696	#[inline]
697	pub fn to_u32x4(self) -> U32x4 {
698	U32x4(self.0)
699	}
700
701	// Basic operations
702
703	#[inline]
704	pub fn max(self, other: I32x4) -> I32x4 {
705	unsafe { I32x4(x86::_mm_max_epi32(self.0, other.0)) }
706	}
707
708	#[inline]
709	pub fn min(self, other: I32x4) -> I32x4 {
710	unsafe { I32x4(x86::_mm_min_epi32(self.0, other.0)) }
711	}
712
713	// Packed comparisons
714
715	#[inline]
716	pub fn packed_eq(self, other: I32x4) -> U32x4 {
717	unsafe { U32x4(x86::_mm_cmpeq_epi32(self.0, other.0)) }
718	}
719
720	// Comparisons
721
722	#[inline]
723	pub fn packed_gt(self, other: I32x4) -> U32x4 {
724	unsafe { U32x4(x86::_mm_cmpgt_epi32(self.0, other.0)) }
725	}
726
727	#[inline]
728	pub fn packed_lt(self, other: I32x4) -> U32x4 {
729	other.packed_gt(self)
730	}
731
732	#[inline]
733	pub fn packed_le(self, other: I32x4) -> U32x4 {
734	!self.packed_gt(other)
735	}
736	}
737
738	impl Default for I32x4 {
739	#[inline]
740	fn default() -> I32x4 {
741	unsafe { I32x4(x86::_mm_setzero_si128()) }
742	}
743	}
744
745	impl Index<usize> for I32x4 {
746	type Output = i32;
747	#[inline]
748	fn index(&self, index: usize) -> &i32 {
749	unsafe { &mem::transmute::<&__m128i, &[i32; `4`]>(&self.0)[index] }
750	}
751	}
752
753	impl IndexMut<usize> for I32x4 {
754	#[inline]
755	fn index_mut(&mut self, index: usize) -> &mut i32 {
756	unsafe { &mut mem::transmute::<&mut __m128i, &mut [i32; `4`]>(&mut self.0)[index] }
757	}
758	}
759
760	impl Add<I32x4> for I32x4 {
761	type Output = I32x4;
762	#[inline]
763	fn add(self, other: I32x4) -> I32x4 {
764	unsafe { I32x4(x86::_mm_add_epi32(self.0, b:other.0)) }
765	}
766	}
767
768	impl Sub<I32x4> for I32x4 {
769	type Output = I32x4;
770	#[inline]
771	fn sub(self, other: I32x4) -> I32x4 {
772	unsafe { I32x4(x86::_mm_sub_epi32(self.0, b:other.0)) }
773	}
774	}
775
776	impl Mul<I32x4> for I32x4 {
777	type Output = I32x4;
778	#[inline]
779	fn mul(self, other: I32x4) -> I32x4 {
780	unsafe { I32x4(x86::_mm_mullo_epi32(self.0, b:other.0)) }
781	}
782	}
783
784	impl BitAnd<I32x4> for I32x4 {
785	type Output = I32x4;
786	#[inline]
787	fn bitand(self, other: I32x4) -> I32x4 {
788	unsafe { I32x4(x86::_mm_and_si128(self.0, b:other.0)) }
789	}
790	}
791
792	impl BitOr<I32x4> for I32x4 {
793	type Output = I32x4;
794	#[inline]
795	fn bitor(self, other: I32x4) -> I32x4 {
796	unsafe { I32x4(x86::_mm_or_si128(self.0, b:other.0)) }
797	}
798	}
799
800	impl Debug for I32x4 {
801	#[inline]
802	fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
803	write!(f, "<{}, {}, {}, {}>", self[`0`], self[`1`], self[`2`], self[`3`])
804	}
805	}
806
807	impl PartialEq for I32x4 {
808	#[inline]
809	fn eq(&self, other: &I32x4) -> bool {
810	self.packed_eq(*other).all_true()
811	}
812	}
813
814	// Two 32-bit unsigned integers
815
816	#[derive(Clone, Copy)]
817	pub struct U32x2(pub u64);
818
819	impl U32x2 {
820	#[inline]
821	pub fn new(x: u32, y: u32) -> U32x2 {
822	U32x2(x as u64 \| ((y as u64) << `32`))
823	}
824
825	#[inline]
826	pub fn splat(x: u32) -> U32x2 {
827	U32x2::new(x, x)
828	}
829
830	/// Returns true if both booleans in this vector are true.
831	///
832	/// The result is undefined* if both values in this vector are not booleans. A boolean is a*
833	/// value with all bits set or all bits clear (i.e. !0 or 0).
834	#[inline]
835	pub fn all_true(self) -> bool {
836	self.0 == !`0`
837	}
838
839	/// Returns true if both booleans in this vector are false.
840	///
841	/// The result is undefined* if both values in this vector are not booleans. A boolean is a*
842	/// value with all bits set or all bits clear (i.e. !0 or 0).
843	#[inline]
844	pub fn all_false(self) -> bool {
845	self.0 == `0`
846	}
847
848	#[inline]
849	pub fn to_i32x2(self) -> I32x2 {
850	I32x2(self.0)
851	}
852	}
853
854	impl Not for U32x2 {
855	type Output = U32x2;
856	#[inline]
857	fn not(self) -> U32x2 {
858	U32x2(!self.0)
859	}
860	}
861
862	impl BitAnd<U32x2> for U32x2 {
863	type Output = U32x2;
864	#[inline]
865	fn bitand(self, other: U32x2) -> U32x2 {
866	U32x2(self.0 & other.0)
867	}
868	}
869
870	impl BitOr<U32x2> for U32x2 {
871	type Output = U32x2;
872	#[inline]
873	fn bitor(self, other: U32x2) -> U32x2 {
874	U32x2(self.0 \| other.0)
875	}
876	}
877
878	// Four 32-bit unsigned integers
879
880	#[derive(Clone, Copy)]
881	pub struct U32x4(pub __m128i);
882
883	impl U32x4 {
884	// Constructors
885
886	#[inline]
887	pub fn new(a: u32, b: u32, c: u32, d: u32) -> U32x4 {
888	unsafe {
889	let vector = [a, b, c, d];
890	U32x4(x86::_mm_loadu_si128(vector.as_ptr() as *const __m128i))
891	}
892	}
893
894	#[inline]
895	pub fn splat(x: u32) -> U32x4 {
896	unsafe { U32x4(x86::_mm_set1_epi32(x as i32)) }
897	}
898
899	// Conversions
900
901	/// Converts these packed unsigned integers to signed integers.
902	///
903	/// Overflowing values will wrap around.
904	#[inline]
905	pub fn to_i32x4(self) -> I32x4 {
906	I32x4(self.0)
907	}
908
909	// Basic operations
910
911	/// Returns true if all four booleans in this vector are true.
912	///
913	/// The result is undefined* if all four values in this vector are not booleans. A boolean is*
914	/// a value with all bits set or all bits clear (i.e. !0 or 0).
915	#[inline]
916	pub fn all_true(self) -> bool {
917	unsafe { x86::_mm_movemask_ps(x86::_mm_castsi128_ps(self.0)) == `0x0f` }
918	}
919
920	/// Returns true if all four booleans in this vector are false.
921	///
922	/// The result is undefined* if all four values in this vector are not booleans. A boolean is*
923	/// a value with all bits set or all bits clear (i.e. !0 or 0).
924	#[inline]
925	pub fn all_false(self) -> bool {
926	unsafe { x86::_mm_movemask_ps(x86::_mm_castsi128_ps(self.0)) == `0x00` }
927	}
928
929	// Extraction
930
931	#[inline]
932	pub fn xy(self) -> U32x2 {
933	unsafe {
934	let swizzled = self.0;
935	U32x2(mem::transmute::<&__m128i, &u64*>(&swizzled))
936	}
937	}
938
939	// Packed comparisons
940
941	#[inline]
942	pub fn packed_eq(self, other: U32x4) -> U32x4 {
943	unsafe { U32x4(x86::_mm_cmpeq_epi32(self.0, other.0)) }
944	}
945	}
946
947	impl Debug for U32x4 {
948	#[inline]
949	fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
950	write!(f, "<{}, {}, {}, {}>", self[`0`], self[`1`], self[`2`], self[`3`])
951	}
952	}
953
954	impl Index<usize> for U32x4 {
955	type Output = u32;
956	#[inline]
957	fn index(&self, index: usize) -> &u32 {
958	unsafe { &mem::transmute::<&__m128i, &[u32; `4`]>(&self.0)[index] }
959	}
960	}
961
962	impl PartialEq for U32x4 {
963	#[inline]
964	fn eq(&self, other: &U32x4) -> bool {
965	self.packed_eq(*other).all_true()
966	}
967	}
968
969	impl Not for U32x4 {
970	type Output = U32x4;
971	#[inline]
972	fn not(self) -> U32x4 {
973	self ^ U32x4::splat(!`0`)
974	}
975	}
976
977	impl BitXor<U32x4> for U32x4 {
978	type Output = U32x4;
979	#[inline]
980	fn bitxor(self, other: U32x4) -> U32x4 {
981	unsafe { U32x4(x86::_mm_xor_si128(self.0, b:other.0)) }
982	}
983	}
984
985	impl Shr<u32> for U32x4 {
986	type Output = U32x4;
987	#[inline]
988	fn shr(self, amount: u32) -> U32x4 {
989	unsafe { U32x4(x86::_mm_srl_epi32(self.0, count:U32x4::new(a:amount, b:`0`, c:`0`, d:`0`).0)) }
990	}
991	}
992