1//! Streaming SIMD Extensions 2 (SSE2)
2
3#[cfg(test)]
4use stdarch_test::assert_instr;
5
6use crate::{
7 core_arch::{simd::*, simd_llvm::*, x86::*},
8 intrinsics,
9 mem::{self, transmute},
10 ptr,
11};
12
13/// Provides a hint to the processor that the code sequence is a spin-wait loop.
14///
15/// This can help improve the performance and power consumption of spin-wait
16/// loops.
17///
18/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_pause)
19#[inline]
20#[cfg_attr(all(test, target_feature = "sse2"), assert_instr(pause))]
21#[stable(feature = "simd_x86", since = "1.27.0")]
22pub unsafe fn _mm_pause() {
23 // note: `pause` is guaranteed to be interpreted as a `nop` by CPUs without
24 // the SSE2 target-feature - therefore it does not require any target features
25 pause()
26}
27
28/// Invalidates and flushes the cache line that contains `p` from all levels of
29/// the cache hierarchy.
30///
31/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clflush)
32#[inline]
33#[target_feature(enable = "sse2")]
34#[cfg_attr(test, assert_instr(clflush))]
35#[stable(feature = "simd_x86", since = "1.27.0")]
36pub unsafe fn _mm_clflush(p: *const u8) {
37 clflush(p)
38}
39
40/// Performs a serializing operation on all load-from-memory instructions
41/// that were issued prior to this instruction.
42///
43/// Guarantees that every load instruction that precedes, in program order, is
44/// globally visible before any load instruction which follows the fence in
45/// program order.
46///
47/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_lfence)
48#[inline]
49#[target_feature(enable = "sse2")]
50#[cfg_attr(test, assert_instr(lfence))]
51#[stable(feature = "simd_x86", since = "1.27.0")]
52pub unsafe fn _mm_lfence() {
53 lfence()
54}
55
56/// Performs a serializing operation on all load-from-memory and store-to-memory
57/// instructions that were issued prior to this instruction.
58///
59/// Guarantees that every memory access that precedes, in program order, the
60/// memory fence instruction is globally visible before any memory instruction
61/// which follows the fence in program order.
62///
63/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mfence)
64#[inline]
65#[target_feature(enable = "sse2")]
66#[cfg_attr(test, assert_instr(mfence))]
67#[stable(feature = "simd_x86", since = "1.27.0")]
68pub unsafe fn _mm_mfence() {
69 mfence()
70}
71
72/// Adds packed 8-bit integers in `a` and `b`.
73///
74/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi8)
75#[inline]
76#[target_feature(enable = "sse2")]
77#[cfg_attr(test, assert_instr(paddb))]
78#[stable(feature = "simd_x86", since = "1.27.0")]
79pub unsafe fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i {
80 transmute(src:simd_add(x:a.as_i8x16(), y:b.as_i8x16()))
81}
82
83/// Adds packed 16-bit integers in `a` and `b`.
84///
85/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi16)
86#[inline]
87#[target_feature(enable = "sse2")]
88#[cfg_attr(test, assert_instr(paddw))]
89#[stable(feature = "simd_x86", since = "1.27.0")]
90pub unsafe fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i {
91 transmute(src:simd_add(x:a.as_i16x8(), y:b.as_i16x8()))
92}
93
94/// Adds packed 32-bit integers in `a` and `b`.
95///
96/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi32)
97#[inline]
98#[target_feature(enable = "sse2")]
99#[cfg_attr(test, assert_instr(paddd))]
100#[stable(feature = "simd_x86", since = "1.27.0")]
101pub unsafe fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i {
102 transmute(src:simd_add(x:a.as_i32x4(), y:b.as_i32x4()))
103}
104
105/// Adds packed 64-bit integers in `a` and `b`.
106///
107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi64)
108#[inline]
109#[target_feature(enable = "sse2")]
110#[cfg_attr(test, assert_instr(paddq))]
111#[stable(feature = "simd_x86", since = "1.27.0")]
112pub unsafe fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i {
113 transmute(src:simd_add(x:a.as_i64x2(), y:b.as_i64x2()))
114}
115
116/// Adds packed 8-bit integers in `a` and `b` using saturation.
117///
118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epi8)
119#[inline]
120#[target_feature(enable = "sse2")]
121#[cfg_attr(test, assert_instr(paddsb))]
122#[stable(feature = "simd_x86", since = "1.27.0")]
123pub unsafe fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i {
124 transmute(src:simd_saturating_add(x:a.as_i8x16(), y:b.as_i8x16()))
125}
126
127/// Adds packed 16-bit integers in `a` and `b` using saturation.
128///
129/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epi16)
130#[inline]
131#[target_feature(enable = "sse2")]
132#[cfg_attr(test, assert_instr(paddsw))]
133#[stable(feature = "simd_x86", since = "1.27.0")]
134pub unsafe fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i {
135 transmute(src:simd_saturating_add(x:a.as_i16x8(), y:b.as_i16x8()))
136}
137
138/// Adds packed unsigned 8-bit integers in `a` and `b` using saturation.
139///
140/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epu8)
141#[inline]
142#[target_feature(enable = "sse2")]
143#[cfg_attr(test, assert_instr(paddusb))]
144#[stable(feature = "simd_x86", since = "1.27.0")]
145pub unsafe fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i {
146 transmute(src:simd_saturating_add(x:a.as_u8x16(), y:b.as_u8x16()))
147}
148
149/// Adds packed unsigned 16-bit integers in `a` and `b` using saturation.
150///
151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epu16)
152#[inline]
153#[target_feature(enable = "sse2")]
154#[cfg_attr(test, assert_instr(paddusw))]
155#[stable(feature = "simd_x86", since = "1.27.0")]
156pub unsafe fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i {
157 transmute(src:simd_saturating_add(x:a.as_u16x8(), y:b.as_u16x8()))
158}
159
160/// Averages packed unsigned 8-bit integers in `a` and `b`.
161///
162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_avg_epu8)
163#[inline]
164#[target_feature(enable = "sse2")]
165#[cfg_attr(test, assert_instr(pavgb))]
166#[stable(feature = "simd_x86", since = "1.27.0")]
167pub unsafe fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i {
168 let a: u16x16 = simd_cast::<_, u16x16>(a.as_u8x16());
169 let b: u16x16 = simd_cast::<_, u16x16>(b.as_u8x16());
170 let r: u16x16 = simd_shr(x:simd_add(simd_add(a, b), u16x16::splat(1)), y:u16x16::splat(1));
171 transmute(src:simd_cast::<_, u8x16>(r))
172}
173
174/// Averages packed unsigned 16-bit integers in `a` and `b`.
175///
176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_avg_epu16)
177#[inline]
178#[target_feature(enable = "sse2")]
179#[cfg_attr(test, assert_instr(pavgw))]
180#[stable(feature = "simd_x86", since = "1.27.0")]
181pub unsafe fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i {
182 let a: u32x8 = simd_cast::<_, u32x8>(a.as_u16x8());
183 let b: u32x8 = simd_cast::<_, u32x8>(b.as_u16x8());
184 let r: u32x8 = simd_shr(x:simd_add(simd_add(a, b), u32x8::splat(1)), y:u32x8::splat(1));
185 transmute(src:simd_cast::<_, u16x8>(r))
186}
187
188/// Multiplies and then horizontally add signed 16 bit integers in `a` and `b`.
189///
190/// Multiplies packed signed 16-bit integers in `a` and `b`, producing
191/// intermediate signed 32-bit integers. Horizontally add adjacent pairs of
192/// intermediate 32-bit integers.
193///
194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_madd_epi16)
195#[inline]
196#[target_feature(enable = "sse2")]
197#[cfg_attr(test, assert_instr(pmaddwd))]
198#[stable(feature = "simd_x86", since = "1.27.0")]
199pub unsafe fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i {
200 transmute(src:pmaddwd(a:a.as_i16x8(), b:b.as_i16x8()))
201}
202
203/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
204/// maximum values.
205///
206/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi16)
207#[inline]
208#[target_feature(enable = "sse2")]
209#[cfg_attr(test, assert_instr(pmaxsw))]
210#[stable(feature = "simd_x86", since = "1.27.0")]
211pub unsafe fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i {
212 let a: i16x8 = a.as_i16x8();
213 let b: i16x8 = b.as_i16x8();
214 transmute(src:simd_select::<i16x8, _>(m:simd_gt(x:a, y:b), a, b))
215}
216
217/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the
218/// packed maximum values.
219///
220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu8)
221#[inline]
222#[target_feature(enable = "sse2")]
223#[cfg_attr(test, assert_instr(pmaxub))]
224#[stable(feature = "simd_x86", since = "1.27.0")]
225pub unsafe fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i {
226 let a: u8x16 = a.as_u8x16();
227 let b: u8x16 = b.as_u8x16();
228 transmute(src:simd_select::<i8x16, _>(m:simd_gt(x:a, y:b), a, b))
229}
230
231/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
232/// minimum values.
233///
234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi16)
235#[inline]
236#[target_feature(enable = "sse2")]
237#[cfg_attr(test, assert_instr(pminsw))]
238#[stable(feature = "simd_x86", since = "1.27.0")]
239pub unsafe fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i {
240 let a: i16x8 = a.as_i16x8();
241 let b: i16x8 = b.as_i16x8();
242 transmute(src:simd_select::<i16x8, _>(m:simd_lt(x:a, y:b), a, b))
243}
244
245/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the
246/// packed minimum values.
247///
248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu8)
249#[inline]
250#[target_feature(enable = "sse2")]
251#[cfg_attr(test, assert_instr(pminub))]
252#[stable(feature = "simd_x86", since = "1.27.0")]
253pub unsafe fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i {
254 let a: u8x16 = a.as_u8x16();
255 let b: u8x16 = b.as_u8x16();
256 transmute(src:simd_select::<i8x16, _>(m:simd_lt(x:a, y:b), a, b))
257}
258
259/// Multiplies the packed 16-bit integers in `a` and `b`.
260///
261/// The multiplication produces intermediate 32-bit integers, and returns the
262/// high 16 bits of the intermediate integers.
263///
264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhi_epi16)
265#[inline]
266#[target_feature(enable = "sse2")]
267#[cfg_attr(test, assert_instr(pmulhw))]
268#[stable(feature = "simd_x86", since = "1.27.0")]
269pub unsafe fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i {
270 let a: i32x8 = simd_cast::<_, i32x8>(a.as_i16x8());
271 let b: i32x8 = simd_cast::<_, i32x8>(b.as_i16x8());
272 let r: i32x8 = simd_shr(x:simd_mul(a, b), y:i32x8::splat(16));
273 transmute(src:simd_cast::<i32x8, i16x8>(r))
274}
275
276/// Multiplies the packed unsigned 16-bit integers in `a` and `b`.
277///
278/// The multiplication produces intermediate 32-bit integers, and returns the
279/// high 16 bits of the intermediate integers.
280///
281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhi_epu16)
282#[inline]
283#[target_feature(enable = "sse2")]
284#[cfg_attr(test, assert_instr(pmulhuw))]
285#[stable(feature = "simd_x86", since = "1.27.0")]
286pub unsafe fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i {
287 let a: u32x8 = simd_cast::<_, u32x8>(a.as_u16x8());
288 let b: u32x8 = simd_cast::<_, u32x8>(b.as_u16x8());
289 let r: u32x8 = simd_shr(x:simd_mul(a, b), y:u32x8::splat(16));
290 transmute(src:simd_cast::<u32x8, u16x8>(r))
291}
292
293/// Multiplies the packed 16-bit integers in `a` and `b`.
294///
295/// The multiplication produces intermediate 32-bit integers, and returns the
296/// low 16 bits of the intermediate integers.
297///
298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mullo_epi16)
299#[inline]
300#[target_feature(enable = "sse2")]
301#[cfg_attr(test, assert_instr(pmullw))]
302#[stable(feature = "simd_x86", since = "1.27.0")]
303pub unsafe fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i {
304 transmute(src:simd_mul(x:a.as_i16x8(), y:b.as_i16x8()))
305}
306
307/// Multiplies the low unsigned 32-bit integers from each packed 64-bit element
308/// in `a` and `b`.
309///
310/// Returns the unsigned 64-bit results.
311///
312/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_epu32)
313#[inline]
314#[target_feature(enable = "sse2")]
315#[cfg_attr(test, assert_instr(pmuludq))]
316#[stable(feature = "simd_x86", since = "1.27.0")]
317pub unsafe fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i {
318 let a: u64x2 = a.as_u64x2();
319 let b: u64x2 = b.as_u64x2();
320 let mask: u64x2 = u64x2::splat(u32::MAX.into());
321 transmute(src:simd_mul(x:simd_and(a, mask), y:simd_and(x:b, y:mask)))
322}
323
324/// Sum the absolute differences of packed unsigned 8-bit integers.
325///
326/// Computes the absolute differences of packed unsigned 8-bit integers in `a`
327/// and `b`, then horizontally sum each consecutive 8 differences to produce
328/// two unsigned 16-bit integers, and pack these unsigned 16-bit integers in
329/// the low 16 bits of 64-bit elements returned.
330///
331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sad_epu8)
332#[inline]
333#[target_feature(enable = "sse2")]
334#[cfg_attr(test, assert_instr(psadbw))]
335#[stable(feature = "simd_x86", since = "1.27.0")]
336pub unsafe fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i {
337 transmute(src:psadbw(a:a.as_u8x16(), b:b.as_u8x16()))
338}
339
340/// Subtracts packed 8-bit integers in `b` from packed 8-bit integers in `a`.
341///
342/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi8)
343#[inline]
344#[target_feature(enable = "sse2")]
345#[cfg_attr(test, assert_instr(psubb))]
346#[stable(feature = "simd_x86", since = "1.27.0")]
347pub unsafe fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i {
348 transmute(src:simd_sub(x:a.as_i8x16(), y:b.as_i8x16()))
349}
350
351/// Subtracts packed 16-bit integers in `b` from packed 16-bit integers in `a`.
352///
353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi16)
354#[inline]
355#[target_feature(enable = "sse2")]
356#[cfg_attr(test, assert_instr(psubw))]
357#[stable(feature = "simd_x86", since = "1.27.0")]
358pub unsafe fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i {
359 transmute(src:simd_sub(x:a.as_i16x8(), y:b.as_i16x8()))
360}
361
362/// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`.
363///
364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi32)
365#[inline]
366#[target_feature(enable = "sse2")]
367#[cfg_attr(test, assert_instr(psubd))]
368#[stable(feature = "simd_x86", since = "1.27.0")]
369pub unsafe fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i {
370 transmute(src:simd_sub(x:a.as_i32x4(), y:b.as_i32x4()))
371}
372
373/// Subtract packed 64-bit integers in `b` from packed 64-bit integers in `a`.
374///
375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi64)
376#[inline]
377#[target_feature(enable = "sse2")]
378#[cfg_attr(test, assert_instr(psubq))]
379#[stable(feature = "simd_x86", since = "1.27.0")]
380pub unsafe fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i {
381 transmute(src:simd_sub(x:a.as_i64x2(), y:b.as_i64x2()))
382}
383
384/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`
385/// using saturation.
386///
387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epi8)
388#[inline]
389#[target_feature(enable = "sse2")]
390#[cfg_attr(test, assert_instr(psubsb))]
391#[stable(feature = "simd_x86", since = "1.27.0")]
392pub unsafe fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i {
393 transmute(src:simd_saturating_sub(x:a.as_i8x16(), y:b.as_i8x16()))
394}
395
396/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
397/// using saturation.
398///
399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epi16)
400#[inline]
401#[target_feature(enable = "sse2")]
402#[cfg_attr(test, assert_instr(psubsw))]
403#[stable(feature = "simd_x86", since = "1.27.0")]
404pub unsafe fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i {
405 transmute(src:simd_saturating_sub(x:a.as_i16x8(), y:b.as_i16x8()))
406}
407
408/// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit
409/// integers in `a` using saturation.
410///
411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epu8)
412#[inline]
413#[target_feature(enable = "sse2")]
414#[cfg_attr(test, assert_instr(psubusb))]
415#[stable(feature = "simd_x86", since = "1.27.0")]
416pub unsafe fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i {
417 transmute(src:simd_saturating_sub(x:a.as_u8x16(), y:b.as_u8x16()))
418}
419
420/// Subtract packed unsigned 16-bit integers in `b` from packed unsigned 16-bit
421/// integers in `a` using saturation.
422///
423/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epu16)
424#[inline]
425#[target_feature(enable = "sse2")]
426#[cfg_attr(test, assert_instr(psubusw))]
427#[stable(feature = "simd_x86", since = "1.27.0")]
428pub unsafe fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i {
429 transmute(src:simd_saturating_sub(x:a.as_u16x8(), y:b.as_u16x8()))
430}
431
432/// Shifts `a` left by `IMM8` bytes while shifting in zeros.
433///
434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_si128)
435#[inline]
436#[target_feature(enable = "sse2")]
437#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
438#[rustc_legacy_const_generics(1)]
439#[stable(feature = "simd_x86", since = "1.27.0")]
440pub unsafe fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
441 static_assert_uimm_bits!(IMM8, 8);
442 _mm_slli_si128_impl::<IMM8>(a)
443}
444
445/// Implementation detail: converts the immediate argument of the
446/// `_mm_slli_si128` intrinsic into a compile-time constant.
447#[inline]
448#[target_feature(enable = "sse2")]
449unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
450 const fn mask(shift: i32, i: u32) -> u32 {
451 let shift = shift as u32 & 0xff;
452 if shift > 15 {
453 i
454 } else {
455 16 - shift + i
456 }
457 }
458 let zero = _mm_set1_epi8(0).as_i8x16();
459 transmute::<i8x16, _>(simd_shuffle!(
460 zero,
461 a.as_i8x16(),
462 [
463 mask(IMM8, 0),
464 mask(IMM8, 1),
465 mask(IMM8, 2),
466 mask(IMM8, 3),
467 mask(IMM8, 4),
468 mask(IMM8, 5),
469 mask(IMM8, 6),
470 mask(IMM8, 7),
471 mask(IMM8, 8),
472 mask(IMM8, 9),
473 mask(IMM8, 10),
474 mask(IMM8, 11),
475 mask(IMM8, 12),
476 mask(IMM8, 13),
477 mask(IMM8, 14),
478 mask(IMM8, 15),
479 ],
480 ))
481}
482
483/// Shifts `a` left by `IMM8` bytes while shifting in zeros.
484///
485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bslli_si128)
486#[inline]
487#[target_feature(enable = "sse2")]
488#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
489#[rustc_legacy_const_generics(1)]
490#[stable(feature = "simd_x86", since = "1.27.0")]
491pub unsafe fn _mm_bslli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
492 static_assert_uimm_bits!(IMM8, 8);
493 _mm_slli_si128_impl::<IMM8>(a)
494}
495
496/// Shifts `a` right by `IMM8` bytes while shifting in zeros.
497///
498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bsrli_si128)
499#[inline]
500#[target_feature(enable = "sse2")]
501#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
502#[rustc_legacy_const_generics(1)]
503#[stable(feature = "simd_x86", since = "1.27.0")]
504pub unsafe fn _mm_bsrli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
505 static_assert_uimm_bits!(IMM8, 8);
506 _mm_srli_si128_impl::<IMM8>(a)
507}
508
509/// Shifts packed 16-bit integers in `a` left by `IMM8` while shifting in zeros.
510///
511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi16)
512#[inline]
513#[target_feature(enable = "sse2")]
514#[cfg_attr(test, assert_instr(psllw, IMM8 = 7))]
515#[rustc_legacy_const_generics(1)]
516#[stable(feature = "simd_x86", since = "1.27.0")]
517pub unsafe fn _mm_slli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
518 static_assert_uimm_bits!(IMM8, 8);
519 if IMM8 >= 16 {
520 _mm_setzero_si128()
521 } else {
522 transmute(src:simd_shl(x:a.as_u16x8(), y:u16x8::splat(IMM8 as u16)))
523 }
524}
525
526/// Shifts packed 16-bit integers in `a` left by `count` while shifting in
527/// zeros.
528///
529/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi16)
530#[inline]
531#[target_feature(enable = "sse2")]
532#[cfg_attr(test, assert_instr(psllw))]
533#[stable(feature = "simd_x86", since = "1.27.0")]
534pub unsafe fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i {
535 transmute(src:psllw(a:a.as_i16x8(), count:count.as_i16x8()))
536}
537
538/// Shifts packed 32-bit integers in `a` left by `IMM8` while shifting in zeros.
539///
540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi32)
541#[inline]
542#[target_feature(enable = "sse2")]
543#[cfg_attr(test, assert_instr(pslld, IMM8 = 7))]
544#[rustc_legacy_const_generics(1)]
545#[stable(feature = "simd_x86", since = "1.27.0")]
546pub unsafe fn _mm_slli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
547 static_assert_uimm_bits!(IMM8, 8);
548 if IMM8 >= 32 {
549 _mm_setzero_si128()
550 } else {
551 transmute(src:simd_shl(x:a.as_u32x4(), y:u32x4::splat(IMM8 as u32)))
552 }
553}
554
555/// Shifts packed 32-bit integers in `a` left by `count` while shifting in
556/// zeros.
557///
558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi32)
559#[inline]
560#[target_feature(enable = "sse2")]
561#[cfg_attr(test, assert_instr(pslld))]
562#[stable(feature = "simd_x86", since = "1.27.0")]
563pub unsafe fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i {
564 transmute(src:pslld(a:a.as_i32x4(), count:count.as_i32x4()))
565}
566
567/// Shifts packed 64-bit integers in `a` left by `IMM8` while shifting in zeros.
568///
569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi64)
570#[inline]
571#[target_feature(enable = "sse2")]
572#[cfg_attr(test, assert_instr(psllq, IMM8 = 7))]
573#[rustc_legacy_const_generics(1)]
574#[stable(feature = "simd_x86", since = "1.27.0")]
575pub unsafe fn _mm_slli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
576 static_assert_uimm_bits!(IMM8, 8);
577 if IMM8 >= 64 {
578 _mm_setzero_si128()
579 } else {
580 transmute(src:simd_shl(x:a.as_u64x2(), y:u64x2::splat(IMM8 as u64)))
581 }
582}
583
584/// Shifts packed 64-bit integers in `a` left by `count` while shifting in
585/// zeros.
586///
587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi64)
588#[inline]
589#[target_feature(enable = "sse2")]
590#[cfg_attr(test, assert_instr(psllq))]
591#[stable(feature = "simd_x86", since = "1.27.0")]
592pub unsafe fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i {
593 transmute(src:psllq(a:a.as_i64x2(), count:count.as_i64x2()))
594}
595
596/// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in sign
597/// bits.
598///
599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi16)
600#[inline]
601#[target_feature(enable = "sse2")]
602#[cfg_attr(test, assert_instr(psraw, IMM8 = 1))]
603#[rustc_legacy_const_generics(1)]
604#[stable(feature = "simd_x86", since = "1.27.0")]
605pub unsafe fn _mm_srai_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
606 static_assert_uimm_bits!(IMM8, 8);
607 transmute(src:simd_shr(x:a.as_i16x8(), y:i16x8::splat(IMM8.min(15) as i16)))
608}
609
610/// Shifts packed 16-bit integers in `a` right by `count` while shifting in sign
611/// bits.
612///
613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi16)
614#[inline]
615#[target_feature(enable = "sse2")]
616#[cfg_attr(test, assert_instr(psraw))]
617#[stable(feature = "simd_x86", since = "1.27.0")]
618pub unsafe fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i {
619 transmute(src:psraw(a:a.as_i16x8(), count:count.as_i16x8()))
620}
621
622/// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in sign
623/// bits.
624///
625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi32)
626#[inline]
627#[target_feature(enable = "sse2")]
628#[cfg_attr(test, assert_instr(psrad, IMM8 = 1))]
629#[rustc_legacy_const_generics(1)]
630#[stable(feature = "simd_x86", since = "1.27.0")]
631pub unsafe fn _mm_srai_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
632 static_assert_uimm_bits!(IMM8, 8);
633 transmute(src:simd_shr(x:a.as_i32x4(), y:i32x4::splat(IMM8.min(31))))
634}
635
636/// Shifts packed 32-bit integers in `a` right by `count` while shifting in sign
637/// bits.
638///
639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi32)
640#[inline]
641#[target_feature(enable = "sse2")]
642#[cfg_attr(test, assert_instr(psrad))]
643#[stable(feature = "simd_x86", since = "1.27.0")]
644pub unsafe fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i {
645 transmute(src:psrad(a:a.as_i32x4(), count:count.as_i32x4()))
646}
647
648/// Shifts `a` right by `IMM8` bytes while shifting in zeros.
649///
650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_si128)
651#[inline]
652#[target_feature(enable = "sse2")]
653#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
654#[rustc_legacy_const_generics(1)]
655#[stable(feature = "simd_x86", since = "1.27.0")]
656pub unsafe fn _mm_srli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
657 static_assert_uimm_bits!(IMM8, 8);
658 _mm_srli_si128_impl::<IMM8>(a)
659}
660
661/// Implementation detail: converts the immediate argument of the
662/// `_mm_srli_si128` intrinsic into a compile-time constant.
663#[inline]
664#[target_feature(enable = "sse2")]
665unsafe fn _mm_srli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
666 const fn mask(shift: i32, i: u32) -> u32 {
667 if (shift as u32) > 15 {
668 i + 16
669 } else {
670 i + (shift as u32)
671 }
672 }
673 let zero = _mm_set1_epi8(0).as_i8x16();
674 let x: i8x16 = simd_shuffle!(
675 a.as_i8x16(),
676 zero,
677 [
678 mask(IMM8, 0),
679 mask(IMM8, 1),
680 mask(IMM8, 2),
681 mask(IMM8, 3),
682 mask(IMM8, 4),
683 mask(IMM8, 5),
684 mask(IMM8, 6),
685 mask(IMM8, 7),
686 mask(IMM8, 8),
687 mask(IMM8, 9),
688 mask(IMM8, 10),
689 mask(IMM8, 11),
690 mask(IMM8, 12),
691 mask(IMM8, 13),
692 mask(IMM8, 14),
693 mask(IMM8, 15),
694 ],
695 );
696 transmute(x)
697}
698
699/// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in
700/// zeros.
701///
702/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi16)
703#[inline]
704#[target_feature(enable = "sse2")]
705#[cfg_attr(test, assert_instr(psrlw, IMM8 = 1))]
706#[rustc_legacy_const_generics(1)]
707#[stable(feature = "simd_x86", since = "1.27.0")]
708pub unsafe fn _mm_srli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
709 static_assert_uimm_bits!(IMM8, 8);
710 if IMM8 >= 16 {
711 _mm_setzero_si128()
712 } else {
713 transmute(src:simd_shr(x:a.as_u16x8(), y:u16x8::splat(IMM8 as u16)))
714 }
715}
716
717/// Shifts packed 16-bit integers in `a` right by `count` while shifting in
718/// zeros.
719///
720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi16)
721#[inline]
722#[target_feature(enable = "sse2")]
723#[cfg_attr(test, assert_instr(psrlw))]
724#[stable(feature = "simd_x86", since = "1.27.0")]
725pub unsafe fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i {
726 transmute(src:psrlw(a:a.as_i16x8(), count:count.as_i16x8()))
727}
728
729/// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in
730/// zeros.
731///
732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi32)
733#[inline]
734#[target_feature(enable = "sse2")]
735#[cfg_attr(test, assert_instr(psrld, IMM8 = 8))]
736#[rustc_legacy_const_generics(1)]
737#[stable(feature = "simd_x86", since = "1.27.0")]
738pub unsafe fn _mm_srli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
739 static_assert_uimm_bits!(IMM8, 8);
740 if IMM8 >= 32 {
741 _mm_setzero_si128()
742 } else {
743 transmute(src:simd_shr(x:a.as_u32x4(), y:u32x4::splat(IMM8 as u32)))
744 }
745}
746
747/// Shifts packed 32-bit integers in `a` right by `count` while shifting in
748/// zeros.
749///
750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi32)
751#[inline]
752#[target_feature(enable = "sse2")]
753#[cfg_attr(test, assert_instr(psrld))]
754#[stable(feature = "simd_x86", since = "1.27.0")]
755pub unsafe fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i {
756 transmute(src:psrld(a:a.as_i32x4(), count:count.as_i32x4()))
757}
758
759/// Shifts packed 64-bit integers in `a` right by `IMM8` while shifting in
760/// zeros.
761///
762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi64)
763#[inline]
764#[target_feature(enable = "sse2")]
765#[cfg_attr(test, assert_instr(psrlq, IMM8 = 1))]
766#[rustc_legacy_const_generics(1)]
767#[stable(feature = "simd_x86", since = "1.27.0")]
768pub unsafe fn _mm_srli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
769 static_assert_uimm_bits!(IMM8, 8);
770 if IMM8 >= 64 {
771 _mm_setzero_si128()
772 } else {
773 transmute(src:simd_shr(x:a.as_u64x2(), y:u64x2::splat(IMM8 as u64)))
774 }
775}
776
777/// Shifts packed 64-bit integers in `a` right by `count` while shifting in
778/// zeros.
779///
780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi64)
781#[inline]
782#[target_feature(enable = "sse2")]
783#[cfg_attr(test, assert_instr(psrlq))]
784#[stable(feature = "simd_x86", since = "1.27.0")]
785pub unsafe fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i {
786 transmute(src:psrlq(a:a.as_i64x2(), count:count.as_i64x2()))
787}
788
789/// Computes the bitwise AND of 128 bits (representing integer data) in `a` and
790/// `b`.
791///
792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_and_si128)
793#[inline]
794#[target_feature(enable = "sse2")]
795#[cfg_attr(test, assert_instr(andps))]
796#[stable(feature = "simd_x86", since = "1.27.0")]
797pub unsafe fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i {
798 simd_and(x:a, y:b)
799}
800
801/// Computes the bitwise NOT of 128 bits (representing integer data) in `a` and
802/// then AND with `b`.
803///
804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_andnot_si128)
805#[inline]
806#[target_feature(enable = "sse2")]
807#[cfg_attr(test, assert_instr(andnps))]
808#[stable(feature = "simd_x86", since = "1.27.0")]
809pub unsafe fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i {
810 simd_and(x:simd_xor(_mm_set1_epi8(-1), a), y:b)
811}
812
813/// Computes the bitwise OR of 128 bits (representing integer data) in `a` and
814/// `b`.
815///
816/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_si128)
817#[inline]
818#[target_feature(enable = "sse2")]
819#[cfg_attr(test, assert_instr(orps))]
820#[stable(feature = "simd_x86", since = "1.27.0")]
821pub unsafe fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i {
822 simd_or(x:a, y:b)
823}
824
825/// Computes the bitwise XOR of 128 bits (representing integer data) in `a` and
826/// `b`.
827///
828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_si128)
829#[inline]
830#[target_feature(enable = "sse2")]
831#[cfg_attr(test, assert_instr(xorps))]
832#[stable(feature = "simd_x86", since = "1.27.0")]
833pub unsafe fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i {
834 simd_xor(x:a, y:b)
835}
836
837/// Compares packed 8-bit integers in `a` and `b` for equality.
838///
839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi8)
840#[inline]
841#[target_feature(enable = "sse2")]
842#[cfg_attr(test, assert_instr(pcmpeqb))]
843#[stable(feature = "simd_x86", since = "1.27.0")]
844pub unsafe fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i {
845 transmute::<i8x16, _>(src:simd_eq(x:a.as_i8x16(), y:b.as_i8x16()))
846}
847
848/// Compares packed 16-bit integers in `a` and `b` for equality.
849///
850/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi16)
851#[inline]
852#[target_feature(enable = "sse2")]
853#[cfg_attr(test, assert_instr(pcmpeqw))]
854#[stable(feature = "simd_x86", since = "1.27.0")]
855pub unsafe fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i {
856 transmute::<i16x8, _>(src:simd_eq(x:a.as_i16x8(), y:b.as_i16x8()))
857}
858
859/// Compares packed 32-bit integers in `a` and `b` for equality.
860///
861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi32)
862#[inline]
863#[target_feature(enable = "sse2")]
864#[cfg_attr(test, assert_instr(pcmpeqd))]
865#[stable(feature = "simd_x86", since = "1.27.0")]
866pub unsafe fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i {
867 transmute::<i32x4, _>(src:simd_eq(x:a.as_i32x4(), y:b.as_i32x4()))
868}
869
870/// Compares packed 8-bit integers in `a` and `b` for greater-than.
871///
872/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi8)
873#[inline]
874#[target_feature(enable = "sse2")]
875#[cfg_attr(test, assert_instr(pcmpgtb))]
876#[stable(feature = "simd_x86", since = "1.27.0")]
877pub unsafe fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i {
878 transmute::<i8x16, _>(src:simd_gt(x:a.as_i8x16(), y:b.as_i8x16()))
879}
880
881/// Compares packed 16-bit integers in `a` and `b` for greater-than.
882///
883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi16)
884#[inline]
885#[target_feature(enable = "sse2")]
886#[cfg_attr(test, assert_instr(pcmpgtw))]
887#[stable(feature = "simd_x86", since = "1.27.0")]
888pub unsafe fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i {
889 transmute::<i16x8, _>(src:simd_gt(x:a.as_i16x8(), y:b.as_i16x8()))
890}
891
892/// Compares packed 32-bit integers in `a` and `b` for greater-than.
893///
894/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi32)
895#[inline]
896#[target_feature(enable = "sse2")]
897#[cfg_attr(test, assert_instr(pcmpgtd))]
898#[stable(feature = "simd_x86", since = "1.27.0")]
899pub unsafe fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i {
900 transmute::<i32x4, _>(src:simd_gt(x:a.as_i32x4(), y:b.as_i32x4()))
901}
902
903/// Compares packed 8-bit integers in `a` and `b` for less-than.
904///
905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi8)
906#[inline]
907#[target_feature(enable = "sse2")]
908#[cfg_attr(test, assert_instr(pcmpgtb))]
909#[stable(feature = "simd_x86", since = "1.27.0")]
910pub unsafe fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i {
911 transmute::<i8x16, _>(src:simd_lt(x:a.as_i8x16(), y:b.as_i8x16()))
912}
913
914/// Compares packed 16-bit integers in `a` and `b` for less-than.
915///
916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi16)
917#[inline]
918#[target_feature(enable = "sse2")]
919#[cfg_attr(test, assert_instr(pcmpgtw))]
920#[stable(feature = "simd_x86", since = "1.27.0")]
921pub unsafe fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i {
922 transmute::<i16x8, _>(src:simd_lt(x:a.as_i16x8(), y:b.as_i16x8()))
923}
924
925/// Compares packed 32-bit integers in `a` and `b` for less-than.
926///
927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi32)
928#[inline]
929#[target_feature(enable = "sse2")]
930#[cfg_attr(test, assert_instr(pcmpgtd))]
931#[stable(feature = "simd_x86", since = "1.27.0")]
932pub unsafe fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i {
933 transmute::<i32x4, _>(src:simd_lt(x:a.as_i32x4(), y:b.as_i32x4()))
934}
935
936/// Converts the lower two packed 32-bit integers in `a` to packed
937/// double-precision (64-bit) floating-point elements.
938///
939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_pd)
940#[inline]
941#[target_feature(enable = "sse2")]
942#[cfg_attr(test, assert_instr(cvtdq2pd))]
943#[stable(feature = "simd_x86", since = "1.27.0")]
944pub unsafe fn _mm_cvtepi32_pd(a: __m128i) -> __m128d {
945 let a: i32x4 = a.as_i32x4();
946 simd_cast::<i32x2, __m128d>(simd_shuffle!(a, a, [0, 1]))
947}
948
949/// Returns `a` with its lower element replaced by `b` after converting it to
950/// an `f64`.
951///
952/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi32_sd)
953#[inline]
954#[target_feature(enable = "sse2")]
955#[cfg_attr(test, assert_instr(cvtsi2sd))]
956#[stable(feature = "simd_x86", since = "1.27.0")]
957pub unsafe fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d {
958 simd_insert(x:a, idx:0, val:b as f64)
959}
960
961/// Converts packed 32-bit integers in `a` to packed single-precision (32-bit)
962/// floating-point elements.
963///
964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_ps)
965#[inline]
966#[target_feature(enable = "sse2")]
967#[cfg_attr(test, assert_instr(cvtdq2ps))]
968#[stable(feature = "simd_x86", since = "1.27.0")]
969pub unsafe fn _mm_cvtepi32_ps(a: __m128i) -> __m128 {
970 transmute(src:simd_cast::<_, f32x4>(a.as_i32x4()))
971}
972
973/// Converts packed single-precision (32-bit) floating-point elements in `a`
974/// to packed 32-bit integers.
975///
976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epi32)
977#[inline]
978#[target_feature(enable = "sse2")]
979#[cfg_attr(test, assert_instr(cvtps2dq))]
980#[stable(feature = "simd_x86", since = "1.27.0")]
981pub unsafe fn _mm_cvtps_epi32(a: __m128) -> __m128i {
982 transmute(src:cvtps2dq(a))
983}
984
985/// Returns a vector whose lowest element is `a` and all higher elements are
986/// `0`.
987///
988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi32_si128)
989#[inline]
990#[target_feature(enable = "sse2")]
991#[stable(feature = "simd_x86", since = "1.27.0")]
992pub unsafe fn _mm_cvtsi32_si128(a: i32) -> __m128i {
993 transmute(src:i32x4::new(x0:a, x1:0, x2:0, x3:0))
994}
995
996/// Returns the lowest element of `a`.
997///
998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi128_si32)
999#[inline]
1000#[target_feature(enable = "sse2")]
1001#[stable(feature = "simd_x86", since = "1.27.0")]
1002pub unsafe fn _mm_cvtsi128_si32(a: __m128i) -> i32 {
1003 simd_extract(x:a.as_i32x4(), idx:0)
1004}
1005
1006/// Sets packed 64-bit integers with the supplied values, from highest to
1007/// lowest.
1008///
1009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi64x)
1010#[inline]
1011#[target_feature(enable = "sse2")]
1012// no particular instruction to test
1013#[stable(feature = "simd_x86", since = "1.27.0")]
1014pub unsafe fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i {
1015 transmute(src:i64x2::new(x0:e0, x1:e1))
1016}
1017
1018/// Sets packed 32-bit integers with the supplied values.
1019///
1020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi32)
1021#[inline]
1022#[target_feature(enable = "sse2")]
1023// no particular instruction to test
1024#[stable(feature = "simd_x86", since = "1.27.0")]
1025pub unsafe fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1026 transmute(src:i32x4::new(x0:e0, x1:e1, x2:e2, x3:e3))
1027}
1028
1029/// Sets packed 16-bit integers with the supplied values.
1030///
1031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi16)
1032#[inline]
1033#[target_feature(enable = "sse2")]
1034// no particular instruction to test
1035#[stable(feature = "simd_x86", since = "1.27.0")]
1036pub unsafe fn _mm_set_epi16(
1037 e7: i16,
1038 e6: i16,
1039 e5: i16,
1040 e4: i16,
1041 e3: i16,
1042 e2: i16,
1043 e1: i16,
1044 e0: i16,
1045) -> __m128i {
1046 transmute(src:i16x8::new(x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7))
1047}
1048
1049/// Sets packed 8-bit integers with the supplied values.
1050///
1051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi8)
1052#[inline]
1053#[target_feature(enable = "sse2")]
1054// no particular instruction to test
1055#[stable(feature = "simd_x86", since = "1.27.0")]
1056pub unsafe fn _mm_set_epi8(
1057 e15: i8,
1058 e14: i8,
1059 e13: i8,
1060 e12: i8,
1061 e11: i8,
1062 e10: i8,
1063 e9: i8,
1064 e8: i8,
1065 e7: i8,
1066 e6: i8,
1067 e5: i8,
1068 e4: i8,
1069 e3: i8,
1070 e2: i8,
1071 e1: i8,
1072 e0: i8,
1073) -> __m128i {
1074 #[rustfmt::skip]
1075 transmute(src:i8x16::new(
1076 x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7, x8:e8, x9:e9, x10:e10, x11:e11, x12:e12, x13:e13, x14:e14, x15:e15,
1077 ))
1078}
1079
1080/// Broadcasts 64-bit integer `a` to all elements.
1081///
1082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi64x)
1083#[inline]
1084#[target_feature(enable = "sse2")]
1085// no particular instruction to test
1086#[stable(feature = "simd_x86", since = "1.27.0")]
1087pub unsafe fn _mm_set1_epi64x(a: i64) -> __m128i {
1088 _mm_set_epi64x(e1:a, e0:a)
1089}
1090
1091/// Broadcasts 32-bit integer `a` to all elements.
1092///
1093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi32)
1094#[inline]
1095#[target_feature(enable = "sse2")]
1096// no particular instruction to test
1097#[stable(feature = "simd_x86", since = "1.27.0")]
1098pub unsafe fn _mm_set1_epi32(a: i32) -> __m128i {
1099 _mm_set_epi32(e3:a, e2:a, e1:a, e0:a)
1100}
1101
1102/// Broadcasts 16-bit integer `a` to all elements.
1103///
1104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi16)
1105#[inline]
1106#[target_feature(enable = "sse2")]
1107// no particular instruction to test
1108#[stable(feature = "simd_x86", since = "1.27.0")]
1109pub unsafe fn _mm_set1_epi16(a: i16) -> __m128i {
1110 _mm_set_epi16(e7:a, e6:a, e5:a, e4:a, e3:a, e2:a, e1:a, e0:a)
1111}
1112
1113/// Broadcasts 8-bit integer `a` to all elements.
1114///
1115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi8)
1116#[inline]
1117#[target_feature(enable = "sse2")]
1118// no particular instruction to test
1119#[stable(feature = "simd_x86", since = "1.27.0")]
1120pub unsafe fn _mm_set1_epi8(a: i8) -> __m128i {
1121 _mm_set_epi8(e15:a, e14:a, e13:a, e12:a, e11:a, e10:a, e9:a, e8:a, e7:a, e6:a, e5:a, e4:a, e3:a, e2:a, e1:a, e0:a)
1122}
1123
1124/// Sets packed 32-bit integers with the supplied values in reverse order.
1125///
1126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi32)
1127#[inline]
1128#[target_feature(enable = "sse2")]
1129// no particular instruction to test
1130#[stable(feature = "simd_x86", since = "1.27.0")]
1131pub unsafe fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1132 _mm_set_epi32(e3:e0, e2:e1, e1:e2, e0:e3)
1133}
1134
1135/// Sets packed 16-bit integers with the supplied values in reverse order.
1136///
1137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi16)
1138#[inline]
1139#[target_feature(enable = "sse2")]
1140// no particular instruction to test
1141#[stable(feature = "simd_x86", since = "1.27.0")]
1142pub unsafe fn _mm_setr_epi16(
1143 e7: i16,
1144 e6: i16,
1145 e5: i16,
1146 e4: i16,
1147 e3: i16,
1148 e2: i16,
1149 e1: i16,
1150 e0: i16,
1151) -> __m128i {
1152 _mm_set_epi16(e7:e0, e6:e1, e5:e2, e4:e3, e3:e4, e2:e5, e1:e6, e0:e7)
1153}
1154
1155/// Sets packed 8-bit integers with the supplied values in reverse order.
1156///
1157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi8)
1158#[inline]
1159#[target_feature(enable = "sse2")]
1160// no particular instruction to test
1161#[stable(feature = "simd_x86", since = "1.27.0")]
1162pub unsafe fn _mm_setr_epi8(
1163 e15: i8,
1164 e14: i8,
1165 e13: i8,
1166 e12: i8,
1167 e11: i8,
1168 e10: i8,
1169 e9: i8,
1170 e8: i8,
1171 e7: i8,
1172 e6: i8,
1173 e5: i8,
1174 e4: i8,
1175 e3: i8,
1176 e2: i8,
1177 e1: i8,
1178 e0: i8,
1179) -> __m128i {
1180 #[rustfmt::skip]
1181 _mm_set_epi8(
1182 e15:e0, e14:e1, e13:e2, e12:e3, e11:e4, e10:e5, e9:e6, e8:e7, e7:e8, e6:e9, e5:e10, e4:e11, e3:e12, e2:e13, e1:e14, e0:e15,
1183 )
1184}
1185
1186/// Returns a vector with all elements set to zero.
1187///
1188/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_si128)
1189#[inline]
1190#[target_feature(enable = "sse2")]
1191#[cfg_attr(test, assert_instr(xorps))]
1192#[stable(feature = "simd_x86", since = "1.27.0")]
1193pub unsafe fn _mm_setzero_si128() -> __m128i {
1194 _mm_set1_epi64x(0)
1195}
1196
1197/// Loads 64-bit integer from memory into first element of returned vector.
1198///
1199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadl_epi64)
1200#[inline]
1201#[target_feature(enable = "sse2")]
1202// FIXME movsd on windows
1203#[cfg_attr(
1204 all(
1205 test,
1206 not(windows),
1207 not(all(target_os = "linux", target_arch = "x86_64")),
1208 target_arch = "x86_64"
1209 ),
1210 assert_instr(movq)
1211)]
1212#[stable(feature = "simd_x86", since = "1.27.0")]
1213pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i {
1214 _mm_set_epi64x(e1:0, e0:ptr::read_unaligned(src:mem_addr as *const i64))
1215}
1216
1217/// Loads 128-bits of integer data from memory into a new vector.
1218///
1219/// `mem_addr` must be aligned on a 16-byte boundary.
1220///
1221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_si128)
1222#[inline]
1223#[target_feature(enable = "sse2")]
1224#[cfg_attr(test, assert_instr(movaps))]
1225#[stable(feature = "simd_x86", since = "1.27.0")]
1226pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i {
1227 *mem_addr
1228}
1229
1230/// Loads 128-bits of integer data from memory into a new vector.
1231///
1232/// `mem_addr` does not need to be aligned on any particular boundary.
1233///
1234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si128)
1235#[inline]
1236#[target_feature(enable = "sse2")]
1237#[cfg_attr(test, assert_instr(movups))]
1238#[stable(feature = "simd_x86", since = "1.27.0")]
1239pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
1240 let mut dst: __m128i = _mm_undefined_si128();
1241 ptr::copy_nonoverlapping(
1242 src:mem_addr as *const u8,
1243 &mut dst as *mut __m128i as *mut u8,
1244 count:mem::size_of::<__m128i>(),
1245 );
1246 dst
1247}
1248
1249/// Conditionally store 8-bit integer elements from `a` into memory using
1250/// `mask`.
1251///
1252/// Elements are not stored when the highest bit is not set in the
1253/// corresponding element.
1254///
1255/// `mem_addr` should correspond to a 128-bit memory location and does not need
1256/// to be aligned on any particular boundary.
1257///
1258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskmoveu_si128)
1259#[inline]
1260#[target_feature(enable = "sse2")]
1261#[cfg_attr(test, assert_instr(maskmovdqu))]
1262#[stable(feature = "simd_x86", since = "1.27.0")]
1263pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8) {
1264 maskmovdqu(a:a.as_i8x16(), mask:mask.as_i8x16(), mem_addr)
1265}
1266
1267/// Stores 128-bits of integer data from `a` into memory.
1268///
1269/// `mem_addr` must be aligned on a 16-byte boundary.
1270///
1271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_si128)
1272#[inline]
1273#[target_feature(enable = "sse2")]
1274#[cfg_attr(test, assert_instr(movaps))]
1275#[stable(feature = "simd_x86", since = "1.27.0")]
1276pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) {
1277 *mem_addr = a;
1278}
1279
1280/// Stores 128-bits of integer data from `a` into memory.
1281///
1282/// `mem_addr` does not need to be aligned on any particular boundary.
1283///
1284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si128)
1285#[inline]
1286#[target_feature(enable = "sse2")]
1287#[cfg_attr(test, assert_instr(movups))] // FIXME movdqu expected
1288#[stable(feature = "simd_x86", since = "1.27.0")]
1289pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) {
1290 mem_addr.write_unaligned(val:a);
1291}
1292
1293/// Stores the lower 64-bit integer `a` to a memory location.
1294///
1295/// `mem_addr` does not need to be aligned on any particular boundary.
1296///
1297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storel_epi64)
1298#[inline]
1299#[target_feature(enable = "sse2")]
1300// FIXME mov on windows, movlps on i686
1301#[cfg_attr(
1302 all(
1303 test,
1304 not(windows),
1305 not(all(target_os = "linux", target_arch = "x86_64")),
1306 target_arch = "x86_64"
1307 ),
1308 assert_instr(movq)
1309)]
1310#[stable(feature = "simd_x86", since = "1.27.0")]
1311pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
1312 ptr::copy_nonoverlapping(&a as *const _ as *const u8, dst:mem_addr as *mut u8, count:8);
1313}
1314
1315/// Stores a 128-bit integer vector to a 128-bit aligned memory location.
1316/// To minimize caching, the data is flagged as non-temporal (unlikely to be
1317/// used again soon).
1318///
1319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si128)
1320#[inline]
1321#[target_feature(enable = "sse2")]
1322#[cfg_attr(test, assert_instr(movntps))] // FIXME movntdq
1323#[stable(feature = "simd_x86", since = "1.27.0")]
1324pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
1325 intrinsics::nontemporal_store(ptr:mem_addr, val:a);
1326}
1327
1328/// Stores a 32-bit integer value in the specified memory location.
1329/// To minimize caching, the data is flagged as non-temporal (unlikely to be
1330/// used again soon).
1331///
1332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si32)
1333#[inline]
1334#[target_feature(enable = "sse2")]
1335#[cfg_attr(test, assert_instr(movnti))]
1336#[stable(feature = "simd_x86", since = "1.27.0")]
1337pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) {
1338 intrinsics::nontemporal_store(ptr:mem_addr, val:a);
1339}
1340
1341/// Returns a vector where the low element is extracted from `a` and its upper
1342/// element is zero.
1343///
1344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_move_epi64)
1345#[inline]
1346#[target_feature(enable = "sse2")]
1347// FIXME movd on windows, movd on i686
1348#[cfg_attr(all(test, not(windows), target_arch = "x86_64"), assert_instr(movq))]
1349#[stable(feature = "simd_x86", since = "1.27.0")]
1350pub unsafe fn _mm_move_epi64(a: __m128i) -> __m128i {
1351 let zero: __m128i = _mm_setzero_si128();
1352 let r: i64x2 = simd_shuffle!(a.as_i64x2(), zero.as_i64x2(), [0, 2]);
1353 transmute(src:r)
1354}
1355
1356/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
1357/// using signed saturation.
1358///
1359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi16)
1360#[inline]
1361#[target_feature(enable = "sse2")]
1362#[cfg_attr(test, assert_instr(packsswb))]
1363#[stable(feature = "simd_x86", since = "1.27.0")]
1364pub unsafe fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
1365 transmute(src:packsswb(a:a.as_i16x8(), b:b.as_i16x8()))
1366}
1367
1368/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
1369/// using signed saturation.
1370///
1371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi32)
1372#[inline]
1373#[target_feature(enable = "sse2")]
1374#[cfg_attr(test, assert_instr(packssdw))]
1375#[stable(feature = "simd_x86", since = "1.27.0")]
1376pub unsafe fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
1377 transmute(src:packssdw(a:a.as_i32x4(), b:b.as_i32x4()))
1378}
1379
1380/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
1381/// using unsigned saturation.
1382///
1383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi16)
1384#[inline]
1385#[target_feature(enable = "sse2")]
1386#[cfg_attr(test, assert_instr(packuswb))]
1387#[stable(feature = "simd_x86", since = "1.27.0")]
1388pub unsafe fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
1389 transmute(src:packuswb(a:a.as_i16x8(), b:b.as_i16x8()))
1390}
1391
1392/// Returns the `imm8` element of `a`.
1393///
1394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_epi16)
1395#[inline]
1396#[target_feature(enable = "sse2")]
1397#[cfg_attr(test, assert_instr(pextrw, IMM8 = 7))]
1398#[rustc_legacy_const_generics(1)]
1399#[stable(feature = "simd_x86", since = "1.27.0")]
1400pub unsafe fn _mm_extract_epi16<const IMM8: i32>(a: __m128i) -> i32 {
1401 static_assert_uimm_bits!(IMM8, 3);
1402 simd_extract::<_, u16>(x:a.as_u16x8(), IMM8 as u32) as i32
1403}
1404
1405/// Returns a new vector where the `imm8` element of `a` is replaced with `i`.
1406///
1407/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_epi16)
1408#[inline]
1409#[target_feature(enable = "sse2")]
1410#[cfg_attr(test, assert_instr(pinsrw, IMM8 = 7))]
1411#[rustc_legacy_const_generics(2)]
1412#[stable(feature = "simd_x86", since = "1.27.0")]
1413pub unsafe fn _mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
1414 static_assert_uimm_bits!(IMM8, 3);
1415 transmute(src:simd_insert(x:a.as_i16x8(), IMM8 as u32, val:i as i16))
1416}
1417
1418/// Returns a mask of the most significant bit of each element in `a`.
1419///
1420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_epi8)
1421#[inline]
1422#[target_feature(enable = "sse2")]
1423#[cfg_attr(test, assert_instr(pmovmskb))]
1424#[stable(feature = "simd_x86", since = "1.27.0")]
1425pub unsafe fn _mm_movemask_epi8(a: __m128i) -> i32 {
1426 let z: i8x16 = i8x16::splat(0);
1427 let m: i8x16 = simd_lt(x:a.as_i8x16(), y:z);
1428 simd_bitmask::<_, u16>(m) as u32 as i32
1429}
1430
1431/// Shuffles 32-bit integers in `a` using the control in `IMM8`.
1432///
1433/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_epi32)
1434#[inline]
1435#[target_feature(enable = "sse2")]
1436#[cfg_attr(test, assert_instr(pshufd, IMM8 = 9))]
1437#[rustc_legacy_const_generics(1)]
1438#[stable(feature = "simd_x86", since = "1.27.0")]
1439pub unsafe fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
1440 static_assert_uimm_bits!(IMM8, 8);
1441 let a: i32x4 = a.as_i32x4();
1442 let x: i32x4 = simd_shuffle!(
1443 a,
1444 a,
1445 [
1446 IMM8 as u32 & 0b11,
1447 (IMM8 as u32 >> 2) & 0b11,
1448 (IMM8 as u32 >> 4) & 0b11,
1449 (IMM8 as u32 >> 6) & 0b11,
1450 ],
1451 );
1452 transmute(src:x)
1453}
1454
1455/// Shuffles 16-bit integers in the high 64 bits of `a` using the control in
1456/// `IMM8`.
1457///
1458/// Put the results in the high 64 bits of the returned vector, with the low 64
1459/// bits being copied from `a`.
1460///
1461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shufflehi_epi16)
1462#[inline]
1463#[target_feature(enable = "sse2")]
1464#[cfg_attr(test, assert_instr(pshufhw, IMM8 = 9))]
1465#[rustc_legacy_const_generics(1)]
1466#[stable(feature = "simd_x86", since = "1.27.0")]
1467pub unsafe fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1468 static_assert_uimm_bits!(IMM8, 8);
1469 let a: i16x8 = a.as_i16x8();
1470 let x: i16x8 = simd_shuffle!(
1471 a,
1472 a,
1473 [
1474 0,
1475 1,
1476 2,
1477 3,
1478 (IMM8 as u32 & 0b11) + 4,
1479 ((IMM8 as u32 >> 2) & 0b11) + 4,
1480 ((IMM8 as u32 >> 4) & 0b11) + 4,
1481 ((IMM8 as u32 >> 6) & 0b11) + 4,
1482 ],
1483 );
1484 transmute(src:x)
1485}
1486
1487/// Shuffles 16-bit integers in the low 64 bits of `a` using the control in
1488/// `IMM8`.
1489///
1490/// Put the results in the low 64 bits of the returned vector, with the high 64
1491/// bits being copied from `a`.
1492///
1493/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shufflelo_epi16)
1494#[inline]
1495#[target_feature(enable = "sse2")]
1496#[cfg_attr(test, assert_instr(pshuflw, IMM8 = 9))]
1497#[rustc_legacy_const_generics(1)]
1498#[stable(feature = "simd_x86", since = "1.27.0")]
1499pub unsafe fn _mm_shufflelo_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1500 static_assert_uimm_bits!(IMM8, 8);
1501 let a: i16x8 = a.as_i16x8();
1502 let x: i16x8 = simd_shuffle!(
1503 a,
1504 a,
1505 [
1506 IMM8 as u32 & 0b11,
1507 (IMM8 as u32 >> 2) & 0b11,
1508 (IMM8 as u32 >> 4) & 0b11,
1509 (IMM8 as u32 >> 6) & 0b11,
1510 4,
1511 5,
1512 6,
1513 7,
1514 ],
1515 );
1516 transmute(src:x)
1517}
1518
1519/// Unpacks and interleave 8-bit integers from the high half of `a` and `b`.
1520///
1521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi8)
1522#[inline]
1523#[target_feature(enable = "sse2")]
1524#[cfg_attr(test, assert_instr(punpckhbw))]
1525#[stable(feature = "simd_x86", since = "1.27.0")]
1526pub unsafe fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i {
1527 transmute::<i8x16, _>(src:simd_shuffle!(
1528 a.as_i8x16(),
1529 b.as_i8x16(),
1530 [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31],
1531 ))
1532}
1533
1534/// Unpacks and interleave 16-bit integers from the high half of `a` and `b`.
1535///
1536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi16)
1537#[inline]
1538#[target_feature(enable = "sse2")]
1539#[cfg_attr(test, assert_instr(punpckhwd))]
1540#[stable(feature = "simd_x86", since = "1.27.0")]
1541pub unsafe fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i {
1542 let x: i16x8 = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]);
1543 transmute::<i16x8, _>(src:x)
1544}
1545
1546/// Unpacks and interleave 32-bit integers from the high half of `a` and `b`.
1547///
1548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi32)
1549#[inline]
1550#[target_feature(enable = "sse2")]
1551#[cfg_attr(test, assert_instr(unpckhps))]
1552#[stable(feature = "simd_x86", since = "1.27.0")]
1553pub unsafe fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i {
1554 transmute::<i32x4, _>(src:simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7]))
1555}
1556
1557/// Unpacks and interleave 64-bit integers from the high half of `a` and `b`.
1558///
1559/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi64)
1560#[inline]
1561#[target_feature(enable = "sse2")]
1562#[cfg_attr(test, assert_instr(unpckhpd))]
1563#[stable(feature = "simd_x86", since = "1.27.0")]
1564pub unsafe fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i {
1565 transmute::<i64x2, _>(src:simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [1, 3]))
1566}
1567
1568/// Unpacks and interleave 8-bit integers from the low half of `a` and `b`.
1569///
1570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi8)
1571#[inline]
1572#[target_feature(enable = "sse2")]
1573#[cfg_attr(test, assert_instr(punpcklbw))]
1574#[stable(feature = "simd_x86", since = "1.27.0")]
1575pub unsafe fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i {
1576 transmute::<i8x16, _>(src:simd_shuffle!(
1577 a.as_i8x16(),
1578 b.as_i8x16(),
1579 [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23],
1580 ))
1581}
1582
1583/// Unpacks and interleave 16-bit integers from the low half of `a` and `b`.
1584///
1585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi16)
1586#[inline]
1587#[target_feature(enable = "sse2")]
1588#[cfg_attr(test, assert_instr(punpcklwd))]
1589#[stable(feature = "simd_x86", since = "1.27.0")]
1590pub unsafe fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i {
1591 let x: i16x8 = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]);
1592 transmute::<i16x8, _>(src:x)
1593}
1594
1595/// Unpacks and interleave 32-bit integers from the low half of `a` and `b`.
1596///
1597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi32)
1598#[inline]
1599#[target_feature(enable = "sse2")]
1600#[cfg_attr(test, assert_instr(unpcklps))]
1601#[stable(feature = "simd_x86", since = "1.27.0")]
1602pub unsafe fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i {
1603 transmute::<i32x4, _>(src:simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5]))
1604}
1605
1606/// Unpacks and interleave 64-bit integers from the low half of `a` and `b`.
1607///
1608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi64)
1609#[inline]
1610#[target_feature(enable = "sse2")]
1611#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlhps))]
1612#[stable(feature = "simd_x86", since = "1.27.0")]
1613pub unsafe fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i {
1614 transmute::<i64x2, _>(src:simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [0, 2]))
1615}
1616
1617/// Returns a new vector with the low element of `a` replaced by the sum of the
1618/// low elements of `a` and `b`.
1619///
1620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_sd)
1621#[inline]
1622#[target_feature(enable = "sse2")]
1623#[cfg_attr(test, assert_instr(addsd))]
1624#[stable(feature = "simd_x86", since = "1.27.0")]
1625pub unsafe fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d {
1626 simd_insert(x:a, idx:0, val:_mm_cvtsd_f64(a) + _mm_cvtsd_f64(b))
1627}
1628
1629/// Adds packed double-precision (64-bit) floating-point elements in `a` and
1630/// `b`.
1631///
1632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_pd)
1633#[inline]
1634#[target_feature(enable = "sse2")]
1635#[cfg_attr(test, assert_instr(addpd))]
1636#[stable(feature = "simd_x86", since = "1.27.0")]
1637pub unsafe fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d {
1638 simd_add(x:a, y:b)
1639}
1640
1641/// Returns a new vector with the low element of `a` replaced by the result of
1642/// diving the lower element of `a` by the lower element of `b`.
1643///
1644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_sd)
1645#[inline]
1646#[target_feature(enable = "sse2")]
1647#[cfg_attr(test, assert_instr(divsd))]
1648#[stable(feature = "simd_x86", since = "1.27.0")]
1649pub unsafe fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d {
1650 simd_insert(x:a, idx:0, val:_mm_cvtsd_f64(a) / _mm_cvtsd_f64(b))
1651}
1652
1653/// Divide packed double-precision (64-bit) floating-point elements in `a` by
1654/// packed elements in `b`.
1655///
1656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_pd)
1657#[inline]
1658#[target_feature(enable = "sse2")]
1659#[cfg_attr(test, assert_instr(divpd))]
1660#[stable(feature = "simd_x86", since = "1.27.0")]
1661pub unsafe fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d {
1662 simd_div(x:a, y:b)
1663}
1664
1665/// Returns a new vector with the low element of `a` replaced by the maximum
1666/// of the lower elements of `a` and `b`.
1667///
1668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_sd)
1669#[inline]
1670#[target_feature(enable = "sse2")]
1671#[cfg_attr(test, assert_instr(maxsd))]
1672#[stable(feature = "simd_x86", since = "1.27.0")]
1673pub unsafe fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d {
1674 maxsd(a, b)
1675}
1676
1677/// Returns a new vector with the maximum values from corresponding elements in
1678/// `a` and `b`.
1679///
1680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_pd)
1681#[inline]
1682#[target_feature(enable = "sse2")]
1683#[cfg_attr(test, assert_instr(maxpd))]
1684#[stable(feature = "simd_x86", since = "1.27.0")]
1685pub unsafe fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d {
1686 maxpd(a, b)
1687}
1688
1689/// Returns a new vector with the low element of `a` replaced by the minimum
1690/// of the lower elements of `a` and `b`.
1691///
1692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_sd)
1693#[inline]
1694#[target_feature(enable = "sse2")]
1695#[cfg_attr(test, assert_instr(minsd))]
1696#[stable(feature = "simd_x86", since = "1.27.0")]
1697pub unsafe fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d {
1698 minsd(a, b)
1699}
1700
1701/// Returns a new vector with the minimum values from corresponding elements in
1702/// `a` and `b`.
1703///
1704/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_pd)
1705#[inline]
1706#[target_feature(enable = "sse2")]
1707#[cfg_attr(test, assert_instr(minpd))]
1708#[stable(feature = "simd_x86", since = "1.27.0")]
1709pub unsafe fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d {
1710 minpd(a, b)
1711}
1712
1713/// Returns a new vector with the low element of `a` replaced by multiplying the
1714/// low elements of `a` and `b`.
1715///
1716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_sd)
1717#[inline]
1718#[target_feature(enable = "sse2")]
1719#[cfg_attr(test, assert_instr(mulsd))]
1720#[stable(feature = "simd_x86", since = "1.27.0")]
1721pub unsafe fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d {
1722 simd_insert(x:a, idx:0, val:_mm_cvtsd_f64(a) * _mm_cvtsd_f64(b))
1723}
1724
1725/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
1726/// and `b`.
1727///
1728/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_pd)
1729#[inline]
1730#[target_feature(enable = "sse2")]
1731#[cfg_attr(test, assert_instr(mulpd))]
1732#[stable(feature = "simd_x86", since = "1.27.0")]
1733pub unsafe fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d {
1734 simd_mul(x:a, y:b)
1735}
1736
1737/// Returns a new vector with the low element of `a` replaced by the square
1738/// root of the lower element `b`.
1739///
1740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_sd)
1741#[inline]
1742#[target_feature(enable = "sse2")]
1743#[cfg_attr(test, assert_instr(sqrtsd))]
1744#[stable(feature = "simd_x86", since = "1.27.0")]
1745pub unsafe fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d {
1746 simd_insert(x:a, idx:0, val:_mm_cvtsd_f64(sqrtsd(b)))
1747}
1748
1749/// Returns a new vector with the square root of each of the values in `a`.
1750///
1751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_pd)
1752#[inline]
1753#[target_feature(enable = "sse2")]
1754#[cfg_attr(test, assert_instr(sqrtpd))]
1755#[stable(feature = "simd_x86", since = "1.27.0")]
1756pub unsafe fn _mm_sqrt_pd(a: __m128d) -> __m128d {
1757 simd_fsqrt(a)
1758}
1759
1760/// Returns a new vector with the low element of `a` replaced by subtracting the
1761/// low element by `b` from the low element of `a`.
1762///
1763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_sd)
1764#[inline]
1765#[target_feature(enable = "sse2")]
1766#[cfg_attr(test, assert_instr(subsd))]
1767#[stable(feature = "simd_x86", since = "1.27.0")]
1768pub unsafe fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d {
1769 simd_insert(x:a, idx:0, val:_mm_cvtsd_f64(a) - _mm_cvtsd_f64(b))
1770}
1771
1772/// Subtract packed double-precision (64-bit) floating-point elements in `b`
1773/// from `a`.
1774///
1775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_pd)
1776#[inline]
1777#[target_feature(enable = "sse2")]
1778#[cfg_attr(test, assert_instr(subpd))]
1779#[stable(feature = "simd_x86", since = "1.27.0")]
1780pub unsafe fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d {
1781 simd_sub(x:a, y:b)
1782}
1783
1784/// Computes the bitwise AND of packed double-precision (64-bit) floating-point
1785/// elements in `a` and `b`.
1786///
1787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_and_pd)
1788#[inline]
1789#[target_feature(enable = "sse2")]
1790#[cfg_attr(test, assert_instr(andps))]
1791#[stable(feature = "simd_x86", since = "1.27.0")]
1792pub unsafe fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d {
1793 let a: __m128i = transmute(src:a);
1794 let b: __m128i = transmute(src:b);
1795 transmute(src:_mm_and_si128(a, b))
1796}
1797
1798/// Computes the bitwise NOT of `a` and then AND with `b`.
1799///
1800/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_andnot_pd)
1801#[inline]
1802#[target_feature(enable = "sse2")]
1803#[cfg_attr(test, assert_instr(andnps))]
1804#[stable(feature = "simd_x86", since = "1.27.0")]
1805pub unsafe fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d {
1806 let a: __m128i = transmute(src:a);
1807 let b: __m128i = transmute(src:b);
1808 transmute(src:_mm_andnot_si128(a, b))
1809}
1810
1811/// Computes the bitwise OR of `a` and `b`.
1812///
1813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_pd)
1814#[inline]
1815#[target_feature(enable = "sse2")]
1816#[cfg_attr(test, assert_instr(orps))]
1817#[stable(feature = "simd_x86", since = "1.27.0")]
1818pub unsafe fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d {
1819 let a: __m128i = transmute(src:a);
1820 let b: __m128i = transmute(src:b);
1821 transmute(src:_mm_or_si128(a, b))
1822}
1823
1824/// Computes the bitwise XOR of `a` and `b`.
1825///
1826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_pd)
1827#[inline]
1828#[target_feature(enable = "sse2")]
1829#[cfg_attr(test, assert_instr(xorps))]
1830#[stable(feature = "simd_x86", since = "1.27.0")]
1831pub unsafe fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d {
1832 let a: __m128i = transmute(src:a);
1833 let b: __m128i = transmute(src:b);
1834 transmute(src:_mm_xor_si128(a, b))
1835}
1836
1837/// Returns a new vector with the low element of `a` replaced by the equality
1838/// comparison of the lower elements of `a` and `b`.
1839///
1840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_sd)
1841#[inline]
1842#[target_feature(enable = "sse2")]
1843#[cfg_attr(test, assert_instr(cmpeqsd))]
1844#[stable(feature = "simd_x86", since = "1.27.0")]
1845pub unsafe fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d {
1846 cmpsd(a, b, imm8:0)
1847}
1848
1849/// Returns a new vector with the low element of `a` replaced by the less-than
1850/// comparison of the lower elements of `a` and `b`.
1851///
1852/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_sd)
1853#[inline]
1854#[target_feature(enable = "sse2")]
1855#[cfg_attr(test, assert_instr(cmpltsd))]
1856#[stable(feature = "simd_x86", since = "1.27.0")]
1857pub unsafe fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d {
1858 cmpsd(a, b, imm8:1)
1859}
1860
1861/// Returns a new vector with the low element of `a` replaced by the
1862/// less-than-or-equal comparison of the lower elements of `a` and `b`.
1863///
1864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_sd)
1865#[inline]
1866#[target_feature(enable = "sse2")]
1867#[cfg_attr(test, assert_instr(cmplesd))]
1868#[stable(feature = "simd_x86", since = "1.27.0")]
1869pub unsafe fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d {
1870 cmpsd(a, b, imm8:2)
1871}
1872
1873/// Returns a new vector with the low element of `a` replaced by the
1874/// greater-than comparison of the lower elements of `a` and `b`.
1875///
1876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_sd)
1877#[inline]
1878#[target_feature(enable = "sse2")]
1879#[cfg_attr(test, assert_instr(cmpltsd))]
1880#[stable(feature = "simd_x86", since = "1.27.0")]
1881pub unsafe fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d {
1882 simd_insert(x:_mm_cmplt_sd(b, a), idx:1, val:simd_extract::<_, f64>(x:a, idx:1))
1883}
1884
1885/// Returns a new vector with the low element of `a` replaced by the
1886/// greater-than-or-equal comparison of the lower elements of `a` and `b`.
1887///
1888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_sd)
1889#[inline]
1890#[target_feature(enable = "sse2")]
1891#[cfg_attr(test, assert_instr(cmplesd))]
1892#[stable(feature = "simd_x86", since = "1.27.0")]
1893pub unsafe fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d {
1894 simd_insert(x:_mm_cmple_sd(b, a), idx:1, val:simd_extract::<_, f64>(x:a, idx:1))
1895}
1896
1897/// Returns a new vector with the low element of `a` replaced by the result
1898/// of comparing both of the lower elements of `a` and `b` to `NaN`. If
1899/// neither are equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0`
1900/// otherwise.
1901///
1902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_sd)
1903#[inline]
1904#[target_feature(enable = "sse2")]
1905#[cfg_attr(test, assert_instr(cmpordsd))]
1906#[stable(feature = "simd_x86", since = "1.27.0")]
1907pub unsafe fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d {
1908 cmpsd(a, b, imm8:7)
1909}
1910
1911/// Returns a new vector with the low element of `a` replaced by the result of
1912/// comparing both of the lower elements of `a` and `b` to `NaN`. If either is
1913/// equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` otherwise.
1914///
1915/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_sd)
1916#[inline]
1917#[target_feature(enable = "sse2")]
1918#[cfg_attr(test, assert_instr(cmpunordsd))]
1919#[stable(feature = "simd_x86", since = "1.27.0")]
1920pub unsafe fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d {
1921 cmpsd(a, b, imm8:3)
1922}
1923
1924/// Returns a new vector with the low element of `a` replaced by the not-equal
1925/// comparison of the lower elements of `a` and `b`.
1926///
1927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_sd)
1928#[inline]
1929#[target_feature(enable = "sse2")]
1930#[cfg_attr(test, assert_instr(cmpneqsd))]
1931#[stable(feature = "simd_x86", since = "1.27.0")]
1932pub unsafe fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d {
1933 cmpsd(a, b, imm8:4)
1934}
1935
1936/// Returns a new vector with the low element of `a` replaced by the
1937/// not-less-than comparison of the lower elements of `a` and `b`.
1938///
1939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_sd)
1940#[inline]
1941#[target_feature(enable = "sse2")]
1942#[cfg_attr(test, assert_instr(cmpnltsd))]
1943#[stable(feature = "simd_x86", since = "1.27.0")]
1944pub unsafe fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d {
1945 cmpsd(a, b, imm8:5)
1946}
1947
1948/// Returns a new vector with the low element of `a` replaced by the
1949/// not-less-than-or-equal comparison of the lower elements of `a` and `b`.
1950///
1951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_sd)
1952#[inline]
1953#[target_feature(enable = "sse2")]
1954#[cfg_attr(test, assert_instr(cmpnlesd))]
1955#[stable(feature = "simd_x86", since = "1.27.0")]
1956pub unsafe fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d {
1957 cmpsd(a, b, imm8:6)
1958}
1959
1960/// Returns a new vector with the low element of `a` replaced by the
1961/// not-greater-than comparison of the lower elements of `a` and `b`.
1962///
1963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpngt_sd)
1964#[inline]
1965#[target_feature(enable = "sse2")]
1966#[cfg_attr(test, assert_instr(cmpnltsd))]
1967#[stable(feature = "simd_x86", since = "1.27.0")]
1968pub unsafe fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d {
1969 simd_insert(x:_mm_cmpnlt_sd(b, a), idx:1, val:simd_extract::<_, f64>(x:a, idx:1))
1970}
1971
1972/// Returns a new vector with the low element of `a` replaced by the
1973/// not-greater-than-or-equal comparison of the lower elements of `a` and `b`.
1974///
1975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_sd)
1976#[inline]
1977#[target_feature(enable = "sse2")]
1978#[cfg_attr(test, assert_instr(cmpnlesd))]
1979#[stable(feature = "simd_x86", since = "1.27.0")]
1980pub unsafe fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d {
1981 simd_insert(x:_mm_cmpnle_sd(b, a), idx:1, val:simd_extract::<_, f64>(x:a, idx:1))
1982}
1983
1984/// Compares corresponding elements in `a` and `b` for equality.
1985///
1986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_pd)
1987#[inline]
1988#[target_feature(enable = "sse2")]
1989#[cfg_attr(test, assert_instr(cmpeqpd))]
1990#[stable(feature = "simd_x86", since = "1.27.0")]
1991pub unsafe fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d {
1992 cmppd(a, b, imm8:0)
1993}
1994
1995/// Compares corresponding elements in `a` and `b` for less-than.
1996///
1997/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_pd)
1998#[inline]
1999#[target_feature(enable = "sse2")]
2000#[cfg_attr(test, assert_instr(cmpltpd))]
2001#[stable(feature = "simd_x86", since = "1.27.0")]
2002pub unsafe fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d {
2003 cmppd(a, b, imm8:1)
2004}
2005
2006/// Compares corresponding elements in `a` and `b` for less-than-or-equal
2007///
2008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_pd)
2009#[inline]
2010#[target_feature(enable = "sse2")]
2011#[cfg_attr(test, assert_instr(cmplepd))]
2012#[stable(feature = "simd_x86", since = "1.27.0")]
2013pub unsafe fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d {
2014 cmppd(a, b, imm8:2)
2015}
2016
2017/// Compares corresponding elements in `a` and `b` for greater-than.
2018///
2019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_pd)
2020#[inline]
2021#[target_feature(enable = "sse2")]
2022#[cfg_attr(test, assert_instr(cmpltpd))]
2023#[stable(feature = "simd_x86", since = "1.27.0")]
2024pub unsafe fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d {
2025 _mm_cmplt_pd(a:b, b:a)
2026}
2027
2028/// Compares corresponding elements in `a` and `b` for greater-than-or-equal.
2029///
2030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_pd)
2031#[inline]
2032#[target_feature(enable = "sse2")]
2033#[cfg_attr(test, assert_instr(cmplepd))]
2034#[stable(feature = "simd_x86", since = "1.27.0")]
2035pub unsafe fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d {
2036 _mm_cmple_pd(a:b, b:a)
2037}
2038
2039/// Compares corresponding elements in `a` and `b` to see if neither is `NaN`.
2040///
2041/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_pd)
2042#[inline]
2043#[target_feature(enable = "sse2")]
2044#[cfg_attr(test, assert_instr(cmpordpd))]
2045#[stable(feature = "simd_x86", since = "1.27.0")]
2046pub unsafe fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d {
2047 cmppd(a, b, imm8:7)
2048}
2049
2050/// Compares corresponding elements in `a` and `b` to see if either is `NaN`.
2051///
2052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_pd)
2053#[inline]
2054#[target_feature(enable = "sse2")]
2055#[cfg_attr(test, assert_instr(cmpunordpd))]
2056#[stable(feature = "simd_x86", since = "1.27.0")]
2057pub unsafe fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d {
2058 cmppd(a, b, imm8:3)
2059}
2060
2061/// Compares corresponding elements in `a` and `b` for not-equal.
2062///
2063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_pd)
2064#[inline]
2065#[target_feature(enable = "sse2")]
2066#[cfg_attr(test, assert_instr(cmpneqpd))]
2067#[stable(feature = "simd_x86", since = "1.27.0")]
2068pub unsafe fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d {
2069 cmppd(a, b, imm8:4)
2070}
2071
2072/// Compares corresponding elements in `a` and `b` for not-less-than.
2073///
2074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_pd)
2075#[inline]
2076#[target_feature(enable = "sse2")]
2077#[cfg_attr(test, assert_instr(cmpnltpd))]
2078#[stable(feature = "simd_x86", since = "1.27.0")]
2079pub unsafe fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d {
2080 cmppd(a, b, imm8:5)
2081}
2082
2083/// Compares corresponding elements in `a` and `b` for not-less-than-or-equal.
2084///
2085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_pd)
2086#[inline]
2087#[target_feature(enable = "sse2")]
2088#[cfg_attr(test, assert_instr(cmpnlepd))]
2089#[stable(feature = "simd_x86", since = "1.27.0")]
2090pub unsafe fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d {
2091 cmppd(a, b, imm8:6)
2092}
2093
2094/// Compares corresponding elements in `a` and `b` for not-greater-than.
2095///
2096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpngt_pd)
2097#[inline]
2098#[target_feature(enable = "sse2")]
2099#[cfg_attr(test, assert_instr(cmpnltpd))]
2100#[stable(feature = "simd_x86", since = "1.27.0")]
2101pub unsafe fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d {
2102 _mm_cmpnlt_pd(a:b, b:a)
2103}
2104
2105/// Compares corresponding elements in `a` and `b` for
2106/// not-greater-than-or-equal.
2107///
2108/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_pd)
2109#[inline]
2110#[target_feature(enable = "sse2")]
2111#[cfg_attr(test, assert_instr(cmpnlepd))]
2112#[stable(feature = "simd_x86", since = "1.27.0")]
2113pub unsafe fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d {
2114 _mm_cmpnle_pd(a:b, b:a)
2115}
2116
2117/// Compares the lower element of `a` and `b` for equality.
2118///
2119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comieq_sd)
2120#[inline]
2121#[target_feature(enable = "sse2")]
2122#[cfg_attr(test, assert_instr(comisd))]
2123#[stable(feature = "simd_x86", since = "1.27.0")]
2124pub unsafe fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 {
2125 comieqsd(a, b)
2126}
2127
2128/// Compares the lower element of `a` and `b` for less-than.
2129///
2130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comilt_sd)
2131#[inline]
2132#[target_feature(enable = "sse2")]
2133#[cfg_attr(test, assert_instr(comisd))]
2134#[stable(feature = "simd_x86", since = "1.27.0")]
2135pub unsafe fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 {
2136 comiltsd(a, b)
2137}
2138
2139/// Compares the lower element of `a` and `b` for less-than-or-equal.
2140///
2141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comile_sd)
2142#[inline]
2143#[target_feature(enable = "sse2")]
2144#[cfg_attr(test, assert_instr(comisd))]
2145#[stable(feature = "simd_x86", since = "1.27.0")]
2146pub unsafe fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 {
2147 comilesd(a, b)
2148}
2149
2150/// Compares the lower element of `a` and `b` for greater-than.
2151///
2152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comigt_sd)
2153#[inline]
2154#[target_feature(enable = "sse2")]
2155#[cfg_attr(test, assert_instr(comisd))]
2156#[stable(feature = "simd_x86", since = "1.27.0")]
2157pub unsafe fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 {
2158 comigtsd(a, b)
2159}
2160
2161/// Compares the lower element of `a` and `b` for greater-than-or-equal.
2162///
2163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comige_sd)
2164#[inline]
2165#[target_feature(enable = "sse2")]
2166#[cfg_attr(test, assert_instr(comisd))]
2167#[stable(feature = "simd_x86", since = "1.27.0")]
2168pub unsafe fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 {
2169 comigesd(a, b)
2170}
2171
2172/// Compares the lower element of `a` and `b` for not-equal.
2173///
2174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comineq_sd)
2175#[inline]
2176#[target_feature(enable = "sse2")]
2177#[cfg_attr(test, assert_instr(comisd))]
2178#[stable(feature = "simd_x86", since = "1.27.0")]
2179pub unsafe fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 {
2180 comineqsd(a, b)
2181}
2182
2183/// Compares the lower element of `a` and `b` for equality.
2184///
2185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomieq_sd)
2186#[inline]
2187#[target_feature(enable = "sse2")]
2188#[cfg_attr(test, assert_instr(ucomisd))]
2189#[stable(feature = "simd_x86", since = "1.27.0")]
2190pub unsafe fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 {
2191 ucomieqsd(a, b)
2192}
2193
2194/// Compares the lower element of `a` and `b` for less-than.
2195///
2196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomilt_sd)
2197#[inline]
2198#[target_feature(enable = "sse2")]
2199#[cfg_attr(test, assert_instr(ucomisd))]
2200#[stable(feature = "simd_x86", since = "1.27.0")]
2201pub unsafe fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 {
2202 ucomiltsd(a, b)
2203}
2204
2205/// Compares the lower element of `a` and `b` for less-than-or-equal.
2206///
2207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomile_sd)
2208#[inline]
2209#[target_feature(enable = "sse2")]
2210#[cfg_attr(test, assert_instr(ucomisd))]
2211#[stable(feature = "simd_x86", since = "1.27.0")]
2212pub unsafe fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 {
2213 ucomilesd(a, b)
2214}
2215
2216/// Compares the lower element of `a` and `b` for greater-than.
2217///
2218/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomigt_sd)
2219#[inline]
2220#[target_feature(enable = "sse2")]
2221#[cfg_attr(test, assert_instr(ucomisd))]
2222#[stable(feature = "simd_x86", since = "1.27.0")]
2223pub unsafe fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 {
2224 ucomigtsd(a, b)
2225}
2226
2227/// Compares the lower element of `a` and `b` for greater-than-or-equal.
2228///
2229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomige_sd)
2230#[inline]
2231#[target_feature(enable = "sse2")]
2232#[cfg_attr(test, assert_instr(ucomisd))]
2233#[stable(feature = "simd_x86", since = "1.27.0")]
2234pub unsafe fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 {
2235 ucomigesd(a, b)
2236}
2237
2238/// Compares the lower element of `a` and `b` for not-equal.
2239///
2240/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomineq_sd)
2241#[inline]
2242#[target_feature(enable = "sse2")]
2243#[cfg_attr(test, assert_instr(ucomisd))]
2244#[stable(feature = "simd_x86", since = "1.27.0")]
2245pub unsafe fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 {
2246 ucomineqsd(a, b)
2247}
2248
2249/// Converts packed double-precision (64-bit) floating-point elements in `a` to
2250/// packed single-precision (32-bit) floating-point elements
2251///
2252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_ps)
2253#[inline]
2254#[target_feature(enable = "sse2")]
2255#[cfg_attr(test, assert_instr(cvtpd2ps))]
2256#[stable(feature = "simd_x86", since = "1.27.0")]
2257pub unsafe fn _mm_cvtpd_ps(a: __m128d) -> __m128 {
2258 let r: f32x2 = simd_cast::<_, f32x2>(a.as_f64x2());
2259 let zero: f32x2 = f32x2::new(x0:0.0, x1:0.0);
2260 transmute::<f32x4, _>(src:simd_shuffle!(r, zero, [0, 1, 2, 3]))
2261}
2262
2263/// Converts packed single-precision (32-bit) floating-point elements in `a` to
2264/// packed
2265/// double-precision (64-bit) floating-point elements.
2266///
2267/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_pd)
2268#[inline]
2269#[target_feature(enable = "sse2")]
2270#[cfg_attr(test, assert_instr(cvtps2pd))]
2271#[stable(feature = "simd_x86", since = "1.27.0")]
2272pub unsafe fn _mm_cvtps_pd(a: __m128) -> __m128d {
2273 let a: f32x4 = a.as_f32x4();
2274 transmute(src:simd_cast::<f32x2, f64x2>(simd_shuffle!(a, a, [0, 1])))
2275}
2276
2277/// Converts packed double-precision (64-bit) floating-point elements in `a` to
2278/// packed 32-bit integers.
2279///
2280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epi32)
2281#[inline]
2282#[target_feature(enable = "sse2")]
2283#[cfg_attr(test, assert_instr(cvtpd2dq))]
2284#[stable(feature = "simd_x86", since = "1.27.0")]
2285pub unsafe fn _mm_cvtpd_epi32(a: __m128d) -> __m128i {
2286 transmute(src:cvtpd2dq(a))
2287}
2288
2289/// Converts the lower double-precision (64-bit) floating-point element in a to
2290/// a 32-bit integer.
2291///
2292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_si32)
2293#[inline]
2294#[target_feature(enable = "sse2")]
2295#[cfg_attr(test, assert_instr(cvtsd2si))]
2296#[stable(feature = "simd_x86", since = "1.27.0")]
2297pub unsafe fn _mm_cvtsd_si32(a: __m128d) -> i32 {
2298 cvtsd2si(a)
2299}
2300
2301/// Converts the lower double-precision (64-bit) floating-point element in `b`
2302/// to a single-precision (32-bit) floating-point element, store the result in
2303/// the lower element of the return value, and copies the upper element from `a`
2304/// to the upper element the return value.
2305///
2306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_ss)
2307#[inline]
2308#[target_feature(enable = "sse2")]
2309#[cfg_attr(test, assert_instr(cvtsd2ss))]
2310#[stable(feature = "simd_x86", since = "1.27.0")]
2311pub unsafe fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 {
2312 cvtsd2ss(a, b)
2313}
2314
2315/// Returns the lower double-precision (64-bit) floating-point element of `a`.
2316///
2317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_f64)
2318#[inline]
2319#[target_feature(enable = "sse2")]
2320#[stable(feature = "simd_x86", since = "1.27.0")]
2321pub unsafe fn _mm_cvtsd_f64(a: __m128d) -> f64 {
2322 simd_extract(x:a, idx:0)
2323}
2324
2325/// Converts the lower single-precision (32-bit) floating-point element in `b`
2326/// to a double-precision (64-bit) floating-point element, store the result in
2327/// the lower element of the return value, and copies the upper element from `a`
2328/// to the upper element the return value.
2329///
2330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtss_sd)
2331#[inline]
2332#[target_feature(enable = "sse2")]
2333#[cfg_attr(test, assert_instr(cvtss2sd))]
2334#[stable(feature = "simd_x86", since = "1.27.0")]
2335pub unsafe fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d {
2336 cvtss2sd(a, b)
2337}
2338
2339/// Converts packed double-precision (64-bit) floating-point elements in `a` to
2340/// packed 32-bit integers with truncation.
2341///
2342/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epi32)
2343#[inline]
2344#[target_feature(enable = "sse2")]
2345#[cfg_attr(test, assert_instr(cvttpd2dq))]
2346#[stable(feature = "simd_x86", since = "1.27.0")]
2347pub unsafe fn _mm_cvttpd_epi32(a: __m128d) -> __m128i {
2348 transmute(src:cvttpd2dq(a))
2349}
2350
2351/// Converts the lower double-precision (64-bit) floating-point element in `a`
2352/// to a 32-bit integer with truncation.
2353///
2354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_si32)
2355#[inline]
2356#[target_feature(enable = "sse2")]
2357#[cfg_attr(test, assert_instr(cvttsd2si))]
2358#[stable(feature = "simd_x86", since = "1.27.0")]
2359pub unsafe fn _mm_cvttsd_si32(a: __m128d) -> i32 {
2360 cvttsd2si(a)
2361}
2362
2363/// Converts packed single-precision (32-bit) floating-point elements in `a` to
2364/// packed 32-bit integers with truncation.
2365///
2366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epi32)
2367#[inline]
2368#[target_feature(enable = "sse2")]
2369#[cfg_attr(test, assert_instr(cvttps2dq))]
2370#[stable(feature = "simd_x86", since = "1.27.0")]
2371pub unsafe fn _mm_cvttps_epi32(a: __m128) -> __m128i {
2372 transmute(src:cvttps2dq(a))
2373}
2374
2375/// Copies double-precision (64-bit) floating-point element `a` to the lower
2376/// element of the packed 64-bit return value.
2377///
2378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_sd)
2379#[inline]
2380#[target_feature(enable = "sse2")]
2381#[stable(feature = "simd_x86", since = "1.27.0")]
2382pub unsafe fn _mm_set_sd(a: f64) -> __m128d {
2383 _mm_set_pd(a:0.0, b:a)
2384}
2385
2386/// Broadcasts double-precision (64-bit) floating-point value a to all elements
2387/// of the return value.
2388///
2389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_pd)
2390#[inline]
2391#[target_feature(enable = "sse2")]
2392#[stable(feature = "simd_x86", since = "1.27.0")]
2393pub unsafe fn _mm_set1_pd(a: f64) -> __m128d {
2394 _mm_set_pd(a, b:a)
2395}
2396
2397/// Broadcasts double-precision (64-bit) floating-point value a to all elements
2398/// of the return value.
2399///
2400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_pd1)
2401#[inline]
2402#[target_feature(enable = "sse2")]
2403#[stable(feature = "simd_x86", since = "1.27.0")]
2404pub unsafe fn _mm_set_pd1(a: f64) -> __m128d {
2405 _mm_set_pd(a, b:a)
2406}
2407
2408/// Sets packed double-precision (64-bit) floating-point elements in the return
2409/// value with the supplied values.
2410///
2411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_pd)
2412#[inline]
2413#[target_feature(enable = "sse2")]
2414#[stable(feature = "simd_x86", since = "1.27.0")]
2415pub unsafe fn _mm_set_pd(a: f64, b: f64) -> __m128d {
2416 __m128d(b, a)
2417}
2418
2419/// Sets packed double-precision (64-bit) floating-point elements in the return
2420/// value with the supplied values in reverse order.
2421///
2422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_pd)
2423#[inline]
2424#[target_feature(enable = "sse2")]
2425#[stable(feature = "simd_x86", since = "1.27.0")]
2426pub unsafe fn _mm_setr_pd(a: f64, b: f64) -> __m128d {
2427 _mm_set_pd(a:b, b:a)
2428}
2429
2430/// Returns packed double-precision (64-bit) floating-point elements with all
2431/// zeros.
2432///
2433/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_pd)
2434#[inline]
2435#[target_feature(enable = "sse2")]
2436#[cfg_attr(test, assert_instr(xorps))] // FIXME xorpd expected
2437#[stable(feature = "simd_x86", since = "1.27.0")]
2438pub unsafe fn _mm_setzero_pd() -> __m128d {
2439 _mm_set_pd(a:0.0, b:0.0)
2440}
2441
2442/// Returns a mask of the most significant bit of each element in `a`.
2443///
2444/// The mask is stored in the 2 least significant bits of the return value.
2445/// All other bits are set to `0`.
2446///
2447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_pd)
2448#[inline]
2449#[target_feature(enable = "sse2")]
2450#[cfg_attr(test, assert_instr(movmskpd))]
2451#[stable(feature = "simd_x86", since = "1.27.0")]
2452pub unsafe fn _mm_movemask_pd(a: __m128d) -> i32 {
2453 // Propagate the highest bit to the rest, because simd_bitmask
2454 // requires all-1 or all-0.
2455 let mask: i64x2 = simd_lt(x:transmute(a), y:i64x2::splat(0));
2456 simd_bitmask::<i64x2, u8>(mask).into()
2457}
2458
2459/// Loads 128-bits (composed of 2 packed double-precision (64-bit)
2460/// floating-point elements) from memory into the returned vector.
2461/// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
2462/// exception may be generated.
2463///
2464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_pd)
2465#[inline]
2466#[target_feature(enable = "sse2")]
2467#[cfg_attr(test, assert_instr(movaps))]
2468#[stable(feature = "simd_x86", since = "1.27.0")]
2469#[allow(clippy::cast_ptr_alignment)]
2470pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d {
2471 *(mem_addr as *const __m128d)
2472}
2473
2474/// Loads a 64-bit double-precision value to the low element of a
2475/// 128-bit integer vector and clears the upper element.
2476///
2477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_sd)
2478#[inline]
2479#[target_feature(enable = "sse2")]
2480#[cfg_attr(test, assert_instr(movsd))]
2481#[stable(feature = "simd_x86", since = "1.27.0")]
2482pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d {
2483 _mm_setr_pd(*mem_addr, b:0.)
2484}
2485
2486/// Loads a double-precision value into the high-order bits of a 128-bit
2487/// vector of `[2 x double]`. The low-order bits are copied from the low-order
2488/// bits of the first operand.
2489///
2490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadh_pd)
2491#[inline]
2492#[target_feature(enable = "sse2")]
2493#[cfg_attr(test, assert_instr(movhps))]
2494#[stable(feature = "simd_x86", since = "1.27.0")]
2495pub unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2496 _mm_setr_pd(a:simd_extract(a, 0), *mem_addr)
2497}
2498
2499/// Loads a double-precision value into the low-order bits of a 128-bit
2500/// vector of `[2 x double]`. The high-order bits are copied from the
2501/// high-order bits of the first operand.
2502///
2503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadl_pd)
2504#[inline]
2505#[target_feature(enable = "sse2")]
2506#[cfg_attr(test, assert_instr(movlps))]
2507#[stable(feature = "simd_x86", since = "1.27.0")]
2508pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2509 _mm_setr_pd(*mem_addr, b:simd_extract(x:a, idx:1))
2510}
2511
2512/// Stores a 128-bit floating point vector of `[2 x double]` to a 128-bit
2513/// aligned memory location.
2514/// To minimize caching, the data is flagged as non-temporal (unlikely to be
2515/// used again soon).
2516///
2517/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_pd)
2518#[inline]
2519#[target_feature(enable = "sse2")]
2520#[cfg_attr(test, assert_instr(movntps))] // FIXME movntpd
2521#[stable(feature = "simd_x86", since = "1.27.0")]
2522#[allow(clippy::cast_ptr_alignment)]
2523pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) {
2524 intrinsics::nontemporal_store(ptr:mem_addr as *mut __m128d, val:a);
2525}
2526
2527/// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a
2528/// memory location.
2529///
2530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_sd)
2531#[inline]
2532#[target_feature(enable = "sse2")]
2533#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlps))]
2534#[stable(feature = "simd_x86", since = "1.27.0")]
2535pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) {
2536 *mem_addr = simd_extract(x:a, idx:0)
2537}
2538
2539/// Stores 128-bits (composed of 2 packed double-precision (64-bit)
2540/// floating-point elements) from `a` into memory. `mem_addr` must be aligned
2541/// on a 16-byte boundary or a general-protection exception may be generated.
2542///
2543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_pd)
2544#[inline]
2545#[target_feature(enable = "sse2")]
2546#[cfg_attr(test, assert_instr(movaps))]
2547#[stable(feature = "simd_x86", since = "1.27.0")]
2548#[allow(clippy::cast_ptr_alignment)]
2549pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) {
2550 *(mem_addr as *mut __m128d) = a;
2551}
2552
2553/// Stores 128-bits (composed of 2 packed double-precision (64-bit)
2554/// floating-point elements) from `a` into memory.
2555/// `mem_addr` does not need to be aligned on any particular boundary.
2556///
2557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_pd)
2558#[inline]
2559#[target_feature(enable = "sse2")]
2560#[cfg_attr(test, assert_instr(movups))] // FIXME movupd expected
2561#[stable(feature = "simd_x86", since = "1.27.0")]
2562pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) {
2563 mem_addr.cast::<__m128d>().write_unaligned(val:a);
2564}
2565
2566/// Stores the lower double-precision (64-bit) floating-point element from `a`
2567/// into 2 contiguous elements in memory. `mem_addr` must be aligned on a
2568/// 16-byte boundary or a general-protection exception may be generated.
2569///
2570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store1_pd)
2571#[inline]
2572#[target_feature(enable = "sse2")]
2573#[stable(feature = "simd_x86", since = "1.27.0")]
2574#[allow(clippy::cast_ptr_alignment)]
2575pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) {
2576 let b: __m128d = simd_shuffle!(a, a, [0, 0]);
2577 *(mem_addr as *mut __m128d) = b;
2578}
2579
2580/// Stores the lower double-precision (64-bit) floating-point element from `a`
2581/// into 2 contiguous elements in memory. `mem_addr` must be aligned on a
2582/// 16-byte boundary or a general-protection exception may be generated.
2583///
2584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_pd1)
2585#[inline]
2586#[target_feature(enable = "sse2")]
2587#[stable(feature = "simd_x86", since = "1.27.0")]
2588#[allow(clippy::cast_ptr_alignment)]
2589pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) {
2590 let b: __m128d = simd_shuffle!(a, a, [0, 0]);
2591 *(mem_addr as *mut __m128d) = b;
2592}
2593
2594/// Stores 2 double-precision (64-bit) floating-point elements from `a` into
2595/// memory in reverse order.
2596/// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
2597/// exception may be generated.
2598///
2599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storer_pd)
2600#[inline]
2601#[target_feature(enable = "sse2")]
2602#[stable(feature = "simd_x86", since = "1.27.0")]
2603#[allow(clippy::cast_ptr_alignment)]
2604pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) {
2605 let b: __m128d = simd_shuffle!(a, a, [1, 0]);
2606 *(mem_addr as *mut __m128d) = b;
2607}
2608
2609/// Stores the upper 64 bits of a 128-bit vector of `[2 x double]` to a
2610/// memory location.
2611///
2612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeh_pd)
2613#[inline]
2614#[target_feature(enable = "sse2")]
2615#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movhps))]
2616#[stable(feature = "simd_x86", since = "1.27.0")]
2617pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) {
2618 *mem_addr = simd_extract(x:a, idx:1);
2619}
2620
2621/// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a
2622/// memory location.
2623///
2624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storel_pd)
2625#[inline]
2626#[target_feature(enable = "sse2")]
2627#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlps))]
2628#[stable(feature = "simd_x86", since = "1.27.0")]
2629pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) {
2630 *mem_addr = simd_extract(x:a, idx:0);
2631}
2632
2633/// Loads a double-precision (64-bit) floating-point element from memory
2634/// into both elements of returned vector.
2635///
2636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load1_pd)
2637#[inline]
2638#[target_feature(enable = "sse2")]
2639// #[cfg_attr(test, assert_instr(movapd))] // FIXME LLVM uses different codegen
2640#[stable(feature = "simd_x86", since = "1.27.0")]
2641pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d {
2642 let d: f64 = *mem_addr;
2643 _mm_setr_pd(a:d, b:d)
2644}
2645
2646/// Loads a double-precision (64-bit) floating-point element from memory
2647/// into both elements of returned vector.
2648///
2649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_pd1)
2650#[inline]
2651#[target_feature(enable = "sse2")]
2652// #[cfg_attr(test, assert_instr(movapd))] // FIXME same as _mm_load1_pd
2653#[stable(feature = "simd_x86", since = "1.27.0")]
2654pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d {
2655 _mm_load1_pd(mem_addr)
2656}
2657
2658/// Loads 2 double-precision (64-bit) floating-point elements from memory into
2659/// the returned vector in reverse order. `mem_addr` must be aligned on a
2660/// 16-byte boundary or a general-protection exception may be generated.
2661///
2662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadr_pd)
2663#[inline]
2664#[target_feature(enable = "sse2")]
2665#[cfg_attr(test, assert_instr(movaps))]
2666#[stable(feature = "simd_x86", since = "1.27.0")]
2667pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d {
2668 let a: __m128d = _mm_load_pd(mem_addr);
2669 simd_shuffle!(a, a, [1, 0])
2670}
2671
2672/// Loads 128-bits (composed of 2 packed double-precision (64-bit)
2673/// floating-point elements) from memory into the returned vector.
2674/// `mem_addr` does not need to be aligned on any particular boundary.
2675///
2676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_pd)
2677#[inline]
2678#[target_feature(enable = "sse2")]
2679#[cfg_attr(test, assert_instr(movups))]
2680#[stable(feature = "simd_x86", since = "1.27.0")]
2681pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d {
2682 let mut dst: __m128d = _mm_undefined_pd();
2683 ptr::copy_nonoverlapping(
2684 src:mem_addr as *const u8,
2685 &mut dst as *mut __m128d as *mut u8,
2686 count:mem::size_of::<__m128d>(),
2687 );
2688 dst
2689}
2690
2691/// Constructs a 128-bit floating-point vector of `[2 x double]` from two
2692/// 128-bit vector parameters of `[2 x double]`, using the immediate-value
2693/// parameter as a specifier.
2694///
2695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_pd)
2696#[inline]
2697#[target_feature(enable = "sse2")]
2698#[cfg_attr(test, assert_instr(shufps, MASK = 2))]
2699#[rustc_legacy_const_generics(2)]
2700#[stable(feature = "simd_x86", since = "1.27.0")]
2701pub unsafe fn _mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d {
2702 static_assert_uimm_bits!(MASK, 8);
2703 simd_shuffle!(a, b, [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2])
2704}
2705
2706/// Constructs a 128-bit floating-point vector of `[2 x double]`. The lower
2707/// 64 bits are set to the lower 64 bits of the second parameter. The upper
2708/// 64 bits are set to the upper 64 bits of the first parameter.
2709///
2710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_move_sd)
2711#[inline]
2712#[target_feature(enable = "sse2")]
2713#[cfg_attr(test, assert_instr(movsd))]
2714#[stable(feature = "simd_x86", since = "1.27.0")]
2715pub unsafe fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d {
2716 _mm_setr_pd(a:simd_extract(b, 0), b:simd_extract(x:a, idx:1))
2717}
2718
2719/// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit
2720/// floating-point vector of `[4 x float]`.
2721///
2722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castpd_ps)
2723#[inline]
2724#[target_feature(enable = "sse2")]
2725#[stable(feature = "simd_x86", since = "1.27.0")]
2726pub unsafe fn _mm_castpd_ps(a: __m128d) -> __m128 {
2727 transmute(src:a)
2728}
2729
2730/// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit
2731/// integer vector.
2732///
2733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castpd_si128)
2734#[inline]
2735#[target_feature(enable = "sse2")]
2736#[stable(feature = "simd_x86", since = "1.27.0")]
2737pub unsafe fn _mm_castpd_si128(a: __m128d) -> __m128i {
2738 transmute(src:a)
2739}
2740
2741/// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit
2742/// floating-point vector of `[2 x double]`.
2743///
2744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castps_pd)
2745#[inline]
2746#[target_feature(enable = "sse2")]
2747#[stable(feature = "simd_x86", since = "1.27.0")]
2748pub unsafe fn _mm_castps_pd(a: __m128) -> __m128d {
2749 transmute(src:a)
2750}
2751
2752/// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit
2753/// integer vector.
2754///
2755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castps_si128)
2756#[inline]
2757#[target_feature(enable = "sse2")]
2758#[stable(feature = "simd_x86", since = "1.27.0")]
2759pub unsafe fn _mm_castps_si128(a: __m128) -> __m128i {
2760 transmute(src:a)
2761}
2762
2763/// Casts a 128-bit integer vector into a 128-bit floating-point vector
2764/// of `[2 x double]`.
2765///
2766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castsi128_pd)
2767#[inline]
2768#[target_feature(enable = "sse2")]
2769#[stable(feature = "simd_x86", since = "1.27.0")]
2770pub unsafe fn _mm_castsi128_pd(a: __m128i) -> __m128d {
2771 transmute(src:a)
2772}
2773
2774/// Casts a 128-bit integer vector into a 128-bit floating-point vector
2775/// of `[4 x float]`.
2776///
2777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castsi128_ps)
2778#[inline]
2779#[target_feature(enable = "sse2")]
2780#[stable(feature = "simd_x86", since = "1.27.0")]
2781pub unsafe fn _mm_castsi128_ps(a: __m128i) -> __m128 {
2782 transmute(src:a)
2783}
2784
2785/// Returns vector of type __m128d with indeterminate elements.
2786/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
2787/// In practice, this is equivalent to [`mem::zeroed`].
2788///
2789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_undefined_pd)
2790#[inline]
2791#[target_feature(enable = "sse2")]
2792#[stable(feature = "simd_x86", since = "1.27.0")]
2793pub unsafe fn _mm_undefined_pd() -> __m128d {
2794 __m128d(0.0, 0.0)
2795}
2796
2797/// Returns vector of type __m128i with indeterminate elements.
2798/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
2799/// In practice, this is equivalent to [`mem::zeroed`].
2800///
2801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_undefined_si128)
2802#[inline]
2803#[target_feature(enable = "sse2")]
2804#[stable(feature = "simd_x86", since = "1.27.0")]
2805pub unsafe fn _mm_undefined_si128() -> __m128i {
2806 __m128i(0, 0)
2807}
2808
2809/// The resulting `__m128d` element is composed by the low-order values of
2810/// the two `__m128d` interleaved input elements, i.e.:
2811///
2812/// * The `[127:64]` bits are copied from the `[127:64]` bits of the second
2813/// input * The `[63:0]` bits are copied from the `[127:64]` bits of the first
2814/// input
2815///
2816/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_pd)
2817#[inline]
2818#[target_feature(enable = "sse2")]
2819#[cfg_attr(test, assert_instr(unpckhpd))]
2820#[stable(feature = "simd_x86", since = "1.27.0")]
2821pub unsafe fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d {
2822 simd_shuffle!(a, b, [1, 3])
2823}
2824
2825/// The resulting `__m128d` element is composed by the high-order values of
2826/// the two `__m128d` interleaved input elements, i.e.:
2827///
2828/// * The `[127:64]` bits are copied from the `[63:0]` bits of the second input
2829/// * The `[63:0]` bits are copied from the `[63:0]` bits of the first input
2830///
2831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_pd)
2832#[inline]
2833#[target_feature(enable = "sse2")]
2834#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlhps))]
2835#[stable(feature = "simd_x86", since = "1.27.0")]
2836pub unsafe fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d {
2837 simd_shuffle!(a, b, [0, 2])
2838}
2839
2840#[allow(improper_ctypes)]
2841extern "C" {
2842 #[link_name = "llvm.x86.sse2.pause"]
2843 fn pause();
2844 #[link_name = "llvm.x86.sse2.clflush"]
2845 fn clflush(p: *const u8);
2846 #[link_name = "llvm.x86.sse2.lfence"]
2847 fn lfence();
2848 #[link_name = "llvm.x86.sse2.mfence"]
2849 fn mfence();
2850 #[link_name = "llvm.x86.sse2.pmadd.wd"]
2851 fn pmaddwd(a: i16x8, b: i16x8) -> i32x4;
2852 #[link_name = "llvm.x86.sse2.psad.bw"]
2853 fn psadbw(a: u8x16, b: u8x16) -> u64x2;
2854 #[link_name = "llvm.x86.sse2.psll.w"]
2855 fn psllw(a: i16x8, count: i16x8) -> i16x8;
2856 #[link_name = "llvm.x86.sse2.psll.d"]
2857 fn pslld(a: i32x4, count: i32x4) -> i32x4;
2858 #[link_name = "llvm.x86.sse2.psll.q"]
2859 fn psllq(a: i64x2, count: i64x2) -> i64x2;
2860 #[link_name = "llvm.x86.sse2.psra.w"]
2861 fn psraw(a: i16x8, count: i16x8) -> i16x8;
2862 #[link_name = "llvm.x86.sse2.psra.d"]
2863 fn psrad(a: i32x4, count: i32x4) -> i32x4;
2864 #[link_name = "llvm.x86.sse2.psrl.w"]
2865 fn psrlw(a: i16x8, count: i16x8) -> i16x8;
2866 #[link_name = "llvm.x86.sse2.psrl.d"]
2867 fn psrld(a: i32x4, count: i32x4) -> i32x4;
2868 #[link_name = "llvm.x86.sse2.psrl.q"]
2869 fn psrlq(a: i64x2, count: i64x2) -> i64x2;
2870 #[link_name = "llvm.x86.sse2.cvtps2dq"]
2871 fn cvtps2dq(a: __m128) -> i32x4;
2872 #[link_name = "llvm.x86.sse2.maskmov.dqu"]
2873 fn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8);
2874 #[link_name = "llvm.x86.sse2.packsswb.128"]
2875 fn packsswb(a: i16x8, b: i16x8) -> i8x16;
2876 #[link_name = "llvm.x86.sse2.packssdw.128"]
2877 fn packssdw(a: i32x4, b: i32x4) -> i16x8;
2878 #[link_name = "llvm.x86.sse2.packuswb.128"]
2879 fn packuswb(a: i16x8, b: i16x8) -> u8x16;
2880 #[link_name = "llvm.x86.sse2.max.sd"]
2881 fn maxsd(a: __m128d, b: __m128d) -> __m128d;
2882 #[link_name = "llvm.x86.sse2.max.pd"]
2883 fn maxpd(a: __m128d, b: __m128d) -> __m128d;
2884 #[link_name = "llvm.x86.sse2.min.sd"]
2885 fn minsd(a: __m128d, b: __m128d) -> __m128d;
2886 #[link_name = "llvm.x86.sse2.min.pd"]
2887 fn minpd(a: __m128d, b: __m128d) -> __m128d;
2888 #[link_name = "llvm.x86.sse2.sqrt.sd"]
2889 fn sqrtsd(a: __m128d) -> __m128d;
2890 #[link_name = "llvm.x86.sse2.sqrt.pd"]
2891 fn sqrtpd(a: __m128d) -> __m128d;
2892 #[link_name = "llvm.x86.sse2.cmp.sd"]
2893 fn cmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
2894 #[link_name = "llvm.x86.sse2.cmp.pd"]
2895 fn cmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
2896 #[link_name = "llvm.x86.sse2.comieq.sd"]
2897 fn comieqsd(a: __m128d, b: __m128d) -> i32;
2898 #[link_name = "llvm.x86.sse2.comilt.sd"]
2899 fn comiltsd(a: __m128d, b: __m128d) -> i32;
2900 #[link_name = "llvm.x86.sse2.comile.sd"]
2901 fn comilesd(a: __m128d, b: __m128d) -> i32;
2902 #[link_name = "llvm.x86.sse2.comigt.sd"]
2903 fn comigtsd(a: __m128d, b: __m128d) -> i32;
2904 #[link_name = "llvm.x86.sse2.comige.sd"]
2905 fn comigesd(a: __m128d, b: __m128d) -> i32;
2906 #[link_name = "llvm.x86.sse2.comineq.sd"]
2907 fn comineqsd(a: __m128d, b: __m128d) -> i32;
2908 #[link_name = "llvm.x86.sse2.ucomieq.sd"]
2909 fn ucomieqsd(a: __m128d, b: __m128d) -> i32;
2910 #[link_name = "llvm.x86.sse2.ucomilt.sd"]
2911 fn ucomiltsd(a: __m128d, b: __m128d) -> i32;
2912 #[link_name = "llvm.x86.sse2.ucomile.sd"]
2913 fn ucomilesd(a: __m128d, b: __m128d) -> i32;
2914 #[link_name = "llvm.x86.sse2.ucomigt.sd"]
2915 fn ucomigtsd(a: __m128d, b: __m128d) -> i32;
2916 #[link_name = "llvm.x86.sse2.ucomige.sd"]
2917 fn ucomigesd(a: __m128d, b: __m128d) -> i32;
2918 #[link_name = "llvm.x86.sse2.ucomineq.sd"]
2919 fn ucomineqsd(a: __m128d, b: __m128d) -> i32;
2920 #[link_name = "llvm.x86.sse2.cvtpd2dq"]
2921 fn cvtpd2dq(a: __m128d) -> i32x4;
2922 #[link_name = "llvm.x86.sse2.cvtsd2si"]
2923 fn cvtsd2si(a: __m128d) -> i32;
2924 #[link_name = "llvm.x86.sse2.cvtsd2ss"]
2925 fn cvtsd2ss(a: __m128, b: __m128d) -> __m128;
2926 #[link_name = "llvm.x86.sse2.cvtss2sd"]
2927 fn cvtss2sd(a: __m128d, b: __m128) -> __m128d;
2928 #[link_name = "llvm.x86.sse2.cvttpd2dq"]
2929 fn cvttpd2dq(a: __m128d) -> i32x4;
2930 #[link_name = "llvm.x86.sse2.cvttsd2si"]
2931 fn cvttsd2si(a: __m128d) -> i32;
2932 #[link_name = "llvm.x86.sse2.cvttps2dq"]
2933 fn cvttps2dq(a: __m128) -> i32x4;
2934}
2935
2936#[cfg(test)]
2937mod tests {
2938 use crate::{
2939 core_arch::{simd::*, x86::*},
2940 hint::black_box,
2941 };
2942 use std::{
2943 boxed, f32,
2944 f64::{self, NAN},
2945 i32,
2946 mem::{self, transmute},
2947 };
2948 use stdarch_test::simd_test;
2949
2950 #[test]
2951 fn test_mm_pause() {
2952 unsafe { _mm_pause() }
2953 }
2954
2955 #[simd_test(enable = "sse2")]
2956 unsafe fn test_mm_clflush() {
2957 let x = 0_u8;
2958 _mm_clflush(&x as *const _);
2959 }
2960
2961 #[simd_test(enable = "sse2")]
2962 // Miri cannot support this until it is clear how it fits in the Rust memory model
2963 #[cfg_attr(miri, ignore)]
2964 unsafe fn test_mm_lfence() {
2965 _mm_lfence();
2966 }
2967
2968 #[simd_test(enable = "sse2")]
2969 // Miri cannot support this until it is clear how it fits in the Rust memory model
2970 #[cfg_attr(miri, ignore)]
2971 unsafe fn test_mm_mfence() {
2972 _mm_mfence();
2973 }
2974
2975 #[simd_test(enable = "sse2")]
2976 unsafe fn test_mm_add_epi8() {
2977 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2978 #[rustfmt::skip]
2979 let b = _mm_setr_epi8(
2980 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2981 );
2982 let r = _mm_add_epi8(a, b);
2983 #[rustfmt::skip]
2984 let e = _mm_setr_epi8(
2985 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
2986 );
2987 assert_eq_m128i(r, e);
2988 }
2989
2990 #[simd_test(enable = "sse2")]
2991 unsafe fn test_mm_add_epi8_overflow() {
2992 let a = _mm_set1_epi8(0x7F);
2993 let b = _mm_set1_epi8(1);
2994 let r = _mm_add_epi8(a, b);
2995 assert_eq_m128i(r, _mm_set1_epi8(-128));
2996 }
2997
2998 #[simd_test(enable = "sse2")]
2999 unsafe fn test_mm_add_epi16() {
3000 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3001 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3002 let r = _mm_add_epi16(a, b);
3003 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3004 assert_eq_m128i(r, e);
3005 }
3006
3007 #[simd_test(enable = "sse2")]
3008 unsafe fn test_mm_add_epi32() {
3009 let a = _mm_setr_epi32(0, 1, 2, 3);
3010 let b = _mm_setr_epi32(4, 5, 6, 7);
3011 let r = _mm_add_epi32(a, b);
3012 let e = _mm_setr_epi32(4, 6, 8, 10);
3013 assert_eq_m128i(r, e);
3014 }
3015
3016 #[simd_test(enable = "sse2")]
3017 unsafe fn test_mm_add_epi64() {
3018 let a = _mm_setr_epi64x(0, 1);
3019 let b = _mm_setr_epi64x(2, 3);
3020 let r = _mm_add_epi64(a, b);
3021 let e = _mm_setr_epi64x(2, 4);
3022 assert_eq_m128i(r, e);
3023 }
3024
3025 #[simd_test(enable = "sse2")]
3026 unsafe fn test_mm_adds_epi8() {
3027 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3028 #[rustfmt::skip]
3029 let b = _mm_setr_epi8(
3030 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3031 );
3032 let r = _mm_adds_epi8(a, b);
3033 #[rustfmt::skip]
3034 let e = _mm_setr_epi8(
3035 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3036 );
3037 assert_eq_m128i(r, e);
3038 }
3039
3040 #[simd_test(enable = "sse2")]
3041 unsafe fn test_mm_adds_epi8_saturate_positive() {
3042 let a = _mm_set1_epi8(0x7F);
3043 let b = _mm_set1_epi8(1);
3044 let r = _mm_adds_epi8(a, b);
3045 assert_eq_m128i(r, a);
3046 }
3047
3048 #[simd_test(enable = "sse2")]
3049 unsafe fn test_mm_adds_epi8_saturate_negative() {
3050 let a = _mm_set1_epi8(-0x80);
3051 let b = _mm_set1_epi8(-1);
3052 let r = _mm_adds_epi8(a, b);
3053 assert_eq_m128i(r, a);
3054 }
3055
3056 #[simd_test(enable = "sse2")]
3057 unsafe fn test_mm_adds_epi16() {
3058 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3059 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3060 let r = _mm_adds_epi16(a, b);
3061 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3062 assert_eq_m128i(r, e);
3063 }
3064
3065 #[simd_test(enable = "sse2")]
3066 unsafe fn test_mm_adds_epi16_saturate_positive() {
3067 let a = _mm_set1_epi16(0x7FFF);
3068 let b = _mm_set1_epi16(1);
3069 let r = _mm_adds_epi16(a, b);
3070 assert_eq_m128i(r, a);
3071 }
3072
3073 #[simd_test(enable = "sse2")]
3074 unsafe fn test_mm_adds_epi16_saturate_negative() {
3075 let a = _mm_set1_epi16(-0x8000);
3076 let b = _mm_set1_epi16(-1);
3077 let r = _mm_adds_epi16(a, b);
3078 assert_eq_m128i(r, a);
3079 }
3080
3081 #[simd_test(enable = "sse2")]
3082 unsafe fn test_mm_adds_epu8() {
3083 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3084 #[rustfmt::skip]
3085 let b = _mm_setr_epi8(
3086 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3087 );
3088 let r = _mm_adds_epu8(a, b);
3089 #[rustfmt::skip]
3090 let e = _mm_setr_epi8(
3091 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3092 );
3093 assert_eq_m128i(r, e);
3094 }
3095
3096 #[simd_test(enable = "sse2")]
3097 unsafe fn test_mm_adds_epu8_saturate() {
3098 let a = _mm_set1_epi8(!0);
3099 let b = _mm_set1_epi8(1);
3100 let r = _mm_adds_epu8(a, b);
3101 assert_eq_m128i(r, a);
3102 }
3103
3104 #[simd_test(enable = "sse2")]
3105 unsafe fn test_mm_adds_epu16() {
3106 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3107 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3108 let r = _mm_adds_epu16(a, b);
3109 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3110 assert_eq_m128i(r, e);
3111 }
3112
3113 #[simd_test(enable = "sse2")]
3114 unsafe fn test_mm_adds_epu16_saturate() {
3115 let a = _mm_set1_epi16(!0);
3116 let b = _mm_set1_epi16(1);
3117 let r = _mm_adds_epu16(a, b);
3118 assert_eq_m128i(r, a);
3119 }
3120
3121 #[simd_test(enable = "sse2")]
3122 unsafe fn test_mm_avg_epu8() {
3123 let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9));
3124 let r = _mm_avg_epu8(a, b);
3125 assert_eq_m128i(r, _mm_set1_epi8(6));
3126 }
3127
3128 #[simd_test(enable = "sse2")]
3129 unsafe fn test_mm_avg_epu16() {
3130 let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9));
3131 let r = _mm_avg_epu16(a, b);
3132 assert_eq_m128i(r, _mm_set1_epi16(6));
3133 }
3134
3135 #[simd_test(enable = "sse2")]
3136 unsafe fn test_mm_madd_epi16() {
3137 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
3138 let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
3139 let r = _mm_madd_epi16(a, b);
3140 let e = _mm_setr_epi32(29, 81, 149, 233);
3141 assert_eq_m128i(r, e);
3142 }
3143
3144 #[simd_test(enable = "sse2")]
3145 unsafe fn test_mm_max_epi16() {
3146 let a = _mm_set1_epi16(1);
3147 let b = _mm_set1_epi16(-1);
3148 let r = _mm_max_epi16(a, b);
3149 assert_eq_m128i(r, a);
3150 }
3151
3152 #[simd_test(enable = "sse2")]
3153 unsafe fn test_mm_max_epu8() {
3154 let a = _mm_set1_epi8(1);
3155 let b = _mm_set1_epi8(!0);
3156 let r = _mm_max_epu8(a, b);
3157 assert_eq_m128i(r, b);
3158 }
3159
3160 #[simd_test(enable = "sse2")]
3161 unsafe fn test_mm_min_epi16() {
3162 let a = _mm_set1_epi16(1);
3163 let b = _mm_set1_epi16(-1);
3164 let r = _mm_min_epi16(a, b);
3165 assert_eq_m128i(r, b);
3166 }
3167
3168 #[simd_test(enable = "sse2")]
3169 unsafe fn test_mm_min_epu8() {
3170 let a = _mm_set1_epi8(1);
3171 let b = _mm_set1_epi8(!0);
3172 let r = _mm_min_epu8(a, b);
3173 assert_eq_m128i(r, a);
3174 }
3175
3176 #[simd_test(enable = "sse2")]
3177 unsafe fn test_mm_mulhi_epi16() {
3178 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3179 let r = _mm_mulhi_epi16(a, b);
3180 assert_eq_m128i(r, _mm_set1_epi16(-16));
3181 }
3182
3183 #[simd_test(enable = "sse2")]
3184 unsafe fn test_mm_mulhi_epu16() {
3185 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001));
3186 let r = _mm_mulhi_epu16(a, b);
3187 assert_eq_m128i(r, _mm_set1_epi16(15));
3188 }
3189
3190 #[simd_test(enable = "sse2")]
3191 unsafe fn test_mm_mullo_epi16() {
3192 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3193 let r = _mm_mullo_epi16(a, b);
3194 assert_eq_m128i(r, _mm_set1_epi16(-17960));
3195 }
3196
3197 #[simd_test(enable = "sse2")]
3198 unsafe fn test_mm_mul_epu32() {
3199 let a = _mm_setr_epi64x(1_000_000_000, 1 << 34);
3200 let b = _mm_setr_epi64x(1_000_000_000, 1 << 35);
3201 let r = _mm_mul_epu32(a, b);
3202 let e = _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0);
3203 assert_eq_m128i(r, e);
3204 }
3205
3206 #[simd_test(enable = "sse2")]
3207 unsafe fn test_mm_sad_epu8() {
3208 #[rustfmt::skip]
3209 let a = _mm_setr_epi8(
3210 255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
3211 1, 2, 3, 4,
3212 155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8,
3213 1, 2, 3, 4,
3214 );
3215 let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2);
3216 let r = _mm_sad_epu8(a, b);
3217 let e = _mm_setr_epi64x(1020, 614);
3218 assert_eq_m128i(r, e);
3219 }
3220
3221 #[simd_test(enable = "sse2")]
3222 unsafe fn test_mm_sub_epi8() {
3223 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(6));
3224 let r = _mm_sub_epi8(a, b);
3225 assert_eq_m128i(r, _mm_set1_epi8(-1));
3226 }
3227
3228 #[simd_test(enable = "sse2")]
3229 unsafe fn test_mm_sub_epi16() {
3230 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(6));
3231 let r = _mm_sub_epi16(a, b);
3232 assert_eq_m128i(r, _mm_set1_epi16(-1));
3233 }
3234
3235 #[simd_test(enable = "sse2")]
3236 unsafe fn test_mm_sub_epi32() {
3237 let (a, b) = (_mm_set1_epi32(5), _mm_set1_epi32(6));
3238 let r = _mm_sub_epi32(a, b);
3239 assert_eq_m128i(r, _mm_set1_epi32(-1));
3240 }
3241
3242 #[simd_test(enable = "sse2")]
3243 unsafe fn test_mm_sub_epi64() {
3244 let (a, b) = (_mm_set1_epi64x(5), _mm_set1_epi64x(6));
3245 let r = _mm_sub_epi64(a, b);
3246 assert_eq_m128i(r, _mm_set1_epi64x(-1));
3247 }
3248
3249 #[simd_test(enable = "sse2")]
3250 unsafe fn test_mm_subs_epi8() {
3251 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3252 let r = _mm_subs_epi8(a, b);
3253 assert_eq_m128i(r, _mm_set1_epi8(3));
3254 }
3255
3256 #[simd_test(enable = "sse2")]
3257 unsafe fn test_mm_subs_epi8_saturate_positive() {
3258 let a = _mm_set1_epi8(0x7F);
3259 let b = _mm_set1_epi8(-1);
3260 let r = _mm_subs_epi8(a, b);
3261 assert_eq_m128i(r, a);
3262 }
3263
3264 #[simd_test(enable = "sse2")]
3265 unsafe fn test_mm_subs_epi8_saturate_negative() {
3266 let a = _mm_set1_epi8(-0x80);
3267 let b = _mm_set1_epi8(1);
3268 let r = _mm_subs_epi8(a, b);
3269 assert_eq_m128i(r, a);
3270 }
3271
3272 #[simd_test(enable = "sse2")]
3273 unsafe fn test_mm_subs_epi16() {
3274 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3275 let r = _mm_subs_epi16(a, b);
3276 assert_eq_m128i(r, _mm_set1_epi16(3));
3277 }
3278
3279 #[simd_test(enable = "sse2")]
3280 unsafe fn test_mm_subs_epi16_saturate_positive() {
3281 let a = _mm_set1_epi16(0x7FFF);
3282 let b = _mm_set1_epi16(-1);
3283 let r = _mm_subs_epi16(a, b);
3284 assert_eq_m128i(r, a);
3285 }
3286
3287 #[simd_test(enable = "sse2")]
3288 unsafe fn test_mm_subs_epi16_saturate_negative() {
3289 let a = _mm_set1_epi16(-0x8000);
3290 let b = _mm_set1_epi16(1);
3291 let r = _mm_subs_epi16(a, b);
3292 assert_eq_m128i(r, a);
3293 }
3294
3295 #[simd_test(enable = "sse2")]
3296 unsafe fn test_mm_subs_epu8() {
3297 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3298 let r = _mm_subs_epu8(a, b);
3299 assert_eq_m128i(r, _mm_set1_epi8(3));
3300 }
3301
3302 #[simd_test(enable = "sse2")]
3303 unsafe fn test_mm_subs_epu8_saturate() {
3304 let a = _mm_set1_epi8(0);
3305 let b = _mm_set1_epi8(1);
3306 let r = _mm_subs_epu8(a, b);
3307 assert_eq_m128i(r, a);
3308 }
3309
3310 #[simd_test(enable = "sse2")]
3311 unsafe fn test_mm_subs_epu16() {
3312 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3313 let r = _mm_subs_epu16(a, b);
3314 assert_eq_m128i(r, _mm_set1_epi16(3));
3315 }
3316
3317 #[simd_test(enable = "sse2")]
3318 unsafe fn test_mm_subs_epu16_saturate() {
3319 let a = _mm_set1_epi16(0);
3320 let b = _mm_set1_epi16(1);
3321 let r = _mm_subs_epu16(a, b);
3322 assert_eq_m128i(r, a);
3323 }
3324
3325 #[simd_test(enable = "sse2")]
3326 unsafe fn test_mm_slli_si128() {
3327 #[rustfmt::skip]
3328 let a = _mm_setr_epi8(
3329 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3330 );
3331 let r = _mm_slli_si128::<1>(a);
3332 let e = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3333 assert_eq_m128i(r, e);
3334
3335 #[rustfmt::skip]
3336 let a = _mm_setr_epi8(
3337 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3338 );
3339 let r = _mm_slli_si128::<15>(a);
3340 let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
3341 assert_eq_m128i(r, e);
3342
3343 #[rustfmt::skip]
3344 let a = _mm_setr_epi8(
3345 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3346 );
3347 let r = _mm_slli_si128::<16>(a);
3348 assert_eq_m128i(r, _mm_set1_epi8(0));
3349 }
3350
3351 #[simd_test(enable = "sse2")]
3352 unsafe fn test_mm_slli_epi16() {
3353 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3354 let r = _mm_slli_epi16::<4>(a);
3355 assert_eq_m128i(
3356 r,
3357 _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
3358 );
3359 let r = _mm_slli_epi16::<16>(a);
3360 assert_eq_m128i(r, _mm_set1_epi16(0));
3361 }
3362
3363 #[simd_test(enable = "sse2")]
3364 unsafe fn test_mm_sll_epi16() {
3365 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3366 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 4));
3367 assert_eq_m128i(
3368 r,
3369 _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
3370 );
3371 let r = _mm_sll_epi16(a, _mm_set_epi64x(4, 0));
3372 assert_eq_m128i(r, a);
3373 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 16));
3374 assert_eq_m128i(r, _mm_set1_epi16(0));
3375 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, i64::MAX));
3376 assert_eq_m128i(r, _mm_set1_epi16(0));
3377 }
3378
3379 #[simd_test(enable = "sse2")]
3380 unsafe fn test_mm_slli_epi32() {
3381 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3382 let r = _mm_slli_epi32::<4>(a);
3383 assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
3384 let r = _mm_slli_epi32::<32>(a);
3385 assert_eq_m128i(r, _mm_set1_epi32(0));
3386 }
3387
3388 #[simd_test(enable = "sse2")]
3389 unsafe fn test_mm_sll_epi32() {
3390 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3391 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 4));
3392 assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
3393 let r = _mm_sll_epi32(a, _mm_set_epi64x(4, 0));
3394 assert_eq_m128i(r, a);
3395 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 32));
3396 assert_eq_m128i(r, _mm_set1_epi32(0));
3397 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, i64::MAX));
3398 assert_eq_m128i(r, _mm_set1_epi32(0));
3399 }
3400
3401 #[simd_test(enable = "sse2")]
3402 unsafe fn test_mm_slli_epi64() {
3403 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3404 let r = _mm_slli_epi64::<4>(a);
3405 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
3406 let r = _mm_slli_epi64::<64>(a);
3407 assert_eq_m128i(r, _mm_set1_epi64x(0));
3408 }
3409
3410 #[simd_test(enable = "sse2")]
3411 unsafe fn test_mm_sll_epi64() {
3412 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3413 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 4));
3414 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
3415 let r = _mm_sll_epi64(a, _mm_set_epi64x(4, 0));
3416 assert_eq_m128i(r, a);
3417 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 64));
3418 assert_eq_m128i(r, _mm_set1_epi64x(0));
3419 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, i64::MAX));
3420 assert_eq_m128i(r, _mm_set1_epi64x(0));
3421 }
3422
3423 #[simd_test(enable = "sse2")]
3424 unsafe fn test_mm_srai_epi16() {
3425 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3426 let r = _mm_srai_epi16::<4>(a);
3427 assert_eq_m128i(
3428 r,
3429 _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10),
3430 );
3431 let r = _mm_srai_epi16::<16>(a);
3432 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3433 }
3434
3435 #[simd_test(enable = "sse2")]
3436 unsafe fn test_mm_sra_epi16() {
3437 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3438 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 4));
3439 assert_eq_m128i(
3440 r,
3441 _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10),
3442 );
3443 let r = _mm_sra_epi16(a, _mm_set_epi64x(4, 0));
3444 assert_eq_m128i(r, a);
3445 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 16));
3446 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3447 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, i64::MAX));
3448 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3449 }
3450
3451 #[simd_test(enable = "sse2")]
3452 unsafe fn test_mm_srai_epi32() {
3453 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3454 let r = _mm_srai_epi32::<4>(a);
3455 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
3456 let r = _mm_srai_epi32::<32>(a);
3457 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3458 }
3459
3460 #[simd_test(enable = "sse2")]
3461 unsafe fn test_mm_sra_epi32() {
3462 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3463 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 4));
3464 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
3465 let r = _mm_sra_epi32(a, _mm_set_epi64x(4, 0));
3466 assert_eq_m128i(r, a);
3467 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 32));
3468 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3469 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, i64::MAX));
3470 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3471 }
3472
3473 #[simd_test(enable = "sse2")]
3474 unsafe fn test_mm_srli_si128() {
3475 #[rustfmt::skip]
3476 let a = _mm_setr_epi8(
3477 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3478 );
3479 let r = _mm_srli_si128::<1>(a);
3480 #[rustfmt::skip]
3481 let e = _mm_setr_epi8(
3482 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0,
3483 );
3484 assert_eq_m128i(r, e);
3485
3486 #[rustfmt::skip]
3487 let a = _mm_setr_epi8(
3488 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3489 );
3490 let r = _mm_srli_si128::<15>(a);
3491 let e = _mm_setr_epi8(16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3492 assert_eq_m128i(r, e);
3493
3494 #[rustfmt::skip]
3495 let a = _mm_setr_epi8(
3496 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3497 );
3498 let r = _mm_srli_si128::<16>(a);
3499 assert_eq_m128i(r, _mm_set1_epi8(0));
3500 }
3501
3502 #[simd_test(enable = "sse2")]
3503 unsafe fn test_mm_srli_epi16() {
3504 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3505 let r = _mm_srli_epi16::<4>(a);
3506 assert_eq_m128i(
3507 r,
3508 _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0),
3509 );
3510 let r = _mm_srli_epi16::<16>(a);
3511 assert_eq_m128i(r, _mm_set1_epi16(0));
3512 }
3513
3514 #[simd_test(enable = "sse2")]
3515 unsafe fn test_mm_srl_epi16() {
3516 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3517 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 4));
3518 assert_eq_m128i(
3519 r,
3520 _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0),
3521 );
3522 let r = _mm_srl_epi16(a, _mm_set_epi64x(4, 0));
3523 assert_eq_m128i(r, a);
3524 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 16));
3525 assert_eq_m128i(r, _mm_set1_epi16(0));
3526 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, i64::MAX));
3527 assert_eq_m128i(r, _mm_set1_epi16(0));
3528 }
3529
3530 #[simd_test(enable = "sse2")]
3531 unsafe fn test_mm_srli_epi32() {
3532 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3533 let r = _mm_srli_epi32::<4>(a);
3534 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
3535 let r = _mm_srli_epi32::<32>(a);
3536 assert_eq_m128i(r, _mm_set1_epi32(0));
3537 }
3538
3539 #[simd_test(enable = "sse2")]
3540 unsafe fn test_mm_srl_epi32() {
3541 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3542 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 4));
3543 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
3544 let r = _mm_srl_epi32(a, _mm_set_epi64x(4, 0));
3545 assert_eq_m128i(r, a);
3546 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 32));
3547 assert_eq_m128i(r, _mm_set1_epi32(0));
3548 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, i64::MAX));
3549 assert_eq_m128i(r, _mm_set1_epi32(0));
3550 }
3551
3552 #[simd_test(enable = "sse2")]
3553 unsafe fn test_mm_srli_epi64() {
3554 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3555 let r = _mm_srli_epi64::<4>(a);
3556 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
3557 let r = _mm_srli_epi64::<64>(a);
3558 assert_eq_m128i(r, _mm_set1_epi64x(0));
3559 }
3560
3561 #[simd_test(enable = "sse2")]
3562 unsafe fn test_mm_srl_epi64() {
3563 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3564 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 4));
3565 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
3566 let r = _mm_srl_epi64(a, _mm_set_epi64x(4, 0));
3567 assert_eq_m128i(r, a);
3568 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 64));
3569 assert_eq_m128i(r, _mm_set1_epi64x(0));
3570 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, i64::MAX));
3571 assert_eq_m128i(r, _mm_set1_epi64x(0));
3572 }
3573
3574 #[simd_test(enable = "sse2")]
3575 unsafe fn test_mm_and_si128() {
3576 let a = _mm_set1_epi8(5);
3577 let b = _mm_set1_epi8(3);
3578 let r = _mm_and_si128(a, b);
3579 assert_eq_m128i(r, _mm_set1_epi8(1));
3580 }
3581
3582 #[simd_test(enable = "sse2")]
3583 unsafe fn test_mm_andnot_si128() {
3584 let a = _mm_set1_epi8(5);
3585 let b = _mm_set1_epi8(3);
3586 let r = _mm_andnot_si128(a, b);
3587 assert_eq_m128i(r, _mm_set1_epi8(2));
3588 }
3589
3590 #[simd_test(enable = "sse2")]
3591 unsafe fn test_mm_or_si128() {
3592 let a = _mm_set1_epi8(5);
3593 let b = _mm_set1_epi8(3);
3594 let r = _mm_or_si128(a, b);
3595 assert_eq_m128i(r, _mm_set1_epi8(7));
3596 }
3597
3598 #[simd_test(enable = "sse2")]
3599 unsafe fn test_mm_xor_si128() {
3600 let a = _mm_set1_epi8(5);
3601 let b = _mm_set1_epi8(3);
3602 let r = _mm_xor_si128(a, b);
3603 assert_eq_m128i(r, _mm_set1_epi8(6));
3604 }
3605
3606 #[simd_test(enable = "sse2")]
3607 unsafe fn test_mm_cmpeq_epi8() {
3608 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3609 let b = _mm_setr_epi8(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
3610 let r = _mm_cmpeq_epi8(a, b);
3611 #[rustfmt::skip]
3612 assert_eq_m128i(
3613 r,
3614 _mm_setr_epi8(
3615 0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3616 )
3617 );
3618 }
3619
3620 #[simd_test(enable = "sse2")]
3621 unsafe fn test_mm_cmpeq_epi16() {
3622 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3623 let b = _mm_setr_epi16(7, 6, 2, 4, 3, 2, 1, 0);
3624 let r = _mm_cmpeq_epi16(a, b);
3625 assert_eq_m128i(r, _mm_setr_epi16(0, 0, !0, 0, 0, 0, 0, 0));
3626 }
3627
3628 #[simd_test(enable = "sse2")]
3629 unsafe fn test_mm_cmpeq_epi32() {
3630 let a = _mm_setr_epi32(0, 1, 2, 3);
3631 let b = _mm_setr_epi32(3, 2, 2, 0);
3632 let r = _mm_cmpeq_epi32(a, b);
3633 assert_eq_m128i(r, _mm_setr_epi32(0, 0, !0, 0));
3634 }
3635
3636 #[simd_test(enable = "sse2")]
3637 unsafe fn test_mm_cmpgt_epi8() {
3638 let a = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3639 let b = _mm_set1_epi8(0);
3640 let r = _mm_cmpgt_epi8(a, b);
3641 let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3642 assert_eq_m128i(r, e);
3643 }
3644
3645 #[simd_test(enable = "sse2")]
3646 unsafe fn test_mm_cmpgt_epi16() {
3647 let a = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
3648 let b = _mm_set1_epi16(0);
3649 let r = _mm_cmpgt_epi16(a, b);
3650 let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
3651 assert_eq_m128i(r, e);
3652 }
3653
3654 #[simd_test(enable = "sse2")]
3655 unsafe fn test_mm_cmpgt_epi32() {
3656 let a = _mm_set_epi32(5, 0, 0, 0);
3657 let b = _mm_set1_epi32(0);
3658 let r = _mm_cmpgt_epi32(a, b);
3659 assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
3660 }
3661
3662 #[simd_test(enable = "sse2")]
3663 unsafe fn test_mm_cmplt_epi8() {
3664 let a = _mm_set1_epi8(0);
3665 let b = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3666 let r = _mm_cmplt_epi8(a, b);
3667 let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3668 assert_eq_m128i(r, e);
3669 }
3670
3671 #[simd_test(enable = "sse2")]
3672 unsafe fn test_mm_cmplt_epi16() {
3673 let a = _mm_set1_epi16(0);
3674 let b = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
3675 let r = _mm_cmplt_epi16(a, b);
3676 let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
3677 assert_eq_m128i(r, e);
3678 }
3679
3680 #[simd_test(enable = "sse2")]
3681 unsafe fn test_mm_cmplt_epi32() {
3682 let a = _mm_set1_epi32(0);
3683 let b = _mm_set_epi32(5, 0, 0, 0);
3684 let r = _mm_cmplt_epi32(a, b);
3685 assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
3686 }
3687
3688 #[simd_test(enable = "sse2")]
3689 unsafe fn test_mm_cvtepi32_pd() {
3690 let a = _mm_set_epi32(35, 25, 15, 5);
3691 let r = _mm_cvtepi32_pd(a);
3692 assert_eq_m128d(r, _mm_setr_pd(5.0, 15.0));
3693 }
3694
3695 #[simd_test(enable = "sse2")]
3696 unsafe fn test_mm_cvtsi32_sd() {
3697 let a = _mm_set1_pd(3.5);
3698 let r = _mm_cvtsi32_sd(a, 5);
3699 assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5));
3700 }
3701
3702 #[simd_test(enable = "sse2")]
3703 unsafe fn test_mm_cvtepi32_ps() {
3704 let a = _mm_setr_epi32(1, 2, 3, 4);
3705 let r = _mm_cvtepi32_ps(a);
3706 assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0));
3707 }
3708
3709 #[simd_test(enable = "sse2")]
3710 unsafe fn test_mm_cvtps_epi32() {
3711 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3712 let r = _mm_cvtps_epi32(a);
3713 assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4));
3714 }
3715
3716 #[simd_test(enable = "sse2")]
3717 unsafe fn test_mm_cvtsi32_si128() {
3718 let r = _mm_cvtsi32_si128(5);
3719 assert_eq_m128i(r, _mm_setr_epi32(5, 0, 0, 0));
3720 }
3721
3722 #[simd_test(enable = "sse2")]
3723 unsafe fn test_mm_cvtsi128_si32() {
3724 let r = _mm_cvtsi128_si32(_mm_setr_epi32(5, 0, 0, 0));
3725 assert_eq!(r, 5);
3726 }
3727
3728 #[simd_test(enable = "sse2")]
3729 unsafe fn test_mm_set_epi64x() {
3730 let r = _mm_set_epi64x(0, 1);
3731 assert_eq_m128i(r, _mm_setr_epi64x(1, 0));
3732 }
3733
3734 #[simd_test(enable = "sse2")]
3735 unsafe fn test_mm_set_epi32() {
3736 let r = _mm_set_epi32(0, 1, 2, 3);
3737 assert_eq_m128i(r, _mm_setr_epi32(3, 2, 1, 0));
3738 }
3739
3740 #[simd_test(enable = "sse2")]
3741 unsafe fn test_mm_set_epi16() {
3742 let r = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3743 assert_eq_m128i(r, _mm_setr_epi16(7, 6, 5, 4, 3, 2, 1, 0));
3744 }
3745
3746 #[simd_test(enable = "sse2")]
3747 unsafe fn test_mm_set_epi8() {
3748 #[rustfmt::skip]
3749 let r = _mm_set_epi8(
3750 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3751 );
3752 #[rustfmt::skip]
3753 let e = _mm_setr_epi8(
3754 15, 14, 13, 12, 11, 10, 9, 8,
3755 7, 6, 5, 4, 3, 2, 1, 0,
3756 );
3757 assert_eq_m128i(r, e);
3758 }
3759
3760 #[simd_test(enable = "sse2")]
3761 unsafe fn test_mm_set1_epi64x() {
3762 let r = _mm_set1_epi64x(1);
3763 assert_eq_m128i(r, _mm_set1_epi64x(1));
3764 }
3765
3766 #[simd_test(enable = "sse2")]
3767 unsafe fn test_mm_set1_epi32() {
3768 let r = _mm_set1_epi32(1);
3769 assert_eq_m128i(r, _mm_set1_epi32(1));
3770 }
3771
3772 #[simd_test(enable = "sse2")]
3773 unsafe fn test_mm_set1_epi16() {
3774 let r = _mm_set1_epi16(1);
3775 assert_eq_m128i(r, _mm_set1_epi16(1));
3776 }
3777
3778 #[simd_test(enable = "sse2")]
3779 unsafe fn test_mm_set1_epi8() {
3780 let r = _mm_set1_epi8(1);
3781 assert_eq_m128i(r, _mm_set1_epi8(1));
3782 }
3783
3784 #[simd_test(enable = "sse2")]
3785 unsafe fn test_mm_setr_epi32() {
3786 let r = _mm_setr_epi32(0, 1, 2, 3);
3787 assert_eq_m128i(r, _mm_setr_epi32(0, 1, 2, 3));
3788 }
3789
3790 #[simd_test(enable = "sse2")]
3791 unsafe fn test_mm_setr_epi16() {
3792 let r = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3793 assert_eq_m128i(r, _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7));
3794 }
3795
3796 #[simd_test(enable = "sse2")]
3797 unsafe fn test_mm_setr_epi8() {
3798 #[rustfmt::skip]
3799 let r = _mm_setr_epi8(
3800 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3801 );
3802 #[rustfmt::skip]
3803 let e = _mm_setr_epi8(
3804 0, 1, 2, 3, 4, 5, 6, 7,
3805 8, 9, 10, 11, 12, 13, 14, 15,
3806 );
3807 assert_eq_m128i(r, e);
3808 }
3809
3810 #[simd_test(enable = "sse2")]
3811 unsafe fn test_mm_setzero_si128() {
3812 let r = _mm_setzero_si128();
3813 assert_eq_m128i(r, _mm_set1_epi64x(0));
3814 }
3815
3816 #[simd_test(enable = "sse2")]
3817 unsafe fn test_mm_loadl_epi64() {
3818 let a = _mm_setr_epi64x(6, 5);
3819 let r = _mm_loadl_epi64(&a as *const _);
3820 assert_eq_m128i(r, _mm_setr_epi64x(6, 0));
3821 }
3822
3823 #[simd_test(enable = "sse2")]
3824 unsafe fn test_mm_load_si128() {
3825 let a = _mm_set_epi64x(5, 6);
3826 let r = _mm_load_si128(&a as *const _ as *const _);
3827 assert_eq_m128i(a, r);
3828 }
3829
3830 #[simd_test(enable = "sse2")]
3831 unsafe fn test_mm_loadu_si128() {
3832 let a = _mm_set_epi64x(5, 6);
3833 let r = _mm_loadu_si128(&a as *const _ as *const _);
3834 assert_eq_m128i(a, r);
3835 }
3836
3837 #[simd_test(enable = "sse2")]
3838 // Miri cannot support this until it is clear how it fits in the Rust memory model
3839 // (non-temporal store)
3840 #[cfg_attr(miri, ignore)]
3841 unsafe fn test_mm_maskmoveu_si128() {
3842 let a = _mm_set1_epi8(9);
3843 #[rustfmt::skip]
3844 let mask = _mm_set_epi8(
3845 0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0,
3846 0, 0, 0, 0, 0, 0, 0, 0,
3847 );
3848 let mut r = _mm_set1_epi8(0);
3849 _mm_maskmoveu_si128(a, mask, &mut r as *mut _ as *mut i8);
3850 let e = _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3851 assert_eq_m128i(r, e);
3852 }
3853
3854 #[simd_test(enable = "sse2")]
3855 unsafe fn test_mm_store_si128() {
3856 let a = _mm_set1_epi8(9);
3857 let mut r = _mm_set1_epi8(0);
3858 _mm_store_si128(&mut r as *mut _ as *mut __m128i, a);
3859 assert_eq_m128i(r, a);
3860 }
3861
3862 #[simd_test(enable = "sse2")]
3863 unsafe fn test_mm_storeu_si128() {
3864 let a = _mm_set1_epi8(9);
3865 let mut r = _mm_set1_epi8(0);
3866 _mm_storeu_si128(&mut r as *mut _ as *mut __m128i, a);
3867 assert_eq_m128i(r, a);
3868 }
3869
3870 #[simd_test(enable = "sse2")]
3871 unsafe fn test_mm_storel_epi64() {
3872 let a = _mm_setr_epi64x(2, 9);
3873 let mut r = _mm_set1_epi8(0);
3874 _mm_storel_epi64(&mut r as *mut _ as *mut __m128i, a);
3875 assert_eq_m128i(r, _mm_setr_epi64x(2, 0));
3876 }
3877
3878 #[simd_test(enable = "sse2")]
3879 // Miri cannot support this until it is clear how it fits in the Rust memory model
3880 // (non-temporal store)
3881 #[cfg_attr(miri, ignore)]
3882 unsafe fn test_mm_stream_si128() {
3883 let a = _mm_setr_epi32(1, 2, 3, 4);
3884 let mut r = _mm_undefined_si128();
3885 _mm_stream_si128(&mut r as *mut _, a);
3886 assert_eq_m128i(r, a);
3887 }
3888
3889 #[simd_test(enable = "sse2")]
3890 // Miri cannot support this until it is clear how it fits in the Rust memory model
3891 // (non-temporal store)
3892 #[cfg_attr(miri, ignore)]
3893 unsafe fn test_mm_stream_si32() {
3894 let a: i32 = 7;
3895 let mut mem = boxed::Box::<i32>::new(-1);
3896 _mm_stream_si32(&mut *mem as *mut i32, a);
3897 assert_eq!(a, *mem);
3898 }
3899
3900 #[simd_test(enable = "sse2")]
3901 unsafe fn test_mm_move_epi64() {
3902 let a = _mm_setr_epi64x(5, 6);
3903 let r = _mm_move_epi64(a);
3904 assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
3905 }
3906
3907 #[simd_test(enable = "sse2")]
3908 unsafe fn test_mm_packs_epi16() {
3909 let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0);
3910 let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80);
3911 let r = _mm_packs_epi16(a, b);
3912 #[rustfmt::skip]
3913 assert_eq_m128i(
3914 r,
3915 _mm_setr_epi8(
3916 0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F
3917 )
3918 );
3919 }
3920
3921 #[simd_test(enable = "sse2")]
3922 unsafe fn test_mm_packs_epi32() {
3923 let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0);
3924 let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000);
3925 let r = _mm_packs_epi32(a, b);
3926 assert_eq_m128i(
3927 r,
3928 _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF),
3929 );
3930 }
3931
3932 #[simd_test(enable = "sse2")]
3933 unsafe fn test_mm_packus_epi16() {
3934 let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0);
3935 let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100);
3936 let r = _mm_packus_epi16(a, b);
3937 assert_eq_m128i(
3938 r,
3939 _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0),
3940 );
3941 }
3942
3943 #[simd_test(enable = "sse2")]
3944 unsafe fn test_mm_extract_epi16() {
3945 let a = _mm_setr_epi16(-1, 1, 2, 3, 4, 5, 6, 7);
3946 let r1 = _mm_extract_epi16::<0>(a);
3947 let r2 = _mm_extract_epi16::<3>(a);
3948 assert_eq!(r1, 0xFFFF);
3949 assert_eq!(r2, 3);
3950 }
3951
3952 #[simd_test(enable = "sse2")]
3953 unsafe fn test_mm_insert_epi16() {
3954 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3955 let r = _mm_insert_epi16::<0>(a, 9);
3956 let e = _mm_setr_epi16(9, 1, 2, 3, 4, 5, 6, 7);
3957 assert_eq_m128i(r, e);
3958 }
3959
3960 #[simd_test(enable = "sse2")]
3961 unsafe fn test_mm_movemask_epi8() {
3962 #[rustfmt::skip]
3963 let a = _mm_setr_epi8(
3964 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01,
3965 0b0101, 0b1111_0000u8 as i8, 0, 0,
3966 0, 0b1011_0101u8 as i8, 0b1111_0000u8 as i8, 0b0101,
3967 0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8,
3968 );
3969 let r = _mm_movemask_epi8(a);
3970 assert_eq!(r, 0b10100110_00100101);
3971 }
3972
3973 #[simd_test(enable = "sse2")]
3974 unsafe fn test_mm_shuffle_epi32() {
3975 let a = _mm_setr_epi32(5, 10, 15, 20);
3976 let r = _mm_shuffle_epi32::<0b00_01_01_11>(a);
3977 let e = _mm_setr_epi32(20, 10, 10, 5);
3978 assert_eq_m128i(r, e);
3979 }
3980
3981 #[simd_test(enable = "sse2")]
3982 unsafe fn test_mm_shufflehi_epi16() {
3983 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 10, 15, 20);
3984 let r = _mm_shufflehi_epi16::<0b00_01_01_11>(a);
3985 let e = _mm_setr_epi16(1, 2, 3, 4, 20, 10, 10, 5);
3986 assert_eq_m128i(r, e);
3987 }
3988
3989 #[simd_test(enable = "sse2")]
3990 unsafe fn test_mm_shufflelo_epi16() {
3991 let a = _mm_setr_epi16(5, 10, 15, 20, 1, 2, 3, 4);
3992 let r = _mm_shufflelo_epi16::<0b00_01_01_11>(a);
3993 let e = _mm_setr_epi16(20, 10, 10, 5, 1, 2, 3, 4);
3994 assert_eq_m128i(r, e);
3995 }
3996
3997 #[simd_test(enable = "sse2")]
3998 unsafe fn test_mm_unpackhi_epi8() {
3999 #[rustfmt::skip]
4000 let a = _mm_setr_epi8(
4001 0, 1, 2, 3, 4, 5, 6, 7,
4002 8, 9, 10, 11, 12, 13, 14, 15,
4003 );
4004 #[rustfmt::skip]
4005 let b = _mm_setr_epi8(
4006 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4007 );
4008 let r = _mm_unpackhi_epi8(a, b);
4009 #[rustfmt::skip]
4010 let e = _mm_setr_epi8(
4011 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31,
4012 );
4013 assert_eq_m128i(r, e);
4014 }
4015
4016 #[simd_test(enable = "sse2")]
4017 unsafe fn test_mm_unpackhi_epi16() {
4018 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4019 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4020 let r = _mm_unpackhi_epi16(a, b);
4021 let e = _mm_setr_epi16(4, 12, 5, 13, 6, 14, 7, 15);
4022 assert_eq_m128i(r, e);
4023 }
4024
4025 #[simd_test(enable = "sse2")]
4026 unsafe fn test_mm_unpackhi_epi32() {
4027 let a = _mm_setr_epi32(0, 1, 2, 3);
4028 let b = _mm_setr_epi32(4, 5, 6, 7);
4029 let r = _mm_unpackhi_epi32(a, b);
4030 let e = _mm_setr_epi32(2, 6, 3, 7);
4031 assert_eq_m128i(r, e);
4032 }
4033
4034 #[simd_test(enable = "sse2")]
4035 unsafe fn test_mm_unpackhi_epi64() {
4036 let a = _mm_setr_epi64x(0, 1);
4037 let b = _mm_setr_epi64x(2, 3);
4038 let r = _mm_unpackhi_epi64(a, b);
4039 let e = _mm_setr_epi64x(1, 3);
4040 assert_eq_m128i(r, e);
4041 }
4042
4043 #[simd_test(enable = "sse2")]
4044 unsafe fn test_mm_unpacklo_epi8() {
4045 #[rustfmt::skip]
4046 let a = _mm_setr_epi8(
4047 0, 1, 2, 3, 4, 5, 6, 7,
4048 8, 9, 10, 11, 12, 13, 14, 15,
4049 );
4050 #[rustfmt::skip]
4051 let b = _mm_setr_epi8(
4052 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4053 );
4054 let r = _mm_unpacklo_epi8(a, b);
4055 #[rustfmt::skip]
4056 let e = _mm_setr_epi8(
4057 0, 16, 1, 17, 2, 18, 3, 19,
4058 4, 20, 5, 21, 6, 22, 7, 23,
4059 );
4060 assert_eq_m128i(r, e);
4061 }
4062
4063 #[simd_test(enable = "sse2")]
4064 unsafe fn test_mm_unpacklo_epi16() {
4065 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4066 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4067 let r = _mm_unpacklo_epi16(a, b);
4068 let e = _mm_setr_epi16(0, 8, 1, 9, 2, 10, 3, 11);
4069 assert_eq_m128i(r, e);
4070 }
4071
4072 #[simd_test(enable = "sse2")]
4073 unsafe fn test_mm_unpacklo_epi32() {
4074 let a = _mm_setr_epi32(0, 1, 2, 3);
4075 let b = _mm_setr_epi32(4, 5, 6, 7);
4076 let r = _mm_unpacklo_epi32(a, b);
4077 let e = _mm_setr_epi32(0, 4, 1, 5);
4078 assert_eq_m128i(r, e);
4079 }
4080
4081 #[simd_test(enable = "sse2")]
4082 unsafe fn test_mm_unpacklo_epi64() {
4083 let a = _mm_setr_epi64x(0, 1);
4084 let b = _mm_setr_epi64x(2, 3);
4085 let r = _mm_unpacklo_epi64(a, b);
4086 let e = _mm_setr_epi64x(0, 2);
4087 assert_eq_m128i(r, e);
4088 }
4089
4090 #[simd_test(enable = "sse2")]
4091 unsafe fn test_mm_add_sd() {
4092 let a = _mm_setr_pd(1.0, 2.0);
4093 let b = _mm_setr_pd(5.0, 10.0);
4094 let r = _mm_add_sd(a, b);
4095 assert_eq_m128d(r, _mm_setr_pd(6.0, 2.0));
4096 }
4097
4098 #[simd_test(enable = "sse2")]
4099 unsafe fn test_mm_add_pd() {
4100 let a = _mm_setr_pd(1.0, 2.0);
4101 let b = _mm_setr_pd(5.0, 10.0);
4102 let r = _mm_add_pd(a, b);
4103 assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0));
4104 }
4105
4106 #[simd_test(enable = "sse2")]
4107 unsafe fn test_mm_div_sd() {
4108 let a = _mm_setr_pd(1.0, 2.0);
4109 let b = _mm_setr_pd(5.0, 10.0);
4110 let r = _mm_div_sd(a, b);
4111 assert_eq_m128d(r, _mm_setr_pd(0.2, 2.0));
4112 }
4113
4114 #[simd_test(enable = "sse2")]
4115 unsafe fn test_mm_div_pd() {
4116 let a = _mm_setr_pd(1.0, 2.0);
4117 let b = _mm_setr_pd(5.0, 10.0);
4118 let r = _mm_div_pd(a, b);
4119 assert_eq_m128d(r, _mm_setr_pd(0.2, 0.2));
4120 }
4121
4122 #[simd_test(enable = "sse2")]
4123 unsafe fn test_mm_max_sd() {
4124 let a = _mm_setr_pd(1.0, 2.0);
4125 let b = _mm_setr_pd(5.0, 10.0);
4126 let r = _mm_max_sd(a, b);
4127 assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4128 }
4129
4130 #[simd_test(enable = "sse2")]
4131 unsafe fn test_mm_max_pd() {
4132 let a = _mm_setr_pd(1.0, 2.0);
4133 let b = _mm_setr_pd(5.0, 10.0);
4134 let r = _mm_max_pd(a, b);
4135 assert_eq_m128d(r, _mm_setr_pd(5.0, 10.0));
4136
4137 // Check SSE(2)-specific semantics for -0.0 handling.
4138 let a = _mm_setr_pd(-0.0, 0.0);
4139 let b = _mm_setr_pd(0.0, 0.0);
4140 let r1: [u8; 16] = transmute(_mm_max_pd(a, b));
4141 let r2: [u8; 16] = transmute(_mm_max_pd(b, a));
4142 let a: [u8; 16] = transmute(a);
4143 let b: [u8; 16] = transmute(b);
4144 assert_eq!(r1, b);
4145 assert_eq!(r2, a);
4146 assert_ne!(a, b); // sanity check that -0.0 is actually present
4147 }
4148
4149 #[simd_test(enable = "sse2")]
4150 unsafe fn test_mm_min_sd() {
4151 let a = _mm_setr_pd(1.0, 2.0);
4152 let b = _mm_setr_pd(5.0, 10.0);
4153 let r = _mm_min_sd(a, b);
4154 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4155 }
4156
4157 #[simd_test(enable = "sse2")]
4158 unsafe fn test_mm_min_pd() {
4159 let a = _mm_setr_pd(1.0, 2.0);
4160 let b = _mm_setr_pd(5.0, 10.0);
4161 let r = _mm_min_pd(a, b);
4162 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4163
4164 // Check SSE(2)-specific semantics for -0.0 handling.
4165 let a = _mm_setr_pd(-0.0, 0.0);
4166 let b = _mm_setr_pd(0.0, 0.0);
4167 let r1: [u8; 16] = transmute(_mm_min_pd(a, b));
4168 let r2: [u8; 16] = transmute(_mm_min_pd(b, a));
4169 let a: [u8; 16] = transmute(a);
4170 let b: [u8; 16] = transmute(b);
4171 assert_eq!(r1, b);
4172 assert_eq!(r2, a);
4173 assert_ne!(a, b); // sanity check that -0.0 is actually present
4174 }
4175
4176 #[simd_test(enable = "sse2")]
4177 unsafe fn test_mm_mul_sd() {
4178 let a = _mm_setr_pd(1.0, 2.0);
4179 let b = _mm_setr_pd(5.0, 10.0);
4180 let r = _mm_mul_sd(a, b);
4181 assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4182 }
4183
4184 #[simd_test(enable = "sse2")]
4185 unsafe fn test_mm_mul_pd() {
4186 let a = _mm_setr_pd(1.0, 2.0);
4187 let b = _mm_setr_pd(5.0, 10.0);
4188 let r = _mm_mul_pd(a, b);
4189 assert_eq_m128d(r, _mm_setr_pd(5.0, 20.0));
4190 }
4191
4192 #[simd_test(enable = "sse2")]
4193 unsafe fn test_mm_sqrt_sd() {
4194 let a = _mm_setr_pd(1.0, 2.0);
4195 let b = _mm_setr_pd(5.0, 10.0);
4196 let r = _mm_sqrt_sd(a, b);
4197 assert_eq_m128d(r, _mm_setr_pd(5.0f64.sqrt(), 2.0));
4198 }
4199
4200 #[simd_test(enable = "sse2")]
4201 unsafe fn test_mm_sqrt_pd() {
4202 let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0));
4203 assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt()));
4204 }
4205
4206 #[simd_test(enable = "sse2")]
4207 unsafe fn test_mm_sub_sd() {
4208 let a = _mm_setr_pd(1.0, 2.0);
4209 let b = _mm_setr_pd(5.0, 10.0);
4210 let r = _mm_sub_sd(a, b);
4211 assert_eq_m128d(r, _mm_setr_pd(-4.0, 2.0));
4212 }
4213
4214 #[simd_test(enable = "sse2")]
4215 unsafe fn test_mm_sub_pd() {
4216 let a = _mm_setr_pd(1.0, 2.0);
4217 let b = _mm_setr_pd(5.0, 10.0);
4218 let r = _mm_sub_pd(a, b);
4219 assert_eq_m128d(r, _mm_setr_pd(-4.0, -8.0));
4220 }
4221
4222 #[simd_test(enable = "sse2")]
4223 unsafe fn test_mm_and_pd() {
4224 let a = transmute(u64x2::splat(5));
4225 let b = transmute(u64x2::splat(3));
4226 let r = _mm_and_pd(a, b);
4227 let e = transmute(u64x2::splat(1));
4228 assert_eq_m128d(r, e);
4229 }
4230
4231 #[simd_test(enable = "sse2")]
4232 unsafe fn test_mm_andnot_pd() {
4233 let a = transmute(u64x2::splat(5));
4234 let b = transmute(u64x2::splat(3));
4235 let r = _mm_andnot_pd(a, b);
4236 let e = transmute(u64x2::splat(2));
4237 assert_eq_m128d(r, e);
4238 }
4239
4240 #[simd_test(enable = "sse2")]
4241 unsafe fn test_mm_or_pd() {
4242 let a = transmute(u64x2::splat(5));
4243 let b = transmute(u64x2::splat(3));
4244 let r = _mm_or_pd(a, b);
4245 let e = transmute(u64x2::splat(7));
4246 assert_eq_m128d(r, e);
4247 }
4248
4249 #[simd_test(enable = "sse2")]
4250 unsafe fn test_mm_xor_pd() {
4251 let a = transmute(u64x2::splat(5));
4252 let b = transmute(u64x2::splat(3));
4253 let r = _mm_xor_pd(a, b);
4254 let e = transmute(u64x2::splat(6));
4255 assert_eq_m128d(r, e);
4256 }
4257
4258 #[simd_test(enable = "sse2")]
4259 unsafe fn test_mm_cmpeq_sd() {
4260 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4261 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4262 let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b));
4263 assert_eq_m128i(r, e);
4264 }
4265
4266 #[simd_test(enable = "sse2")]
4267 unsafe fn test_mm_cmplt_sd() {
4268 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4269 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4270 let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b));
4271 assert_eq_m128i(r, e);
4272 }
4273
4274 #[simd_test(enable = "sse2")]
4275 unsafe fn test_mm_cmple_sd() {
4276 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4277 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4278 let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b));
4279 assert_eq_m128i(r, e);
4280 }
4281
4282 #[simd_test(enable = "sse2")]
4283 unsafe fn test_mm_cmpgt_sd() {
4284 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4285 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4286 let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b));
4287 assert_eq_m128i(r, e);
4288 }
4289
4290 #[simd_test(enable = "sse2")]
4291 unsafe fn test_mm_cmpge_sd() {
4292 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4293 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4294 let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b));
4295 assert_eq_m128i(r, e);
4296 }
4297
4298 #[simd_test(enable = "sse2")]
4299 unsafe fn test_mm_cmpord_sd() {
4300 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4301 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4302 let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b));
4303 assert_eq_m128i(r, e);
4304 }
4305
4306 #[simd_test(enable = "sse2")]
4307 unsafe fn test_mm_cmpunord_sd() {
4308 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4309 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4310 let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b));
4311 assert_eq_m128i(r, e);
4312 }
4313
4314 #[simd_test(enable = "sse2")]
4315 unsafe fn test_mm_cmpneq_sd() {
4316 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4317 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4318 let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b));
4319 assert_eq_m128i(r, e);
4320 }
4321
4322 #[simd_test(enable = "sse2")]
4323 unsafe fn test_mm_cmpnlt_sd() {
4324 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4325 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4326 let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b));
4327 assert_eq_m128i(r, e);
4328 }
4329
4330 #[simd_test(enable = "sse2")]
4331 unsafe fn test_mm_cmpnle_sd() {
4332 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4333 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4334 let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b));
4335 assert_eq_m128i(r, e);
4336 }
4337
4338 #[simd_test(enable = "sse2")]
4339 unsafe fn test_mm_cmpngt_sd() {
4340 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4341 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4342 let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b));
4343 assert_eq_m128i(r, e);
4344 }
4345
4346 #[simd_test(enable = "sse2")]
4347 unsafe fn test_mm_cmpnge_sd() {
4348 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4349 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4350 let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b));
4351 assert_eq_m128i(r, e);
4352 }
4353
4354 #[simd_test(enable = "sse2")]
4355 unsafe fn test_mm_cmpeq_pd() {
4356 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4357 let e = _mm_setr_epi64x(!0, 0);
4358 let r = transmute::<_, __m128i>(_mm_cmpeq_pd(a, b));
4359 assert_eq_m128i(r, e);
4360 }
4361
4362 #[simd_test(enable = "sse2")]
4363 unsafe fn test_mm_cmplt_pd() {
4364 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4365 let e = _mm_setr_epi64x(0, !0);
4366 let r = transmute::<_, __m128i>(_mm_cmplt_pd(a, b));
4367 assert_eq_m128i(r, e);
4368 }
4369
4370 #[simd_test(enable = "sse2")]
4371 unsafe fn test_mm_cmple_pd() {
4372 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4373 let e = _mm_setr_epi64x(!0, !0);
4374 let r = transmute::<_, __m128i>(_mm_cmple_pd(a, b));
4375 assert_eq_m128i(r, e);
4376 }
4377
4378 #[simd_test(enable = "sse2")]
4379 unsafe fn test_mm_cmpgt_pd() {
4380 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4381 let e = _mm_setr_epi64x(0, 0);
4382 let r = transmute::<_, __m128i>(_mm_cmpgt_pd(a, b));
4383 assert_eq_m128i(r, e);
4384 }
4385
4386 #[simd_test(enable = "sse2")]
4387 unsafe fn test_mm_cmpge_pd() {
4388 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4389 let e = _mm_setr_epi64x(!0, 0);
4390 let r = transmute::<_, __m128i>(_mm_cmpge_pd(a, b));
4391 assert_eq_m128i(r, e);
4392 }
4393
4394 #[simd_test(enable = "sse2")]
4395 unsafe fn test_mm_cmpord_pd() {
4396 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4397 let e = _mm_setr_epi64x(0, !0);
4398 let r = transmute::<_, __m128i>(_mm_cmpord_pd(a, b));
4399 assert_eq_m128i(r, e);
4400 }
4401
4402 #[simd_test(enable = "sse2")]
4403 unsafe fn test_mm_cmpunord_pd() {
4404 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4405 let e = _mm_setr_epi64x(!0, 0);
4406 let r = transmute::<_, __m128i>(_mm_cmpunord_pd(a, b));
4407 assert_eq_m128i(r, e);
4408 }
4409
4410 #[simd_test(enable = "sse2")]
4411 unsafe fn test_mm_cmpneq_pd() {
4412 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4413 let e = _mm_setr_epi64x(!0, !0);
4414 let r = transmute::<_, __m128i>(_mm_cmpneq_pd(a, b));
4415 assert_eq_m128i(r, e);
4416 }
4417
4418 #[simd_test(enable = "sse2")]
4419 unsafe fn test_mm_cmpnlt_pd() {
4420 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4421 let e = _mm_setr_epi64x(0, 0);
4422 let r = transmute::<_, __m128i>(_mm_cmpnlt_pd(a, b));
4423 assert_eq_m128i(r, e);
4424 }
4425
4426 #[simd_test(enable = "sse2")]
4427 unsafe fn test_mm_cmpnle_pd() {
4428 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4429 let e = _mm_setr_epi64x(0, 0);
4430 let r = transmute::<_, __m128i>(_mm_cmpnle_pd(a, b));
4431 assert_eq_m128i(r, e);
4432 }
4433
4434 #[simd_test(enable = "sse2")]
4435 unsafe fn test_mm_cmpngt_pd() {
4436 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4437 let e = _mm_setr_epi64x(0, !0);
4438 let r = transmute::<_, __m128i>(_mm_cmpngt_pd(a, b));
4439 assert_eq_m128i(r, e);
4440 }
4441
4442 #[simd_test(enable = "sse2")]
4443 unsafe fn test_mm_cmpnge_pd() {
4444 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4445 let e = _mm_setr_epi64x(0, !0);
4446 let r = transmute::<_, __m128i>(_mm_cmpnge_pd(a, b));
4447 assert_eq_m128i(r, e);
4448 }
4449
4450 #[simd_test(enable = "sse2")]
4451 unsafe fn test_mm_comieq_sd() {
4452 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4453 assert!(_mm_comieq_sd(a, b) != 0);
4454
4455 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0));
4456 assert!(_mm_comieq_sd(a, b) == 0);
4457 }
4458
4459 #[simd_test(enable = "sse2")]
4460 unsafe fn test_mm_comilt_sd() {
4461 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4462 assert!(_mm_comilt_sd(a, b) == 0);
4463 }
4464
4465 #[simd_test(enable = "sse2")]
4466 unsafe fn test_mm_comile_sd() {
4467 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4468 assert!(_mm_comile_sd(a, b) != 0);
4469 }
4470
4471 #[simd_test(enable = "sse2")]
4472 unsafe fn test_mm_comigt_sd() {
4473 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4474 assert!(_mm_comigt_sd(a, b) == 0);
4475 }
4476
4477 #[simd_test(enable = "sse2")]
4478 unsafe fn test_mm_comige_sd() {
4479 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4480 assert!(_mm_comige_sd(a, b) != 0);
4481 }
4482
4483 #[simd_test(enable = "sse2")]
4484 unsafe fn test_mm_comineq_sd() {
4485 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4486 assert!(_mm_comineq_sd(a, b) == 0);
4487 }
4488
4489 #[simd_test(enable = "sse2")]
4490 unsafe fn test_mm_ucomieq_sd() {
4491 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4492 assert!(_mm_ucomieq_sd(a, b) != 0);
4493
4494 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0));
4495 assert!(_mm_ucomieq_sd(a, b) == 0);
4496 }
4497
4498 #[simd_test(enable = "sse2")]
4499 unsafe fn test_mm_ucomilt_sd() {
4500 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4501 assert!(_mm_ucomilt_sd(a, b) == 0);
4502 }
4503
4504 #[simd_test(enable = "sse2")]
4505 unsafe fn test_mm_ucomile_sd() {
4506 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4507 assert!(_mm_ucomile_sd(a, b) != 0);
4508 }
4509
4510 #[simd_test(enable = "sse2")]
4511 unsafe fn test_mm_ucomigt_sd() {
4512 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4513 assert!(_mm_ucomigt_sd(a, b) == 0);
4514 }
4515
4516 #[simd_test(enable = "sse2")]
4517 unsafe fn test_mm_ucomige_sd() {
4518 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4519 assert!(_mm_ucomige_sd(a, b) != 0);
4520 }
4521
4522 #[simd_test(enable = "sse2")]
4523 unsafe fn test_mm_ucomineq_sd() {
4524 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4525 assert!(_mm_ucomineq_sd(a, b) == 0);
4526 }
4527
4528 #[simd_test(enable = "sse2")]
4529 unsafe fn test_mm_movemask_pd() {
4530 let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0));
4531 assert_eq!(r, 0b01);
4532
4533 let r = _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0));
4534 assert_eq!(r, 0b11);
4535 }
4536
4537 #[repr(align(16))]
4538 struct Memory {
4539 data: [f64; 4],
4540 }
4541
4542 #[simd_test(enable = "sse2")]
4543 unsafe fn test_mm_load_pd() {
4544 let mem = Memory {
4545 data: [1.0f64, 2.0, 3.0, 4.0],
4546 };
4547 let vals = &mem.data;
4548 let d = vals.as_ptr();
4549
4550 let r = _mm_load_pd(d);
4551 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4552 }
4553
4554 #[simd_test(enable = "sse2")]
4555 unsafe fn test_mm_load_sd() {
4556 let a = 1.;
4557 let expected = _mm_setr_pd(a, 0.);
4558 let r = _mm_load_sd(&a);
4559 assert_eq_m128d(r, expected);
4560 }
4561
4562 #[simd_test(enable = "sse2")]
4563 unsafe fn test_mm_loadh_pd() {
4564 let a = _mm_setr_pd(1., 2.);
4565 let b = 3.;
4566 let expected = _mm_setr_pd(_mm_cvtsd_f64(a), 3.);
4567 let r = _mm_loadh_pd(a, &b);
4568 assert_eq_m128d(r, expected);
4569 }
4570
4571 #[simd_test(enable = "sse2")]
4572 unsafe fn test_mm_loadl_pd() {
4573 let a = _mm_setr_pd(1., 2.);
4574 let b = 3.;
4575 let expected = _mm_setr_pd(3., get_m128d(a, 1));
4576 let r = _mm_loadl_pd(a, &b);
4577 assert_eq_m128d(r, expected);
4578 }
4579
4580 #[simd_test(enable = "sse2")]
4581 // Miri cannot support this until it is clear how it fits in the Rust memory model
4582 // (non-temporal store)
4583 #[cfg_attr(miri, ignore)]
4584 unsafe fn test_mm_stream_pd() {
4585 #[repr(align(128))]
4586 struct Memory {
4587 pub data: [f64; 2],
4588 }
4589 let a = _mm_set1_pd(7.0);
4590 let mut mem = Memory { data: [-1.0; 2] };
4591
4592 _mm_stream_pd(&mut mem.data[0] as *mut f64, a);
4593 for i in 0..2 {
4594 assert_eq!(mem.data[i], get_m128d(a, i));
4595 }
4596 }
4597
4598 #[simd_test(enable = "sse2")]
4599 unsafe fn test_mm_store_sd() {
4600 let mut dest = 0.;
4601 let a = _mm_setr_pd(1., 2.);
4602 _mm_store_sd(&mut dest, a);
4603 assert_eq!(dest, _mm_cvtsd_f64(a));
4604 }
4605
4606 #[simd_test(enable = "sse2")]
4607 unsafe fn test_mm_store_pd() {
4608 let mut mem = Memory { data: [0.0f64; 4] };
4609 let vals = &mut mem.data;
4610 let a = _mm_setr_pd(1.0, 2.0);
4611 let d = vals.as_mut_ptr();
4612
4613 _mm_store_pd(d, *black_box(&a));
4614 assert_eq!(vals[0], 1.0);
4615 assert_eq!(vals[1], 2.0);
4616 }
4617
4618 #[simd_test(enable = "sse2")]
4619 unsafe fn test_mm_storeu_pd() {
4620 let mut mem = Memory { data: [0.0f64; 4] };
4621 let vals = &mut mem.data;
4622 let a = _mm_setr_pd(1.0, 2.0);
4623
4624 let mut ofs = 0;
4625 let mut p = vals.as_mut_ptr();
4626
4627 // Make sure p is **not** aligned to 16-byte boundary
4628 if (p as usize) & 0xf == 0 {
4629 ofs = 1;
4630 p = p.add(1);
4631 }
4632
4633 _mm_storeu_pd(p, *black_box(&a));
4634
4635 if ofs > 0 {
4636 assert_eq!(vals[ofs - 1], 0.0);
4637 }
4638 assert_eq!(vals[ofs + 0], 1.0);
4639 assert_eq!(vals[ofs + 1], 2.0);
4640 }
4641
4642 #[simd_test(enable = "sse2")]
4643 unsafe fn test_mm_store1_pd() {
4644 let mut mem = Memory { data: [0.0f64; 4] };
4645 let vals = &mut mem.data;
4646 let a = _mm_setr_pd(1.0, 2.0);
4647 let d = vals.as_mut_ptr();
4648
4649 _mm_store1_pd(d, *black_box(&a));
4650 assert_eq!(vals[0], 1.0);
4651 assert_eq!(vals[1], 1.0);
4652 }
4653
4654 #[simd_test(enable = "sse2")]
4655 unsafe fn test_mm_store_pd1() {
4656 let mut mem = Memory { data: [0.0f64; 4] };
4657 let vals = &mut mem.data;
4658 let a = _mm_setr_pd(1.0, 2.0);
4659 let d = vals.as_mut_ptr();
4660
4661 _mm_store_pd1(d, *black_box(&a));
4662 assert_eq!(vals[0], 1.0);
4663 assert_eq!(vals[1], 1.0);
4664 }
4665
4666 #[simd_test(enable = "sse2")]
4667 unsafe fn test_mm_storer_pd() {
4668 let mut mem = Memory { data: [0.0f64; 4] };
4669 let vals = &mut mem.data;
4670 let a = _mm_setr_pd(1.0, 2.0);
4671 let d = vals.as_mut_ptr();
4672
4673 _mm_storer_pd(d, *black_box(&a));
4674 assert_eq!(vals[0], 2.0);
4675 assert_eq!(vals[1], 1.0);
4676 }
4677
4678 #[simd_test(enable = "sse2")]
4679 unsafe fn test_mm_storeh_pd() {
4680 let mut dest = 0.;
4681 let a = _mm_setr_pd(1., 2.);
4682 _mm_storeh_pd(&mut dest, a);
4683 assert_eq!(dest, get_m128d(a, 1));
4684 }
4685
4686 #[simd_test(enable = "sse2")]
4687 unsafe fn test_mm_storel_pd() {
4688 let mut dest = 0.;
4689 let a = _mm_setr_pd(1., 2.);
4690 _mm_storel_pd(&mut dest, a);
4691 assert_eq!(dest, _mm_cvtsd_f64(a));
4692 }
4693
4694 #[simd_test(enable = "sse2")]
4695 unsafe fn test_mm_loadr_pd() {
4696 let mut mem = Memory {
4697 data: [1.0f64, 2.0, 3.0, 4.0],
4698 };
4699 let vals = &mut mem.data;
4700 let d = vals.as_ptr();
4701
4702 let r = _mm_loadr_pd(d);
4703 assert_eq_m128d(r, _mm_setr_pd(2.0, 1.0));
4704 }
4705
4706 #[simd_test(enable = "sse2")]
4707 unsafe fn test_mm_loadu_pd() {
4708 let mut mem = Memory {
4709 data: [1.0f64, 2.0, 3.0, 4.0],
4710 };
4711 let vals = &mut mem.data;
4712 let mut d = vals.as_ptr();
4713
4714 // make sure d is not aligned to 16-byte boundary
4715 let mut offset = 0;
4716 if (d as usize) & 0xf == 0 {
4717 offset = 1;
4718 d = d.add(offset);
4719 }
4720
4721 let r = _mm_loadu_pd(d);
4722 let e = _mm_add_pd(_mm_setr_pd(1.0, 2.0), _mm_set1_pd(offset as f64));
4723 assert_eq_m128d(r, e);
4724 }
4725
4726 #[simd_test(enable = "sse2")]
4727 unsafe fn test_mm_cvtpd_ps() {
4728 let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0));
4729 assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0));
4730
4731 let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0));
4732 assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0));
4733
4734 let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN));
4735 assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0));
4736
4737 let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64));
4738 assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0));
4739 }
4740
4741 #[simd_test(enable = "sse2")]
4742 unsafe fn test_mm_cvtps_pd() {
4743 let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0));
4744 assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0));
4745
4746 let r = _mm_cvtps_pd(_mm_setr_ps(
4747 f32::MAX,
4748 f32::INFINITY,
4749 f32::NEG_INFINITY,
4750 f32::MIN,
4751 ));
4752 assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY));
4753 }
4754
4755 #[simd_test(enable = "sse2")]
4756 unsafe fn test_mm_cvtpd_epi32() {
4757 let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0));
4758 assert_eq_m128i(r, _mm_setr_epi32(-1, 5, 0, 0));
4759
4760 let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0));
4761 assert_eq_m128i(r, _mm_setr_epi32(-1, -5, 0, 0));
4762
4763 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN));
4764 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
4765
4766 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY));
4767 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
4768
4769 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN));
4770 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
4771 }
4772
4773 #[simd_test(enable = "sse2")]
4774 unsafe fn test_mm_cvtsd_si32() {
4775 let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0));
4776 assert_eq!(r, -2);
4777
4778 let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN));
4779 assert_eq!(r, i32::MIN);
4780
4781 let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN));
4782 assert_eq!(r, i32::MIN);
4783 }
4784
4785 #[simd_test(enable = "sse2")]
4786 unsafe fn test_mm_cvtsd_ss() {
4787 let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4);
4788 let b = _mm_setr_pd(2.0, -5.0);
4789
4790 let r = _mm_cvtsd_ss(a, b);
4791
4792 assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4));
4793
4794 let a = _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY);
4795 let b = _mm_setr_pd(f64::INFINITY, -5.0);
4796
4797 let r = _mm_cvtsd_ss(a, b);
4798
4799 assert_eq_m128(
4800 r,
4801 _mm_setr_ps(
4802 f32::INFINITY,
4803 f32::NEG_INFINITY,
4804 f32::MAX,
4805 f32::NEG_INFINITY,
4806 ),
4807 );
4808 }
4809
4810 #[simd_test(enable = "sse2")]
4811 unsafe fn test_mm_cvtsd_f64() {
4812 let r = _mm_cvtsd_f64(_mm_setr_pd(-1.1, 2.2));
4813 assert_eq!(r, -1.1);
4814 }
4815
4816 #[simd_test(enable = "sse2")]
4817 unsafe fn test_mm_cvtss_sd() {
4818 let a = _mm_setr_pd(-1.1, 2.2);
4819 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
4820
4821 let r = _mm_cvtss_sd(a, b);
4822 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.2));
4823
4824 let a = _mm_setr_pd(-1.1, f64::INFINITY);
4825 let b = _mm_setr_ps(f32::NEG_INFINITY, 2.0, 3.0, 4.0);
4826
4827 let r = _mm_cvtss_sd(a, b);
4828 assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY));
4829 }
4830
4831 #[simd_test(enable = "sse2")]
4832 unsafe fn test_mm_cvttpd_epi32() {
4833 let a = _mm_setr_pd(-1.1, 2.2);
4834 let r = _mm_cvttpd_epi32(a);
4835 assert_eq_m128i(r, _mm_setr_epi32(-1, 2, 0, 0));
4836
4837 let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
4838 let r = _mm_cvttpd_epi32(a);
4839 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
4840 }
4841
4842 #[simd_test(enable = "sse2")]
4843 unsafe fn test_mm_cvttsd_si32() {
4844 let a = _mm_setr_pd(-1.1, 2.2);
4845 let r = _mm_cvttsd_si32(a);
4846 assert_eq!(r, -1);
4847
4848 let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
4849 let r = _mm_cvttsd_si32(a);
4850 assert_eq!(r, i32::MIN);
4851 }
4852
4853 #[simd_test(enable = "sse2")]
4854 unsafe fn test_mm_cvttps_epi32() {
4855 let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6);
4856 let r = _mm_cvttps_epi32(a);
4857 assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6));
4858
4859 let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX);
4860 let r = _mm_cvttps_epi32(a);
4861 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN));
4862 }
4863
4864 #[simd_test(enable = "sse2")]
4865 unsafe fn test_mm_set_sd() {
4866 let r = _mm_set_sd(-1.0_f64);
4867 assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, 0_f64));
4868 }
4869
4870 #[simd_test(enable = "sse2")]
4871 unsafe fn test_mm_set1_pd() {
4872 let r = _mm_set1_pd(-1.0_f64);
4873 assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, -1.0_f64));
4874 }
4875
4876 #[simd_test(enable = "sse2")]
4877 unsafe fn test_mm_set_pd1() {
4878 let r = _mm_set_pd1(-2.0_f64);
4879 assert_eq_m128d(r, _mm_setr_pd(-2.0_f64, -2.0_f64));
4880 }
4881
4882 #[simd_test(enable = "sse2")]
4883 unsafe fn test_mm_set_pd() {
4884 let r = _mm_set_pd(1.0_f64, 5.0_f64);
4885 assert_eq_m128d(r, _mm_setr_pd(5.0_f64, 1.0_f64));
4886 }
4887
4888 #[simd_test(enable = "sse2")]
4889 unsafe fn test_mm_setr_pd() {
4890 let r = _mm_setr_pd(1.0_f64, -5.0_f64);
4891 assert_eq_m128d(r, _mm_setr_pd(1.0_f64, -5.0_f64));
4892 }
4893
4894 #[simd_test(enable = "sse2")]
4895 unsafe fn test_mm_setzero_pd() {
4896 let r = _mm_setzero_pd();
4897 assert_eq_m128d(r, _mm_setr_pd(0_f64, 0_f64));
4898 }
4899
4900 #[simd_test(enable = "sse2")]
4901 unsafe fn test_mm_load1_pd() {
4902 let d = -5.0;
4903 let r = _mm_load1_pd(&d);
4904 assert_eq_m128d(r, _mm_setr_pd(d, d));
4905 }
4906
4907 #[simd_test(enable = "sse2")]
4908 unsafe fn test_mm_load_pd1() {
4909 let d = -5.0;
4910 let r = _mm_load_pd1(&d);
4911 assert_eq_m128d(r, _mm_setr_pd(d, d));
4912 }
4913
4914 #[simd_test(enable = "sse2")]
4915 unsafe fn test_mm_unpackhi_pd() {
4916 let a = _mm_setr_pd(1.0, 2.0);
4917 let b = _mm_setr_pd(3.0, 4.0);
4918 let r = _mm_unpackhi_pd(a, b);
4919 assert_eq_m128d(r, _mm_setr_pd(2.0, 4.0));
4920 }
4921
4922 #[simd_test(enable = "sse2")]
4923 unsafe fn test_mm_unpacklo_pd() {
4924 let a = _mm_setr_pd(1.0, 2.0);
4925 let b = _mm_setr_pd(3.0, 4.0);
4926 let r = _mm_unpacklo_pd(a, b);
4927 assert_eq_m128d(r, _mm_setr_pd(1.0, 3.0));
4928 }
4929
4930 #[simd_test(enable = "sse2")]
4931 unsafe fn test_mm_shuffle_pd() {
4932 let a = _mm_setr_pd(1., 2.);
4933 let b = _mm_setr_pd(3., 4.);
4934 let expected = _mm_setr_pd(1., 3.);
4935 let r = _mm_shuffle_pd::<0b00_00_00_00>(a, b);
4936 assert_eq_m128d(r, expected);
4937 }
4938
4939 #[simd_test(enable = "sse2")]
4940 unsafe fn test_mm_move_sd() {
4941 let a = _mm_setr_pd(1., 2.);
4942 let b = _mm_setr_pd(3., 4.);
4943 let expected = _mm_setr_pd(3., 2.);
4944 let r = _mm_move_sd(a, b);
4945 assert_eq_m128d(r, expected);
4946 }
4947
4948 #[simd_test(enable = "sse2")]
4949 unsafe fn test_mm_castpd_ps() {
4950 let a = _mm_set1_pd(0.);
4951 let expected = _mm_set1_ps(0.);
4952 let r = _mm_castpd_ps(a);
4953 assert_eq_m128(r, expected);
4954 }
4955
4956 #[simd_test(enable = "sse2")]
4957 unsafe fn test_mm_castpd_si128() {
4958 let a = _mm_set1_pd(0.);
4959 let expected = _mm_set1_epi64x(0);
4960 let r = _mm_castpd_si128(a);
4961 assert_eq_m128i(r, expected);
4962 }
4963
4964 #[simd_test(enable = "sse2")]
4965 unsafe fn test_mm_castps_pd() {
4966 let a = _mm_set1_ps(0.);
4967 let expected = _mm_set1_pd(0.);
4968 let r = _mm_castps_pd(a);
4969 assert_eq_m128d(r, expected);
4970 }
4971
4972 #[simd_test(enable = "sse2")]
4973 unsafe fn test_mm_castps_si128() {
4974 let a = _mm_set1_ps(0.);
4975 let expected = _mm_set1_epi32(0);
4976 let r = _mm_castps_si128(a);
4977 assert_eq_m128i(r, expected);
4978 }
4979
4980 #[simd_test(enable = "sse2")]
4981 unsafe fn test_mm_castsi128_pd() {
4982 let a = _mm_set1_epi64x(0);
4983 let expected = _mm_set1_pd(0.);
4984 let r = _mm_castsi128_pd(a);
4985 assert_eq_m128d(r, expected);
4986 }
4987
4988 #[simd_test(enable = "sse2")]
4989 unsafe fn test_mm_castsi128_ps() {
4990 let a = _mm_set1_epi32(0);
4991 let expected = _mm_set1_ps(0.);
4992 let r = _mm_castsi128_ps(a);
4993 assert_eq_m128(r, expected);
4994 }
4995}
4996