1//! Streaming SIMD Extensions 2 (SSE2)
2
3#[cfg(test)]
4use stdarch_test::assert_instr;
5
6use crate::{
7 core_arch::{simd::*, x86::*},
8 intrinsics::simd::*,
9 mem, ptr,
10};
11
12/// Provides a hint to the processor that the code sequence is a spin-wait loop.
13///
14/// This can help improve the performance and power consumption of spin-wait
15/// loops.
16///
17/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_pause)
18#[inline]
19#[cfg_attr(all(test, target_feature = "sse2"), assert_instr(pause))]
20#[stable(feature = "simd_x86", since = "1.27.0")]
21pub unsafe fn _mm_pause() {
22 // note: `pause` is guaranteed to be interpreted as a `nop` by CPUs without
23 // the SSE2 target-feature - therefore it does not require any target features
24 pause()
25}
26
27/// Invalidates and flushes the cache line that contains `p` from all levels of
28/// the cache hierarchy.
29///
30/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clflush)
31#[inline]
32#[target_feature(enable = "sse2")]
33#[cfg_attr(test, assert_instr(clflush))]
34#[stable(feature = "simd_x86", since = "1.27.0")]
35pub unsafe fn _mm_clflush(p: *const u8) {
36 clflush(p)
37}
38
39/// Performs a serializing operation on all load-from-memory instructions
40/// that were issued prior to this instruction.
41///
42/// Guarantees that every load instruction that precedes, in program order, is
43/// globally visible before any load instruction which follows the fence in
44/// program order.
45///
46/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_lfence)
47#[inline]
48#[target_feature(enable = "sse2")]
49#[cfg_attr(test, assert_instr(lfence))]
50#[stable(feature = "simd_x86", since = "1.27.0")]
51pub unsafe fn _mm_lfence() {
52 lfence()
53}
54
55/// Performs a serializing operation on all load-from-memory and store-to-memory
56/// instructions that were issued prior to this instruction.
57///
58/// Guarantees that every memory access that precedes, in program order, the
59/// memory fence instruction is globally visible before any memory instruction
60/// which follows the fence in program order.
61///
62/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mfence)
63#[inline]
64#[target_feature(enable = "sse2")]
65#[cfg_attr(test, assert_instr(mfence))]
66#[stable(feature = "simd_x86", since = "1.27.0")]
67pub unsafe fn _mm_mfence() {
68 mfence()
69}
70
71/// Adds packed 8-bit integers in `a` and `b`.
72///
73/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi8)
74#[inline]
75#[target_feature(enable = "sse2")]
76#[cfg_attr(test, assert_instr(paddb))]
77#[stable(feature = "simd_x86", since = "1.27.0")]
78pub unsafe fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i {
79 transmute(src:simd_add(x:a.as_i8x16(), y:b.as_i8x16()))
80}
81
82/// Adds packed 16-bit integers in `a` and `b`.
83///
84/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi16)
85#[inline]
86#[target_feature(enable = "sse2")]
87#[cfg_attr(test, assert_instr(paddw))]
88#[stable(feature = "simd_x86", since = "1.27.0")]
89pub unsafe fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i {
90 transmute(src:simd_add(x:a.as_i16x8(), y:b.as_i16x8()))
91}
92
93/// Adds packed 32-bit integers in `a` and `b`.
94///
95/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi32)
96#[inline]
97#[target_feature(enable = "sse2")]
98#[cfg_attr(test, assert_instr(paddd))]
99#[stable(feature = "simd_x86", since = "1.27.0")]
100pub unsafe fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i {
101 transmute(src:simd_add(x:a.as_i32x4(), y:b.as_i32x4()))
102}
103
104/// Adds packed 64-bit integers in `a` and `b`.
105///
106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi64)
107#[inline]
108#[target_feature(enable = "sse2")]
109#[cfg_attr(test, assert_instr(paddq))]
110#[stable(feature = "simd_x86", since = "1.27.0")]
111pub unsafe fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i {
112 transmute(src:simd_add(x:a.as_i64x2(), y:b.as_i64x2()))
113}
114
115/// Adds packed 8-bit integers in `a` and `b` using saturation.
116///
117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epi8)
118#[inline]
119#[target_feature(enable = "sse2")]
120#[cfg_attr(test, assert_instr(paddsb))]
121#[stable(feature = "simd_x86", since = "1.27.0")]
122pub unsafe fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i {
123 transmute(src:simd_saturating_add(x:a.as_i8x16(), y:b.as_i8x16()))
124}
125
126/// Adds packed 16-bit integers in `a` and `b` using saturation.
127///
128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epi16)
129#[inline]
130#[target_feature(enable = "sse2")]
131#[cfg_attr(test, assert_instr(paddsw))]
132#[stable(feature = "simd_x86", since = "1.27.0")]
133pub unsafe fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i {
134 transmute(src:simd_saturating_add(x:a.as_i16x8(), y:b.as_i16x8()))
135}
136
137/// Adds packed unsigned 8-bit integers in `a` and `b` using saturation.
138///
139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epu8)
140#[inline]
141#[target_feature(enable = "sse2")]
142#[cfg_attr(test, assert_instr(paddusb))]
143#[stable(feature = "simd_x86", since = "1.27.0")]
144pub unsafe fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i {
145 transmute(src:simd_saturating_add(x:a.as_u8x16(), y:b.as_u8x16()))
146}
147
148/// Adds packed unsigned 16-bit integers in `a` and `b` using saturation.
149///
150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epu16)
151#[inline]
152#[target_feature(enable = "sse2")]
153#[cfg_attr(test, assert_instr(paddusw))]
154#[stable(feature = "simd_x86", since = "1.27.0")]
155pub unsafe fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i {
156 transmute(src:simd_saturating_add(x:a.as_u16x8(), y:b.as_u16x8()))
157}
158
159/// Averages packed unsigned 8-bit integers in `a` and `b`.
160///
161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_avg_epu8)
162#[inline]
163#[target_feature(enable = "sse2")]
164#[cfg_attr(test, assert_instr(pavgb))]
165#[stable(feature = "simd_x86", since = "1.27.0")]
166pub unsafe fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i {
167 let a: u16x16 = simd_cast::<_, u16x16>(a.as_u8x16());
168 let b: u16x16 = simd_cast::<_, u16x16>(b.as_u8x16());
169 let r: u16x16 = simd_shr(lhs:simd_add(simd_add(a, b), u16x16::splat(1)), rhs:u16x16::splat(1));
170 transmute(src:simd_cast::<_, u8x16>(r))
171}
172
173/// Averages packed unsigned 16-bit integers in `a` and `b`.
174///
175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_avg_epu16)
176#[inline]
177#[target_feature(enable = "sse2")]
178#[cfg_attr(test, assert_instr(pavgw))]
179#[stable(feature = "simd_x86", since = "1.27.0")]
180pub unsafe fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i {
181 let a: u32x8 = simd_cast::<_, u32x8>(a.as_u16x8());
182 let b: u32x8 = simd_cast::<_, u32x8>(b.as_u16x8());
183 let r: u32x8 = simd_shr(lhs:simd_add(simd_add(a, b), u32x8::splat(1)), rhs:u32x8::splat(1));
184 transmute(src:simd_cast::<_, u16x8>(r))
185}
186
187/// Multiplies and then horizontally add signed 16 bit integers in `a` and `b`.
188///
189/// Multiplies packed signed 16-bit integers in `a` and `b`, producing
190/// intermediate signed 32-bit integers. Horizontally add adjacent pairs of
191/// intermediate 32-bit integers.
192///
193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_madd_epi16)
194#[inline]
195#[target_feature(enable = "sse2")]
196#[cfg_attr(test, assert_instr(pmaddwd))]
197#[stable(feature = "simd_x86", since = "1.27.0")]
198pub unsafe fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i {
199 transmute(src:pmaddwd(a:a.as_i16x8(), b:b.as_i16x8()))
200}
201
202/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
203/// maximum values.
204///
205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi16)
206#[inline]
207#[target_feature(enable = "sse2")]
208#[cfg_attr(test, assert_instr(pmaxsw))]
209#[stable(feature = "simd_x86", since = "1.27.0")]
210pub unsafe fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i {
211 let a: i16x8 = a.as_i16x8();
212 let b: i16x8 = b.as_i16x8();
213 transmute(src:simd_select::<i16x8, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
214}
215
216/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the
217/// packed maximum values.
218///
219/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu8)
220#[inline]
221#[target_feature(enable = "sse2")]
222#[cfg_attr(test, assert_instr(pmaxub))]
223#[stable(feature = "simd_x86", since = "1.27.0")]
224pub unsafe fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i {
225 let a: u8x16 = a.as_u8x16();
226 let b: u8x16 = b.as_u8x16();
227 transmute(src:simd_select::<i8x16, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
228}
229
230/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
231/// minimum values.
232///
233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi16)
234#[inline]
235#[target_feature(enable = "sse2")]
236#[cfg_attr(test, assert_instr(pminsw))]
237#[stable(feature = "simd_x86", since = "1.27.0")]
238pub unsafe fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i {
239 let a: i16x8 = a.as_i16x8();
240 let b: i16x8 = b.as_i16x8();
241 transmute(src:simd_select::<i16x8, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
242}
243
244/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the
245/// packed minimum values.
246///
247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu8)
248#[inline]
249#[target_feature(enable = "sse2")]
250#[cfg_attr(test, assert_instr(pminub))]
251#[stable(feature = "simd_x86", since = "1.27.0")]
252pub unsafe fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i {
253 let a: u8x16 = a.as_u8x16();
254 let b: u8x16 = b.as_u8x16();
255 transmute(src:simd_select::<i8x16, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
256}
257
258/// Multiplies the packed 16-bit integers in `a` and `b`.
259///
260/// The multiplication produces intermediate 32-bit integers, and returns the
261/// high 16 bits of the intermediate integers.
262///
263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhi_epi16)
264#[inline]
265#[target_feature(enable = "sse2")]
266#[cfg_attr(test, assert_instr(pmulhw))]
267#[stable(feature = "simd_x86", since = "1.27.0")]
268pub unsafe fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i {
269 let a: i32x8 = simd_cast::<_, i32x8>(a.as_i16x8());
270 let b: i32x8 = simd_cast::<_, i32x8>(b.as_i16x8());
271 let r: i32x8 = simd_shr(lhs:simd_mul(a, b), rhs:i32x8::splat(16));
272 transmute(src:simd_cast::<i32x8, i16x8>(r))
273}
274
275/// Multiplies the packed unsigned 16-bit integers in `a` and `b`.
276///
277/// The multiplication produces intermediate 32-bit integers, and returns the
278/// high 16 bits of the intermediate integers.
279///
280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhi_epu16)
281#[inline]
282#[target_feature(enable = "sse2")]
283#[cfg_attr(test, assert_instr(pmulhuw))]
284#[stable(feature = "simd_x86", since = "1.27.0")]
285pub unsafe fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i {
286 let a: u32x8 = simd_cast::<_, u32x8>(a.as_u16x8());
287 let b: u32x8 = simd_cast::<_, u32x8>(b.as_u16x8());
288 let r: u32x8 = simd_shr(lhs:simd_mul(a, b), rhs:u32x8::splat(16));
289 transmute(src:simd_cast::<u32x8, u16x8>(r))
290}
291
292/// Multiplies the packed 16-bit integers in `a` and `b`.
293///
294/// The multiplication produces intermediate 32-bit integers, and returns the
295/// low 16 bits of the intermediate integers.
296///
297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mullo_epi16)
298#[inline]
299#[target_feature(enable = "sse2")]
300#[cfg_attr(test, assert_instr(pmullw))]
301#[stable(feature = "simd_x86", since = "1.27.0")]
302pub unsafe fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i {
303 transmute(src:simd_mul(x:a.as_i16x8(), y:b.as_i16x8()))
304}
305
306/// Multiplies the low unsigned 32-bit integers from each packed 64-bit element
307/// in `a` and `b`.
308///
309/// Returns the unsigned 64-bit results.
310///
311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_epu32)
312#[inline]
313#[target_feature(enable = "sse2")]
314#[cfg_attr(test, assert_instr(pmuludq))]
315#[stable(feature = "simd_x86", since = "1.27.0")]
316pub unsafe fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i {
317 let a: u64x2 = a.as_u64x2();
318 let b: u64x2 = b.as_u64x2();
319 let mask: u64x2 = u64x2::splat(u32::MAX.into());
320 transmute(src:simd_mul(x:simd_and(a, mask), y:simd_and(x:b, y:mask)))
321}
322
323/// Sum the absolute differences of packed unsigned 8-bit integers.
324///
325/// Computes the absolute differences of packed unsigned 8-bit integers in `a`
326/// and `b`, then horizontally sum each consecutive 8 differences to produce
327/// two unsigned 16-bit integers, and pack these unsigned 16-bit integers in
328/// the low 16 bits of 64-bit elements returned.
329///
330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sad_epu8)
331#[inline]
332#[target_feature(enable = "sse2")]
333#[cfg_attr(test, assert_instr(psadbw))]
334#[stable(feature = "simd_x86", since = "1.27.0")]
335pub unsafe fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i {
336 transmute(src:psadbw(a:a.as_u8x16(), b:b.as_u8x16()))
337}
338
339/// Subtracts packed 8-bit integers in `b` from packed 8-bit integers in `a`.
340///
341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi8)
342#[inline]
343#[target_feature(enable = "sse2")]
344#[cfg_attr(test, assert_instr(psubb))]
345#[stable(feature = "simd_x86", since = "1.27.0")]
346pub unsafe fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i {
347 transmute(src:simd_sub(lhs:a.as_i8x16(), rhs:b.as_i8x16()))
348}
349
350/// Subtracts packed 16-bit integers in `b` from packed 16-bit integers in `a`.
351///
352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi16)
353#[inline]
354#[target_feature(enable = "sse2")]
355#[cfg_attr(test, assert_instr(psubw))]
356#[stable(feature = "simd_x86", since = "1.27.0")]
357pub unsafe fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i {
358 transmute(src:simd_sub(lhs:a.as_i16x8(), rhs:b.as_i16x8()))
359}
360
361/// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`.
362///
363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi32)
364#[inline]
365#[target_feature(enable = "sse2")]
366#[cfg_attr(test, assert_instr(psubd))]
367#[stable(feature = "simd_x86", since = "1.27.0")]
368pub unsafe fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i {
369 transmute(src:simd_sub(lhs:a.as_i32x4(), rhs:b.as_i32x4()))
370}
371
372/// Subtract packed 64-bit integers in `b` from packed 64-bit integers in `a`.
373///
374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi64)
375#[inline]
376#[target_feature(enable = "sse2")]
377#[cfg_attr(test, assert_instr(psubq))]
378#[stable(feature = "simd_x86", since = "1.27.0")]
379pub unsafe fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i {
380 transmute(src:simd_sub(lhs:a.as_i64x2(), rhs:b.as_i64x2()))
381}
382
383/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`
384/// using saturation.
385///
386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epi8)
387#[inline]
388#[target_feature(enable = "sse2")]
389#[cfg_attr(test, assert_instr(psubsb))]
390#[stable(feature = "simd_x86", since = "1.27.0")]
391pub unsafe fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i {
392 transmute(src:simd_saturating_sub(lhs:a.as_i8x16(), rhs:b.as_i8x16()))
393}
394
395/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
396/// using saturation.
397///
398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epi16)
399#[inline]
400#[target_feature(enable = "sse2")]
401#[cfg_attr(test, assert_instr(psubsw))]
402#[stable(feature = "simd_x86", since = "1.27.0")]
403pub unsafe fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i {
404 transmute(src:simd_saturating_sub(lhs:a.as_i16x8(), rhs:b.as_i16x8()))
405}
406
407/// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit
408/// integers in `a` using saturation.
409///
410/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epu8)
411#[inline]
412#[target_feature(enable = "sse2")]
413#[cfg_attr(test, assert_instr(psubusb))]
414#[stable(feature = "simd_x86", since = "1.27.0")]
415pub unsafe fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i {
416 transmute(src:simd_saturating_sub(lhs:a.as_u8x16(), rhs:b.as_u8x16()))
417}
418
419/// Subtract packed unsigned 16-bit integers in `b` from packed unsigned 16-bit
420/// integers in `a` using saturation.
421///
422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epu16)
423#[inline]
424#[target_feature(enable = "sse2")]
425#[cfg_attr(test, assert_instr(psubusw))]
426#[stable(feature = "simd_x86", since = "1.27.0")]
427pub unsafe fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i {
428 transmute(src:simd_saturating_sub(lhs:a.as_u16x8(), rhs:b.as_u16x8()))
429}
430
431/// Shifts `a` left by `IMM8` bytes while shifting in zeros.
432///
433/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_si128)
434#[inline]
435#[target_feature(enable = "sse2")]
436#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
437#[rustc_legacy_const_generics(1)]
438#[stable(feature = "simd_x86", since = "1.27.0")]
439pub unsafe fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
440 static_assert_uimm_bits!(IMM8, 8);
441 _mm_slli_si128_impl::<IMM8>(a)
442}
443
444/// Implementation detail: converts the immediate argument of the
445/// `_mm_slli_si128` intrinsic into a compile-time constant.
446#[inline]
447#[target_feature(enable = "sse2")]
448unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
449 const fn mask(shift: i32, i: u32) -> u32 {
450 let shift = shift as u32 & 0xff;
451 if shift > 15 {
452 i
453 } else {
454 16 - shift + i
455 }
456 }
457 let zero = _mm_set1_epi8(0).as_i8x16();
458 transmute::<i8x16, _>(simd_shuffle!(
459 zero,
460 a.as_i8x16(),
461 [
462 mask(IMM8, 0),
463 mask(IMM8, 1),
464 mask(IMM8, 2),
465 mask(IMM8, 3),
466 mask(IMM8, 4),
467 mask(IMM8, 5),
468 mask(IMM8, 6),
469 mask(IMM8, 7),
470 mask(IMM8, 8),
471 mask(IMM8, 9),
472 mask(IMM8, 10),
473 mask(IMM8, 11),
474 mask(IMM8, 12),
475 mask(IMM8, 13),
476 mask(IMM8, 14),
477 mask(IMM8, 15),
478 ],
479 ))
480}
481
482/// Shifts `a` left by `IMM8` bytes while shifting in zeros.
483///
484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bslli_si128)
485#[inline]
486#[target_feature(enable = "sse2")]
487#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
488#[rustc_legacy_const_generics(1)]
489#[stable(feature = "simd_x86", since = "1.27.0")]
490pub unsafe fn _mm_bslli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
491 static_assert_uimm_bits!(IMM8, 8);
492 _mm_slli_si128_impl::<IMM8>(a)
493}
494
495/// Shifts `a` right by `IMM8` bytes while shifting in zeros.
496///
497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bsrli_si128)
498#[inline]
499#[target_feature(enable = "sse2")]
500#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
501#[rustc_legacy_const_generics(1)]
502#[stable(feature = "simd_x86", since = "1.27.0")]
503pub unsafe fn _mm_bsrli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
504 static_assert_uimm_bits!(IMM8, 8);
505 _mm_srli_si128_impl::<IMM8>(a)
506}
507
508/// Shifts packed 16-bit integers in `a` left by `IMM8` while shifting in zeros.
509///
510/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi16)
511#[inline]
512#[target_feature(enable = "sse2")]
513#[cfg_attr(test, assert_instr(psllw, IMM8 = 7))]
514#[rustc_legacy_const_generics(1)]
515#[stable(feature = "simd_x86", since = "1.27.0")]
516pub unsafe fn _mm_slli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
517 static_assert_uimm_bits!(IMM8, 8);
518 if IMM8 >= 16 {
519 _mm_setzero_si128()
520 } else {
521 transmute(src:simd_shl(lhs:a.as_u16x8(), rhs:u16x8::splat(IMM8 as u16)))
522 }
523}
524
525/// Shifts packed 16-bit integers in `a` left by `count` while shifting in
526/// zeros.
527///
528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi16)
529#[inline]
530#[target_feature(enable = "sse2")]
531#[cfg_attr(test, assert_instr(psllw))]
532#[stable(feature = "simd_x86", since = "1.27.0")]
533pub unsafe fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i {
534 transmute(src:psllw(a:a.as_i16x8(), count:count.as_i16x8()))
535}
536
537/// Shifts packed 32-bit integers in `a` left by `IMM8` while shifting in zeros.
538///
539/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi32)
540#[inline]
541#[target_feature(enable = "sse2")]
542#[cfg_attr(test, assert_instr(pslld, IMM8 = 7))]
543#[rustc_legacy_const_generics(1)]
544#[stable(feature = "simd_x86", since = "1.27.0")]
545pub unsafe fn _mm_slli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
546 static_assert_uimm_bits!(IMM8, 8);
547 if IMM8 >= 32 {
548 _mm_setzero_si128()
549 } else {
550 transmute(src:simd_shl(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8 as u32)))
551 }
552}
553
554/// Shifts packed 32-bit integers in `a` left by `count` while shifting in
555/// zeros.
556///
557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi32)
558#[inline]
559#[target_feature(enable = "sse2")]
560#[cfg_attr(test, assert_instr(pslld))]
561#[stable(feature = "simd_x86", since = "1.27.0")]
562pub unsafe fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i {
563 transmute(src:pslld(a:a.as_i32x4(), count:count.as_i32x4()))
564}
565
566/// Shifts packed 64-bit integers in `a` left by `IMM8` while shifting in zeros.
567///
568/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi64)
569#[inline]
570#[target_feature(enable = "sse2")]
571#[cfg_attr(test, assert_instr(psllq, IMM8 = 7))]
572#[rustc_legacy_const_generics(1)]
573#[stable(feature = "simd_x86", since = "1.27.0")]
574pub unsafe fn _mm_slli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
575 static_assert_uimm_bits!(IMM8, 8);
576 if IMM8 >= 64 {
577 _mm_setzero_si128()
578 } else {
579 transmute(src:simd_shl(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64)))
580 }
581}
582
583/// Shifts packed 64-bit integers in `a` left by `count` while shifting in
584/// zeros.
585///
586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi64)
587#[inline]
588#[target_feature(enable = "sse2")]
589#[cfg_attr(test, assert_instr(psllq))]
590#[stable(feature = "simd_x86", since = "1.27.0")]
591pub unsafe fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i {
592 transmute(src:psllq(a:a.as_i64x2(), count:count.as_i64x2()))
593}
594
595/// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in sign
596/// bits.
597///
598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi16)
599#[inline]
600#[target_feature(enable = "sse2")]
601#[cfg_attr(test, assert_instr(psraw, IMM8 = 1))]
602#[rustc_legacy_const_generics(1)]
603#[stable(feature = "simd_x86", since = "1.27.0")]
604pub unsafe fn _mm_srai_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
605 static_assert_uimm_bits!(IMM8, 8);
606 transmute(src:simd_shr(lhs:a.as_i16x8(), rhs:i16x8::splat(IMM8.min(15) as i16)))
607}
608
609/// Shifts packed 16-bit integers in `a` right by `count` while shifting in sign
610/// bits.
611///
612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi16)
613#[inline]
614#[target_feature(enable = "sse2")]
615#[cfg_attr(test, assert_instr(psraw))]
616#[stable(feature = "simd_x86", since = "1.27.0")]
617pub unsafe fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i {
618 transmute(src:psraw(a:a.as_i16x8(), count:count.as_i16x8()))
619}
620
621/// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in sign
622/// bits.
623///
624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi32)
625#[inline]
626#[target_feature(enable = "sse2")]
627#[cfg_attr(test, assert_instr(psrad, IMM8 = 1))]
628#[rustc_legacy_const_generics(1)]
629#[stable(feature = "simd_x86", since = "1.27.0")]
630pub unsafe fn _mm_srai_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
631 static_assert_uimm_bits!(IMM8, 8);
632 transmute(src:simd_shr(lhs:a.as_i32x4(), rhs:i32x4::splat(IMM8.min(31))))
633}
634
635/// Shifts packed 32-bit integers in `a` right by `count` while shifting in sign
636/// bits.
637///
638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi32)
639#[inline]
640#[target_feature(enable = "sse2")]
641#[cfg_attr(test, assert_instr(psrad))]
642#[stable(feature = "simd_x86", since = "1.27.0")]
643pub unsafe fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i {
644 transmute(src:psrad(a:a.as_i32x4(), count:count.as_i32x4()))
645}
646
647/// Shifts `a` right by `IMM8` bytes while shifting in zeros.
648///
649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_si128)
650#[inline]
651#[target_feature(enable = "sse2")]
652#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
653#[rustc_legacy_const_generics(1)]
654#[stable(feature = "simd_x86", since = "1.27.0")]
655pub unsafe fn _mm_srli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
656 static_assert_uimm_bits!(IMM8, 8);
657 _mm_srli_si128_impl::<IMM8>(a)
658}
659
660/// Implementation detail: converts the immediate argument of the
661/// `_mm_srli_si128` intrinsic into a compile-time constant.
662#[inline]
663#[target_feature(enable = "sse2")]
664unsafe fn _mm_srli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
665 const fn mask(shift: i32, i: u32) -> u32 {
666 if (shift as u32) > 15 {
667 i + 16
668 } else {
669 i + (shift as u32)
670 }
671 }
672 let zero = _mm_set1_epi8(0).as_i8x16();
673 let x: i8x16 = simd_shuffle!(
674 a.as_i8x16(),
675 zero,
676 [
677 mask(IMM8, 0),
678 mask(IMM8, 1),
679 mask(IMM8, 2),
680 mask(IMM8, 3),
681 mask(IMM8, 4),
682 mask(IMM8, 5),
683 mask(IMM8, 6),
684 mask(IMM8, 7),
685 mask(IMM8, 8),
686 mask(IMM8, 9),
687 mask(IMM8, 10),
688 mask(IMM8, 11),
689 mask(IMM8, 12),
690 mask(IMM8, 13),
691 mask(IMM8, 14),
692 mask(IMM8, 15),
693 ],
694 );
695 transmute(x)
696}
697
698/// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in
699/// zeros.
700///
701/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi16)
702#[inline]
703#[target_feature(enable = "sse2")]
704#[cfg_attr(test, assert_instr(psrlw, IMM8 = 1))]
705#[rustc_legacy_const_generics(1)]
706#[stable(feature = "simd_x86", since = "1.27.0")]
707pub unsafe fn _mm_srli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
708 static_assert_uimm_bits!(IMM8, 8);
709 if IMM8 >= 16 {
710 _mm_setzero_si128()
711 } else {
712 transmute(src:simd_shr(lhs:a.as_u16x8(), rhs:u16x8::splat(IMM8 as u16)))
713 }
714}
715
716/// Shifts packed 16-bit integers in `a` right by `count` while shifting in
717/// zeros.
718///
719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi16)
720#[inline]
721#[target_feature(enable = "sse2")]
722#[cfg_attr(test, assert_instr(psrlw))]
723#[stable(feature = "simd_x86", since = "1.27.0")]
724pub unsafe fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i {
725 transmute(src:psrlw(a:a.as_i16x8(), count:count.as_i16x8()))
726}
727
728/// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in
729/// zeros.
730///
731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi32)
732#[inline]
733#[target_feature(enable = "sse2")]
734#[cfg_attr(test, assert_instr(psrld, IMM8 = 8))]
735#[rustc_legacy_const_generics(1)]
736#[stable(feature = "simd_x86", since = "1.27.0")]
737pub unsafe fn _mm_srli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
738 static_assert_uimm_bits!(IMM8, 8);
739 if IMM8 >= 32 {
740 _mm_setzero_si128()
741 } else {
742 transmute(src:simd_shr(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8 as u32)))
743 }
744}
745
746/// Shifts packed 32-bit integers in `a` right by `count` while shifting in
747/// zeros.
748///
749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi32)
750#[inline]
751#[target_feature(enable = "sse2")]
752#[cfg_attr(test, assert_instr(psrld))]
753#[stable(feature = "simd_x86", since = "1.27.0")]
754pub unsafe fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i {
755 transmute(src:psrld(a:a.as_i32x4(), count:count.as_i32x4()))
756}
757
758/// Shifts packed 64-bit integers in `a` right by `IMM8` while shifting in
759/// zeros.
760///
761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi64)
762#[inline]
763#[target_feature(enable = "sse2")]
764#[cfg_attr(test, assert_instr(psrlq, IMM8 = 1))]
765#[rustc_legacy_const_generics(1)]
766#[stable(feature = "simd_x86", since = "1.27.0")]
767pub unsafe fn _mm_srli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
768 static_assert_uimm_bits!(IMM8, 8);
769 if IMM8 >= 64 {
770 _mm_setzero_si128()
771 } else {
772 transmute(src:simd_shr(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64)))
773 }
774}
775
776/// Shifts packed 64-bit integers in `a` right by `count` while shifting in
777/// zeros.
778///
779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi64)
780#[inline]
781#[target_feature(enable = "sse2")]
782#[cfg_attr(test, assert_instr(psrlq))]
783#[stable(feature = "simd_x86", since = "1.27.0")]
784pub unsafe fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i {
785 transmute(src:psrlq(a:a.as_i64x2(), count:count.as_i64x2()))
786}
787
788/// Computes the bitwise AND of 128 bits (representing integer data) in `a` and
789/// `b`.
790///
791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_and_si128)
792#[inline]
793#[target_feature(enable = "sse2")]
794#[cfg_attr(test, assert_instr(andps))]
795#[stable(feature = "simd_x86", since = "1.27.0")]
796pub unsafe fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i {
797 simd_and(x:a, y:b)
798}
799
800/// Computes the bitwise NOT of 128 bits (representing integer data) in `a` and
801/// then AND with `b`.
802///
803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_andnot_si128)
804#[inline]
805#[target_feature(enable = "sse2")]
806#[cfg_attr(test, assert_instr(andnps))]
807#[stable(feature = "simd_x86", since = "1.27.0")]
808pub unsafe fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i {
809 simd_and(x:simd_xor(_mm_set1_epi8(-1), a), y:b)
810}
811
812/// Computes the bitwise OR of 128 bits (representing integer data) in `a` and
813/// `b`.
814///
815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_si128)
816#[inline]
817#[target_feature(enable = "sse2")]
818#[cfg_attr(test, assert_instr(orps))]
819#[stable(feature = "simd_x86", since = "1.27.0")]
820pub unsafe fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i {
821 simd_or(x:a, y:b)
822}
823
824/// Computes the bitwise XOR of 128 bits (representing integer data) in `a` and
825/// `b`.
826///
827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_si128)
828#[inline]
829#[target_feature(enable = "sse2")]
830#[cfg_attr(test, assert_instr(xorps))]
831#[stable(feature = "simd_x86", since = "1.27.0")]
832pub unsafe fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i {
833 simd_xor(x:a, y:b)
834}
835
836/// Compares packed 8-bit integers in `a` and `b` for equality.
837///
838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi8)
839#[inline]
840#[target_feature(enable = "sse2")]
841#[cfg_attr(test, assert_instr(pcmpeqb))]
842#[stable(feature = "simd_x86", since = "1.27.0")]
843pub unsafe fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i {
844 transmute::<i8x16, _>(src:simd_eq(x:a.as_i8x16(), y:b.as_i8x16()))
845}
846
847/// Compares packed 16-bit integers in `a` and `b` for equality.
848///
849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi16)
850#[inline]
851#[target_feature(enable = "sse2")]
852#[cfg_attr(test, assert_instr(pcmpeqw))]
853#[stable(feature = "simd_x86", since = "1.27.0")]
854pub unsafe fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i {
855 transmute::<i16x8, _>(src:simd_eq(x:a.as_i16x8(), y:b.as_i16x8()))
856}
857
858/// Compares packed 32-bit integers in `a` and `b` for equality.
859///
860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi32)
861#[inline]
862#[target_feature(enable = "sse2")]
863#[cfg_attr(test, assert_instr(pcmpeqd))]
864#[stable(feature = "simd_x86", since = "1.27.0")]
865pub unsafe fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i {
866 transmute::<i32x4, _>(src:simd_eq(x:a.as_i32x4(), y:b.as_i32x4()))
867}
868
869/// Compares packed 8-bit integers in `a` and `b` for greater-than.
870///
871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi8)
872#[inline]
873#[target_feature(enable = "sse2")]
874#[cfg_attr(test, assert_instr(pcmpgtb))]
875#[stable(feature = "simd_x86", since = "1.27.0")]
876pub unsafe fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i {
877 transmute::<i8x16, _>(src:simd_gt(x:a.as_i8x16(), y:b.as_i8x16()))
878}
879
880/// Compares packed 16-bit integers in `a` and `b` for greater-than.
881///
882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi16)
883#[inline]
884#[target_feature(enable = "sse2")]
885#[cfg_attr(test, assert_instr(pcmpgtw))]
886#[stable(feature = "simd_x86", since = "1.27.0")]
887pub unsafe fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i {
888 transmute::<i16x8, _>(src:simd_gt(x:a.as_i16x8(), y:b.as_i16x8()))
889}
890
891/// Compares packed 32-bit integers in `a` and `b` for greater-than.
892///
893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi32)
894#[inline]
895#[target_feature(enable = "sse2")]
896#[cfg_attr(test, assert_instr(pcmpgtd))]
897#[stable(feature = "simd_x86", since = "1.27.0")]
898pub unsafe fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i {
899 transmute::<i32x4, _>(src:simd_gt(x:a.as_i32x4(), y:b.as_i32x4()))
900}
901
902/// Compares packed 8-bit integers in `a` and `b` for less-than.
903///
904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi8)
905#[inline]
906#[target_feature(enable = "sse2")]
907#[cfg_attr(test, assert_instr(pcmpgtb))]
908#[stable(feature = "simd_x86", since = "1.27.0")]
909pub unsafe fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i {
910 transmute::<i8x16, _>(src:simd_lt(x:a.as_i8x16(), y:b.as_i8x16()))
911}
912
913/// Compares packed 16-bit integers in `a` and `b` for less-than.
914///
915/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi16)
916#[inline]
917#[target_feature(enable = "sse2")]
918#[cfg_attr(test, assert_instr(pcmpgtw))]
919#[stable(feature = "simd_x86", since = "1.27.0")]
920pub unsafe fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i {
921 transmute::<i16x8, _>(src:simd_lt(x:a.as_i16x8(), y:b.as_i16x8()))
922}
923
924/// Compares packed 32-bit integers in `a` and `b` for less-than.
925///
926/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi32)
927#[inline]
928#[target_feature(enable = "sse2")]
929#[cfg_attr(test, assert_instr(pcmpgtd))]
930#[stable(feature = "simd_x86", since = "1.27.0")]
931pub unsafe fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i {
932 transmute::<i32x4, _>(src:simd_lt(x:a.as_i32x4(), y:b.as_i32x4()))
933}
934
935/// Converts the lower two packed 32-bit integers in `a` to packed
936/// double-precision (64-bit) floating-point elements.
937///
938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_pd)
939#[inline]
940#[target_feature(enable = "sse2")]
941#[cfg_attr(test, assert_instr(cvtdq2pd))]
942#[stable(feature = "simd_x86", since = "1.27.0")]
943pub unsafe fn _mm_cvtepi32_pd(a: __m128i) -> __m128d {
944 let a: i32x4 = a.as_i32x4();
945 simd_cast::<i32x2, __m128d>(simd_shuffle!(a, a, [0, 1]))
946}
947
948/// Returns `a` with its lower element replaced by `b` after converting it to
949/// an `f64`.
950///
951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi32_sd)
952#[inline]
953#[target_feature(enable = "sse2")]
954#[cfg_attr(test, assert_instr(cvtsi2sd))]
955#[stable(feature = "simd_x86", since = "1.27.0")]
956pub unsafe fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d {
957 simd_insert!(a, 0, b as f64)
958}
959
960/// Converts packed 32-bit integers in `a` to packed single-precision (32-bit)
961/// floating-point elements.
962///
963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_ps)
964#[inline]
965#[target_feature(enable = "sse2")]
966#[cfg_attr(test, assert_instr(cvtdq2ps))]
967#[stable(feature = "simd_x86", since = "1.27.0")]
968pub unsafe fn _mm_cvtepi32_ps(a: __m128i) -> __m128 {
969 transmute(src:simd_cast::<_, f32x4>(a.as_i32x4()))
970}
971
972/// Converts packed single-precision (32-bit) floating-point elements in `a`
973/// to packed 32-bit integers.
974///
975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epi32)
976#[inline]
977#[target_feature(enable = "sse2")]
978#[cfg_attr(test, assert_instr(cvtps2dq))]
979#[stable(feature = "simd_x86", since = "1.27.0")]
980pub unsafe fn _mm_cvtps_epi32(a: __m128) -> __m128i {
981 transmute(src:cvtps2dq(a))
982}
983
984/// Returns a vector whose lowest element is `a` and all higher elements are
985/// `0`.
986///
987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi32_si128)
988#[inline]
989#[target_feature(enable = "sse2")]
990#[stable(feature = "simd_x86", since = "1.27.0")]
991pub unsafe fn _mm_cvtsi32_si128(a: i32) -> __m128i {
992 transmute(src:i32x4::new(x0:a, x1:0, x2:0, x3:0))
993}
994
995/// Returns the lowest element of `a`.
996///
997/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi128_si32)
998#[inline]
999#[target_feature(enable = "sse2")]
1000#[stable(feature = "simd_x86", since = "1.27.0")]
1001pub unsafe fn _mm_cvtsi128_si32(a: __m128i) -> i32 {
1002 simd_extract!(a.as_i32x4(), 0)
1003}
1004
1005/// Sets packed 64-bit integers with the supplied values, from highest to
1006/// lowest.
1007///
1008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi64x)
1009#[inline]
1010#[target_feature(enable = "sse2")]
1011// no particular instruction to test
1012#[stable(feature = "simd_x86", since = "1.27.0")]
1013pub unsafe fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i {
1014 transmute(src:i64x2::new(x0:e0, x1:e1))
1015}
1016
1017/// Sets packed 32-bit integers with the supplied values.
1018///
1019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi32)
1020#[inline]
1021#[target_feature(enable = "sse2")]
1022// no particular instruction to test
1023#[stable(feature = "simd_x86", since = "1.27.0")]
1024pub unsafe fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1025 transmute(src:i32x4::new(x0:e0, x1:e1, x2:e2, x3:e3))
1026}
1027
1028/// Sets packed 16-bit integers with the supplied values.
1029///
1030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi16)
1031#[inline]
1032#[target_feature(enable = "sse2")]
1033// no particular instruction to test
1034#[stable(feature = "simd_x86", since = "1.27.0")]
1035pub unsafe fn _mm_set_epi16(
1036 e7: i16,
1037 e6: i16,
1038 e5: i16,
1039 e4: i16,
1040 e3: i16,
1041 e2: i16,
1042 e1: i16,
1043 e0: i16,
1044) -> __m128i {
1045 transmute(src:i16x8::new(x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7))
1046}
1047
1048/// Sets packed 8-bit integers with the supplied values.
1049///
1050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi8)
1051#[inline]
1052#[target_feature(enable = "sse2")]
1053// no particular instruction to test
1054#[stable(feature = "simd_x86", since = "1.27.0")]
1055pub unsafe fn _mm_set_epi8(
1056 e15: i8,
1057 e14: i8,
1058 e13: i8,
1059 e12: i8,
1060 e11: i8,
1061 e10: i8,
1062 e9: i8,
1063 e8: i8,
1064 e7: i8,
1065 e6: i8,
1066 e5: i8,
1067 e4: i8,
1068 e3: i8,
1069 e2: i8,
1070 e1: i8,
1071 e0: i8,
1072) -> __m128i {
1073 #[rustfmt::skip]
1074 transmute(src:i8x16::new(
1075 x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7, x8:e8, x9:e9, x10:e10, x11:e11, x12:e12, x13:e13, x14:e14, x15:e15,
1076 ))
1077}
1078
1079/// Broadcasts 64-bit integer `a` to all elements.
1080///
1081/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi64x)
1082#[inline]
1083#[target_feature(enable = "sse2")]
1084// no particular instruction to test
1085#[stable(feature = "simd_x86", since = "1.27.0")]
1086pub unsafe fn _mm_set1_epi64x(a: i64) -> __m128i {
1087 _mm_set_epi64x(e1:a, e0:a)
1088}
1089
1090/// Broadcasts 32-bit integer `a` to all elements.
1091///
1092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi32)
1093#[inline]
1094#[target_feature(enable = "sse2")]
1095// no particular instruction to test
1096#[stable(feature = "simd_x86", since = "1.27.0")]
1097pub unsafe fn _mm_set1_epi32(a: i32) -> __m128i {
1098 _mm_set_epi32(e3:a, e2:a, e1:a, e0:a)
1099}
1100
1101/// Broadcasts 16-bit integer `a` to all elements.
1102///
1103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi16)
1104#[inline]
1105#[target_feature(enable = "sse2")]
1106// no particular instruction to test
1107#[stable(feature = "simd_x86", since = "1.27.0")]
1108pub unsafe fn _mm_set1_epi16(a: i16) -> __m128i {
1109 _mm_set_epi16(e7:a, e6:a, e5:a, e4:a, e3:a, e2:a, e1:a, e0:a)
1110}
1111
1112/// Broadcasts 8-bit integer `a` to all elements.
1113///
1114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi8)
1115#[inline]
1116#[target_feature(enable = "sse2")]
1117// no particular instruction to test
1118#[stable(feature = "simd_x86", since = "1.27.0")]
1119pub unsafe fn _mm_set1_epi8(a: i8) -> __m128i {
1120 _mm_set_epi8(e15:a, e14:a, e13:a, e12:a, e11:a, e10:a, e9:a, e8:a, e7:a, e6:a, e5:a, e4:a, e3:a, e2:a, e1:a, e0:a)
1121}
1122
1123/// Sets packed 32-bit integers with the supplied values in reverse order.
1124///
1125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi32)
1126#[inline]
1127#[target_feature(enable = "sse2")]
1128// no particular instruction to test
1129#[stable(feature = "simd_x86", since = "1.27.0")]
1130pub unsafe fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1131 _mm_set_epi32(e3:e0, e2:e1, e1:e2, e0:e3)
1132}
1133
1134/// Sets packed 16-bit integers with the supplied values in reverse order.
1135///
1136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi16)
1137#[inline]
1138#[target_feature(enable = "sse2")]
1139// no particular instruction to test
1140#[stable(feature = "simd_x86", since = "1.27.0")]
1141pub unsafe fn _mm_setr_epi16(
1142 e7: i16,
1143 e6: i16,
1144 e5: i16,
1145 e4: i16,
1146 e3: i16,
1147 e2: i16,
1148 e1: i16,
1149 e0: i16,
1150) -> __m128i {
1151 _mm_set_epi16(e7:e0, e6:e1, e5:e2, e4:e3, e3:e4, e2:e5, e1:e6, e0:e7)
1152}
1153
1154/// Sets packed 8-bit integers with the supplied values in reverse order.
1155///
1156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi8)
1157#[inline]
1158#[target_feature(enable = "sse2")]
1159// no particular instruction to test
1160#[stable(feature = "simd_x86", since = "1.27.0")]
1161pub unsafe fn _mm_setr_epi8(
1162 e15: i8,
1163 e14: i8,
1164 e13: i8,
1165 e12: i8,
1166 e11: i8,
1167 e10: i8,
1168 e9: i8,
1169 e8: i8,
1170 e7: i8,
1171 e6: i8,
1172 e5: i8,
1173 e4: i8,
1174 e3: i8,
1175 e2: i8,
1176 e1: i8,
1177 e0: i8,
1178) -> __m128i {
1179 #[rustfmt::skip]
1180 _mm_set_epi8(
1181 e15:e0, e14:e1, e13:e2, e12:e3, e11:e4, e10:e5, e9:e6, e8:e7, e7:e8, e6:e9, e5:e10, e4:e11, e3:e12, e2:e13, e1:e14, e0:e15,
1182 )
1183}
1184
1185/// Returns a vector with all elements set to zero.
1186///
1187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_si128)
1188#[inline]
1189#[target_feature(enable = "sse2")]
1190#[cfg_attr(test, assert_instr(xorps))]
1191#[stable(feature = "simd_x86", since = "1.27.0")]
1192pub unsafe fn _mm_setzero_si128() -> __m128i {
1193 _mm_set1_epi64x(0)
1194}
1195
1196/// Loads 64-bit integer from memory into first element of returned vector.
1197///
1198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadl_epi64)
1199#[inline]
1200#[target_feature(enable = "sse2")]
1201// FIXME movsd on windows
1202#[cfg_attr(
1203 all(
1204 test,
1205 not(windows),
1206 not(all(target_os = "linux", target_arch = "x86_64")),
1207 target_arch = "x86_64"
1208 ),
1209 assert_instr(movq)
1210)]
1211#[stable(feature = "simd_x86", since = "1.27.0")]
1212pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i {
1213 _mm_set_epi64x(e1:0, e0:ptr::read_unaligned(src:mem_addr as *const i64))
1214}
1215
1216/// Loads 128-bits of integer data from memory into a new vector.
1217///
1218/// `mem_addr` must be aligned on a 16-byte boundary.
1219///
1220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_si128)
1221#[inline]
1222#[target_feature(enable = "sse2")]
1223#[cfg_attr(test, assert_instr(movaps))]
1224#[stable(feature = "simd_x86", since = "1.27.0")]
1225pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i {
1226 *mem_addr
1227}
1228
1229/// Loads 128-bits of integer data from memory into a new vector.
1230///
1231/// `mem_addr` does not need to be aligned on any particular boundary.
1232///
1233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si128)
1234#[inline]
1235#[target_feature(enable = "sse2")]
1236#[cfg_attr(test, assert_instr(movups))]
1237#[stable(feature = "simd_x86", since = "1.27.0")]
1238pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
1239 let mut dst: __m128i = _mm_undefined_si128();
1240 ptr::copy_nonoverlapping(
1241 src:mem_addr as *const u8,
1242 dst:ptr::addr_of_mut!(dst) as *mut u8,
1243 count:mem::size_of::<__m128i>(),
1244 );
1245 dst
1246}
1247
1248/// Conditionally store 8-bit integer elements from `a` into memory using
1249/// `mask`.
1250///
1251/// Elements are not stored when the highest bit is not set in the
1252/// corresponding element.
1253///
1254/// `mem_addr` should correspond to a 128-bit memory location and does not need
1255/// to be aligned on any particular boundary.
1256///
1257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskmoveu_si128)
1258#[inline]
1259#[target_feature(enable = "sse2")]
1260#[cfg_attr(test, assert_instr(maskmovdqu))]
1261#[stable(feature = "simd_x86", since = "1.27.0")]
1262pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8) {
1263 maskmovdqu(a:a.as_i8x16(), mask:mask.as_i8x16(), mem_addr)
1264}
1265
1266/// Stores 128-bits of integer data from `a` into memory.
1267///
1268/// `mem_addr` must be aligned on a 16-byte boundary.
1269///
1270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_si128)
1271#[inline]
1272#[target_feature(enable = "sse2")]
1273#[cfg_attr(test, assert_instr(movaps))]
1274#[stable(feature = "simd_x86", since = "1.27.0")]
1275pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) {
1276 *mem_addr = a;
1277}
1278
1279/// Stores 128-bits of integer data from `a` into memory.
1280///
1281/// `mem_addr` does not need to be aligned on any particular boundary.
1282///
1283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si128)
1284#[inline]
1285#[target_feature(enable = "sse2")]
1286#[cfg_attr(test, assert_instr(movups))] // FIXME movdqu expected
1287#[stable(feature = "simd_x86", since = "1.27.0")]
1288pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) {
1289 mem_addr.write_unaligned(val:a);
1290}
1291
1292/// Stores the lower 64-bit integer `a` to a memory location.
1293///
1294/// `mem_addr` does not need to be aligned on any particular boundary.
1295///
1296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storel_epi64)
1297#[inline]
1298#[target_feature(enable = "sse2")]
1299// FIXME mov on windows, movlps on i686
1300#[cfg_attr(
1301 all(
1302 test,
1303 not(windows),
1304 not(all(target_os = "linux", target_arch = "x86_64")),
1305 target_arch = "x86_64"
1306 ),
1307 assert_instr(movq)
1308)]
1309#[stable(feature = "simd_x86", since = "1.27.0")]
1310pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
1311 ptr::copy_nonoverlapping(src:ptr::addr_of!(a) as *const u8, dst:mem_addr as *mut u8, count:8);
1312}
1313
1314/// Stores a 128-bit integer vector to a 128-bit aligned memory location.
1315/// To minimize caching, the data is flagged as non-temporal (unlikely to be
1316/// used again soon).
1317///
1318/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si128)
1319///
1320/// # Safety of non-temporal stores
1321///
1322/// After using this intrinsic, but before any other access to the memory that this intrinsic
1323/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
1324/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
1325/// return.
1326///
1327/// See [`_mm_sfence`] for details.
1328#[inline]
1329#[target_feature(enable = "sse2")]
1330#[cfg_attr(test, assert_instr(movntps))] // FIXME movntdq
1331#[stable(feature = "simd_x86", since = "1.27.0")]
1332pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
1333 intrinsics::nontemporal_store(ptr:mem_addr, val:a);
1334}
1335
1336/// Stores a 32-bit integer value in the specified memory location.
1337/// To minimize caching, the data is flagged as non-temporal (unlikely to be
1338/// used again soon).
1339///
1340/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si32)
1341///
1342/// # Safety of non-temporal stores
1343///
1344/// After using this intrinsic, but before any other access to the memory that this intrinsic
1345/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
1346/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
1347/// return.
1348///
1349/// See [`_mm_sfence`] for details.
1350#[inline]
1351#[target_feature(enable = "sse2")]
1352#[cfg_attr(test, assert_instr(movnti))]
1353#[stable(feature = "simd_x86", since = "1.27.0")]
1354pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) {
1355 intrinsics::nontemporal_store(ptr:mem_addr, val:a);
1356}
1357
1358/// Returns a vector where the low element is extracted from `a` and its upper
1359/// element is zero.
1360///
1361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_move_epi64)
1362#[inline]
1363#[target_feature(enable = "sse2")]
1364// FIXME movd on windows, movd on i686
1365#[cfg_attr(all(test, not(windows), target_arch = "x86_64"), assert_instr(movq))]
1366#[stable(feature = "simd_x86", since = "1.27.0")]
1367pub unsafe fn _mm_move_epi64(a: __m128i) -> __m128i {
1368 let zero: __m128i = _mm_setzero_si128();
1369 let r: i64x2 = simd_shuffle!(a.as_i64x2(), zero.as_i64x2(), [0, 2]);
1370 transmute(src:r)
1371}
1372
1373/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
1374/// using signed saturation.
1375///
1376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi16)
1377#[inline]
1378#[target_feature(enable = "sse2")]
1379#[cfg_attr(test, assert_instr(packsswb))]
1380#[stable(feature = "simd_x86", since = "1.27.0")]
1381pub unsafe fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
1382 transmute(src:packsswb(a:a.as_i16x8(), b:b.as_i16x8()))
1383}
1384
1385/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
1386/// using signed saturation.
1387///
1388/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi32)
1389#[inline]
1390#[target_feature(enable = "sse2")]
1391#[cfg_attr(test, assert_instr(packssdw))]
1392#[stable(feature = "simd_x86", since = "1.27.0")]
1393pub unsafe fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
1394 transmute(src:packssdw(a:a.as_i32x4(), b:b.as_i32x4()))
1395}
1396
1397/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
1398/// using unsigned saturation.
1399///
1400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi16)
1401#[inline]
1402#[target_feature(enable = "sse2")]
1403#[cfg_attr(test, assert_instr(packuswb))]
1404#[stable(feature = "simd_x86", since = "1.27.0")]
1405pub unsafe fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
1406 transmute(src:packuswb(a:a.as_i16x8(), b:b.as_i16x8()))
1407}
1408
1409/// Returns the `imm8` element of `a`.
1410///
1411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_epi16)
1412#[inline]
1413#[target_feature(enable = "sse2")]
1414#[cfg_attr(test, assert_instr(pextrw, IMM8 = 7))]
1415#[rustc_legacy_const_generics(1)]
1416#[stable(feature = "simd_x86", since = "1.27.0")]
1417pub unsafe fn _mm_extract_epi16<const IMM8: i32>(a: __m128i) -> i32 {
1418 static_assert_uimm_bits!(IMM8, 3);
1419 simd_extract!(a.as_u16x8(), IMM8 as u32, u16) as i32
1420}
1421
1422/// Returns a new vector where the `imm8` element of `a` is replaced with `i`.
1423///
1424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_epi16)
1425#[inline]
1426#[target_feature(enable = "sse2")]
1427#[cfg_attr(test, assert_instr(pinsrw, IMM8 = 7))]
1428#[rustc_legacy_const_generics(2)]
1429#[stable(feature = "simd_x86", since = "1.27.0")]
1430pub unsafe fn _mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
1431 static_assert_uimm_bits!(IMM8, 3);
1432 transmute(src:simd_insert!(a.as_i16x8(), IMM8 as u32, i as i16))
1433}
1434
1435/// Returns a mask of the most significant bit of each element in `a`.
1436///
1437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_epi8)
1438#[inline]
1439#[target_feature(enable = "sse2")]
1440#[cfg_attr(test, assert_instr(pmovmskb))]
1441#[stable(feature = "simd_x86", since = "1.27.0")]
1442pub unsafe fn _mm_movemask_epi8(a: __m128i) -> i32 {
1443 let z: i8x16 = i8x16::splat(0);
1444 let m: i8x16 = simd_lt(x:a.as_i8x16(), y:z);
1445 simd_bitmask::<_, u16>(m) as u32 as i32
1446}
1447
1448/// Shuffles 32-bit integers in `a` using the control in `IMM8`.
1449///
1450/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_epi32)
1451#[inline]
1452#[target_feature(enable = "sse2")]
1453#[cfg_attr(test, assert_instr(pshufd, IMM8 = 9))]
1454#[rustc_legacy_const_generics(1)]
1455#[stable(feature = "simd_x86", since = "1.27.0")]
1456pub unsafe fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
1457 static_assert_uimm_bits!(IMM8, 8);
1458 let a: i32x4 = a.as_i32x4();
1459 let x: i32x4 = simd_shuffle!(
1460 a,
1461 a,
1462 [
1463 IMM8 as u32 & 0b11,
1464 (IMM8 as u32 >> 2) & 0b11,
1465 (IMM8 as u32 >> 4) & 0b11,
1466 (IMM8 as u32 >> 6) & 0b11,
1467 ],
1468 );
1469 transmute(src:x)
1470}
1471
1472/// Shuffles 16-bit integers in the high 64 bits of `a` using the control in
1473/// `IMM8`.
1474///
1475/// Put the results in the high 64 bits of the returned vector, with the low 64
1476/// bits being copied from `a`.
1477///
1478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shufflehi_epi16)
1479#[inline]
1480#[target_feature(enable = "sse2")]
1481#[cfg_attr(test, assert_instr(pshufhw, IMM8 = 9))]
1482#[rustc_legacy_const_generics(1)]
1483#[stable(feature = "simd_x86", since = "1.27.0")]
1484pub unsafe fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1485 static_assert_uimm_bits!(IMM8, 8);
1486 let a: i16x8 = a.as_i16x8();
1487 let x: i16x8 = simd_shuffle!(
1488 a,
1489 a,
1490 [
1491 0,
1492 1,
1493 2,
1494 3,
1495 (IMM8 as u32 & 0b11) + 4,
1496 ((IMM8 as u32 >> 2) & 0b11) + 4,
1497 ((IMM8 as u32 >> 4) & 0b11) + 4,
1498 ((IMM8 as u32 >> 6) & 0b11) + 4,
1499 ],
1500 );
1501 transmute(src:x)
1502}
1503
1504/// Shuffles 16-bit integers in the low 64 bits of `a` using the control in
1505/// `IMM8`.
1506///
1507/// Put the results in the low 64 bits of the returned vector, with the high 64
1508/// bits being copied from `a`.
1509///
1510/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shufflelo_epi16)
1511#[inline]
1512#[target_feature(enable = "sse2")]
1513#[cfg_attr(test, assert_instr(pshuflw, IMM8 = 9))]
1514#[rustc_legacy_const_generics(1)]
1515#[stable(feature = "simd_x86", since = "1.27.0")]
1516pub unsafe fn _mm_shufflelo_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1517 static_assert_uimm_bits!(IMM8, 8);
1518 let a: i16x8 = a.as_i16x8();
1519 let x: i16x8 = simd_shuffle!(
1520 a,
1521 a,
1522 [
1523 IMM8 as u32 & 0b11,
1524 (IMM8 as u32 >> 2) & 0b11,
1525 (IMM8 as u32 >> 4) & 0b11,
1526 (IMM8 as u32 >> 6) & 0b11,
1527 4,
1528 5,
1529 6,
1530 7,
1531 ],
1532 );
1533 transmute(src:x)
1534}
1535
1536/// Unpacks and interleave 8-bit integers from the high half of `a` and `b`.
1537///
1538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi8)
1539#[inline]
1540#[target_feature(enable = "sse2")]
1541#[cfg_attr(test, assert_instr(punpckhbw))]
1542#[stable(feature = "simd_x86", since = "1.27.0")]
1543pub unsafe fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i {
1544 transmute::<i8x16, _>(src:simd_shuffle!(
1545 a.as_i8x16(),
1546 b.as_i8x16(),
1547 [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31],
1548 ))
1549}
1550
1551/// Unpacks and interleave 16-bit integers from the high half of `a` and `b`.
1552///
1553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi16)
1554#[inline]
1555#[target_feature(enable = "sse2")]
1556#[cfg_attr(test, assert_instr(punpckhwd))]
1557#[stable(feature = "simd_x86", since = "1.27.0")]
1558pub unsafe fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i {
1559 let x: i16x8 = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]);
1560 transmute::<i16x8, _>(src:x)
1561}
1562
1563/// Unpacks and interleave 32-bit integers from the high half of `a` and `b`.
1564///
1565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi32)
1566#[inline]
1567#[target_feature(enable = "sse2")]
1568#[cfg_attr(test, assert_instr(unpckhps))]
1569#[stable(feature = "simd_x86", since = "1.27.0")]
1570pub unsafe fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i {
1571 transmute::<i32x4, _>(src:simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7]))
1572}
1573
1574/// Unpacks and interleave 64-bit integers from the high half of `a` and `b`.
1575///
1576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi64)
1577#[inline]
1578#[target_feature(enable = "sse2")]
1579#[cfg_attr(test, assert_instr(unpckhpd))]
1580#[stable(feature = "simd_x86", since = "1.27.0")]
1581pub unsafe fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i {
1582 transmute::<i64x2, _>(src:simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [1, 3]))
1583}
1584
1585/// Unpacks and interleave 8-bit integers from the low half of `a` and `b`.
1586///
1587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi8)
1588#[inline]
1589#[target_feature(enable = "sse2")]
1590#[cfg_attr(test, assert_instr(punpcklbw))]
1591#[stable(feature = "simd_x86", since = "1.27.0")]
1592pub unsafe fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i {
1593 transmute::<i8x16, _>(src:simd_shuffle!(
1594 a.as_i8x16(),
1595 b.as_i8x16(),
1596 [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23],
1597 ))
1598}
1599
1600/// Unpacks and interleave 16-bit integers from the low half of `a` and `b`.
1601///
1602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi16)
1603#[inline]
1604#[target_feature(enable = "sse2")]
1605#[cfg_attr(test, assert_instr(punpcklwd))]
1606#[stable(feature = "simd_x86", since = "1.27.0")]
1607pub unsafe fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i {
1608 let x: i16x8 = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]);
1609 transmute::<i16x8, _>(src:x)
1610}
1611
1612/// Unpacks and interleave 32-bit integers from the low half of `a` and `b`.
1613///
1614/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi32)
1615#[inline]
1616#[target_feature(enable = "sse2")]
1617#[cfg_attr(test, assert_instr(unpcklps))]
1618#[stable(feature = "simd_x86", since = "1.27.0")]
1619pub unsafe fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i {
1620 transmute::<i32x4, _>(src:simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5]))
1621}
1622
1623/// Unpacks and interleave 64-bit integers from the low half of `a` and `b`.
1624///
1625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi64)
1626#[inline]
1627#[target_feature(enable = "sse2")]
1628#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlhps))]
1629#[stable(feature = "simd_x86", since = "1.27.0")]
1630pub unsafe fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i {
1631 transmute::<i64x2, _>(src:simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [0, 2]))
1632}
1633
1634/// Returns a new vector with the low element of `a` replaced by the sum of the
1635/// low elements of `a` and `b`.
1636///
1637/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_sd)
1638#[inline]
1639#[target_feature(enable = "sse2")]
1640#[cfg_attr(test, assert_instr(addsd))]
1641#[stable(feature = "simd_x86", since = "1.27.0")]
1642pub unsafe fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d {
1643 simd_insert!(a, 0, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b))
1644}
1645
1646/// Adds packed double-precision (64-bit) floating-point elements in `a` and
1647/// `b`.
1648///
1649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_pd)
1650#[inline]
1651#[target_feature(enable = "sse2")]
1652#[cfg_attr(test, assert_instr(addpd))]
1653#[stable(feature = "simd_x86", since = "1.27.0")]
1654pub unsafe fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d {
1655 simd_add(x:a, y:b)
1656}
1657
1658/// Returns a new vector with the low element of `a` replaced by the result of
1659/// diving the lower element of `a` by the lower element of `b`.
1660///
1661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_sd)
1662#[inline]
1663#[target_feature(enable = "sse2")]
1664#[cfg_attr(test, assert_instr(divsd))]
1665#[stable(feature = "simd_x86", since = "1.27.0")]
1666pub unsafe fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d {
1667 simd_insert!(a, 0, _mm_cvtsd_f64(a) / _mm_cvtsd_f64(b))
1668}
1669
1670/// Divide packed double-precision (64-bit) floating-point elements in `a` by
1671/// packed elements in `b`.
1672///
1673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_pd)
1674#[inline]
1675#[target_feature(enable = "sse2")]
1676#[cfg_attr(test, assert_instr(divpd))]
1677#[stable(feature = "simd_x86", since = "1.27.0")]
1678pub unsafe fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d {
1679 simd_div(lhs:a, rhs:b)
1680}
1681
1682/// Returns a new vector with the low element of `a` replaced by the maximum
1683/// of the lower elements of `a` and `b`.
1684///
1685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_sd)
1686#[inline]
1687#[target_feature(enable = "sse2")]
1688#[cfg_attr(test, assert_instr(maxsd))]
1689#[stable(feature = "simd_x86", since = "1.27.0")]
1690pub unsafe fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d {
1691 maxsd(a, b)
1692}
1693
1694/// Returns a new vector with the maximum values from corresponding elements in
1695/// `a` and `b`.
1696///
1697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_pd)
1698#[inline]
1699#[target_feature(enable = "sse2")]
1700#[cfg_attr(test, assert_instr(maxpd))]
1701#[stable(feature = "simd_x86", since = "1.27.0")]
1702pub unsafe fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d {
1703 maxpd(a, b)
1704}
1705
1706/// Returns a new vector with the low element of `a` replaced by the minimum
1707/// of the lower elements of `a` and `b`.
1708///
1709/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_sd)
1710#[inline]
1711#[target_feature(enable = "sse2")]
1712#[cfg_attr(test, assert_instr(minsd))]
1713#[stable(feature = "simd_x86", since = "1.27.0")]
1714pub unsafe fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d {
1715 minsd(a, b)
1716}
1717
1718/// Returns a new vector with the minimum values from corresponding elements in
1719/// `a` and `b`.
1720///
1721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_pd)
1722#[inline]
1723#[target_feature(enable = "sse2")]
1724#[cfg_attr(test, assert_instr(minpd))]
1725#[stable(feature = "simd_x86", since = "1.27.0")]
1726pub unsafe fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d {
1727 minpd(a, b)
1728}
1729
1730/// Returns a new vector with the low element of `a` replaced by multiplying the
1731/// low elements of `a` and `b`.
1732///
1733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_sd)
1734#[inline]
1735#[target_feature(enable = "sse2")]
1736#[cfg_attr(test, assert_instr(mulsd))]
1737#[stable(feature = "simd_x86", since = "1.27.0")]
1738pub unsafe fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d {
1739 simd_insert!(a, 0, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b))
1740}
1741
1742/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
1743/// and `b`.
1744///
1745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_pd)
1746#[inline]
1747#[target_feature(enable = "sse2")]
1748#[cfg_attr(test, assert_instr(mulpd))]
1749#[stable(feature = "simd_x86", since = "1.27.0")]
1750pub unsafe fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d {
1751 simd_mul(x:a, y:b)
1752}
1753
1754/// Returns a new vector with the low element of `a` replaced by the square
1755/// root of the lower element `b`.
1756///
1757/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_sd)
1758#[inline]
1759#[target_feature(enable = "sse2")]
1760#[cfg_attr(test, assert_instr(sqrtsd))]
1761#[stable(feature = "simd_x86", since = "1.27.0")]
1762pub unsafe fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d {
1763 simd_insert!(a, 0, _mm_cvtsd_f64(sqrtsd(b)))
1764}
1765
1766/// Returns a new vector with the square root of each of the values in `a`.
1767///
1768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_pd)
1769#[inline]
1770#[target_feature(enable = "sse2")]
1771#[cfg_attr(test, assert_instr(sqrtpd))]
1772#[stable(feature = "simd_x86", since = "1.27.0")]
1773pub unsafe fn _mm_sqrt_pd(a: __m128d) -> __m128d {
1774 simd_fsqrt(a)
1775}
1776
1777/// Returns a new vector with the low element of `a` replaced by subtracting the
1778/// low element by `b` from the low element of `a`.
1779///
1780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_sd)
1781#[inline]
1782#[target_feature(enable = "sse2")]
1783#[cfg_attr(test, assert_instr(subsd))]
1784#[stable(feature = "simd_x86", since = "1.27.0")]
1785pub unsafe fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d {
1786 simd_insert!(a, 0, _mm_cvtsd_f64(a) - _mm_cvtsd_f64(b))
1787}
1788
1789/// Subtract packed double-precision (64-bit) floating-point elements in `b`
1790/// from `a`.
1791///
1792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_pd)
1793#[inline]
1794#[target_feature(enable = "sse2")]
1795#[cfg_attr(test, assert_instr(subpd))]
1796#[stable(feature = "simd_x86", since = "1.27.0")]
1797pub unsafe fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d {
1798 simd_sub(lhs:a, rhs:b)
1799}
1800
1801/// Computes the bitwise AND of packed double-precision (64-bit) floating-point
1802/// elements in `a` and `b`.
1803///
1804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_and_pd)
1805#[inline]
1806#[target_feature(enable = "sse2")]
1807#[cfg_attr(test, assert_instr(andps))]
1808#[stable(feature = "simd_x86", since = "1.27.0")]
1809pub unsafe fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d {
1810 let a: __m128i = transmute(src:a);
1811 let b: __m128i = transmute(src:b);
1812 transmute(src:_mm_and_si128(a, b))
1813}
1814
1815/// Computes the bitwise NOT of `a` and then AND with `b`.
1816///
1817/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_andnot_pd)
1818#[inline]
1819#[target_feature(enable = "sse2")]
1820#[cfg_attr(test, assert_instr(andnps))]
1821#[stable(feature = "simd_x86", since = "1.27.0")]
1822pub unsafe fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d {
1823 let a: __m128i = transmute(src:a);
1824 let b: __m128i = transmute(src:b);
1825 transmute(src:_mm_andnot_si128(a, b))
1826}
1827
1828/// Computes the bitwise OR of `a` and `b`.
1829///
1830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_pd)
1831#[inline]
1832#[target_feature(enable = "sse2")]
1833#[cfg_attr(test, assert_instr(orps))]
1834#[stable(feature = "simd_x86", since = "1.27.0")]
1835pub unsafe fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d {
1836 let a: __m128i = transmute(src:a);
1837 let b: __m128i = transmute(src:b);
1838 transmute(src:_mm_or_si128(a, b))
1839}
1840
1841/// Computes the bitwise XOR of `a` and `b`.
1842///
1843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_pd)
1844#[inline]
1845#[target_feature(enable = "sse2")]
1846#[cfg_attr(test, assert_instr(xorps))]
1847#[stable(feature = "simd_x86", since = "1.27.0")]
1848pub unsafe fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d {
1849 let a: __m128i = transmute(src:a);
1850 let b: __m128i = transmute(src:b);
1851 transmute(src:_mm_xor_si128(a, b))
1852}
1853
1854/// Returns a new vector with the low element of `a` replaced by the equality
1855/// comparison of the lower elements of `a` and `b`.
1856///
1857/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_sd)
1858#[inline]
1859#[target_feature(enable = "sse2")]
1860#[cfg_attr(test, assert_instr(cmpeqsd))]
1861#[stable(feature = "simd_x86", since = "1.27.0")]
1862pub unsafe fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d {
1863 cmpsd(a, b, imm8:0)
1864}
1865
1866/// Returns a new vector with the low element of `a` replaced by the less-than
1867/// comparison of the lower elements of `a` and `b`.
1868///
1869/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_sd)
1870#[inline]
1871#[target_feature(enable = "sse2")]
1872#[cfg_attr(test, assert_instr(cmpltsd))]
1873#[stable(feature = "simd_x86", since = "1.27.0")]
1874pub unsafe fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d {
1875 cmpsd(a, b, imm8:1)
1876}
1877
1878/// Returns a new vector with the low element of `a` replaced by the
1879/// less-than-or-equal comparison of the lower elements of `a` and `b`.
1880///
1881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_sd)
1882#[inline]
1883#[target_feature(enable = "sse2")]
1884#[cfg_attr(test, assert_instr(cmplesd))]
1885#[stable(feature = "simd_x86", since = "1.27.0")]
1886pub unsafe fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d {
1887 cmpsd(a, b, imm8:2)
1888}
1889
1890/// Returns a new vector with the low element of `a` replaced by the
1891/// greater-than comparison of the lower elements of `a` and `b`.
1892///
1893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_sd)
1894#[inline]
1895#[target_feature(enable = "sse2")]
1896#[cfg_attr(test, assert_instr(cmpltsd))]
1897#[stable(feature = "simd_x86", since = "1.27.0")]
1898pub unsafe fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d {
1899 simd_insert!(_mm_cmplt_sd(b, a), 1, simd_extract!(a, 1, f64))
1900}
1901
1902/// Returns a new vector with the low element of `a` replaced by the
1903/// greater-than-or-equal comparison of the lower elements of `a` and `b`.
1904///
1905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_sd)
1906#[inline]
1907#[target_feature(enable = "sse2")]
1908#[cfg_attr(test, assert_instr(cmplesd))]
1909#[stable(feature = "simd_x86", since = "1.27.0")]
1910pub unsafe fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d {
1911 simd_insert!(_mm_cmple_sd(b, a), 1, simd_extract!(a, 1, f64))
1912}
1913
1914/// Returns a new vector with the low element of `a` replaced by the result
1915/// of comparing both of the lower elements of `a` and `b` to `NaN`. If
1916/// neither are equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0`
1917/// otherwise.
1918///
1919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_sd)
1920#[inline]
1921#[target_feature(enable = "sse2")]
1922#[cfg_attr(test, assert_instr(cmpordsd))]
1923#[stable(feature = "simd_x86", since = "1.27.0")]
1924pub unsafe fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d {
1925 cmpsd(a, b, imm8:7)
1926}
1927
1928/// Returns a new vector with the low element of `a` replaced by the result of
1929/// comparing both of the lower elements of `a` and `b` to `NaN`. If either is
1930/// equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` otherwise.
1931///
1932/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_sd)
1933#[inline]
1934#[target_feature(enable = "sse2")]
1935#[cfg_attr(test, assert_instr(cmpunordsd))]
1936#[stable(feature = "simd_x86", since = "1.27.0")]
1937pub unsafe fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d {
1938 cmpsd(a, b, imm8:3)
1939}
1940
1941/// Returns a new vector with the low element of `a` replaced by the not-equal
1942/// comparison of the lower elements of `a` and `b`.
1943///
1944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_sd)
1945#[inline]
1946#[target_feature(enable = "sse2")]
1947#[cfg_attr(test, assert_instr(cmpneqsd))]
1948#[stable(feature = "simd_x86", since = "1.27.0")]
1949pub unsafe fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d {
1950 cmpsd(a, b, imm8:4)
1951}
1952
1953/// Returns a new vector with the low element of `a` replaced by the
1954/// not-less-than comparison of the lower elements of `a` and `b`.
1955///
1956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_sd)
1957#[inline]
1958#[target_feature(enable = "sse2")]
1959#[cfg_attr(test, assert_instr(cmpnltsd))]
1960#[stable(feature = "simd_x86", since = "1.27.0")]
1961pub unsafe fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d {
1962 cmpsd(a, b, imm8:5)
1963}
1964
1965/// Returns a new vector with the low element of `a` replaced by the
1966/// not-less-than-or-equal comparison of the lower elements of `a` and `b`.
1967///
1968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_sd)
1969#[inline]
1970#[target_feature(enable = "sse2")]
1971#[cfg_attr(test, assert_instr(cmpnlesd))]
1972#[stable(feature = "simd_x86", since = "1.27.0")]
1973pub unsafe fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d {
1974 cmpsd(a, b, imm8:6)
1975}
1976
1977/// Returns a new vector with the low element of `a` replaced by the
1978/// not-greater-than comparison of the lower elements of `a` and `b`.
1979///
1980/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpngt_sd)
1981#[inline]
1982#[target_feature(enable = "sse2")]
1983#[cfg_attr(test, assert_instr(cmpnltsd))]
1984#[stable(feature = "simd_x86", since = "1.27.0")]
1985pub unsafe fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d {
1986 simd_insert!(_mm_cmpnlt_sd(b, a), 1, simd_extract!(a, 1, f64))
1987}
1988
1989/// Returns a new vector with the low element of `a` replaced by the
1990/// not-greater-than-or-equal comparison of the lower elements of `a` and `b`.
1991///
1992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_sd)
1993#[inline]
1994#[target_feature(enable = "sse2")]
1995#[cfg_attr(test, assert_instr(cmpnlesd))]
1996#[stable(feature = "simd_x86", since = "1.27.0")]
1997pub unsafe fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d {
1998 simd_insert!(_mm_cmpnle_sd(b, a), 1, simd_extract!(a, 1, f64))
1999}
2000
2001/// Compares corresponding elements in `a` and `b` for equality.
2002///
2003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_pd)
2004#[inline]
2005#[target_feature(enable = "sse2")]
2006#[cfg_attr(test, assert_instr(cmpeqpd))]
2007#[stable(feature = "simd_x86", since = "1.27.0")]
2008pub unsafe fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d {
2009 cmppd(a, b, imm8:0)
2010}
2011
2012/// Compares corresponding elements in `a` and `b` for less-than.
2013///
2014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_pd)
2015#[inline]
2016#[target_feature(enable = "sse2")]
2017#[cfg_attr(test, assert_instr(cmpltpd))]
2018#[stable(feature = "simd_x86", since = "1.27.0")]
2019pub unsafe fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d {
2020 cmppd(a, b, imm8:1)
2021}
2022
2023/// Compares corresponding elements in `a` and `b` for less-than-or-equal
2024///
2025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_pd)
2026#[inline]
2027#[target_feature(enable = "sse2")]
2028#[cfg_attr(test, assert_instr(cmplepd))]
2029#[stable(feature = "simd_x86", since = "1.27.0")]
2030pub unsafe fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d {
2031 cmppd(a, b, imm8:2)
2032}
2033
2034/// Compares corresponding elements in `a` and `b` for greater-than.
2035///
2036/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_pd)
2037#[inline]
2038#[target_feature(enable = "sse2")]
2039#[cfg_attr(test, assert_instr(cmpltpd))]
2040#[stable(feature = "simd_x86", since = "1.27.0")]
2041pub unsafe fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d {
2042 _mm_cmplt_pd(a:b, b:a)
2043}
2044
2045/// Compares corresponding elements in `a` and `b` for greater-than-or-equal.
2046///
2047/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_pd)
2048#[inline]
2049#[target_feature(enable = "sse2")]
2050#[cfg_attr(test, assert_instr(cmplepd))]
2051#[stable(feature = "simd_x86", since = "1.27.0")]
2052pub unsafe fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d {
2053 _mm_cmple_pd(a:b, b:a)
2054}
2055
2056/// Compares corresponding elements in `a` and `b` to see if neither is `NaN`.
2057///
2058/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_pd)
2059#[inline]
2060#[target_feature(enable = "sse2")]
2061#[cfg_attr(test, assert_instr(cmpordpd))]
2062#[stable(feature = "simd_x86", since = "1.27.0")]
2063pub unsafe fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d {
2064 cmppd(a, b, imm8:7)
2065}
2066
2067/// Compares corresponding elements in `a` and `b` to see if either is `NaN`.
2068///
2069/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_pd)
2070#[inline]
2071#[target_feature(enable = "sse2")]
2072#[cfg_attr(test, assert_instr(cmpunordpd))]
2073#[stable(feature = "simd_x86", since = "1.27.0")]
2074pub unsafe fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d {
2075 cmppd(a, b, imm8:3)
2076}
2077
2078/// Compares corresponding elements in `a` and `b` for not-equal.
2079///
2080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_pd)
2081#[inline]
2082#[target_feature(enable = "sse2")]
2083#[cfg_attr(test, assert_instr(cmpneqpd))]
2084#[stable(feature = "simd_x86", since = "1.27.0")]
2085pub unsafe fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d {
2086 cmppd(a, b, imm8:4)
2087}
2088
2089/// Compares corresponding elements in `a` and `b` for not-less-than.
2090///
2091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_pd)
2092#[inline]
2093#[target_feature(enable = "sse2")]
2094#[cfg_attr(test, assert_instr(cmpnltpd))]
2095#[stable(feature = "simd_x86", since = "1.27.0")]
2096pub unsafe fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d {
2097 cmppd(a, b, imm8:5)
2098}
2099
2100/// Compares corresponding elements in `a` and `b` for not-less-than-or-equal.
2101///
2102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_pd)
2103#[inline]
2104#[target_feature(enable = "sse2")]
2105#[cfg_attr(test, assert_instr(cmpnlepd))]
2106#[stable(feature = "simd_x86", since = "1.27.0")]
2107pub unsafe fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d {
2108 cmppd(a, b, imm8:6)
2109}
2110
2111/// Compares corresponding elements in `a` and `b` for not-greater-than.
2112///
2113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpngt_pd)
2114#[inline]
2115#[target_feature(enable = "sse2")]
2116#[cfg_attr(test, assert_instr(cmpnltpd))]
2117#[stable(feature = "simd_x86", since = "1.27.0")]
2118pub unsafe fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d {
2119 _mm_cmpnlt_pd(a:b, b:a)
2120}
2121
2122/// Compares corresponding elements in `a` and `b` for
2123/// not-greater-than-or-equal.
2124///
2125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_pd)
2126#[inline]
2127#[target_feature(enable = "sse2")]
2128#[cfg_attr(test, assert_instr(cmpnlepd))]
2129#[stable(feature = "simd_x86", since = "1.27.0")]
2130pub unsafe fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d {
2131 _mm_cmpnle_pd(a:b, b:a)
2132}
2133
2134/// Compares the lower element of `a` and `b` for equality.
2135///
2136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comieq_sd)
2137#[inline]
2138#[target_feature(enable = "sse2")]
2139#[cfg_attr(test, assert_instr(comisd))]
2140#[stable(feature = "simd_x86", since = "1.27.0")]
2141pub unsafe fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 {
2142 comieqsd(a, b)
2143}
2144
2145/// Compares the lower element of `a` and `b` for less-than.
2146///
2147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comilt_sd)
2148#[inline]
2149#[target_feature(enable = "sse2")]
2150#[cfg_attr(test, assert_instr(comisd))]
2151#[stable(feature = "simd_x86", since = "1.27.0")]
2152pub unsafe fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 {
2153 comiltsd(a, b)
2154}
2155
2156/// Compares the lower element of `a` and `b` for less-than-or-equal.
2157///
2158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comile_sd)
2159#[inline]
2160#[target_feature(enable = "sse2")]
2161#[cfg_attr(test, assert_instr(comisd))]
2162#[stable(feature = "simd_x86", since = "1.27.0")]
2163pub unsafe fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 {
2164 comilesd(a, b)
2165}
2166
2167/// Compares the lower element of `a` and `b` for greater-than.
2168///
2169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comigt_sd)
2170#[inline]
2171#[target_feature(enable = "sse2")]
2172#[cfg_attr(test, assert_instr(comisd))]
2173#[stable(feature = "simd_x86", since = "1.27.0")]
2174pub unsafe fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 {
2175 comigtsd(a, b)
2176}
2177
2178/// Compares the lower element of `a` and `b` for greater-than-or-equal.
2179///
2180/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comige_sd)
2181#[inline]
2182#[target_feature(enable = "sse2")]
2183#[cfg_attr(test, assert_instr(comisd))]
2184#[stable(feature = "simd_x86", since = "1.27.0")]
2185pub unsafe fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 {
2186 comigesd(a, b)
2187}
2188
2189/// Compares the lower element of `a` and `b` for not-equal.
2190///
2191/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comineq_sd)
2192#[inline]
2193#[target_feature(enable = "sse2")]
2194#[cfg_attr(test, assert_instr(comisd))]
2195#[stable(feature = "simd_x86", since = "1.27.0")]
2196pub unsafe fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 {
2197 comineqsd(a, b)
2198}
2199
2200/// Compares the lower element of `a` and `b` for equality.
2201///
2202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomieq_sd)
2203#[inline]
2204#[target_feature(enable = "sse2")]
2205#[cfg_attr(test, assert_instr(ucomisd))]
2206#[stable(feature = "simd_x86", since = "1.27.0")]
2207pub unsafe fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 {
2208 ucomieqsd(a, b)
2209}
2210
2211/// Compares the lower element of `a` and `b` for less-than.
2212///
2213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomilt_sd)
2214#[inline]
2215#[target_feature(enable = "sse2")]
2216#[cfg_attr(test, assert_instr(ucomisd))]
2217#[stable(feature = "simd_x86", since = "1.27.0")]
2218pub unsafe fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 {
2219 ucomiltsd(a, b)
2220}
2221
2222/// Compares the lower element of `a` and `b` for less-than-or-equal.
2223///
2224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomile_sd)
2225#[inline]
2226#[target_feature(enable = "sse2")]
2227#[cfg_attr(test, assert_instr(ucomisd))]
2228#[stable(feature = "simd_x86", since = "1.27.0")]
2229pub unsafe fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 {
2230 ucomilesd(a, b)
2231}
2232
2233/// Compares the lower element of `a` and `b` for greater-than.
2234///
2235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomigt_sd)
2236#[inline]
2237#[target_feature(enable = "sse2")]
2238#[cfg_attr(test, assert_instr(ucomisd))]
2239#[stable(feature = "simd_x86", since = "1.27.0")]
2240pub unsafe fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 {
2241 ucomigtsd(a, b)
2242}
2243
2244/// Compares the lower element of `a` and `b` for greater-than-or-equal.
2245///
2246/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomige_sd)
2247#[inline]
2248#[target_feature(enable = "sse2")]
2249#[cfg_attr(test, assert_instr(ucomisd))]
2250#[stable(feature = "simd_x86", since = "1.27.0")]
2251pub unsafe fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 {
2252 ucomigesd(a, b)
2253}
2254
2255/// Compares the lower element of `a` and `b` for not-equal.
2256///
2257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomineq_sd)
2258#[inline]
2259#[target_feature(enable = "sse2")]
2260#[cfg_attr(test, assert_instr(ucomisd))]
2261#[stable(feature = "simd_x86", since = "1.27.0")]
2262pub unsafe fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 {
2263 ucomineqsd(a, b)
2264}
2265
2266/// Converts packed double-precision (64-bit) floating-point elements in `a` to
2267/// packed single-precision (32-bit) floating-point elements
2268///
2269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_ps)
2270#[inline]
2271#[target_feature(enable = "sse2")]
2272#[cfg_attr(test, assert_instr(cvtpd2ps))]
2273#[stable(feature = "simd_x86", since = "1.27.0")]
2274pub unsafe fn _mm_cvtpd_ps(a: __m128d) -> __m128 {
2275 let r: f32x2 = simd_cast::<_, f32x2>(a.as_f64x2());
2276 let zero: f32x2 = f32x2::new(x0:0.0, x1:0.0);
2277 transmute::<f32x4, _>(src:simd_shuffle!(r, zero, [0, 1, 2, 3]))
2278}
2279
2280/// Converts packed single-precision (32-bit) floating-point elements in `a` to
2281/// packed
2282/// double-precision (64-bit) floating-point elements.
2283///
2284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_pd)
2285#[inline]
2286#[target_feature(enable = "sse2")]
2287#[cfg_attr(test, assert_instr(cvtps2pd))]
2288#[stable(feature = "simd_x86", since = "1.27.0")]
2289pub unsafe fn _mm_cvtps_pd(a: __m128) -> __m128d {
2290 let a: f32x4 = a.as_f32x4();
2291 transmute(src:simd_cast::<f32x2, f64x2>(simd_shuffle!(a, a, [0, 1])))
2292}
2293
2294/// Converts packed double-precision (64-bit) floating-point elements in `a` to
2295/// packed 32-bit integers.
2296///
2297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epi32)
2298#[inline]
2299#[target_feature(enable = "sse2")]
2300#[cfg_attr(test, assert_instr(cvtpd2dq))]
2301#[stable(feature = "simd_x86", since = "1.27.0")]
2302pub unsafe fn _mm_cvtpd_epi32(a: __m128d) -> __m128i {
2303 transmute(src:cvtpd2dq(a))
2304}
2305
2306/// Converts the lower double-precision (64-bit) floating-point element in a to
2307/// a 32-bit integer.
2308///
2309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_si32)
2310#[inline]
2311#[target_feature(enable = "sse2")]
2312#[cfg_attr(test, assert_instr(cvtsd2si))]
2313#[stable(feature = "simd_x86", since = "1.27.0")]
2314pub unsafe fn _mm_cvtsd_si32(a: __m128d) -> i32 {
2315 cvtsd2si(a)
2316}
2317
2318/// Converts the lower double-precision (64-bit) floating-point element in `b`
2319/// to a single-precision (32-bit) floating-point element, store the result in
2320/// the lower element of the return value, and copies the upper element from `a`
2321/// to the upper element the return value.
2322///
2323/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_ss)
2324#[inline]
2325#[target_feature(enable = "sse2")]
2326#[cfg_attr(test, assert_instr(cvtsd2ss))]
2327#[stable(feature = "simd_x86", since = "1.27.0")]
2328pub unsafe fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 {
2329 cvtsd2ss(a, b)
2330}
2331
2332/// Returns the lower double-precision (64-bit) floating-point element of `a`.
2333///
2334/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_f64)
2335#[inline]
2336#[target_feature(enable = "sse2")]
2337#[stable(feature = "simd_x86", since = "1.27.0")]
2338pub unsafe fn _mm_cvtsd_f64(a: __m128d) -> f64 {
2339 simd_extract!(a, 0)
2340}
2341
2342/// Converts the lower single-precision (32-bit) floating-point element in `b`
2343/// to a double-precision (64-bit) floating-point element, store the result in
2344/// the lower element of the return value, and copies the upper element from `a`
2345/// to the upper element the return value.
2346///
2347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtss_sd)
2348#[inline]
2349#[target_feature(enable = "sse2")]
2350#[cfg_attr(test, assert_instr(cvtss2sd))]
2351#[stable(feature = "simd_x86", since = "1.27.0")]
2352pub unsafe fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d {
2353 cvtss2sd(a, b)
2354}
2355
2356/// Converts packed double-precision (64-bit) floating-point elements in `a` to
2357/// packed 32-bit integers with truncation.
2358///
2359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epi32)
2360#[inline]
2361#[target_feature(enable = "sse2")]
2362#[cfg_attr(test, assert_instr(cvttpd2dq))]
2363#[stable(feature = "simd_x86", since = "1.27.0")]
2364pub unsafe fn _mm_cvttpd_epi32(a: __m128d) -> __m128i {
2365 transmute(src:cvttpd2dq(a))
2366}
2367
2368/// Converts the lower double-precision (64-bit) floating-point element in `a`
2369/// to a 32-bit integer with truncation.
2370///
2371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_si32)
2372#[inline]
2373#[target_feature(enable = "sse2")]
2374#[cfg_attr(test, assert_instr(cvttsd2si))]
2375#[stable(feature = "simd_x86", since = "1.27.0")]
2376pub unsafe fn _mm_cvttsd_si32(a: __m128d) -> i32 {
2377 cvttsd2si(a)
2378}
2379
2380/// Converts packed single-precision (32-bit) floating-point elements in `a` to
2381/// packed 32-bit integers with truncation.
2382///
2383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epi32)
2384#[inline]
2385#[target_feature(enable = "sse2")]
2386#[cfg_attr(test, assert_instr(cvttps2dq))]
2387#[stable(feature = "simd_x86", since = "1.27.0")]
2388pub unsafe fn _mm_cvttps_epi32(a: __m128) -> __m128i {
2389 transmute(src:cvttps2dq(a))
2390}
2391
2392/// Copies double-precision (64-bit) floating-point element `a` to the lower
2393/// element of the packed 64-bit return value.
2394///
2395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_sd)
2396#[inline]
2397#[target_feature(enable = "sse2")]
2398#[stable(feature = "simd_x86", since = "1.27.0")]
2399pub unsafe fn _mm_set_sd(a: f64) -> __m128d {
2400 _mm_set_pd(a:0.0, b:a)
2401}
2402
2403/// Broadcasts double-precision (64-bit) floating-point value a to all elements
2404/// of the return value.
2405///
2406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_pd)
2407#[inline]
2408#[target_feature(enable = "sse2")]
2409#[stable(feature = "simd_x86", since = "1.27.0")]
2410pub unsafe fn _mm_set1_pd(a: f64) -> __m128d {
2411 _mm_set_pd(a, b:a)
2412}
2413
2414/// Broadcasts double-precision (64-bit) floating-point value a to all elements
2415/// of the return value.
2416///
2417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_pd1)
2418#[inline]
2419#[target_feature(enable = "sse2")]
2420#[stable(feature = "simd_x86", since = "1.27.0")]
2421pub unsafe fn _mm_set_pd1(a: f64) -> __m128d {
2422 _mm_set_pd(a, b:a)
2423}
2424
2425/// Sets packed double-precision (64-bit) floating-point elements in the return
2426/// value with the supplied values.
2427///
2428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_pd)
2429#[inline]
2430#[target_feature(enable = "sse2")]
2431#[stable(feature = "simd_x86", since = "1.27.0")]
2432pub unsafe fn _mm_set_pd(a: f64, b: f64) -> __m128d {
2433 __m128d(b, a)
2434}
2435
2436/// Sets packed double-precision (64-bit) floating-point elements in the return
2437/// value with the supplied values in reverse order.
2438///
2439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_pd)
2440#[inline]
2441#[target_feature(enable = "sse2")]
2442#[stable(feature = "simd_x86", since = "1.27.0")]
2443pub unsafe fn _mm_setr_pd(a: f64, b: f64) -> __m128d {
2444 _mm_set_pd(a:b, b:a)
2445}
2446
2447/// Returns packed double-precision (64-bit) floating-point elements with all
2448/// zeros.
2449///
2450/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_pd)
2451#[inline]
2452#[target_feature(enable = "sse2")]
2453#[cfg_attr(test, assert_instr(xorps))] // FIXME xorpd expected
2454#[stable(feature = "simd_x86", since = "1.27.0")]
2455pub unsafe fn _mm_setzero_pd() -> __m128d {
2456 _mm_set_pd(a:0.0, b:0.0)
2457}
2458
2459/// Returns a mask of the most significant bit of each element in `a`.
2460///
2461/// The mask is stored in the 2 least significant bits of the return value.
2462/// All other bits are set to `0`.
2463///
2464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_pd)
2465#[inline]
2466#[target_feature(enable = "sse2")]
2467#[cfg_attr(test, assert_instr(movmskpd))]
2468#[stable(feature = "simd_x86", since = "1.27.0")]
2469pub unsafe fn _mm_movemask_pd(a: __m128d) -> i32 {
2470 // Propagate the highest bit to the rest, because simd_bitmask
2471 // requires all-1 or all-0.
2472 let mask: i64x2 = simd_lt(x:transmute(a), y:i64x2::splat(0));
2473 simd_bitmask::<i64x2, u8>(mask).into()
2474}
2475
2476/// Loads 128-bits (composed of 2 packed double-precision (64-bit)
2477/// floating-point elements) from memory into the returned vector.
2478/// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
2479/// exception may be generated.
2480///
2481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_pd)
2482#[inline]
2483#[target_feature(enable = "sse2")]
2484#[cfg_attr(test, assert_instr(movaps))]
2485#[stable(feature = "simd_x86", since = "1.27.0")]
2486#[allow(clippy::cast_ptr_alignment)]
2487pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d {
2488 *(mem_addr as *const __m128d)
2489}
2490
2491/// Loads a 64-bit double-precision value to the low element of a
2492/// 128-bit integer vector and clears the upper element.
2493///
2494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_sd)
2495#[inline]
2496#[target_feature(enable = "sse2")]
2497#[cfg_attr(test, assert_instr(movsd))]
2498#[stable(feature = "simd_x86", since = "1.27.0")]
2499pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d {
2500 _mm_setr_pd(*mem_addr, b:0.)
2501}
2502
2503/// Loads a double-precision value into the high-order bits of a 128-bit
2504/// vector of `[2 x double]`. The low-order bits are copied from the low-order
2505/// bits of the first operand.
2506///
2507/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadh_pd)
2508#[inline]
2509#[target_feature(enable = "sse2")]
2510#[cfg_attr(test, assert_instr(movhps))]
2511#[stable(feature = "simd_x86", since = "1.27.0")]
2512pub unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2513 _mm_setr_pd(a:simd_extract!(a, 0), *mem_addr)
2514}
2515
2516/// Loads a double-precision value into the low-order bits of a 128-bit
2517/// vector of `[2 x double]`. The high-order bits are copied from the
2518/// high-order bits of the first operand.
2519///
2520/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadl_pd)
2521#[inline]
2522#[target_feature(enable = "sse2")]
2523#[cfg_attr(test, assert_instr(movlps))]
2524#[stable(feature = "simd_x86", since = "1.27.0")]
2525pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2526 _mm_setr_pd(*mem_addr, b:simd_extract!(a, 1))
2527}
2528
2529/// Stores a 128-bit floating point vector of `[2 x double]` to a 128-bit
2530/// aligned memory location.
2531/// To minimize caching, the data is flagged as non-temporal (unlikely to be
2532/// used again soon).
2533///
2534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_pd)
2535///
2536/// # Safety of non-temporal stores
2537///
2538/// After using this intrinsic, but before any other access to the memory that this intrinsic
2539/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
2540/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
2541/// return.
2542///
2543/// See [`_mm_sfence`] for details.
2544#[inline]
2545#[target_feature(enable = "sse2")]
2546#[cfg_attr(test, assert_instr(movntps))] // FIXME movntpd
2547#[stable(feature = "simd_x86", since = "1.27.0")]
2548#[allow(clippy::cast_ptr_alignment)]
2549pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) {
2550 intrinsics::nontemporal_store(ptr:mem_addr as *mut __m128d, val:a);
2551}
2552
2553/// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a
2554/// memory location.
2555///
2556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_sd)
2557#[inline]
2558#[target_feature(enable = "sse2")]
2559#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlps))]
2560#[stable(feature = "simd_x86", since = "1.27.0")]
2561pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) {
2562 *mem_addr = simd_extract!(a, 0)
2563}
2564
2565/// Stores 128-bits (composed of 2 packed double-precision (64-bit)
2566/// floating-point elements) from `a` into memory. `mem_addr` must be aligned
2567/// on a 16-byte boundary or a general-protection exception may be generated.
2568///
2569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_pd)
2570#[inline]
2571#[target_feature(enable = "sse2")]
2572#[cfg_attr(test, assert_instr(movaps))]
2573#[stable(feature = "simd_x86", since = "1.27.0")]
2574#[allow(clippy::cast_ptr_alignment)]
2575pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) {
2576 *(mem_addr as *mut __m128d) = a;
2577}
2578
2579/// Stores 128-bits (composed of 2 packed double-precision (64-bit)
2580/// floating-point elements) from `a` into memory.
2581/// `mem_addr` does not need to be aligned on any particular boundary.
2582///
2583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_pd)
2584#[inline]
2585#[target_feature(enable = "sse2")]
2586#[cfg_attr(test, assert_instr(movups))] // FIXME movupd expected
2587#[stable(feature = "simd_x86", since = "1.27.0")]
2588pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) {
2589 mem_addr.cast::<__m128d>().write_unaligned(val:a);
2590}
2591
2592/// Stores the lower double-precision (64-bit) floating-point element from `a`
2593/// into 2 contiguous elements in memory. `mem_addr` must be aligned on a
2594/// 16-byte boundary or a general-protection exception may be generated.
2595///
2596/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store1_pd)
2597#[inline]
2598#[target_feature(enable = "sse2")]
2599#[stable(feature = "simd_x86", since = "1.27.0")]
2600#[allow(clippy::cast_ptr_alignment)]
2601pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) {
2602 let b: __m128d = simd_shuffle!(a, a, [0, 0]);
2603 *(mem_addr as *mut __m128d) = b;
2604}
2605
2606/// Stores the lower double-precision (64-bit) floating-point element from `a`
2607/// into 2 contiguous elements in memory. `mem_addr` must be aligned on a
2608/// 16-byte boundary or a general-protection exception may be generated.
2609///
2610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_pd1)
2611#[inline]
2612#[target_feature(enable = "sse2")]
2613#[stable(feature = "simd_x86", since = "1.27.0")]
2614#[allow(clippy::cast_ptr_alignment)]
2615pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) {
2616 let b: __m128d = simd_shuffle!(a, a, [0, 0]);
2617 *(mem_addr as *mut __m128d) = b;
2618}
2619
2620/// Stores 2 double-precision (64-bit) floating-point elements from `a` into
2621/// memory in reverse order.
2622/// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
2623/// exception may be generated.
2624///
2625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storer_pd)
2626#[inline]
2627#[target_feature(enable = "sse2")]
2628#[stable(feature = "simd_x86", since = "1.27.0")]
2629#[allow(clippy::cast_ptr_alignment)]
2630pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) {
2631 let b: __m128d = simd_shuffle!(a, a, [1, 0]);
2632 *(mem_addr as *mut __m128d) = b;
2633}
2634
2635/// Stores the upper 64 bits of a 128-bit vector of `[2 x double]` to a
2636/// memory location.
2637///
2638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeh_pd)
2639#[inline]
2640#[target_feature(enable = "sse2")]
2641#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movhps))]
2642#[stable(feature = "simd_x86", since = "1.27.0")]
2643pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) {
2644 *mem_addr = simd_extract!(a, 1);
2645}
2646
2647/// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a
2648/// memory location.
2649///
2650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storel_pd)
2651#[inline]
2652#[target_feature(enable = "sse2")]
2653#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlps))]
2654#[stable(feature = "simd_x86", since = "1.27.0")]
2655pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) {
2656 *mem_addr = simd_extract!(a, 0);
2657}
2658
2659/// Loads a double-precision (64-bit) floating-point element from memory
2660/// into both elements of returned vector.
2661///
2662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load1_pd)
2663#[inline]
2664#[target_feature(enable = "sse2")]
2665// #[cfg_attr(test, assert_instr(movapd))] // FIXME LLVM uses different codegen
2666#[stable(feature = "simd_x86", since = "1.27.0")]
2667pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d {
2668 let d: f64 = *mem_addr;
2669 _mm_setr_pd(a:d, b:d)
2670}
2671
2672/// Loads a double-precision (64-bit) floating-point element from memory
2673/// into both elements of returned vector.
2674///
2675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_pd1)
2676#[inline]
2677#[target_feature(enable = "sse2")]
2678// #[cfg_attr(test, assert_instr(movapd))] // FIXME same as _mm_load1_pd
2679#[stable(feature = "simd_x86", since = "1.27.0")]
2680pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d {
2681 _mm_load1_pd(mem_addr)
2682}
2683
2684/// Loads 2 double-precision (64-bit) floating-point elements from memory into
2685/// the returned vector in reverse order. `mem_addr` must be aligned on a
2686/// 16-byte boundary or a general-protection exception may be generated.
2687///
2688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadr_pd)
2689#[inline]
2690#[target_feature(enable = "sse2")]
2691#[cfg_attr(test, assert_instr(movaps))]
2692#[stable(feature = "simd_x86", since = "1.27.0")]
2693pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d {
2694 let a: __m128d = _mm_load_pd(mem_addr);
2695 simd_shuffle!(a, a, [1, 0])
2696}
2697
2698/// Loads 128-bits (composed of 2 packed double-precision (64-bit)
2699/// floating-point elements) from memory into the returned vector.
2700/// `mem_addr` does not need to be aligned on any particular boundary.
2701///
2702/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_pd)
2703#[inline]
2704#[target_feature(enable = "sse2")]
2705#[cfg_attr(test, assert_instr(movups))]
2706#[stable(feature = "simd_x86", since = "1.27.0")]
2707pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d {
2708 let mut dst: __m128d = _mm_undefined_pd();
2709 ptr::copy_nonoverlapping(
2710 src:mem_addr as *const u8,
2711 dst:ptr::addr_of_mut!(dst) as *mut u8,
2712 count:mem::size_of::<__m128d>(),
2713 );
2714 dst
2715}
2716
2717/// Constructs a 128-bit floating-point vector of `[2 x double]` from two
2718/// 128-bit vector parameters of `[2 x double]`, using the immediate-value
2719/// parameter as a specifier.
2720///
2721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_pd)
2722#[inline]
2723#[target_feature(enable = "sse2")]
2724#[cfg_attr(test, assert_instr(shufps, MASK = 2))]
2725#[rustc_legacy_const_generics(2)]
2726#[stable(feature = "simd_x86", since = "1.27.0")]
2727pub unsafe fn _mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d {
2728 static_assert_uimm_bits!(MASK, 8);
2729 simd_shuffle!(a, b, [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2])
2730}
2731
2732/// Constructs a 128-bit floating-point vector of `[2 x double]`. The lower
2733/// 64 bits are set to the lower 64 bits of the second parameter. The upper
2734/// 64 bits are set to the upper 64 bits of the first parameter.
2735///
2736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_move_sd)
2737#[inline]
2738#[target_feature(enable = "sse2")]
2739#[cfg_attr(test, assert_instr(movsd))]
2740#[stable(feature = "simd_x86", since = "1.27.0")]
2741pub unsafe fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d {
2742 _mm_setr_pd(a:simd_extract!(b, 0), b:simd_extract!(a, 1))
2743}
2744
2745/// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit
2746/// floating-point vector of `[4 x float]`.
2747///
2748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castpd_ps)
2749#[inline]
2750#[target_feature(enable = "sse2")]
2751#[stable(feature = "simd_x86", since = "1.27.0")]
2752pub unsafe fn _mm_castpd_ps(a: __m128d) -> __m128 {
2753 transmute(src:a)
2754}
2755
2756/// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit
2757/// integer vector.
2758///
2759/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castpd_si128)
2760#[inline]
2761#[target_feature(enable = "sse2")]
2762#[stable(feature = "simd_x86", since = "1.27.0")]
2763pub unsafe fn _mm_castpd_si128(a: __m128d) -> __m128i {
2764 transmute(src:a)
2765}
2766
2767/// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit
2768/// floating-point vector of `[2 x double]`.
2769///
2770/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castps_pd)
2771#[inline]
2772#[target_feature(enable = "sse2")]
2773#[stable(feature = "simd_x86", since = "1.27.0")]
2774pub unsafe fn _mm_castps_pd(a: __m128) -> __m128d {
2775 transmute(src:a)
2776}
2777
2778/// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit
2779/// integer vector.
2780///
2781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castps_si128)
2782#[inline]
2783#[target_feature(enable = "sse2")]
2784#[stable(feature = "simd_x86", since = "1.27.0")]
2785pub unsafe fn _mm_castps_si128(a: __m128) -> __m128i {
2786 transmute(src:a)
2787}
2788
2789/// Casts a 128-bit integer vector into a 128-bit floating-point vector
2790/// of `[2 x double]`.
2791///
2792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castsi128_pd)
2793#[inline]
2794#[target_feature(enable = "sse2")]
2795#[stable(feature = "simd_x86", since = "1.27.0")]
2796pub unsafe fn _mm_castsi128_pd(a: __m128i) -> __m128d {
2797 transmute(src:a)
2798}
2799
2800/// Casts a 128-bit integer vector into a 128-bit floating-point vector
2801/// of `[4 x float]`.
2802///
2803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castsi128_ps)
2804#[inline]
2805#[target_feature(enable = "sse2")]
2806#[stable(feature = "simd_x86", since = "1.27.0")]
2807pub unsafe fn _mm_castsi128_ps(a: __m128i) -> __m128 {
2808 transmute(src:a)
2809}
2810
2811/// Returns vector of type __m128d with indeterminate elements.
2812/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
2813/// In practice, this is equivalent to [`mem::zeroed`].
2814///
2815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_undefined_pd)
2816#[inline]
2817#[target_feature(enable = "sse2")]
2818#[stable(feature = "simd_x86", since = "1.27.0")]
2819pub unsafe fn _mm_undefined_pd() -> __m128d {
2820 __m128d(0.0, 0.0)
2821}
2822
2823/// Returns vector of type __m128i with indeterminate elements.
2824/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
2825/// In practice, this is equivalent to [`mem::zeroed`].
2826///
2827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_undefined_si128)
2828#[inline]
2829#[target_feature(enable = "sse2")]
2830#[stable(feature = "simd_x86", since = "1.27.0")]
2831pub unsafe fn _mm_undefined_si128() -> __m128i {
2832 __m128i(0, 0)
2833}
2834
2835/// The resulting `__m128d` element is composed by the low-order values of
2836/// the two `__m128d` interleaved input elements, i.e.:
2837///
2838/// * The `[127:64]` bits are copied from the `[127:64]` bits of the second
2839/// input * The `[63:0]` bits are copied from the `[127:64]` bits of the first
2840/// input
2841///
2842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_pd)
2843#[inline]
2844#[target_feature(enable = "sse2")]
2845#[cfg_attr(test, assert_instr(unpckhpd))]
2846#[stable(feature = "simd_x86", since = "1.27.0")]
2847pub unsafe fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d {
2848 simd_shuffle!(a, b, [1, 3])
2849}
2850
2851/// The resulting `__m128d` element is composed by the high-order values of
2852/// the two `__m128d` interleaved input elements, i.e.:
2853///
2854/// * The `[127:64]` bits are copied from the `[63:0]` bits of the second input
2855/// * The `[63:0]` bits are copied from the `[63:0]` bits of the first input
2856///
2857/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_pd)
2858#[inline]
2859#[target_feature(enable = "sse2")]
2860#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlhps))]
2861#[stable(feature = "simd_x86", since = "1.27.0")]
2862pub unsafe fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d {
2863 simd_shuffle!(a, b, [0, 2])
2864}
2865
2866#[allow(improper_ctypes)]
2867extern "C" {
2868 #[link_name = "llvm.x86.sse2.pause"]
2869 fn pause();
2870 #[link_name = "llvm.x86.sse2.clflush"]
2871 fn clflush(p: *const u8);
2872 #[link_name = "llvm.x86.sse2.lfence"]
2873 fn lfence();
2874 #[link_name = "llvm.x86.sse2.mfence"]
2875 fn mfence();
2876 #[link_name = "llvm.x86.sse2.pmadd.wd"]
2877 fn pmaddwd(a: i16x8, b: i16x8) -> i32x4;
2878 #[link_name = "llvm.x86.sse2.psad.bw"]
2879 fn psadbw(a: u8x16, b: u8x16) -> u64x2;
2880 #[link_name = "llvm.x86.sse2.psll.w"]
2881 fn psllw(a: i16x8, count: i16x8) -> i16x8;
2882 #[link_name = "llvm.x86.sse2.psll.d"]
2883 fn pslld(a: i32x4, count: i32x4) -> i32x4;
2884 #[link_name = "llvm.x86.sse2.psll.q"]
2885 fn psllq(a: i64x2, count: i64x2) -> i64x2;
2886 #[link_name = "llvm.x86.sse2.psra.w"]
2887 fn psraw(a: i16x8, count: i16x8) -> i16x8;
2888 #[link_name = "llvm.x86.sse2.psra.d"]
2889 fn psrad(a: i32x4, count: i32x4) -> i32x4;
2890 #[link_name = "llvm.x86.sse2.psrl.w"]
2891 fn psrlw(a: i16x8, count: i16x8) -> i16x8;
2892 #[link_name = "llvm.x86.sse2.psrl.d"]
2893 fn psrld(a: i32x4, count: i32x4) -> i32x4;
2894 #[link_name = "llvm.x86.sse2.psrl.q"]
2895 fn psrlq(a: i64x2, count: i64x2) -> i64x2;
2896 #[link_name = "llvm.x86.sse2.cvtps2dq"]
2897 fn cvtps2dq(a: __m128) -> i32x4;
2898 #[link_name = "llvm.x86.sse2.maskmov.dqu"]
2899 fn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8);
2900 #[link_name = "llvm.x86.sse2.packsswb.128"]
2901 fn packsswb(a: i16x8, b: i16x8) -> i8x16;
2902 #[link_name = "llvm.x86.sse2.packssdw.128"]
2903 fn packssdw(a: i32x4, b: i32x4) -> i16x8;
2904 #[link_name = "llvm.x86.sse2.packuswb.128"]
2905 fn packuswb(a: i16x8, b: i16x8) -> u8x16;
2906 #[link_name = "llvm.x86.sse2.max.sd"]
2907 fn maxsd(a: __m128d, b: __m128d) -> __m128d;
2908 #[link_name = "llvm.x86.sse2.max.pd"]
2909 fn maxpd(a: __m128d, b: __m128d) -> __m128d;
2910 #[link_name = "llvm.x86.sse2.min.sd"]
2911 fn minsd(a: __m128d, b: __m128d) -> __m128d;
2912 #[link_name = "llvm.x86.sse2.min.pd"]
2913 fn minpd(a: __m128d, b: __m128d) -> __m128d;
2914 #[link_name = "llvm.x86.sse2.sqrt.sd"]
2915 fn sqrtsd(a: __m128d) -> __m128d;
2916 #[link_name = "llvm.x86.sse2.sqrt.pd"]
2917 fn sqrtpd(a: __m128d) -> __m128d;
2918 #[link_name = "llvm.x86.sse2.cmp.sd"]
2919 fn cmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
2920 #[link_name = "llvm.x86.sse2.cmp.pd"]
2921 fn cmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
2922 #[link_name = "llvm.x86.sse2.comieq.sd"]
2923 fn comieqsd(a: __m128d, b: __m128d) -> i32;
2924 #[link_name = "llvm.x86.sse2.comilt.sd"]
2925 fn comiltsd(a: __m128d, b: __m128d) -> i32;
2926 #[link_name = "llvm.x86.sse2.comile.sd"]
2927 fn comilesd(a: __m128d, b: __m128d) -> i32;
2928 #[link_name = "llvm.x86.sse2.comigt.sd"]
2929 fn comigtsd(a: __m128d, b: __m128d) -> i32;
2930 #[link_name = "llvm.x86.sse2.comige.sd"]
2931 fn comigesd(a: __m128d, b: __m128d) -> i32;
2932 #[link_name = "llvm.x86.sse2.comineq.sd"]
2933 fn comineqsd(a: __m128d, b: __m128d) -> i32;
2934 #[link_name = "llvm.x86.sse2.ucomieq.sd"]
2935 fn ucomieqsd(a: __m128d, b: __m128d) -> i32;
2936 #[link_name = "llvm.x86.sse2.ucomilt.sd"]
2937 fn ucomiltsd(a: __m128d, b: __m128d) -> i32;
2938 #[link_name = "llvm.x86.sse2.ucomile.sd"]
2939 fn ucomilesd(a: __m128d, b: __m128d) -> i32;
2940 #[link_name = "llvm.x86.sse2.ucomigt.sd"]
2941 fn ucomigtsd(a: __m128d, b: __m128d) -> i32;
2942 #[link_name = "llvm.x86.sse2.ucomige.sd"]
2943 fn ucomigesd(a: __m128d, b: __m128d) -> i32;
2944 #[link_name = "llvm.x86.sse2.ucomineq.sd"]
2945 fn ucomineqsd(a: __m128d, b: __m128d) -> i32;
2946 #[link_name = "llvm.x86.sse2.cvtpd2dq"]
2947 fn cvtpd2dq(a: __m128d) -> i32x4;
2948 #[link_name = "llvm.x86.sse2.cvtsd2si"]
2949 fn cvtsd2si(a: __m128d) -> i32;
2950 #[link_name = "llvm.x86.sse2.cvtsd2ss"]
2951 fn cvtsd2ss(a: __m128, b: __m128d) -> __m128;
2952 #[link_name = "llvm.x86.sse2.cvtss2sd"]
2953 fn cvtss2sd(a: __m128d, b: __m128) -> __m128d;
2954 #[link_name = "llvm.x86.sse2.cvttpd2dq"]
2955 fn cvttpd2dq(a: __m128d) -> i32x4;
2956 #[link_name = "llvm.x86.sse2.cvttsd2si"]
2957 fn cvttsd2si(a: __m128d) -> i32;
2958 #[link_name = "llvm.x86.sse2.cvttps2dq"]
2959 fn cvttps2dq(a: __m128) -> i32x4;
2960}
2961
2962#[cfg(test)]
2963mod tests {
2964 use crate::{
2965 core_arch::{simd::*, x86::*},
2966 hint::black_box,
2967 };
2968 use std::{
2969 boxed, f32,
2970 f64::{self, NAN},
2971 i32,
2972 mem::{self, transmute},
2973 ptr,
2974 };
2975 use stdarch_test::simd_test;
2976
2977 #[test]
2978 fn test_mm_pause() {
2979 unsafe { _mm_pause() }
2980 }
2981
2982 #[simd_test(enable = "sse2")]
2983 unsafe fn test_mm_clflush() {
2984 let x = 0_u8;
2985 _mm_clflush(ptr::addr_of!(x));
2986 }
2987
2988 #[simd_test(enable = "sse2")]
2989 // Miri cannot support this until it is clear how it fits in the Rust memory model
2990 #[cfg_attr(miri, ignore)]
2991 unsafe fn test_mm_lfence() {
2992 _mm_lfence();
2993 }
2994
2995 #[simd_test(enable = "sse2")]
2996 // Miri cannot support this until it is clear how it fits in the Rust memory model
2997 #[cfg_attr(miri, ignore)]
2998 unsafe fn test_mm_mfence() {
2999 _mm_mfence();
3000 }
3001
3002 #[simd_test(enable = "sse2")]
3003 unsafe fn test_mm_add_epi8() {
3004 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3005 #[rustfmt::skip]
3006 let b = _mm_setr_epi8(
3007 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3008 );
3009 let r = _mm_add_epi8(a, b);
3010 #[rustfmt::skip]
3011 let e = _mm_setr_epi8(
3012 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3013 );
3014 assert_eq_m128i(r, e);
3015 }
3016
3017 #[simd_test(enable = "sse2")]
3018 unsafe fn test_mm_add_epi8_overflow() {
3019 let a = _mm_set1_epi8(0x7F);
3020 let b = _mm_set1_epi8(1);
3021 let r = _mm_add_epi8(a, b);
3022 assert_eq_m128i(r, _mm_set1_epi8(-128));
3023 }
3024
3025 #[simd_test(enable = "sse2")]
3026 unsafe fn test_mm_add_epi16() {
3027 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3028 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3029 let r = _mm_add_epi16(a, b);
3030 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3031 assert_eq_m128i(r, e);
3032 }
3033
3034 #[simd_test(enable = "sse2")]
3035 unsafe fn test_mm_add_epi32() {
3036 let a = _mm_setr_epi32(0, 1, 2, 3);
3037 let b = _mm_setr_epi32(4, 5, 6, 7);
3038 let r = _mm_add_epi32(a, b);
3039 let e = _mm_setr_epi32(4, 6, 8, 10);
3040 assert_eq_m128i(r, e);
3041 }
3042
3043 #[simd_test(enable = "sse2")]
3044 unsafe fn test_mm_add_epi64() {
3045 let a = _mm_setr_epi64x(0, 1);
3046 let b = _mm_setr_epi64x(2, 3);
3047 let r = _mm_add_epi64(a, b);
3048 let e = _mm_setr_epi64x(2, 4);
3049 assert_eq_m128i(r, e);
3050 }
3051
3052 #[simd_test(enable = "sse2")]
3053 unsafe fn test_mm_adds_epi8() {
3054 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3055 #[rustfmt::skip]
3056 let b = _mm_setr_epi8(
3057 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3058 );
3059 let r = _mm_adds_epi8(a, b);
3060 #[rustfmt::skip]
3061 let e = _mm_setr_epi8(
3062 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3063 );
3064 assert_eq_m128i(r, e);
3065 }
3066
3067 #[simd_test(enable = "sse2")]
3068 unsafe fn test_mm_adds_epi8_saturate_positive() {
3069 let a = _mm_set1_epi8(0x7F);
3070 let b = _mm_set1_epi8(1);
3071 let r = _mm_adds_epi8(a, b);
3072 assert_eq_m128i(r, a);
3073 }
3074
3075 #[simd_test(enable = "sse2")]
3076 unsafe fn test_mm_adds_epi8_saturate_negative() {
3077 let a = _mm_set1_epi8(-0x80);
3078 let b = _mm_set1_epi8(-1);
3079 let r = _mm_adds_epi8(a, b);
3080 assert_eq_m128i(r, a);
3081 }
3082
3083 #[simd_test(enable = "sse2")]
3084 unsafe fn test_mm_adds_epi16() {
3085 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3086 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3087 let r = _mm_adds_epi16(a, b);
3088 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3089 assert_eq_m128i(r, e);
3090 }
3091
3092 #[simd_test(enable = "sse2")]
3093 unsafe fn test_mm_adds_epi16_saturate_positive() {
3094 let a = _mm_set1_epi16(0x7FFF);
3095 let b = _mm_set1_epi16(1);
3096 let r = _mm_adds_epi16(a, b);
3097 assert_eq_m128i(r, a);
3098 }
3099
3100 #[simd_test(enable = "sse2")]
3101 unsafe fn test_mm_adds_epi16_saturate_negative() {
3102 let a = _mm_set1_epi16(-0x8000);
3103 let b = _mm_set1_epi16(-1);
3104 let r = _mm_adds_epi16(a, b);
3105 assert_eq_m128i(r, a);
3106 }
3107
3108 #[simd_test(enable = "sse2")]
3109 unsafe fn test_mm_adds_epu8() {
3110 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3111 #[rustfmt::skip]
3112 let b = _mm_setr_epi8(
3113 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3114 );
3115 let r = _mm_adds_epu8(a, b);
3116 #[rustfmt::skip]
3117 let e = _mm_setr_epi8(
3118 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3119 );
3120 assert_eq_m128i(r, e);
3121 }
3122
3123 #[simd_test(enable = "sse2")]
3124 unsafe fn test_mm_adds_epu8_saturate() {
3125 let a = _mm_set1_epi8(!0);
3126 let b = _mm_set1_epi8(1);
3127 let r = _mm_adds_epu8(a, b);
3128 assert_eq_m128i(r, a);
3129 }
3130
3131 #[simd_test(enable = "sse2")]
3132 unsafe fn test_mm_adds_epu16() {
3133 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3134 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3135 let r = _mm_adds_epu16(a, b);
3136 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3137 assert_eq_m128i(r, e);
3138 }
3139
3140 #[simd_test(enable = "sse2")]
3141 unsafe fn test_mm_adds_epu16_saturate() {
3142 let a = _mm_set1_epi16(!0);
3143 let b = _mm_set1_epi16(1);
3144 let r = _mm_adds_epu16(a, b);
3145 assert_eq_m128i(r, a);
3146 }
3147
3148 #[simd_test(enable = "sse2")]
3149 unsafe fn test_mm_avg_epu8() {
3150 let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9));
3151 let r = _mm_avg_epu8(a, b);
3152 assert_eq_m128i(r, _mm_set1_epi8(6));
3153 }
3154
3155 #[simd_test(enable = "sse2")]
3156 unsafe fn test_mm_avg_epu16() {
3157 let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9));
3158 let r = _mm_avg_epu16(a, b);
3159 assert_eq_m128i(r, _mm_set1_epi16(6));
3160 }
3161
3162 #[simd_test(enable = "sse2")]
3163 unsafe fn test_mm_madd_epi16() {
3164 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
3165 let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
3166 let r = _mm_madd_epi16(a, b);
3167 let e = _mm_setr_epi32(29, 81, 149, 233);
3168 assert_eq_m128i(r, e);
3169
3170 // Test large values.
3171 // MIN*MIN+MIN*MIN will overflow into i32::MIN.
3172 let a = _mm_setr_epi16(
3173 i16::MAX,
3174 i16::MAX,
3175 i16::MIN,
3176 i16::MIN,
3177 i16::MIN,
3178 i16::MAX,
3179 0,
3180 0,
3181 );
3182 let b = _mm_setr_epi16(
3183 i16::MAX,
3184 i16::MAX,
3185 i16::MIN,
3186 i16::MIN,
3187 i16::MAX,
3188 i16::MIN,
3189 0,
3190 0,
3191 );
3192 let r = _mm_madd_epi16(a, b);
3193 let e = _mm_setr_epi32(0x7FFE0002, i32::MIN, -0x7FFF0000, 0);
3194 assert_eq_m128i(r, e);
3195 }
3196
3197 #[simd_test(enable = "sse2")]
3198 unsafe fn test_mm_max_epi16() {
3199 let a = _mm_set1_epi16(1);
3200 let b = _mm_set1_epi16(-1);
3201 let r = _mm_max_epi16(a, b);
3202 assert_eq_m128i(r, a);
3203 }
3204
3205 #[simd_test(enable = "sse2")]
3206 unsafe fn test_mm_max_epu8() {
3207 let a = _mm_set1_epi8(1);
3208 let b = _mm_set1_epi8(!0);
3209 let r = _mm_max_epu8(a, b);
3210 assert_eq_m128i(r, b);
3211 }
3212
3213 #[simd_test(enable = "sse2")]
3214 unsafe fn test_mm_min_epi16() {
3215 let a = _mm_set1_epi16(1);
3216 let b = _mm_set1_epi16(-1);
3217 let r = _mm_min_epi16(a, b);
3218 assert_eq_m128i(r, b);
3219 }
3220
3221 #[simd_test(enable = "sse2")]
3222 unsafe fn test_mm_min_epu8() {
3223 let a = _mm_set1_epi8(1);
3224 let b = _mm_set1_epi8(!0);
3225 let r = _mm_min_epu8(a, b);
3226 assert_eq_m128i(r, a);
3227 }
3228
3229 #[simd_test(enable = "sse2")]
3230 unsafe fn test_mm_mulhi_epi16() {
3231 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3232 let r = _mm_mulhi_epi16(a, b);
3233 assert_eq_m128i(r, _mm_set1_epi16(-16));
3234 }
3235
3236 #[simd_test(enable = "sse2")]
3237 unsafe fn test_mm_mulhi_epu16() {
3238 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001));
3239 let r = _mm_mulhi_epu16(a, b);
3240 assert_eq_m128i(r, _mm_set1_epi16(15));
3241 }
3242
3243 #[simd_test(enable = "sse2")]
3244 unsafe fn test_mm_mullo_epi16() {
3245 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3246 let r = _mm_mullo_epi16(a, b);
3247 assert_eq_m128i(r, _mm_set1_epi16(-17960));
3248 }
3249
3250 #[simd_test(enable = "sse2")]
3251 unsafe fn test_mm_mul_epu32() {
3252 let a = _mm_setr_epi64x(1_000_000_000, 1 << 34);
3253 let b = _mm_setr_epi64x(1_000_000_000, 1 << 35);
3254 let r = _mm_mul_epu32(a, b);
3255 let e = _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0);
3256 assert_eq_m128i(r, e);
3257 }
3258
3259 #[simd_test(enable = "sse2")]
3260 unsafe fn test_mm_sad_epu8() {
3261 #[rustfmt::skip]
3262 let a = _mm_setr_epi8(
3263 255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
3264 1, 2, 3, 4,
3265 155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8,
3266 1, 2, 3, 4,
3267 );
3268 let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2);
3269 let r = _mm_sad_epu8(a, b);
3270 let e = _mm_setr_epi64x(1020, 614);
3271 assert_eq_m128i(r, e);
3272 }
3273
3274 #[simd_test(enable = "sse2")]
3275 unsafe fn test_mm_sub_epi8() {
3276 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(6));
3277 let r = _mm_sub_epi8(a, b);
3278 assert_eq_m128i(r, _mm_set1_epi8(-1));
3279 }
3280
3281 #[simd_test(enable = "sse2")]
3282 unsafe fn test_mm_sub_epi16() {
3283 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(6));
3284 let r = _mm_sub_epi16(a, b);
3285 assert_eq_m128i(r, _mm_set1_epi16(-1));
3286 }
3287
3288 #[simd_test(enable = "sse2")]
3289 unsafe fn test_mm_sub_epi32() {
3290 let (a, b) = (_mm_set1_epi32(5), _mm_set1_epi32(6));
3291 let r = _mm_sub_epi32(a, b);
3292 assert_eq_m128i(r, _mm_set1_epi32(-1));
3293 }
3294
3295 #[simd_test(enable = "sse2")]
3296 unsafe fn test_mm_sub_epi64() {
3297 let (a, b) = (_mm_set1_epi64x(5), _mm_set1_epi64x(6));
3298 let r = _mm_sub_epi64(a, b);
3299 assert_eq_m128i(r, _mm_set1_epi64x(-1));
3300 }
3301
3302 #[simd_test(enable = "sse2")]
3303 unsafe fn test_mm_subs_epi8() {
3304 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3305 let r = _mm_subs_epi8(a, b);
3306 assert_eq_m128i(r, _mm_set1_epi8(3));
3307 }
3308
3309 #[simd_test(enable = "sse2")]
3310 unsafe fn test_mm_subs_epi8_saturate_positive() {
3311 let a = _mm_set1_epi8(0x7F);
3312 let b = _mm_set1_epi8(-1);
3313 let r = _mm_subs_epi8(a, b);
3314 assert_eq_m128i(r, a);
3315 }
3316
3317 #[simd_test(enable = "sse2")]
3318 unsafe fn test_mm_subs_epi8_saturate_negative() {
3319 let a = _mm_set1_epi8(-0x80);
3320 let b = _mm_set1_epi8(1);
3321 let r = _mm_subs_epi8(a, b);
3322 assert_eq_m128i(r, a);
3323 }
3324
3325 #[simd_test(enable = "sse2")]
3326 unsafe fn test_mm_subs_epi16() {
3327 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3328 let r = _mm_subs_epi16(a, b);
3329 assert_eq_m128i(r, _mm_set1_epi16(3));
3330 }
3331
3332 #[simd_test(enable = "sse2")]
3333 unsafe fn test_mm_subs_epi16_saturate_positive() {
3334 let a = _mm_set1_epi16(0x7FFF);
3335 let b = _mm_set1_epi16(-1);
3336 let r = _mm_subs_epi16(a, b);
3337 assert_eq_m128i(r, a);
3338 }
3339
3340 #[simd_test(enable = "sse2")]
3341 unsafe fn test_mm_subs_epi16_saturate_negative() {
3342 let a = _mm_set1_epi16(-0x8000);
3343 let b = _mm_set1_epi16(1);
3344 let r = _mm_subs_epi16(a, b);
3345 assert_eq_m128i(r, a);
3346 }
3347
3348 #[simd_test(enable = "sse2")]
3349 unsafe fn test_mm_subs_epu8() {
3350 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3351 let r = _mm_subs_epu8(a, b);
3352 assert_eq_m128i(r, _mm_set1_epi8(3));
3353 }
3354
3355 #[simd_test(enable = "sse2")]
3356 unsafe fn test_mm_subs_epu8_saturate() {
3357 let a = _mm_set1_epi8(0);
3358 let b = _mm_set1_epi8(1);
3359 let r = _mm_subs_epu8(a, b);
3360 assert_eq_m128i(r, a);
3361 }
3362
3363 #[simd_test(enable = "sse2")]
3364 unsafe fn test_mm_subs_epu16() {
3365 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3366 let r = _mm_subs_epu16(a, b);
3367 assert_eq_m128i(r, _mm_set1_epi16(3));
3368 }
3369
3370 #[simd_test(enable = "sse2")]
3371 unsafe fn test_mm_subs_epu16_saturate() {
3372 let a = _mm_set1_epi16(0);
3373 let b = _mm_set1_epi16(1);
3374 let r = _mm_subs_epu16(a, b);
3375 assert_eq_m128i(r, a);
3376 }
3377
3378 #[simd_test(enable = "sse2")]
3379 unsafe fn test_mm_slli_si128() {
3380 #[rustfmt::skip]
3381 let a = _mm_setr_epi8(
3382 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3383 );
3384 let r = _mm_slli_si128::<1>(a);
3385 let e = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3386 assert_eq_m128i(r, e);
3387
3388 #[rustfmt::skip]
3389 let a = _mm_setr_epi8(
3390 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3391 );
3392 let r = _mm_slli_si128::<15>(a);
3393 let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
3394 assert_eq_m128i(r, e);
3395
3396 #[rustfmt::skip]
3397 let a = _mm_setr_epi8(
3398 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3399 );
3400 let r = _mm_slli_si128::<16>(a);
3401 assert_eq_m128i(r, _mm_set1_epi8(0));
3402 }
3403
3404 #[simd_test(enable = "sse2")]
3405 unsafe fn test_mm_slli_epi16() {
3406 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3407 let r = _mm_slli_epi16::<4>(a);
3408 assert_eq_m128i(
3409 r,
3410 _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
3411 );
3412 let r = _mm_slli_epi16::<16>(a);
3413 assert_eq_m128i(r, _mm_set1_epi16(0));
3414 }
3415
3416 #[simd_test(enable = "sse2")]
3417 unsafe fn test_mm_sll_epi16() {
3418 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3419 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 4));
3420 assert_eq_m128i(
3421 r,
3422 _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
3423 );
3424 let r = _mm_sll_epi16(a, _mm_set_epi64x(4, 0));
3425 assert_eq_m128i(r, a);
3426 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 16));
3427 assert_eq_m128i(r, _mm_set1_epi16(0));
3428 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, i64::MAX));
3429 assert_eq_m128i(r, _mm_set1_epi16(0));
3430 }
3431
3432 #[simd_test(enable = "sse2")]
3433 unsafe fn test_mm_slli_epi32() {
3434 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3435 let r = _mm_slli_epi32::<4>(a);
3436 assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
3437 let r = _mm_slli_epi32::<32>(a);
3438 assert_eq_m128i(r, _mm_set1_epi32(0));
3439 }
3440
3441 #[simd_test(enable = "sse2")]
3442 unsafe fn test_mm_sll_epi32() {
3443 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3444 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 4));
3445 assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
3446 let r = _mm_sll_epi32(a, _mm_set_epi64x(4, 0));
3447 assert_eq_m128i(r, a);
3448 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 32));
3449 assert_eq_m128i(r, _mm_set1_epi32(0));
3450 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, i64::MAX));
3451 assert_eq_m128i(r, _mm_set1_epi32(0));
3452 }
3453
3454 #[simd_test(enable = "sse2")]
3455 unsafe fn test_mm_slli_epi64() {
3456 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3457 let r = _mm_slli_epi64::<4>(a);
3458 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
3459 let r = _mm_slli_epi64::<64>(a);
3460 assert_eq_m128i(r, _mm_set1_epi64x(0));
3461 }
3462
3463 #[simd_test(enable = "sse2")]
3464 unsafe fn test_mm_sll_epi64() {
3465 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3466 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 4));
3467 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
3468 let r = _mm_sll_epi64(a, _mm_set_epi64x(4, 0));
3469 assert_eq_m128i(r, a);
3470 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 64));
3471 assert_eq_m128i(r, _mm_set1_epi64x(0));
3472 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, i64::MAX));
3473 assert_eq_m128i(r, _mm_set1_epi64x(0));
3474 }
3475
3476 #[simd_test(enable = "sse2")]
3477 unsafe fn test_mm_srai_epi16() {
3478 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3479 let r = _mm_srai_epi16::<4>(a);
3480 assert_eq_m128i(
3481 r,
3482 _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10),
3483 );
3484 let r = _mm_srai_epi16::<16>(a);
3485 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3486 }
3487
3488 #[simd_test(enable = "sse2")]
3489 unsafe fn test_mm_sra_epi16() {
3490 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3491 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 4));
3492 assert_eq_m128i(
3493 r,
3494 _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10),
3495 );
3496 let r = _mm_sra_epi16(a, _mm_set_epi64x(4, 0));
3497 assert_eq_m128i(r, a);
3498 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 16));
3499 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3500 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, i64::MAX));
3501 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3502 }
3503
3504 #[simd_test(enable = "sse2")]
3505 unsafe fn test_mm_srai_epi32() {
3506 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3507 let r = _mm_srai_epi32::<4>(a);
3508 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
3509 let r = _mm_srai_epi32::<32>(a);
3510 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3511 }
3512
3513 #[simd_test(enable = "sse2")]
3514 unsafe fn test_mm_sra_epi32() {
3515 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3516 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 4));
3517 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
3518 let r = _mm_sra_epi32(a, _mm_set_epi64x(4, 0));
3519 assert_eq_m128i(r, a);
3520 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 32));
3521 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3522 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, i64::MAX));
3523 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3524 }
3525
3526 #[simd_test(enable = "sse2")]
3527 unsafe fn test_mm_srli_si128() {
3528 #[rustfmt::skip]
3529 let a = _mm_setr_epi8(
3530 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3531 );
3532 let r = _mm_srli_si128::<1>(a);
3533 #[rustfmt::skip]
3534 let e = _mm_setr_epi8(
3535 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0,
3536 );
3537 assert_eq_m128i(r, e);
3538
3539 #[rustfmt::skip]
3540 let a = _mm_setr_epi8(
3541 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3542 );
3543 let r = _mm_srli_si128::<15>(a);
3544 let e = _mm_setr_epi8(16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3545 assert_eq_m128i(r, e);
3546
3547 #[rustfmt::skip]
3548 let a = _mm_setr_epi8(
3549 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3550 );
3551 let r = _mm_srli_si128::<16>(a);
3552 assert_eq_m128i(r, _mm_set1_epi8(0));
3553 }
3554
3555 #[simd_test(enable = "sse2")]
3556 unsafe fn test_mm_srli_epi16() {
3557 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3558 let r = _mm_srli_epi16::<4>(a);
3559 assert_eq_m128i(
3560 r,
3561 _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0),
3562 );
3563 let r = _mm_srli_epi16::<16>(a);
3564 assert_eq_m128i(r, _mm_set1_epi16(0));
3565 }
3566
3567 #[simd_test(enable = "sse2")]
3568 unsafe fn test_mm_srl_epi16() {
3569 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3570 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 4));
3571 assert_eq_m128i(
3572 r,
3573 _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0),
3574 );
3575 let r = _mm_srl_epi16(a, _mm_set_epi64x(4, 0));
3576 assert_eq_m128i(r, a);
3577 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 16));
3578 assert_eq_m128i(r, _mm_set1_epi16(0));
3579 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, i64::MAX));
3580 assert_eq_m128i(r, _mm_set1_epi16(0));
3581 }
3582
3583 #[simd_test(enable = "sse2")]
3584 unsafe fn test_mm_srli_epi32() {
3585 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3586 let r = _mm_srli_epi32::<4>(a);
3587 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
3588 let r = _mm_srli_epi32::<32>(a);
3589 assert_eq_m128i(r, _mm_set1_epi32(0));
3590 }
3591
3592 #[simd_test(enable = "sse2")]
3593 unsafe fn test_mm_srl_epi32() {
3594 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3595 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 4));
3596 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
3597 let r = _mm_srl_epi32(a, _mm_set_epi64x(4, 0));
3598 assert_eq_m128i(r, a);
3599 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 32));
3600 assert_eq_m128i(r, _mm_set1_epi32(0));
3601 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, i64::MAX));
3602 assert_eq_m128i(r, _mm_set1_epi32(0));
3603 }
3604
3605 #[simd_test(enable = "sse2")]
3606 unsafe fn test_mm_srli_epi64() {
3607 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3608 let r = _mm_srli_epi64::<4>(a);
3609 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
3610 let r = _mm_srli_epi64::<64>(a);
3611 assert_eq_m128i(r, _mm_set1_epi64x(0));
3612 }
3613
3614 #[simd_test(enable = "sse2")]
3615 unsafe fn test_mm_srl_epi64() {
3616 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3617 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 4));
3618 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
3619 let r = _mm_srl_epi64(a, _mm_set_epi64x(4, 0));
3620 assert_eq_m128i(r, a);
3621 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 64));
3622 assert_eq_m128i(r, _mm_set1_epi64x(0));
3623 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, i64::MAX));
3624 assert_eq_m128i(r, _mm_set1_epi64x(0));
3625 }
3626
3627 #[simd_test(enable = "sse2")]
3628 unsafe fn test_mm_and_si128() {
3629 let a = _mm_set1_epi8(5);
3630 let b = _mm_set1_epi8(3);
3631 let r = _mm_and_si128(a, b);
3632 assert_eq_m128i(r, _mm_set1_epi8(1));
3633 }
3634
3635 #[simd_test(enable = "sse2")]
3636 unsafe fn test_mm_andnot_si128() {
3637 let a = _mm_set1_epi8(5);
3638 let b = _mm_set1_epi8(3);
3639 let r = _mm_andnot_si128(a, b);
3640 assert_eq_m128i(r, _mm_set1_epi8(2));
3641 }
3642
3643 #[simd_test(enable = "sse2")]
3644 unsafe fn test_mm_or_si128() {
3645 let a = _mm_set1_epi8(5);
3646 let b = _mm_set1_epi8(3);
3647 let r = _mm_or_si128(a, b);
3648 assert_eq_m128i(r, _mm_set1_epi8(7));
3649 }
3650
3651 #[simd_test(enable = "sse2")]
3652 unsafe fn test_mm_xor_si128() {
3653 let a = _mm_set1_epi8(5);
3654 let b = _mm_set1_epi8(3);
3655 let r = _mm_xor_si128(a, b);
3656 assert_eq_m128i(r, _mm_set1_epi8(6));
3657 }
3658
3659 #[simd_test(enable = "sse2")]
3660 unsafe fn test_mm_cmpeq_epi8() {
3661 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3662 let b = _mm_setr_epi8(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
3663 let r = _mm_cmpeq_epi8(a, b);
3664 #[rustfmt::skip]
3665 assert_eq_m128i(
3666 r,
3667 _mm_setr_epi8(
3668 0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3669 )
3670 );
3671 }
3672
3673 #[simd_test(enable = "sse2")]
3674 unsafe fn test_mm_cmpeq_epi16() {
3675 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3676 let b = _mm_setr_epi16(7, 6, 2, 4, 3, 2, 1, 0);
3677 let r = _mm_cmpeq_epi16(a, b);
3678 assert_eq_m128i(r, _mm_setr_epi16(0, 0, !0, 0, 0, 0, 0, 0));
3679 }
3680
3681 #[simd_test(enable = "sse2")]
3682 unsafe fn test_mm_cmpeq_epi32() {
3683 let a = _mm_setr_epi32(0, 1, 2, 3);
3684 let b = _mm_setr_epi32(3, 2, 2, 0);
3685 let r = _mm_cmpeq_epi32(a, b);
3686 assert_eq_m128i(r, _mm_setr_epi32(0, 0, !0, 0));
3687 }
3688
3689 #[simd_test(enable = "sse2")]
3690 unsafe fn test_mm_cmpgt_epi8() {
3691 let a = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3692 let b = _mm_set1_epi8(0);
3693 let r = _mm_cmpgt_epi8(a, b);
3694 let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3695 assert_eq_m128i(r, e);
3696 }
3697
3698 #[simd_test(enable = "sse2")]
3699 unsafe fn test_mm_cmpgt_epi16() {
3700 let a = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
3701 let b = _mm_set1_epi16(0);
3702 let r = _mm_cmpgt_epi16(a, b);
3703 let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
3704 assert_eq_m128i(r, e);
3705 }
3706
3707 #[simd_test(enable = "sse2")]
3708 unsafe fn test_mm_cmpgt_epi32() {
3709 let a = _mm_set_epi32(5, 0, 0, 0);
3710 let b = _mm_set1_epi32(0);
3711 let r = _mm_cmpgt_epi32(a, b);
3712 assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
3713 }
3714
3715 #[simd_test(enable = "sse2")]
3716 unsafe fn test_mm_cmplt_epi8() {
3717 let a = _mm_set1_epi8(0);
3718 let b = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3719 let r = _mm_cmplt_epi8(a, b);
3720 let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3721 assert_eq_m128i(r, e);
3722 }
3723
3724 #[simd_test(enable = "sse2")]
3725 unsafe fn test_mm_cmplt_epi16() {
3726 let a = _mm_set1_epi16(0);
3727 let b = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
3728 let r = _mm_cmplt_epi16(a, b);
3729 let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
3730 assert_eq_m128i(r, e);
3731 }
3732
3733 #[simd_test(enable = "sse2")]
3734 unsafe fn test_mm_cmplt_epi32() {
3735 let a = _mm_set1_epi32(0);
3736 let b = _mm_set_epi32(5, 0, 0, 0);
3737 let r = _mm_cmplt_epi32(a, b);
3738 assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
3739 }
3740
3741 #[simd_test(enable = "sse2")]
3742 unsafe fn test_mm_cvtepi32_pd() {
3743 let a = _mm_set_epi32(35, 25, 15, 5);
3744 let r = _mm_cvtepi32_pd(a);
3745 assert_eq_m128d(r, _mm_setr_pd(5.0, 15.0));
3746 }
3747
3748 #[simd_test(enable = "sse2")]
3749 unsafe fn test_mm_cvtsi32_sd() {
3750 let a = _mm_set1_pd(3.5);
3751 let r = _mm_cvtsi32_sd(a, 5);
3752 assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5));
3753 }
3754
3755 #[simd_test(enable = "sse2")]
3756 unsafe fn test_mm_cvtepi32_ps() {
3757 let a = _mm_setr_epi32(1, 2, 3, 4);
3758 let r = _mm_cvtepi32_ps(a);
3759 assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0));
3760 }
3761
3762 #[simd_test(enable = "sse2")]
3763 unsafe fn test_mm_cvtps_epi32() {
3764 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3765 let r = _mm_cvtps_epi32(a);
3766 assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4));
3767 }
3768
3769 #[simd_test(enable = "sse2")]
3770 unsafe fn test_mm_cvtsi32_si128() {
3771 let r = _mm_cvtsi32_si128(5);
3772 assert_eq_m128i(r, _mm_setr_epi32(5, 0, 0, 0));
3773 }
3774
3775 #[simd_test(enable = "sse2")]
3776 unsafe fn test_mm_cvtsi128_si32() {
3777 let r = _mm_cvtsi128_si32(_mm_setr_epi32(5, 0, 0, 0));
3778 assert_eq!(r, 5);
3779 }
3780
3781 #[simd_test(enable = "sse2")]
3782 unsafe fn test_mm_set_epi64x() {
3783 let r = _mm_set_epi64x(0, 1);
3784 assert_eq_m128i(r, _mm_setr_epi64x(1, 0));
3785 }
3786
3787 #[simd_test(enable = "sse2")]
3788 unsafe fn test_mm_set_epi32() {
3789 let r = _mm_set_epi32(0, 1, 2, 3);
3790 assert_eq_m128i(r, _mm_setr_epi32(3, 2, 1, 0));
3791 }
3792
3793 #[simd_test(enable = "sse2")]
3794 unsafe fn test_mm_set_epi16() {
3795 let r = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3796 assert_eq_m128i(r, _mm_setr_epi16(7, 6, 5, 4, 3, 2, 1, 0));
3797 }
3798
3799 #[simd_test(enable = "sse2")]
3800 unsafe fn test_mm_set_epi8() {
3801 #[rustfmt::skip]
3802 let r = _mm_set_epi8(
3803 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3804 );
3805 #[rustfmt::skip]
3806 let e = _mm_setr_epi8(
3807 15, 14, 13, 12, 11, 10, 9, 8,
3808 7, 6, 5, 4, 3, 2, 1, 0,
3809 );
3810 assert_eq_m128i(r, e);
3811 }
3812
3813 #[simd_test(enable = "sse2")]
3814 unsafe fn test_mm_set1_epi64x() {
3815 let r = _mm_set1_epi64x(1);
3816 assert_eq_m128i(r, _mm_set1_epi64x(1));
3817 }
3818
3819 #[simd_test(enable = "sse2")]
3820 unsafe fn test_mm_set1_epi32() {
3821 let r = _mm_set1_epi32(1);
3822 assert_eq_m128i(r, _mm_set1_epi32(1));
3823 }
3824
3825 #[simd_test(enable = "sse2")]
3826 unsafe fn test_mm_set1_epi16() {
3827 let r = _mm_set1_epi16(1);
3828 assert_eq_m128i(r, _mm_set1_epi16(1));
3829 }
3830
3831 #[simd_test(enable = "sse2")]
3832 unsafe fn test_mm_set1_epi8() {
3833 let r = _mm_set1_epi8(1);
3834 assert_eq_m128i(r, _mm_set1_epi8(1));
3835 }
3836
3837 #[simd_test(enable = "sse2")]
3838 unsafe fn test_mm_setr_epi32() {
3839 let r = _mm_setr_epi32(0, 1, 2, 3);
3840 assert_eq_m128i(r, _mm_setr_epi32(0, 1, 2, 3));
3841 }
3842
3843 #[simd_test(enable = "sse2")]
3844 unsafe fn test_mm_setr_epi16() {
3845 let r = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3846 assert_eq_m128i(r, _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7));
3847 }
3848
3849 #[simd_test(enable = "sse2")]
3850 unsafe fn test_mm_setr_epi8() {
3851 #[rustfmt::skip]
3852 let r = _mm_setr_epi8(
3853 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3854 );
3855 #[rustfmt::skip]
3856 let e = _mm_setr_epi8(
3857 0, 1, 2, 3, 4, 5, 6, 7,
3858 8, 9, 10, 11, 12, 13, 14, 15,
3859 );
3860 assert_eq_m128i(r, e);
3861 }
3862
3863 #[simd_test(enable = "sse2")]
3864 unsafe fn test_mm_setzero_si128() {
3865 let r = _mm_setzero_si128();
3866 assert_eq_m128i(r, _mm_set1_epi64x(0));
3867 }
3868
3869 #[simd_test(enable = "sse2")]
3870 unsafe fn test_mm_loadl_epi64() {
3871 let a = _mm_setr_epi64x(6, 5);
3872 let r = _mm_loadl_epi64(ptr::addr_of!(a));
3873 assert_eq_m128i(r, _mm_setr_epi64x(6, 0));
3874 }
3875
3876 #[simd_test(enable = "sse2")]
3877 unsafe fn test_mm_load_si128() {
3878 let a = _mm_set_epi64x(5, 6);
3879 let r = _mm_load_si128(ptr::addr_of!(a) as *const _);
3880 assert_eq_m128i(a, r);
3881 }
3882
3883 #[simd_test(enable = "sse2")]
3884 unsafe fn test_mm_loadu_si128() {
3885 let a = _mm_set_epi64x(5, 6);
3886 let r = _mm_loadu_si128(ptr::addr_of!(a) as *const _);
3887 assert_eq_m128i(a, r);
3888 }
3889
3890 #[simd_test(enable = "sse2")]
3891 // Miri cannot support this until it is clear how it fits in the Rust memory model
3892 // (non-temporal store)
3893 #[cfg_attr(miri, ignore)]
3894 unsafe fn test_mm_maskmoveu_si128() {
3895 let a = _mm_set1_epi8(9);
3896 #[rustfmt::skip]
3897 let mask = _mm_set_epi8(
3898 0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0,
3899 0, 0, 0, 0, 0, 0, 0, 0,
3900 );
3901 let mut r = _mm_set1_epi8(0);
3902 _mm_maskmoveu_si128(a, mask, ptr::addr_of_mut!(r) as *mut i8);
3903 let e = _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3904 assert_eq_m128i(r, e);
3905 }
3906
3907 #[simd_test(enable = "sse2")]
3908 unsafe fn test_mm_store_si128() {
3909 let a = _mm_set1_epi8(9);
3910 let mut r = _mm_set1_epi8(0);
3911 _mm_store_si128(&mut r, a);
3912 assert_eq_m128i(r, a);
3913 }
3914
3915 #[simd_test(enable = "sse2")]
3916 unsafe fn test_mm_storeu_si128() {
3917 let a = _mm_set1_epi8(9);
3918 let mut r = _mm_set1_epi8(0);
3919 _mm_storeu_si128(&mut r, a);
3920 assert_eq_m128i(r, a);
3921 }
3922
3923 #[simd_test(enable = "sse2")]
3924 unsafe fn test_mm_storel_epi64() {
3925 let a = _mm_setr_epi64x(2, 9);
3926 let mut r = _mm_set1_epi8(0);
3927 _mm_storel_epi64(&mut r, a);
3928 assert_eq_m128i(r, _mm_setr_epi64x(2, 0));
3929 }
3930
3931 #[simd_test(enable = "sse2")]
3932 // Miri cannot support this until it is clear how it fits in the Rust memory model
3933 // (non-temporal store)
3934 #[cfg_attr(miri, ignore)]
3935 unsafe fn test_mm_stream_si128() {
3936 let a = _mm_setr_epi32(1, 2, 3, 4);
3937 let mut r = _mm_undefined_si128();
3938 _mm_stream_si128(ptr::addr_of_mut!(r), a);
3939 assert_eq_m128i(r, a);
3940 }
3941
3942 #[simd_test(enable = "sse2")]
3943 // Miri cannot support this until it is clear how it fits in the Rust memory model
3944 // (non-temporal store)
3945 #[cfg_attr(miri, ignore)]
3946 unsafe fn test_mm_stream_si32() {
3947 let a: i32 = 7;
3948 let mut mem = boxed::Box::<i32>::new(-1);
3949 _mm_stream_si32(ptr::addr_of_mut!(*mem), a);
3950 assert_eq!(a, *mem);
3951 }
3952
3953 #[simd_test(enable = "sse2")]
3954 unsafe fn test_mm_move_epi64() {
3955 let a = _mm_setr_epi64x(5, 6);
3956 let r = _mm_move_epi64(a);
3957 assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
3958 }
3959
3960 #[simd_test(enable = "sse2")]
3961 unsafe fn test_mm_packs_epi16() {
3962 let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0);
3963 let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80);
3964 let r = _mm_packs_epi16(a, b);
3965 #[rustfmt::skip]
3966 assert_eq_m128i(
3967 r,
3968 _mm_setr_epi8(
3969 0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F
3970 )
3971 );
3972 }
3973
3974 #[simd_test(enable = "sse2")]
3975 unsafe fn test_mm_packs_epi32() {
3976 let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0);
3977 let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000);
3978 let r = _mm_packs_epi32(a, b);
3979 assert_eq_m128i(
3980 r,
3981 _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF),
3982 );
3983 }
3984
3985 #[simd_test(enable = "sse2")]
3986 unsafe fn test_mm_packus_epi16() {
3987 let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0);
3988 let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100);
3989 let r = _mm_packus_epi16(a, b);
3990 assert_eq_m128i(
3991 r,
3992 _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0),
3993 );
3994 }
3995
3996 #[simd_test(enable = "sse2")]
3997 unsafe fn test_mm_extract_epi16() {
3998 let a = _mm_setr_epi16(-1, 1, 2, 3, 4, 5, 6, 7);
3999 let r1 = _mm_extract_epi16::<0>(a);
4000 let r2 = _mm_extract_epi16::<3>(a);
4001 assert_eq!(r1, 0xFFFF);
4002 assert_eq!(r2, 3);
4003 }
4004
4005 #[simd_test(enable = "sse2")]
4006 unsafe fn test_mm_insert_epi16() {
4007 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4008 let r = _mm_insert_epi16::<0>(a, 9);
4009 let e = _mm_setr_epi16(9, 1, 2, 3, 4, 5, 6, 7);
4010 assert_eq_m128i(r, e);
4011 }
4012
4013 #[simd_test(enable = "sse2")]
4014 unsafe fn test_mm_movemask_epi8() {
4015 #[rustfmt::skip]
4016 let a = _mm_setr_epi8(
4017 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01,
4018 0b0101, 0b1111_0000u8 as i8, 0, 0,
4019 0, 0b1011_0101u8 as i8, 0b1111_0000u8 as i8, 0b0101,
4020 0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8,
4021 );
4022 let r = _mm_movemask_epi8(a);
4023 assert_eq!(r, 0b10100110_00100101);
4024 }
4025
4026 #[simd_test(enable = "sse2")]
4027 unsafe fn test_mm_shuffle_epi32() {
4028 let a = _mm_setr_epi32(5, 10, 15, 20);
4029 let r = _mm_shuffle_epi32::<0b00_01_01_11>(a);
4030 let e = _mm_setr_epi32(20, 10, 10, 5);
4031 assert_eq_m128i(r, e);
4032 }
4033
4034 #[simd_test(enable = "sse2")]
4035 unsafe fn test_mm_shufflehi_epi16() {
4036 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 10, 15, 20);
4037 let r = _mm_shufflehi_epi16::<0b00_01_01_11>(a);
4038 let e = _mm_setr_epi16(1, 2, 3, 4, 20, 10, 10, 5);
4039 assert_eq_m128i(r, e);
4040 }
4041
4042 #[simd_test(enable = "sse2")]
4043 unsafe fn test_mm_shufflelo_epi16() {
4044 let a = _mm_setr_epi16(5, 10, 15, 20, 1, 2, 3, 4);
4045 let r = _mm_shufflelo_epi16::<0b00_01_01_11>(a);
4046 let e = _mm_setr_epi16(20, 10, 10, 5, 1, 2, 3, 4);
4047 assert_eq_m128i(r, e);
4048 }
4049
4050 #[simd_test(enable = "sse2")]
4051 unsafe fn test_mm_unpackhi_epi8() {
4052 #[rustfmt::skip]
4053 let a = _mm_setr_epi8(
4054 0, 1, 2, 3, 4, 5, 6, 7,
4055 8, 9, 10, 11, 12, 13, 14, 15,
4056 );
4057 #[rustfmt::skip]
4058 let b = _mm_setr_epi8(
4059 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4060 );
4061 let r = _mm_unpackhi_epi8(a, b);
4062 #[rustfmt::skip]
4063 let e = _mm_setr_epi8(
4064 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31,
4065 );
4066 assert_eq_m128i(r, e);
4067 }
4068
4069 #[simd_test(enable = "sse2")]
4070 unsafe fn test_mm_unpackhi_epi16() {
4071 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4072 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4073 let r = _mm_unpackhi_epi16(a, b);
4074 let e = _mm_setr_epi16(4, 12, 5, 13, 6, 14, 7, 15);
4075 assert_eq_m128i(r, e);
4076 }
4077
4078 #[simd_test(enable = "sse2")]
4079 unsafe fn test_mm_unpackhi_epi32() {
4080 let a = _mm_setr_epi32(0, 1, 2, 3);
4081 let b = _mm_setr_epi32(4, 5, 6, 7);
4082 let r = _mm_unpackhi_epi32(a, b);
4083 let e = _mm_setr_epi32(2, 6, 3, 7);
4084 assert_eq_m128i(r, e);
4085 }
4086
4087 #[simd_test(enable = "sse2")]
4088 unsafe fn test_mm_unpackhi_epi64() {
4089 let a = _mm_setr_epi64x(0, 1);
4090 let b = _mm_setr_epi64x(2, 3);
4091 let r = _mm_unpackhi_epi64(a, b);
4092 let e = _mm_setr_epi64x(1, 3);
4093 assert_eq_m128i(r, e);
4094 }
4095
4096 #[simd_test(enable = "sse2")]
4097 unsafe fn test_mm_unpacklo_epi8() {
4098 #[rustfmt::skip]
4099 let a = _mm_setr_epi8(
4100 0, 1, 2, 3, 4, 5, 6, 7,
4101 8, 9, 10, 11, 12, 13, 14, 15,
4102 );
4103 #[rustfmt::skip]
4104 let b = _mm_setr_epi8(
4105 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4106 );
4107 let r = _mm_unpacklo_epi8(a, b);
4108 #[rustfmt::skip]
4109 let e = _mm_setr_epi8(
4110 0, 16, 1, 17, 2, 18, 3, 19,
4111 4, 20, 5, 21, 6, 22, 7, 23,
4112 );
4113 assert_eq_m128i(r, e);
4114 }
4115
4116 #[simd_test(enable = "sse2")]
4117 unsafe fn test_mm_unpacklo_epi16() {
4118 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4119 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4120 let r = _mm_unpacklo_epi16(a, b);
4121 let e = _mm_setr_epi16(0, 8, 1, 9, 2, 10, 3, 11);
4122 assert_eq_m128i(r, e);
4123 }
4124
4125 #[simd_test(enable = "sse2")]
4126 unsafe fn test_mm_unpacklo_epi32() {
4127 let a = _mm_setr_epi32(0, 1, 2, 3);
4128 let b = _mm_setr_epi32(4, 5, 6, 7);
4129 let r = _mm_unpacklo_epi32(a, b);
4130 let e = _mm_setr_epi32(0, 4, 1, 5);
4131 assert_eq_m128i(r, e);
4132 }
4133
4134 #[simd_test(enable = "sse2")]
4135 unsafe fn test_mm_unpacklo_epi64() {
4136 let a = _mm_setr_epi64x(0, 1);
4137 let b = _mm_setr_epi64x(2, 3);
4138 let r = _mm_unpacklo_epi64(a, b);
4139 let e = _mm_setr_epi64x(0, 2);
4140 assert_eq_m128i(r, e);
4141 }
4142
4143 #[simd_test(enable = "sse2")]
4144 unsafe fn test_mm_add_sd() {
4145 let a = _mm_setr_pd(1.0, 2.0);
4146 let b = _mm_setr_pd(5.0, 10.0);
4147 let r = _mm_add_sd(a, b);
4148 assert_eq_m128d(r, _mm_setr_pd(6.0, 2.0));
4149 }
4150
4151 #[simd_test(enable = "sse2")]
4152 unsafe fn test_mm_add_pd() {
4153 let a = _mm_setr_pd(1.0, 2.0);
4154 let b = _mm_setr_pd(5.0, 10.0);
4155 let r = _mm_add_pd(a, b);
4156 assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0));
4157 }
4158
4159 #[simd_test(enable = "sse2")]
4160 unsafe fn test_mm_div_sd() {
4161 let a = _mm_setr_pd(1.0, 2.0);
4162 let b = _mm_setr_pd(5.0, 10.0);
4163 let r = _mm_div_sd(a, b);
4164 assert_eq_m128d(r, _mm_setr_pd(0.2, 2.0));
4165 }
4166
4167 #[simd_test(enable = "sse2")]
4168 unsafe fn test_mm_div_pd() {
4169 let a = _mm_setr_pd(1.0, 2.0);
4170 let b = _mm_setr_pd(5.0, 10.0);
4171 let r = _mm_div_pd(a, b);
4172 assert_eq_m128d(r, _mm_setr_pd(0.2, 0.2));
4173 }
4174
4175 #[simd_test(enable = "sse2")]
4176 unsafe fn test_mm_max_sd() {
4177 let a = _mm_setr_pd(1.0, 2.0);
4178 let b = _mm_setr_pd(5.0, 10.0);
4179 let r = _mm_max_sd(a, b);
4180 assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4181 }
4182
4183 #[simd_test(enable = "sse2")]
4184 unsafe fn test_mm_max_pd() {
4185 let a = _mm_setr_pd(1.0, 2.0);
4186 let b = _mm_setr_pd(5.0, 10.0);
4187 let r = _mm_max_pd(a, b);
4188 assert_eq_m128d(r, _mm_setr_pd(5.0, 10.0));
4189
4190 // Check SSE(2)-specific semantics for -0.0 handling.
4191 let a = _mm_setr_pd(-0.0, 0.0);
4192 let b = _mm_setr_pd(0.0, 0.0);
4193 let r1: [u8; 16] = transmute(_mm_max_pd(a, b));
4194 let r2: [u8; 16] = transmute(_mm_max_pd(b, a));
4195 let a: [u8; 16] = transmute(a);
4196 let b: [u8; 16] = transmute(b);
4197 assert_eq!(r1, b);
4198 assert_eq!(r2, a);
4199 assert_ne!(a, b); // sanity check that -0.0 is actually present
4200 }
4201
4202 #[simd_test(enable = "sse2")]
4203 unsafe fn test_mm_min_sd() {
4204 let a = _mm_setr_pd(1.0, 2.0);
4205 let b = _mm_setr_pd(5.0, 10.0);
4206 let r = _mm_min_sd(a, b);
4207 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4208 }
4209
4210 #[simd_test(enable = "sse2")]
4211 unsafe fn test_mm_min_pd() {
4212 let a = _mm_setr_pd(1.0, 2.0);
4213 let b = _mm_setr_pd(5.0, 10.0);
4214 let r = _mm_min_pd(a, b);
4215 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4216
4217 // Check SSE(2)-specific semantics for -0.0 handling.
4218 let a = _mm_setr_pd(-0.0, 0.0);
4219 let b = _mm_setr_pd(0.0, 0.0);
4220 let r1: [u8; 16] = transmute(_mm_min_pd(a, b));
4221 let r2: [u8; 16] = transmute(_mm_min_pd(b, a));
4222 let a: [u8; 16] = transmute(a);
4223 let b: [u8; 16] = transmute(b);
4224 assert_eq!(r1, b);
4225 assert_eq!(r2, a);
4226 assert_ne!(a, b); // sanity check that -0.0 is actually present
4227 }
4228
4229 #[simd_test(enable = "sse2")]
4230 unsafe fn test_mm_mul_sd() {
4231 let a = _mm_setr_pd(1.0, 2.0);
4232 let b = _mm_setr_pd(5.0, 10.0);
4233 let r = _mm_mul_sd(a, b);
4234 assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4235 }
4236
4237 #[simd_test(enable = "sse2")]
4238 unsafe fn test_mm_mul_pd() {
4239 let a = _mm_setr_pd(1.0, 2.0);
4240 let b = _mm_setr_pd(5.0, 10.0);
4241 let r = _mm_mul_pd(a, b);
4242 assert_eq_m128d(r, _mm_setr_pd(5.0, 20.0));
4243 }
4244
4245 #[simd_test(enable = "sse2")]
4246 unsafe fn test_mm_sqrt_sd() {
4247 let a = _mm_setr_pd(1.0, 2.0);
4248 let b = _mm_setr_pd(5.0, 10.0);
4249 let r = _mm_sqrt_sd(a, b);
4250 assert_eq_m128d(r, _mm_setr_pd(5.0f64.sqrt(), 2.0));
4251 }
4252
4253 #[simd_test(enable = "sse2")]
4254 unsafe fn test_mm_sqrt_pd() {
4255 let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0));
4256 assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt()));
4257 }
4258
4259 #[simd_test(enable = "sse2")]
4260 unsafe fn test_mm_sub_sd() {
4261 let a = _mm_setr_pd(1.0, 2.0);
4262 let b = _mm_setr_pd(5.0, 10.0);
4263 let r = _mm_sub_sd(a, b);
4264 assert_eq_m128d(r, _mm_setr_pd(-4.0, 2.0));
4265 }
4266
4267 #[simd_test(enable = "sse2")]
4268 unsafe fn test_mm_sub_pd() {
4269 let a = _mm_setr_pd(1.0, 2.0);
4270 let b = _mm_setr_pd(5.0, 10.0);
4271 let r = _mm_sub_pd(a, b);
4272 assert_eq_m128d(r, _mm_setr_pd(-4.0, -8.0));
4273 }
4274
4275 #[simd_test(enable = "sse2")]
4276 unsafe fn test_mm_and_pd() {
4277 let a = transmute(u64x2::splat(5));
4278 let b = transmute(u64x2::splat(3));
4279 let r = _mm_and_pd(a, b);
4280 let e = transmute(u64x2::splat(1));
4281 assert_eq_m128d(r, e);
4282 }
4283
4284 #[simd_test(enable = "sse2")]
4285 unsafe fn test_mm_andnot_pd() {
4286 let a = transmute(u64x2::splat(5));
4287 let b = transmute(u64x2::splat(3));
4288 let r = _mm_andnot_pd(a, b);
4289 let e = transmute(u64x2::splat(2));
4290 assert_eq_m128d(r, e);
4291 }
4292
4293 #[simd_test(enable = "sse2")]
4294 unsafe fn test_mm_or_pd() {
4295 let a = transmute(u64x2::splat(5));
4296 let b = transmute(u64x2::splat(3));
4297 let r = _mm_or_pd(a, b);
4298 let e = transmute(u64x2::splat(7));
4299 assert_eq_m128d(r, e);
4300 }
4301
4302 #[simd_test(enable = "sse2")]
4303 unsafe fn test_mm_xor_pd() {
4304 let a = transmute(u64x2::splat(5));
4305 let b = transmute(u64x2::splat(3));
4306 let r = _mm_xor_pd(a, b);
4307 let e = transmute(u64x2::splat(6));
4308 assert_eq_m128d(r, e);
4309 }
4310
4311 #[simd_test(enable = "sse2")]
4312 unsafe fn test_mm_cmpeq_sd() {
4313 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4314 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4315 let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b));
4316 assert_eq_m128i(r, e);
4317 }
4318
4319 #[simd_test(enable = "sse2")]
4320 unsafe fn test_mm_cmplt_sd() {
4321 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4322 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4323 let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b));
4324 assert_eq_m128i(r, e);
4325 }
4326
4327 #[simd_test(enable = "sse2")]
4328 unsafe fn test_mm_cmple_sd() {
4329 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4330 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4331 let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b));
4332 assert_eq_m128i(r, e);
4333 }
4334
4335 #[simd_test(enable = "sse2")]
4336 unsafe fn test_mm_cmpgt_sd() {
4337 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4338 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4339 let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b));
4340 assert_eq_m128i(r, e);
4341 }
4342
4343 #[simd_test(enable = "sse2")]
4344 unsafe fn test_mm_cmpge_sd() {
4345 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4346 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4347 let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b));
4348 assert_eq_m128i(r, e);
4349 }
4350
4351 #[simd_test(enable = "sse2")]
4352 unsafe fn test_mm_cmpord_sd() {
4353 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4354 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4355 let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b));
4356 assert_eq_m128i(r, e);
4357 }
4358
4359 #[simd_test(enable = "sse2")]
4360 unsafe fn test_mm_cmpunord_sd() {
4361 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4362 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4363 let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b));
4364 assert_eq_m128i(r, e);
4365 }
4366
4367 #[simd_test(enable = "sse2")]
4368 unsafe fn test_mm_cmpneq_sd() {
4369 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4370 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4371 let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b));
4372 assert_eq_m128i(r, e);
4373 }
4374
4375 #[simd_test(enable = "sse2")]
4376 unsafe fn test_mm_cmpnlt_sd() {
4377 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4378 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4379 let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b));
4380 assert_eq_m128i(r, e);
4381 }
4382
4383 #[simd_test(enable = "sse2")]
4384 unsafe fn test_mm_cmpnle_sd() {
4385 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4386 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4387 let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b));
4388 assert_eq_m128i(r, e);
4389 }
4390
4391 #[simd_test(enable = "sse2")]
4392 unsafe fn test_mm_cmpngt_sd() {
4393 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4394 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4395 let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b));
4396 assert_eq_m128i(r, e);
4397 }
4398
4399 #[simd_test(enable = "sse2")]
4400 unsafe fn test_mm_cmpnge_sd() {
4401 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4402 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4403 let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b));
4404 assert_eq_m128i(r, e);
4405 }
4406
4407 #[simd_test(enable = "sse2")]
4408 unsafe fn test_mm_cmpeq_pd() {
4409 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4410 let e = _mm_setr_epi64x(!0, 0);
4411 let r = transmute::<_, __m128i>(_mm_cmpeq_pd(a, b));
4412 assert_eq_m128i(r, e);
4413 }
4414
4415 #[simd_test(enable = "sse2")]
4416 unsafe fn test_mm_cmplt_pd() {
4417 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4418 let e = _mm_setr_epi64x(0, !0);
4419 let r = transmute::<_, __m128i>(_mm_cmplt_pd(a, b));
4420 assert_eq_m128i(r, e);
4421 }
4422
4423 #[simd_test(enable = "sse2")]
4424 unsafe fn test_mm_cmple_pd() {
4425 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4426 let e = _mm_setr_epi64x(!0, !0);
4427 let r = transmute::<_, __m128i>(_mm_cmple_pd(a, b));
4428 assert_eq_m128i(r, e);
4429 }
4430
4431 #[simd_test(enable = "sse2")]
4432 unsafe fn test_mm_cmpgt_pd() {
4433 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4434 let e = _mm_setr_epi64x(0, 0);
4435 let r = transmute::<_, __m128i>(_mm_cmpgt_pd(a, b));
4436 assert_eq_m128i(r, e);
4437 }
4438
4439 #[simd_test(enable = "sse2")]
4440 unsafe fn test_mm_cmpge_pd() {
4441 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4442 let e = _mm_setr_epi64x(!0, 0);
4443 let r = transmute::<_, __m128i>(_mm_cmpge_pd(a, b));
4444 assert_eq_m128i(r, e);
4445 }
4446
4447 #[simd_test(enable = "sse2")]
4448 unsafe fn test_mm_cmpord_pd() {
4449 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4450 let e = _mm_setr_epi64x(0, !0);
4451 let r = transmute::<_, __m128i>(_mm_cmpord_pd(a, b));
4452 assert_eq_m128i(r, e);
4453 }
4454
4455 #[simd_test(enable = "sse2")]
4456 unsafe fn test_mm_cmpunord_pd() {
4457 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4458 let e = _mm_setr_epi64x(!0, 0);
4459 let r = transmute::<_, __m128i>(_mm_cmpunord_pd(a, b));
4460 assert_eq_m128i(r, e);
4461 }
4462
4463 #[simd_test(enable = "sse2")]
4464 unsafe fn test_mm_cmpneq_pd() {
4465 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4466 let e = _mm_setr_epi64x(!0, !0);
4467 let r = transmute::<_, __m128i>(_mm_cmpneq_pd(a, b));
4468 assert_eq_m128i(r, e);
4469 }
4470
4471 #[simd_test(enable = "sse2")]
4472 unsafe fn test_mm_cmpnlt_pd() {
4473 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4474 let e = _mm_setr_epi64x(0, 0);
4475 let r = transmute::<_, __m128i>(_mm_cmpnlt_pd(a, b));
4476 assert_eq_m128i(r, e);
4477 }
4478
4479 #[simd_test(enable = "sse2")]
4480 unsafe fn test_mm_cmpnle_pd() {
4481 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4482 let e = _mm_setr_epi64x(0, 0);
4483 let r = transmute::<_, __m128i>(_mm_cmpnle_pd(a, b));
4484 assert_eq_m128i(r, e);
4485 }
4486
4487 #[simd_test(enable = "sse2")]
4488 unsafe fn test_mm_cmpngt_pd() {
4489 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4490 let e = _mm_setr_epi64x(0, !0);
4491 let r = transmute::<_, __m128i>(_mm_cmpngt_pd(a, b));
4492 assert_eq_m128i(r, e);
4493 }
4494
4495 #[simd_test(enable = "sse2")]
4496 unsafe fn test_mm_cmpnge_pd() {
4497 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4498 let e = _mm_setr_epi64x(0, !0);
4499 let r = transmute::<_, __m128i>(_mm_cmpnge_pd(a, b));
4500 assert_eq_m128i(r, e);
4501 }
4502
4503 #[simd_test(enable = "sse2")]
4504 unsafe fn test_mm_comieq_sd() {
4505 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4506 assert!(_mm_comieq_sd(a, b) != 0);
4507
4508 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0));
4509 assert!(_mm_comieq_sd(a, b) == 0);
4510 }
4511
4512 #[simd_test(enable = "sse2")]
4513 unsafe fn test_mm_comilt_sd() {
4514 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4515 assert!(_mm_comilt_sd(a, b) == 0);
4516 }
4517
4518 #[simd_test(enable = "sse2")]
4519 unsafe fn test_mm_comile_sd() {
4520 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4521 assert!(_mm_comile_sd(a, b) != 0);
4522 }
4523
4524 #[simd_test(enable = "sse2")]
4525 unsafe fn test_mm_comigt_sd() {
4526 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4527 assert!(_mm_comigt_sd(a, b) == 0);
4528 }
4529
4530 #[simd_test(enable = "sse2")]
4531 unsafe fn test_mm_comige_sd() {
4532 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4533 assert!(_mm_comige_sd(a, b) != 0);
4534 }
4535
4536 #[simd_test(enable = "sse2")]
4537 unsafe fn test_mm_comineq_sd() {
4538 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4539 assert!(_mm_comineq_sd(a, b) == 0);
4540 }
4541
4542 #[simd_test(enable = "sse2")]
4543 unsafe fn test_mm_ucomieq_sd() {
4544 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4545 assert!(_mm_ucomieq_sd(a, b) != 0);
4546
4547 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0));
4548 assert!(_mm_ucomieq_sd(a, b) == 0);
4549 }
4550
4551 #[simd_test(enable = "sse2")]
4552 unsafe fn test_mm_ucomilt_sd() {
4553 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4554 assert!(_mm_ucomilt_sd(a, b) == 0);
4555 }
4556
4557 #[simd_test(enable = "sse2")]
4558 unsafe fn test_mm_ucomile_sd() {
4559 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4560 assert!(_mm_ucomile_sd(a, b) != 0);
4561 }
4562
4563 #[simd_test(enable = "sse2")]
4564 unsafe fn test_mm_ucomigt_sd() {
4565 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4566 assert!(_mm_ucomigt_sd(a, b) == 0);
4567 }
4568
4569 #[simd_test(enable = "sse2")]
4570 unsafe fn test_mm_ucomige_sd() {
4571 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4572 assert!(_mm_ucomige_sd(a, b) != 0);
4573 }
4574
4575 #[simd_test(enable = "sse2")]
4576 unsafe fn test_mm_ucomineq_sd() {
4577 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4578 assert!(_mm_ucomineq_sd(a, b) == 0);
4579 }
4580
4581 #[simd_test(enable = "sse2")]
4582 unsafe fn test_mm_movemask_pd() {
4583 let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0));
4584 assert_eq!(r, 0b01);
4585
4586 let r = _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0));
4587 assert_eq!(r, 0b11);
4588 }
4589
4590 #[repr(align(16))]
4591 struct Memory {
4592 data: [f64; 4],
4593 }
4594
4595 #[simd_test(enable = "sse2")]
4596 unsafe fn test_mm_load_pd() {
4597 let mem = Memory {
4598 data: [1.0f64, 2.0, 3.0, 4.0],
4599 };
4600 let vals = &mem.data;
4601 let d = vals.as_ptr();
4602
4603 let r = _mm_load_pd(d);
4604 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4605 }
4606
4607 #[simd_test(enable = "sse2")]
4608 unsafe fn test_mm_load_sd() {
4609 let a = 1.;
4610 let expected = _mm_setr_pd(a, 0.);
4611 let r = _mm_load_sd(&a);
4612 assert_eq_m128d(r, expected);
4613 }
4614
4615 #[simd_test(enable = "sse2")]
4616 unsafe fn test_mm_loadh_pd() {
4617 let a = _mm_setr_pd(1., 2.);
4618 let b = 3.;
4619 let expected = _mm_setr_pd(_mm_cvtsd_f64(a), 3.);
4620 let r = _mm_loadh_pd(a, &b);
4621 assert_eq_m128d(r, expected);
4622 }
4623
4624 #[simd_test(enable = "sse2")]
4625 unsafe fn test_mm_loadl_pd() {
4626 let a = _mm_setr_pd(1., 2.);
4627 let b = 3.;
4628 let expected = _mm_setr_pd(3., get_m128d(a, 1));
4629 let r = _mm_loadl_pd(a, &b);
4630 assert_eq_m128d(r, expected);
4631 }
4632
4633 #[simd_test(enable = "sse2")]
4634 // Miri cannot support this until it is clear how it fits in the Rust memory model
4635 // (non-temporal store)
4636 #[cfg_attr(miri, ignore)]
4637 unsafe fn test_mm_stream_pd() {
4638 #[repr(align(128))]
4639 struct Memory {
4640 pub data: [f64; 2],
4641 }
4642 let a = _mm_set1_pd(7.0);
4643 let mut mem = Memory { data: [-1.0; 2] };
4644
4645 _mm_stream_pd(ptr::addr_of_mut!(mem.data[0]), a);
4646 for i in 0..2 {
4647 assert_eq!(mem.data[i], get_m128d(a, i));
4648 }
4649 }
4650
4651 #[simd_test(enable = "sse2")]
4652 unsafe fn test_mm_store_sd() {
4653 let mut dest = 0.;
4654 let a = _mm_setr_pd(1., 2.);
4655 _mm_store_sd(&mut dest, a);
4656 assert_eq!(dest, _mm_cvtsd_f64(a));
4657 }
4658
4659 #[simd_test(enable = "sse2")]
4660 unsafe fn test_mm_store_pd() {
4661 let mut mem = Memory { data: [0.0f64; 4] };
4662 let vals = &mut mem.data;
4663 let a = _mm_setr_pd(1.0, 2.0);
4664 let d = vals.as_mut_ptr();
4665
4666 _mm_store_pd(d, *black_box(&a));
4667 assert_eq!(vals[0], 1.0);
4668 assert_eq!(vals[1], 2.0);
4669 }
4670
4671 #[simd_test(enable = "sse2")]
4672 unsafe fn test_mm_storeu_pd() {
4673 let mut mem = Memory { data: [0.0f64; 4] };
4674 let vals = &mut mem.data;
4675 let a = _mm_setr_pd(1.0, 2.0);
4676
4677 let mut ofs = 0;
4678 let mut p = vals.as_mut_ptr();
4679
4680 // Make sure p is **not** aligned to 16-byte boundary
4681 if (p as usize) & 0xf == 0 {
4682 ofs = 1;
4683 p = p.add(1);
4684 }
4685
4686 _mm_storeu_pd(p, *black_box(&a));
4687
4688 if ofs > 0 {
4689 assert_eq!(vals[ofs - 1], 0.0);
4690 }
4691 assert_eq!(vals[ofs + 0], 1.0);
4692 assert_eq!(vals[ofs + 1], 2.0);
4693 }
4694
4695 #[simd_test(enable = "sse2")]
4696 unsafe fn test_mm_store1_pd() {
4697 let mut mem = Memory { data: [0.0f64; 4] };
4698 let vals = &mut mem.data;
4699 let a = _mm_setr_pd(1.0, 2.0);
4700 let d = vals.as_mut_ptr();
4701
4702 _mm_store1_pd(d, *black_box(&a));
4703 assert_eq!(vals[0], 1.0);
4704 assert_eq!(vals[1], 1.0);
4705 }
4706
4707 #[simd_test(enable = "sse2")]
4708 unsafe fn test_mm_store_pd1() {
4709 let mut mem = Memory { data: [0.0f64; 4] };
4710 let vals = &mut mem.data;
4711 let a = _mm_setr_pd(1.0, 2.0);
4712 let d = vals.as_mut_ptr();
4713
4714 _mm_store_pd1(d, *black_box(&a));
4715 assert_eq!(vals[0], 1.0);
4716 assert_eq!(vals[1], 1.0);
4717 }
4718
4719 #[simd_test(enable = "sse2")]
4720 unsafe fn test_mm_storer_pd() {
4721 let mut mem = Memory { data: [0.0f64; 4] };
4722 let vals = &mut mem.data;
4723 let a = _mm_setr_pd(1.0, 2.0);
4724 let d = vals.as_mut_ptr();
4725
4726 _mm_storer_pd(d, *black_box(&a));
4727 assert_eq!(vals[0], 2.0);
4728 assert_eq!(vals[1], 1.0);
4729 }
4730
4731 #[simd_test(enable = "sse2")]
4732 unsafe fn test_mm_storeh_pd() {
4733 let mut dest = 0.;
4734 let a = _mm_setr_pd(1., 2.);
4735 _mm_storeh_pd(&mut dest, a);
4736 assert_eq!(dest, get_m128d(a, 1));
4737 }
4738
4739 #[simd_test(enable = "sse2")]
4740 unsafe fn test_mm_storel_pd() {
4741 let mut dest = 0.;
4742 let a = _mm_setr_pd(1., 2.);
4743 _mm_storel_pd(&mut dest, a);
4744 assert_eq!(dest, _mm_cvtsd_f64(a));
4745 }
4746
4747 #[simd_test(enable = "sse2")]
4748 unsafe fn test_mm_loadr_pd() {
4749 let mut mem = Memory {
4750 data: [1.0f64, 2.0, 3.0, 4.0],
4751 };
4752 let vals = &mut mem.data;
4753 let d = vals.as_ptr();
4754
4755 let r = _mm_loadr_pd(d);
4756 assert_eq_m128d(r, _mm_setr_pd(2.0, 1.0));
4757 }
4758
4759 #[simd_test(enable = "sse2")]
4760 unsafe fn test_mm_loadu_pd() {
4761 let mut mem = Memory {
4762 data: [1.0f64, 2.0, 3.0, 4.0],
4763 };
4764 let vals = &mut mem.data;
4765 let mut d = vals.as_ptr();
4766
4767 // make sure d is not aligned to 16-byte boundary
4768 let mut offset = 0;
4769 if (d as usize) & 0xf == 0 {
4770 offset = 1;
4771 d = d.add(offset);
4772 }
4773
4774 let r = _mm_loadu_pd(d);
4775 let e = _mm_add_pd(_mm_setr_pd(1.0, 2.0), _mm_set1_pd(offset as f64));
4776 assert_eq_m128d(r, e);
4777 }
4778
4779 #[simd_test(enable = "sse2")]
4780 unsafe fn test_mm_cvtpd_ps() {
4781 let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0));
4782 assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0));
4783
4784 let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0));
4785 assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0));
4786
4787 let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN));
4788 assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0));
4789
4790 let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64));
4791 assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0));
4792 }
4793
4794 #[simd_test(enable = "sse2")]
4795 unsafe fn test_mm_cvtps_pd() {
4796 let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0));
4797 assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0));
4798
4799 let r = _mm_cvtps_pd(_mm_setr_ps(
4800 f32::MAX,
4801 f32::INFINITY,
4802 f32::NEG_INFINITY,
4803 f32::MIN,
4804 ));
4805 assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY));
4806 }
4807
4808 #[simd_test(enable = "sse2")]
4809 unsafe fn test_mm_cvtpd_epi32() {
4810 let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0));
4811 assert_eq_m128i(r, _mm_setr_epi32(-1, 5, 0, 0));
4812
4813 let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0));
4814 assert_eq_m128i(r, _mm_setr_epi32(-1, -5, 0, 0));
4815
4816 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN));
4817 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
4818
4819 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY));
4820 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
4821
4822 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN));
4823 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
4824 }
4825
4826 #[simd_test(enable = "sse2")]
4827 unsafe fn test_mm_cvtsd_si32() {
4828 let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0));
4829 assert_eq!(r, -2);
4830
4831 let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN));
4832 assert_eq!(r, i32::MIN);
4833
4834 let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN));
4835 assert_eq!(r, i32::MIN);
4836 }
4837
4838 #[simd_test(enable = "sse2")]
4839 unsafe fn test_mm_cvtsd_ss() {
4840 let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4);
4841 let b = _mm_setr_pd(2.0, -5.0);
4842
4843 let r = _mm_cvtsd_ss(a, b);
4844
4845 assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4));
4846
4847 let a = _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY);
4848 let b = _mm_setr_pd(f64::INFINITY, -5.0);
4849
4850 let r = _mm_cvtsd_ss(a, b);
4851
4852 assert_eq_m128(
4853 r,
4854 _mm_setr_ps(
4855 f32::INFINITY,
4856 f32::NEG_INFINITY,
4857 f32::MAX,
4858 f32::NEG_INFINITY,
4859 ),
4860 );
4861 }
4862
4863 #[simd_test(enable = "sse2")]
4864 unsafe fn test_mm_cvtsd_f64() {
4865 let r = _mm_cvtsd_f64(_mm_setr_pd(-1.1, 2.2));
4866 assert_eq!(r, -1.1);
4867 }
4868
4869 #[simd_test(enable = "sse2")]
4870 unsafe fn test_mm_cvtss_sd() {
4871 let a = _mm_setr_pd(-1.1, 2.2);
4872 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
4873
4874 let r = _mm_cvtss_sd(a, b);
4875 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.2));
4876
4877 let a = _mm_setr_pd(-1.1, f64::INFINITY);
4878 let b = _mm_setr_ps(f32::NEG_INFINITY, 2.0, 3.0, 4.0);
4879
4880 let r = _mm_cvtss_sd(a, b);
4881 assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY));
4882 }
4883
4884 #[simd_test(enable = "sse2")]
4885 unsafe fn test_mm_cvttpd_epi32() {
4886 let a = _mm_setr_pd(-1.1, 2.2);
4887 let r = _mm_cvttpd_epi32(a);
4888 assert_eq_m128i(r, _mm_setr_epi32(-1, 2, 0, 0));
4889
4890 let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
4891 let r = _mm_cvttpd_epi32(a);
4892 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
4893 }
4894
4895 #[simd_test(enable = "sse2")]
4896 unsafe fn test_mm_cvttsd_si32() {
4897 let a = _mm_setr_pd(-1.1, 2.2);
4898 let r = _mm_cvttsd_si32(a);
4899 assert_eq!(r, -1);
4900
4901 let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
4902 let r = _mm_cvttsd_si32(a);
4903 assert_eq!(r, i32::MIN);
4904 }
4905
4906 #[simd_test(enable = "sse2")]
4907 unsafe fn test_mm_cvttps_epi32() {
4908 let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6);
4909 let r = _mm_cvttps_epi32(a);
4910 assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6));
4911
4912 let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX);
4913 let r = _mm_cvttps_epi32(a);
4914 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN));
4915 }
4916
4917 #[simd_test(enable = "sse2")]
4918 unsafe fn test_mm_set_sd() {
4919 let r = _mm_set_sd(-1.0_f64);
4920 assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, 0_f64));
4921 }
4922
4923 #[simd_test(enable = "sse2")]
4924 unsafe fn test_mm_set1_pd() {
4925 let r = _mm_set1_pd(-1.0_f64);
4926 assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, -1.0_f64));
4927 }
4928
4929 #[simd_test(enable = "sse2")]
4930 unsafe fn test_mm_set_pd1() {
4931 let r = _mm_set_pd1(-2.0_f64);
4932 assert_eq_m128d(r, _mm_setr_pd(-2.0_f64, -2.0_f64));
4933 }
4934
4935 #[simd_test(enable = "sse2")]
4936 unsafe fn test_mm_set_pd() {
4937 let r = _mm_set_pd(1.0_f64, 5.0_f64);
4938 assert_eq_m128d(r, _mm_setr_pd(5.0_f64, 1.0_f64));
4939 }
4940
4941 #[simd_test(enable = "sse2")]
4942 unsafe fn test_mm_setr_pd() {
4943 let r = _mm_setr_pd(1.0_f64, -5.0_f64);
4944 assert_eq_m128d(r, _mm_setr_pd(1.0_f64, -5.0_f64));
4945 }
4946
4947 #[simd_test(enable = "sse2")]
4948 unsafe fn test_mm_setzero_pd() {
4949 let r = _mm_setzero_pd();
4950 assert_eq_m128d(r, _mm_setr_pd(0_f64, 0_f64));
4951 }
4952
4953 #[simd_test(enable = "sse2")]
4954 unsafe fn test_mm_load1_pd() {
4955 let d = -5.0;
4956 let r = _mm_load1_pd(&d);
4957 assert_eq_m128d(r, _mm_setr_pd(d, d));
4958 }
4959
4960 #[simd_test(enable = "sse2")]
4961 unsafe fn test_mm_load_pd1() {
4962 let d = -5.0;
4963 let r = _mm_load_pd1(&d);
4964 assert_eq_m128d(r, _mm_setr_pd(d, d));
4965 }
4966
4967 #[simd_test(enable = "sse2")]
4968 unsafe fn test_mm_unpackhi_pd() {
4969 let a = _mm_setr_pd(1.0, 2.0);
4970 let b = _mm_setr_pd(3.0, 4.0);
4971 let r = _mm_unpackhi_pd(a, b);
4972 assert_eq_m128d(r, _mm_setr_pd(2.0, 4.0));
4973 }
4974
4975 #[simd_test(enable = "sse2")]
4976 unsafe fn test_mm_unpacklo_pd() {
4977 let a = _mm_setr_pd(1.0, 2.0);
4978 let b = _mm_setr_pd(3.0, 4.0);
4979 let r = _mm_unpacklo_pd(a, b);
4980 assert_eq_m128d(r, _mm_setr_pd(1.0, 3.0));
4981 }
4982
4983 #[simd_test(enable = "sse2")]
4984 unsafe fn test_mm_shuffle_pd() {
4985 let a = _mm_setr_pd(1., 2.);
4986 let b = _mm_setr_pd(3., 4.);
4987 let expected = _mm_setr_pd(1., 3.);
4988 let r = _mm_shuffle_pd::<0b00_00_00_00>(a, b);
4989 assert_eq_m128d(r, expected);
4990 }
4991
4992 #[simd_test(enable = "sse2")]
4993 unsafe fn test_mm_move_sd() {
4994 let a = _mm_setr_pd(1., 2.);
4995 let b = _mm_setr_pd(3., 4.);
4996 let expected = _mm_setr_pd(3., 2.);
4997 let r = _mm_move_sd(a, b);
4998 assert_eq_m128d(r, expected);
4999 }
5000
5001 #[simd_test(enable = "sse2")]
5002 unsafe fn test_mm_castpd_ps() {
5003 let a = _mm_set1_pd(0.);
5004 let expected = _mm_set1_ps(0.);
5005 let r = _mm_castpd_ps(a);
5006 assert_eq_m128(r, expected);
5007 }
5008
5009 #[simd_test(enable = "sse2")]
5010 unsafe fn test_mm_castpd_si128() {
5011 let a = _mm_set1_pd(0.);
5012 let expected = _mm_set1_epi64x(0);
5013 let r = _mm_castpd_si128(a);
5014 assert_eq_m128i(r, expected);
5015 }
5016
5017 #[simd_test(enable = "sse2")]
5018 unsafe fn test_mm_castps_pd() {
5019 let a = _mm_set1_ps(0.);
5020 let expected = _mm_set1_pd(0.);
5021 let r = _mm_castps_pd(a);
5022 assert_eq_m128d(r, expected);
5023 }
5024
5025 #[simd_test(enable = "sse2")]
5026 unsafe fn test_mm_castps_si128() {
5027 let a = _mm_set1_ps(0.);
5028 let expected = _mm_set1_epi32(0);
5029 let r = _mm_castps_si128(a);
5030 assert_eq_m128i(r, expected);
5031 }
5032
5033 #[simd_test(enable = "sse2")]
5034 unsafe fn test_mm_castsi128_pd() {
5035 let a = _mm_set1_epi64x(0);
5036 let expected = _mm_set1_pd(0.);
5037 let r = _mm_castsi128_pd(a);
5038 assert_eq_m128d(r, expected);
5039 }
5040
5041 #[simd_test(enable = "sse2")]
5042 unsafe fn test_mm_castsi128_ps() {
5043 let a = _mm_set1_epi32(0);
5044 let expected = _mm_set1_ps(0.);
5045 let r = _mm_castsi128_ps(a);
5046 assert_eq_m128(r, expected);
5047 }
5048}
5049