1//! Streaming SIMD Extensions 2 (SSE2)
2
3#[cfg(test)]
4use stdarch_test::assert_instr;
5
6use crate::{
7 core_arch::{simd::*, x86::*},
8 intrinsics::simd::*,
9 intrinsics::sqrtf64,
10 mem, ptr,
11};
12
13/// Provides a hint to the processor that the code sequence is a spin-wait loop.
14///
15/// This can help improve the performance and power consumption of spin-wait
16/// loops.
17///
18/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_pause)
19#[inline]
20#[cfg_attr(all(test, target_feature = "sse2"), assert_instr(pause))]
21#[stable(feature = "simd_x86", since = "1.27.0")]
22pub unsafe fn _mm_pause() {
23 // note: `pause` is guaranteed to be interpreted as a `nop` by CPUs without
24 // the SSE2 target-feature - therefore it does not require any target features
25 pause()
26}
27
28/// Invalidates and flushes the cache line that contains `p` from all levels of
29/// the cache hierarchy.
30///
31/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clflush)
32#[inline]
33#[target_feature(enable = "sse2")]
34#[cfg_attr(test, assert_instr(clflush))]
35#[stable(feature = "simd_x86", since = "1.27.0")]
36pub unsafe fn _mm_clflush(p: *const u8) {
37 clflush(p)
38}
39
40/// Performs a serializing operation on all load-from-memory instructions
41/// that were issued prior to this instruction.
42///
43/// Guarantees that every load instruction that precedes, in program order, is
44/// globally visible before any load instruction which follows the fence in
45/// program order.
46///
47/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_lfence)
48#[inline]
49#[target_feature(enable = "sse2")]
50#[cfg_attr(test, assert_instr(lfence))]
51#[stable(feature = "simd_x86", since = "1.27.0")]
52pub unsafe fn _mm_lfence() {
53 lfence()
54}
55
56/// Performs a serializing operation on all load-from-memory and store-to-memory
57/// instructions that were issued prior to this instruction.
58///
59/// Guarantees that every memory access that precedes, in program order, the
60/// memory fence instruction is globally visible before any memory instruction
61/// which follows the fence in program order.
62///
63/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mfence)
64#[inline]
65#[target_feature(enable = "sse2")]
66#[cfg_attr(test, assert_instr(mfence))]
67#[stable(feature = "simd_x86", since = "1.27.0")]
68pub unsafe fn _mm_mfence() {
69 mfence()
70}
71
72/// Adds packed 8-bit integers in `a` and `b`.
73///
74/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi8)
75#[inline]
76#[target_feature(enable = "sse2")]
77#[cfg_attr(test, assert_instr(paddb))]
78#[stable(feature = "simd_x86", since = "1.27.0")]
79pub fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i {
80 unsafe { transmute(src:simd_add(x:a.as_i8x16(), y:b.as_i8x16())) }
81}
82
83/// Adds packed 16-bit integers in `a` and `b`.
84///
85/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi16)
86#[inline]
87#[target_feature(enable = "sse2")]
88#[cfg_attr(test, assert_instr(paddw))]
89#[stable(feature = "simd_x86", since = "1.27.0")]
90pub fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i {
91 unsafe { transmute(src:simd_add(x:a.as_i16x8(), y:b.as_i16x8())) }
92}
93
94/// Adds packed 32-bit integers in `a` and `b`.
95///
96/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi32)
97#[inline]
98#[target_feature(enable = "sse2")]
99#[cfg_attr(test, assert_instr(paddd))]
100#[stable(feature = "simd_x86", since = "1.27.0")]
101pub fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i {
102 unsafe { transmute(src:simd_add(x:a.as_i32x4(), y:b.as_i32x4())) }
103}
104
105/// Adds packed 64-bit integers in `a` and `b`.
106///
107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi64)
108#[inline]
109#[target_feature(enable = "sse2")]
110#[cfg_attr(test, assert_instr(paddq))]
111#[stable(feature = "simd_x86", since = "1.27.0")]
112pub fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i {
113 unsafe { transmute(src:simd_add(x:a.as_i64x2(), y:b.as_i64x2())) }
114}
115
116/// Adds packed 8-bit integers in `a` and `b` using saturation.
117///
118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epi8)
119#[inline]
120#[target_feature(enable = "sse2")]
121#[cfg_attr(test, assert_instr(paddsb))]
122#[stable(feature = "simd_x86", since = "1.27.0")]
123pub fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i {
124 unsafe { transmute(src:simd_saturating_add(x:a.as_i8x16(), y:b.as_i8x16())) }
125}
126
127/// Adds packed 16-bit integers in `a` and `b` using saturation.
128///
129/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epi16)
130#[inline]
131#[target_feature(enable = "sse2")]
132#[cfg_attr(test, assert_instr(paddsw))]
133#[stable(feature = "simd_x86", since = "1.27.0")]
134pub fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i {
135 unsafe { transmute(src:simd_saturating_add(x:a.as_i16x8(), y:b.as_i16x8())) }
136}
137
138/// Adds packed unsigned 8-bit integers in `a` and `b` using saturation.
139///
140/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epu8)
141#[inline]
142#[target_feature(enable = "sse2")]
143#[cfg_attr(test, assert_instr(paddusb))]
144#[stable(feature = "simd_x86", since = "1.27.0")]
145pub fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i {
146 unsafe { transmute(src:simd_saturating_add(x:a.as_u8x16(), y:b.as_u8x16())) }
147}
148
149/// Adds packed unsigned 16-bit integers in `a` and `b` using saturation.
150///
151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epu16)
152#[inline]
153#[target_feature(enable = "sse2")]
154#[cfg_attr(test, assert_instr(paddusw))]
155#[stable(feature = "simd_x86", since = "1.27.0")]
156pub fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i {
157 unsafe { transmute(src:simd_saturating_add(x:a.as_u16x8(), y:b.as_u16x8())) }
158}
159
160/// Averages packed unsigned 8-bit integers in `a` and `b`.
161///
162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_avg_epu8)
163#[inline]
164#[target_feature(enable = "sse2")]
165#[cfg_attr(test, assert_instr(pavgb))]
166#[stable(feature = "simd_x86", since = "1.27.0")]
167pub fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i {
168 unsafe {
169 let a: u16x16 = simd_cast::<_, u16x16>(a.as_u8x16());
170 let b: u16x16 = simd_cast::<_, u16x16>(b.as_u8x16());
171 let r: u16x16 = simd_shr(lhs:simd_add(simd_add(a, b), u16x16::splat(1)), rhs:u16x16::splat(1));
172 transmute(src:simd_cast::<_, u8x16>(r))
173 }
174}
175
176/// Averages packed unsigned 16-bit integers in `a` and `b`.
177///
178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_avg_epu16)
179#[inline]
180#[target_feature(enable = "sse2")]
181#[cfg_attr(test, assert_instr(pavgw))]
182#[stable(feature = "simd_x86", since = "1.27.0")]
183pub fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i {
184 unsafe {
185 let a: u32x8 = simd_cast::<_, u32x8>(a.as_u16x8());
186 let b: u32x8 = simd_cast::<_, u32x8>(b.as_u16x8());
187 let r: u32x8 = simd_shr(lhs:simd_add(simd_add(a, b), u32x8::splat(1)), rhs:u32x8::splat(1));
188 transmute(src:simd_cast::<_, u16x8>(r))
189 }
190}
191
192/// Multiplies and then horizontally add signed 16 bit integers in `a` and `b`.
193///
194/// Multiplies packed signed 16-bit integers in `a` and `b`, producing
195/// intermediate signed 32-bit integers. Horizontally add adjacent pairs of
196/// intermediate 32-bit integers.
197///
198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_madd_epi16)
199#[inline]
200#[target_feature(enable = "sse2")]
201#[cfg_attr(test, assert_instr(pmaddwd))]
202#[stable(feature = "simd_x86", since = "1.27.0")]
203pub fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i {
204 unsafe { transmute(src:pmaddwd(a.as_i16x8(), b.as_i16x8())) }
205}
206
207/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
208/// maximum values.
209///
210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi16)
211#[inline]
212#[target_feature(enable = "sse2")]
213#[cfg_attr(test, assert_instr(pmaxsw))]
214#[stable(feature = "simd_x86", since = "1.27.0")]
215pub fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i {
216 unsafe {
217 let a: i16x8 = a.as_i16x8();
218 let b: i16x8 = b.as_i16x8();
219 transmute(src:simd_select::<i16x8, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
220 }
221}
222
223/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the
224/// packed maximum values.
225///
226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu8)
227#[inline]
228#[target_feature(enable = "sse2")]
229#[cfg_attr(test, assert_instr(pmaxub))]
230#[stable(feature = "simd_x86", since = "1.27.0")]
231pub fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i {
232 unsafe {
233 let a: u8x16 = a.as_u8x16();
234 let b: u8x16 = b.as_u8x16();
235 transmute(src:simd_select::<i8x16, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
236 }
237}
238
239/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
240/// minimum values.
241///
242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi16)
243#[inline]
244#[target_feature(enable = "sse2")]
245#[cfg_attr(test, assert_instr(pminsw))]
246#[stable(feature = "simd_x86", since = "1.27.0")]
247pub fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i {
248 unsafe {
249 let a: i16x8 = a.as_i16x8();
250 let b: i16x8 = b.as_i16x8();
251 transmute(src:simd_select::<i16x8, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
252 }
253}
254
255/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the
256/// packed minimum values.
257///
258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu8)
259#[inline]
260#[target_feature(enable = "sse2")]
261#[cfg_attr(test, assert_instr(pminub))]
262#[stable(feature = "simd_x86", since = "1.27.0")]
263pub fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i {
264 unsafe {
265 let a: u8x16 = a.as_u8x16();
266 let b: u8x16 = b.as_u8x16();
267 transmute(src:simd_select::<i8x16, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
268 }
269}
270
271/// Multiplies the packed 16-bit integers in `a` and `b`.
272///
273/// The multiplication produces intermediate 32-bit integers, and returns the
274/// high 16 bits of the intermediate integers.
275///
276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhi_epi16)
277#[inline]
278#[target_feature(enable = "sse2")]
279#[cfg_attr(test, assert_instr(pmulhw))]
280#[stable(feature = "simd_x86", since = "1.27.0")]
281pub fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i {
282 unsafe {
283 let a: i32x8 = simd_cast::<_, i32x8>(a.as_i16x8());
284 let b: i32x8 = simd_cast::<_, i32x8>(b.as_i16x8());
285 let r: i32x8 = simd_shr(lhs:simd_mul(a, b), rhs:i32x8::splat(16));
286 transmute(src:simd_cast::<i32x8, i16x8>(r))
287 }
288}
289
290/// Multiplies the packed unsigned 16-bit integers in `a` and `b`.
291///
292/// The multiplication produces intermediate 32-bit integers, and returns the
293/// high 16 bits of the intermediate integers.
294///
295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhi_epu16)
296#[inline]
297#[target_feature(enable = "sse2")]
298#[cfg_attr(test, assert_instr(pmulhuw))]
299#[stable(feature = "simd_x86", since = "1.27.0")]
300pub fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i {
301 unsafe {
302 let a: u32x8 = simd_cast::<_, u32x8>(a.as_u16x8());
303 let b: u32x8 = simd_cast::<_, u32x8>(b.as_u16x8());
304 let r: u32x8 = simd_shr(lhs:simd_mul(a, b), rhs:u32x8::splat(16));
305 transmute(src:simd_cast::<u32x8, u16x8>(r))
306 }
307}
308
309/// Multiplies the packed 16-bit integers in `a` and `b`.
310///
311/// The multiplication produces intermediate 32-bit integers, and returns the
312/// low 16 bits of the intermediate integers.
313///
314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mullo_epi16)
315#[inline]
316#[target_feature(enable = "sse2")]
317#[cfg_attr(test, assert_instr(pmullw))]
318#[stable(feature = "simd_x86", since = "1.27.0")]
319pub fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i {
320 unsafe { transmute(src:simd_mul(x:a.as_i16x8(), y:b.as_i16x8())) }
321}
322
323/// Multiplies the low unsigned 32-bit integers from each packed 64-bit element
324/// in `a` and `b`.
325///
326/// Returns the unsigned 64-bit results.
327///
328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_epu32)
329#[inline]
330#[target_feature(enable = "sse2")]
331#[cfg_attr(test, assert_instr(pmuludq))]
332#[stable(feature = "simd_x86", since = "1.27.0")]
333pub fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i {
334 unsafe {
335 let a: u64x2 = a.as_u64x2();
336 let b: u64x2 = b.as_u64x2();
337 let mask: u64x2 = u64x2::splat(u32::MAX.into());
338 transmute(src:simd_mul(x:simd_and(a, mask), y:simd_and(x:b, y:mask)))
339 }
340}
341
342/// Sum the absolute differences of packed unsigned 8-bit integers.
343///
344/// Computes the absolute differences of packed unsigned 8-bit integers in `a`
345/// and `b`, then horizontally sum each consecutive 8 differences to produce
346/// two unsigned 16-bit integers, and pack these unsigned 16-bit integers in
347/// the low 16 bits of 64-bit elements returned.
348///
349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sad_epu8)
350#[inline]
351#[target_feature(enable = "sse2")]
352#[cfg_attr(test, assert_instr(psadbw))]
353#[stable(feature = "simd_x86", since = "1.27.0")]
354pub fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i {
355 unsafe { transmute(src:psadbw(a.as_u8x16(), b.as_u8x16())) }
356}
357
358/// Subtracts packed 8-bit integers in `b` from packed 8-bit integers in `a`.
359///
360/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi8)
361#[inline]
362#[target_feature(enable = "sse2")]
363#[cfg_attr(test, assert_instr(psubb))]
364#[stable(feature = "simd_x86", since = "1.27.0")]
365pub fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i {
366 unsafe { transmute(src:simd_sub(lhs:a.as_i8x16(), rhs:b.as_i8x16())) }
367}
368
369/// Subtracts packed 16-bit integers in `b` from packed 16-bit integers in `a`.
370///
371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi16)
372#[inline]
373#[target_feature(enable = "sse2")]
374#[cfg_attr(test, assert_instr(psubw))]
375#[stable(feature = "simd_x86", since = "1.27.0")]
376pub fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i {
377 unsafe { transmute(src:simd_sub(lhs:a.as_i16x8(), rhs:b.as_i16x8())) }
378}
379
380/// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`.
381///
382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi32)
383#[inline]
384#[target_feature(enable = "sse2")]
385#[cfg_attr(test, assert_instr(psubd))]
386#[stable(feature = "simd_x86", since = "1.27.0")]
387pub fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i {
388 unsafe { transmute(src:simd_sub(lhs:a.as_i32x4(), rhs:b.as_i32x4())) }
389}
390
391/// Subtract packed 64-bit integers in `b` from packed 64-bit integers in `a`.
392///
393/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi64)
394#[inline]
395#[target_feature(enable = "sse2")]
396#[cfg_attr(test, assert_instr(psubq))]
397#[stable(feature = "simd_x86", since = "1.27.0")]
398pub fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i {
399 unsafe { transmute(src:simd_sub(lhs:a.as_i64x2(), rhs:b.as_i64x2())) }
400}
401
402/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`
403/// using saturation.
404///
405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epi8)
406#[inline]
407#[target_feature(enable = "sse2")]
408#[cfg_attr(test, assert_instr(psubsb))]
409#[stable(feature = "simd_x86", since = "1.27.0")]
410pub fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i {
411 unsafe { transmute(src:simd_saturating_sub(lhs:a.as_i8x16(), rhs:b.as_i8x16())) }
412}
413
414/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
415/// using saturation.
416///
417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epi16)
418#[inline]
419#[target_feature(enable = "sse2")]
420#[cfg_attr(test, assert_instr(psubsw))]
421#[stable(feature = "simd_x86", since = "1.27.0")]
422pub fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i {
423 unsafe { transmute(src:simd_saturating_sub(lhs:a.as_i16x8(), rhs:b.as_i16x8())) }
424}
425
426/// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit
427/// integers in `a` using saturation.
428///
429/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epu8)
430#[inline]
431#[target_feature(enable = "sse2")]
432#[cfg_attr(test, assert_instr(psubusb))]
433#[stable(feature = "simd_x86", since = "1.27.0")]
434pub fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i {
435 unsafe { transmute(src:simd_saturating_sub(lhs:a.as_u8x16(), rhs:b.as_u8x16())) }
436}
437
438/// Subtract packed unsigned 16-bit integers in `b` from packed unsigned 16-bit
439/// integers in `a` using saturation.
440///
441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epu16)
442#[inline]
443#[target_feature(enable = "sse2")]
444#[cfg_attr(test, assert_instr(psubusw))]
445#[stable(feature = "simd_x86", since = "1.27.0")]
446pub fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i {
447 unsafe { transmute(src:simd_saturating_sub(lhs:a.as_u16x8(), rhs:b.as_u16x8())) }
448}
449
450/// Shifts `a` left by `IMM8` bytes while shifting in zeros.
451///
452/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_si128)
453#[inline]
454#[target_feature(enable = "sse2")]
455#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
456#[rustc_legacy_const_generics(1)]
457#[stable(feature = "simd_x86", since = "1.27.0")]
458pub fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
459 static_assert_uimm_bits!(IMM8, 8);
460 unsafe { _mm_slli_si128_impl::<IMM8>(a) }
461}
462
463/// Implementation detail: converts the immediate argument of the
464/// `_mm_slli_si128` intrinsic into a compile-time constant.
465#[inline]
466#[target_feature(enable = "sse2")]
467unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
468 const fn mask(shift: i32, i: u32) -> u32 {
469 let shift = shift as u32 & 0xff;
470 if shift > 15 { i } else { 16 - shift + i }
471 }
472 transmute::<i8x16, _>(simd_shuffle!(
473 i8x16::ZERO,
474 a.as_i8x16(),
475 [
476 mask(IMM8, 0),
477 mask(IMM8, 1),
478 mask(IMM8, 2),
479 mask(IMM8, 3),
480 mask(IMM8, 4),
481 mask(IMM8, 5),
482 mask(IMM8, 6),
483 mask(IMM8, 7),
484 mask(IMM8, 8),
485 mask(IMM8, 9),
486 mask(IMM8, 10),
487 mask(IMM8, 11),
488 mask(IMM8, 12),
489 mask(IMM8, 13),
490 mask(IMM8, 14),
491 mask(IMM8, 15),
492 ],
493 ))
494}
495
496/// Shifts `a` left by `IMM8` bytes while shifting in zeros.
497///
498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bslli_si128)
499#[inline]
500#[target_feature(enable = "sse2")]
501#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
502#[rustc_legacy_const_generics(1)]
503#[stable(feature = "simd_x86", since = "1.27.0")]
504pub fn _mm_bslli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
505 unsafe {
506 static_assert_uimm_bits!(IMM8, 8);
507 _mm_slli_si128_impl::<IMM8>(a)
508 }
509}
510
511/// Shifts `a` right by `IMM8` bytes while shifting in zeros.
512///
513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bsrli_si128)
514#[inline]
515#[target_feature(enable = "sse2")]
516#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
517#[rustc_legacy_const_generics(1)]
518#[stable(feature = "simd_x86", since = "1.27.0")]
519pub fn _mm_bsrli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
520 unsafe {
521 static_assert_uimm_bits!(IMM8, 8);
522 _mm_srli_si128_impl::<IMM8>(a)
523 }
524}
525
526/// Shifts packed 16-bit integers in `a` left by `IMM8` while shifting in zeros.
527///
528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi16)
529#[inline]
530#[target_feature(enable = "sse2")]
531#[cfg_attr(test, assert_instr(psllw, IMM8 = 7))]
532#[rustc_legacy_const_generics(1)]
533#[stable(feature = "simd_x86", since = "1.27.0")]
534pub fn _mm_slli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
535 static_assert_uimm_bits!(IMM8, 8);
536 unsafe {
537 if IMM8 >= 16 {
538 _mm_setzero_si128()
539 } else {
540 transmute(src:simd_shl(lhs:a.as_u16x8(), rhs:u16x8::splat(IMM8 as u16)))
541 }
542 }
543}
544
545/// Shifts packed 16-bit integers in `a` left by `count` while shifting in
546/// zeros.
547///
548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi16)
549#[inline]
550#[target_feature(enable = "sse2")]
551#[cfg_attr(test, assert_instr(psllw))]
552#[stable(feature = "simd_x86", since = "1.27.0")]
553pub fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i {
554 unsafe { transmute(src:psllw(a.as_i16x8(), count.as_i16x8())) }
555}
556
557/// Shifts packed 32-bit integers in `a` left by `IMM8` while shifting in zeros.
558///
559/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi32)
560#[inline]
561#[target_feature(enable = "sse2")]
562#[cfg_attr(test, assert_instr(pslld, IMM8 = 7))]
563#[rustc_legacy_const_generics(1)]
564#[stable(feature = "simd_x86", since = "1.27.0")]
565pub fn _mm_slli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
566 static_assert_uimm_bits!(IMM8, 8);
567 unsafe {
568 if IMM8 >= 32 {
569 _mm_setzero_si128()
570 } else {
571 transmute(src:simd_shl(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8 as u32)))
572 }
573 }
574}
575
576/// Shifts packed 32-bit integers in `a` left by `count` while shifting in
577/// zeros.
578///
579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi32)
580#[inline]
581#[target_feature(enable = "sse2")]
582#[cfg_attr(test, assert_instr(pslld))]
583#[stable(feature = "simd_x86", since = "1.27.0")]
584pub fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i {
585 unsafe { transmute(src:pslld(a.as_i32x4(), count.as_i32x4())) }
586}
587
588/// Shifts packed 64-bit integers in `a` left by `IMM8` while shifting in zeros.
589///
590/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi64)
591#[inline]
592#[target_feature(enable = "sse2")]
593#[cfg_attr(test, assert_instr(psllq, IMM8 = 7))]
594#[rustc_legacy_const_generics(1)]
595#[stable(feature = "simd_x86", since = "1.27.0")]
596pub fn _mm_slli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
597 static_assert_uimm_bits!(IMM8, 8);
598 unsafe {
599 if IMM8 >= 64 {
600 _mm_setzero_si128()
601 } else {
602 transmute(src:simd_shl(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64)))
603 }
604 }
605}
606
607/// Shifts packed 64-bit integers in `a` left by `count` while shifting in
608/// zeros.
609///
610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi64)
611#[inline]
612#[target_feature(enable = "sse2")]
613#[cfg_attr(test, assert_instr(psllq))]
614#[stable(feature = "simd_x86", since = "1.27.0")]
615pub fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i {
616 unsafe { transmute(src:psllq(a.as_i64x2(), count.as_i64x2())) }
617}
618
619/// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in sign
620/// bits.
621///
622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi16)
623#[inline]
624#[target_feature(enable = "sse2")]
625#[cfg_attr(test, assert_instr(psraw, IMM8 = 1))]
626#[rustc_legacy_const_generics(1)]
627#[stable(feature = "simd_x86", since = "1.27.0")]
628pub fn _mm_srai_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
629 static_assert_uimm_bits!(IMM8, 8);
630 unsafe { transmute(src:simd_shr(lhs:a.as_i16x8(), rhs:i16x8::splat(IMM8.min(15) as i16))) }
631}
632
633/// Shifts packed 16-bit integers in `a` right by `count` while shifting in sign
634/// bits.
635///
636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi16)
637#[inline]
638#[target_feature(enable = "sse2")]
639#[cfg_attr(test, assert_instr(psraw))]
640#[stable(feature = "simd_x86", since = "1.27.0")]
641pub fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i {
642 unsafe { transmute(src:psraw(a.as_i16x8(), count.as_i16x8())) }
643}
644
645/// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in sign
646/// bits.
647///
648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi32)
649#[inline]
650#[target_feature(enable = "sse2")]
651#[cfg_attr(test, assert_instr(psrad, IMM8 = 1))]
652#[rustc_legacy_const_generics(1)]
653#[stable(feature = "simd_x86", since = "1.27.0")]
654pub fn _mm_srai_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
655 static_assert_uimm_bits!(IMM8, 8);
656 unsafe { transmute(src:simd_shr(lhs:a.as_i32x4(), rhs:i32x4::splat(IMM8.min(31)))) }
657}
658
659/// Shifts packed 32-bit integers in `a` right by `count` while shifting in sign
660/// bits.
661///
662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi32)
663#[inline]
664#[target_feature(enable = "sse2")]
665#[cfg_attr(test, assert_instr(psrad))]
666#[stable(feature = "simd_x86", since = "1.27.0")]
667pub fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i {
668 unsafe { transmute(src:psrad(a.as_i32x4(), count.as_i32x4())) }
669}
670
671/// Shifts `a` right by `IMM8` bytes while shifting in zeros.
672///
673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_si128)
674#[inline]
675#[target_feature(enable = "sse2")]
676#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
677#[rustc_legacy_const_generics(1)]
678#[stable(feature = "simd_x86", since = "1.27.0")]
679pub fn _mm_srli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
680 static_assert_uimm_bits!(IMM8, 8);
681 unsafe { _mm_srli_si128_impl::<IMM8>(a) }
682}
683
684/// Implementation detail: converts the immediate argument of the
685/// `_mm_srli_si128` intrinsic into a compile-time constant.
686#[inline]
687#[target_feature(enable = "sse2")]
688unsafe fn _mm_srli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
689 const fn mask(shift: i32, i: u32) -> u32 {
690 if (shift as u32) > 15 {
691 i + 16
692 } else {
693 i + (shift as u32)
694 }
695 }
696 let x: i8x16 = simd_shuffle!(
697 a.as_i8x16(),
698 i8x16::ZERO,
699 [
700 mask(IMM8, 0),
701 mask(IMM8, 1),
702 mask(IMM8, 2),
703 mask(IMM8, 3),
704 mask(IMM8, 4),
705 mask(IMM8, 5),
706 mask(IMM8, 6),
707 mask(IMM8, 7),
708 mask(IMM8, 8),
709 mask(IMM8, 9),
710 mask(IMM8, 10),
711 mask(IMM8, 11),
712 mask(IMM8, 12),
713 mask(IMM8, 13),
714 mask(IMM8, 14),
715 mask(IMM8, 15),
716 ],
717 );
718 transmute(x)
719}
720
721/// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in
722/// zeros.
723///
724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi16)
725#[inline]
726#[target_feature(enable = "sse2")]
727#[cfg_attr(test, assert_instr(psrlw, IMM8 = 1))]
728#[rustc_legacy_const_generics(1)]
729#[stable(feature = "simd_x86", since = "1.27.0")]
730pub fn _mm_srli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
731 static_assert_uimm_bits!(IMM8, 8);
732 unsafe {
733 if IMM8 >= 16 {
734 _mm_setzero_si128()
735 } else {
736 transmute(src:simd_shr(lhs:a.as_u16x8(), rhs:u16x8::splat(IMM8 as u16)))
737 }
738 }
739}
740
741/// Shifts packed 16-bit integers in `a` right by `count` while shifting in
742/// zeros.
743///
744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi16)
745#[inline]
746#[target_feature(enable = "sse2")]
747#[cfg_attr(test, assert_instr(psrlw))]
748#[stable(feature = "simd_x86", since = "1.27.0")]
749pub fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i {
750 unsafe { transmute(src:psrlw(a.as_i16x8(), count.as_i16x8())) }
751}
752
753/// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in
754/// zeros.
755///
756/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi32)
757#[inline]
758#[target_feature(enable = "sse2")]
759#[cfg_attr(test, assert_instr(psrld, IMM8 = 8))]
760#[rustc_legacy_const_generics(1)]
761#[stable(feature = "simd_x86", since = "1.27.0")]
762pub fn _mm_srli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
763 static_assert_uimm_bits!(IMM8, 8);
764 unsafe {
765 if IMM8 >= 32 {
766 _mm_setzero_si128()
767 } else {
768 transmute(src:simd_shr(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8 as u32)))
769 }
770 }
771}
772
773/// Shifts packed 32-bit integers in `a` right by `count` while shifting in
774/// zeros.
775///
776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi32)
777#[inline]
778#[target_feature(enable = "sse2")]
779#[cfg_attr(test, assert_instr(psrld))]
780#[stable(feature = "simd_x86", since = "1.27.0")]
781pub fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i {
782 unsafe { transmute(src:psrld(a.as_i32x4(), count.as_i32x4())) }
783}
784
785/// Shifts packed 64-bit integers in `a` right by `IMM8` while shifting in
786/// zeros.
787///
788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi64)
789#[inline]
790#[target_feature(enable = "sse2")]
791#[cfg_attr(test, assert_instr(psrlq, IMM8 = 1))]
792#[rustc_legacy_const_generics(1)]
793#[stable(feature = "simd_x86", since = "1.27.0")]
794pub fn _mm_srli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
795 static_assert_uimm_bits!(IMM8, 8);
796 unsafe {
797 if IMM8 >= 64 {
798 _mm_setzero_si128()
799 } else {
800 transmute(src:simd_shr(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64)))
801 }
802 }
803}
804
805/// Shifts packed 64-bit integers in `a` right by `count` while shifting in
806/// zeros.
807///
808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi64)
809#[inline]
810#[target_feature(enable = "sse2")]
811#[cfg_attr(test, assert_instr(psrlq))]
812#[stable(feature = "simd_x86", since = "1.27.0")]
813pub fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i {
814 unsafe { transmute(src:psrlq(a.as_i64x2(), count.as_i64x2())) }
815}
816
817/// Computes the bitwise AND of 128 bits (representing integer data) in `a` and
818/// `b`.
819///
820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_and_si128)
821#[inline]
822#[target_feature(enable = "sse2")]
823#[cfg_attr(test, assert_instr(andps))]
824#[stable(feature = "simd_x86", since = "1.27.0")]
825pub fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i {
826 unsafe { simd_and(x:a, y:b) }
827}
828
829/// Computes the bitwise NOT of 128 bits (representing integer data) in `a` and
830/// then AND with `b`.
831///
832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_andnot_si128)
833#[inline]
834#[target_feature(enable = "sse2")]
835#[cfg_attr(test, assert_instr(andnps))]
836#[stable(feature = "simd_x86", since = "1.27.0")]
837pub fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i {
838 unsafe { simd_and(x:simd_xor(_mm_set1_epi8(-1), a), y:b) }
839}
840
841/// Computes the bitwise OR of 128 bits (representing integer data) in `a` and
842/// `b`.
843///
844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_si128)
845#[inline]
846#[target_feature(enable = "sse2")]
847#[cfg_attr(test, assert_instr(orps))]
848#[stable(feature = "simd_x86", since = "1.27.0")]
849pub fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i {
850 unsafe { simd_or(x:a, y:b) }
851}
852
853/// Computes the bitwise XOR of 128 bits (representing integer data) in `a` and
854/// `b`.
855///
856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_si128)
857#[inline]
858#[target_feature(enable = "sse2")]
859#[cfg_attr(test, assert_instr(xorps))]
860#[stable(feature = "simd_x86", since = "1.27.0")]
861pub fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i {
862 unsafe { simd_xor(x:a, y:b) }
863}
864
865/// Compares packed 8-bit integers in `a` and `b` for equality.
866///
867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi8)
868#[inline]
869#[target_feature(enable = "sse2")]
870#[cfg_attr(test, assert_instr(pcmpeqb))]
871#[stable(feature = "simd_x86", since = "1.27.0")]
872pub fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i {
873 unsafe { transmute::<i8x16, _>(src:simd_eq(x:a.as_i8x16(), y:b.as_i8x16())) }
874}
875
876/// Compares packed 16-bit integers in `a` and `b` for equality.
877///
878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi16)
879#[inline]
880#[target_feature(enable = "sse2")]
881#[cfg_attr(test, assert_instr(pcmpeqw))]
882#[stable(feature = "simd_x86", since = "1.27.0")]
883pub fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i {
884 unsafe { transmute::<i16x8, _>(src:simd_eq(x:a.as_i16x8(), y:b.as_i16x8())) }
885}
886
887/// Compares packed 32-bit integers in `a` and `b` for equality.
888///
889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi32)
890#[inline]
891#[target_feature(enable = "sse2")]
892#[cfg_attr(test, assert_instr(pcmpeqd))]
893#[stable(feature = "simd_x86", since = "1.27.0")]
894pub fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i {
895 unsafe { transmute::<i32x4, _>(src:simd_eq(x:a.as_i32x4(), y:b.as_i32x4())) }
896}
897
898/// Compares packed 8-bit integers in `a` and `b` for greater-than.
899///
900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi8)
901#[inline]
902#[target_feature(enable = "sse2")]
903#[cfg_attr(test, assert_instr(pcmpgtb))]
904#[stable(feature = "simd_x86", since = "1.27.0")]
905pub fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i {
906 unsafe { transmute::<i8x16, _>(src:simd_gt(x:a.as_i8x16(), y:b.as_i8x16())) }
907}
908
909/// Compares packed 16-bit integers in `a` and `b` for greater-than.
910///
911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi16)
912#[inline]
913#[target_feature(enable = "sse2")]
914#[cfg_attr(test, assert_instr(pcmpgtw))]
915#[stable(feature = "simd_x86", since = "1.27.0")]
916pub fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i {
917 unsafe { transmute::<i16x8, _>(src:simd_gt(x:a.as_i16x8(), y:b.as_i16x8())) }
918}
919
920/// Compares packed 32-bit integers in `a` and `b` for greater-than.
921///
922/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi32)
923#[inline]
924#[target_feature(enable = "sse2")]
925#[cfg_attr(test, assert_instr(pcmpgtd))]
926#[stable(feature = "simd_x86", since = "1.27.0")]
927pub fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i {
928 unsafe { transmute::<i32x4, _>(src:simd_gt(x:a.as_i32x4(), y:b.as_i32x4())) }
929}
930
931/// Compares packed 8-bit integers in `a` and `b` for less-than.
932///
933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi8)
934#[inline]
935#[target_feature(enable = "sse2")]
936#[cfg_attr(test, assert_instr(pcmpgtb))]
937#[stable(feature = "simd_x86", since = "1.27.0")]
938pub fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i {
939 unsafe { transmute::<i8x16, _>(src:simd_lt(x:a.as_i8x16(), y:b.as_i8x16())) }
940}
941
942/// Compares packed 16-bit integers in `a` and `b` for less-than.
943///
944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi16)
945#[inline]
946#[target_feature(enable = "sse2")]
947#[cfg_attr(test, assert_instr(pcmpgtw))]
948#[stable(feature = "simd_x86", since = "1.27.0")]
949pub fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i {
950 unsafe { transmute::<i16x8, _>(src:simd_lt(x:a.as_i16x8(), y:b.as_i16x8())) }
951}
952
953/// Compares packed 32-bit integers in `a` and `b` for less-than.
954///
955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi32)
956#[inline]
957#[target_feature(enable = "sse2")]
958#[cfg_attr(test, assert_instr(pcmpgtd))]
959#[stable(feature = "simd_x86", since = "1.27.0")]
960pub fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i {
961 unsafe { transmute::<i32x4, _>(src:simd_lt(x:a.as_i32x4(), y:b.as_i32x4())) }
962}
963
964/// Converts the lower two packed 32-bit integers in `a` to packed
965/// double-precision (64-bit) floating-point elements.
966///
967/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_pd)
968#[inline]
969#[target_feature(enable = "sse2")]
970#[cfg_attr(test, assert_instr(cvtdq2pd))]
971#[stable(feature = "simd_x86", since = "1.27.0")]
972pub fn _mm_cvtepi32_pd(a: __m128i) -> __m128d {
973 unsafe {
974 let a: i32x4 = a.as_i32x4();
975 simd_cast::<i32x2, __m128d>(simd_shuffle!(a, a, [0, 1]))
976 }
977}
978
979/// Returns `a` with its lower element replaced by `b` after converting it to
980/// an `f64`.
981///
982/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi32_sd)
983#[inline]
984#[target_feature(enable = "sse2")]
985#[cfg_attr(test, assert_instr(cvtsi2sd))]
986#[stable(feature = "simd_x86", since = "1.27.0")]
987pub fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d {
988 unsafe { simd_insert!(a, 0, b as f64) }
989}
990
991/// Converts packed 32-bit integers in `a` to packed single-precision (32-bit)
992/// floating-point elements.
993///
994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_ps)
995#[inline]
996#[target_feature(enable = "sse2")]
997#[cfg_attr(test, assert_instr(cvtdq2ps))]
998#[stable(feature = "simd_x86", since = "1.27.0")]
999pub fn _mm_cvtepi32_ps(a: __m128i) -> __m128 {
1000 unsafe { transmute(src:simd_cast::<_, f32x4>(a.as_i32x4())) }
1001}
1002
1003/// Converts packed single-precision (32-bit) floating-point elements in `a`
1004/// to packed 32-bit integers.
1005///
1006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epi32)
1007#[inline]
1008#[target_feature(enable = "sse2")]
1009#[cfg_attr(test, assert_instr(cvtps2dq))]
1010#[stable(feature = "simd_x86", since = "1.27.0")]
1011pub fn _mm_cvtps_epi32(a: __m128) -> __m128i {
1012 unsafe { transmute(src:cvtps2dq(a)) }
1013}
1014
1015/// Returns a vector whose lowest element is `a` and all higher elements are
1016/// `0`.
1017///
1018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi32_si128)
1019#[inline]
1020#[target_feature(enable = "sse2")]
1021#[stable(feature = "simd_x86", since = "1.27.0")]
1022pub fn _mm_cvtsi32_si128(a: i32) -> __m128i {
1023 unsafe { transmute(src:i32x4::new(x0:a, x1:0, x2:0, x3:0)) }
1024}
1025
1026/// Returns the lowest element of `a`.
1027///
1028/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi128_si32)
1029#[inline]
1030#[target_feature(enable = "sse2")]
1031#[stable(feature = "simd_x86", since = "1.27.0")]
1032pub fn _mm_cvtsi128_si32(a: __m128i) -> i32 {
1033 unsafe { simd_extract!(a.as_i32x4(), 0) }
1034}
1035
1036/// Sets packed 64-bit integers with the supplied values, from highest to
1037/// lowest.
1038///
1039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi64x)
1040#[inline]
1041#[target_feature(enable = "sse2")]
1042// no particular instruction to test
1043#[stable(feature = "simd_x86", since = "1.27.0")]
1044pub fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i {
1045 unsafe { transmute(src:i64x2::new(x0:e0, x1:e1)) }
1046}
1047
1048/// Sets packed 32-bit integers with the supplied values.
1049///
1050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi32)
1051#[inline]
1052#[target_feature(enable = "sse2")]
1053// no particular instruction to test
1054#[stable(feature = "simd_x86", since = "1.27.0")]
1055pub fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1056 unsafe { transmute(src:i32x4::new(x0:e0, x1:e1, x2:e2, x3:e3)) }
1057}
1058
1059/// Sets packed 16-bit integers with the supplied values.
1060///
1061/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi16)
1062#[inline]
1063#[target_feature(enable = "sse2")]
1064// no particular instruction to test
1065#[stable(feature = "simd_x86", since = "1.27.0")]
1066pub fn _mm_set_epi16(
1067 e7: i16,
1068 e6: i16,
1069 e5: i16,
1070 e4: i16,
1071 e3: i16,
1072 e2: i16,
1073 e1: i16,
1074 e0: i16,
1075) -> __m128i {
1076 unsafe { transmute(src:i16x8::new(x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7)) }
1077}
1078
1079/// Sets packed 8-bit integers with the supplied values.
1080///
1081/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi8)
1082#[inline]
1083#[target_feature(enable = "sse2")]
1084// no particular instruction to test
1085#[stable(feature = "simd_x86", since = "1.27.0")]
1086pub fn _mm_set_epi8(
1087 e15: i8,
1088 e14: i8,
1089 e13: i8,
1090 e12: i8,
1091 e11: i8,
1092 e10: i8,
1093 e9: i8,
1094 e8: i8,
1095 e7: i8,
1096 e6: i8,
1097 e5: i8,
1098 e4: i8,
1099 e3: i8,
1100 e2: i8,
1101 e1: i8,
1102 e0: i8,
1103) -> __m128i {
1104 unsafe {
1105 #[rustfmt::skip]
1106 transmute(src:i8x16::new(
1107 x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7, x8:e8, x9:e9, x10:e10, x11:e11, x12:e12, x13:e13, x14:e14, x15:e15,
1108 ))
1109 }
1110}
1111
1112/// Broadcasts 64-bit integer `a` to all elements.
1113///
1114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi64x)
1115#[inline]
1116#[target_feature(enable = "sse2")]
1117// no particular instruction to test
1118#[stable(feature = "simd_x86", since = "1.27.0")]
1119pub fn _mm_set1_epi64x(a: i64) -> __m128i {
1120 _mm_set_epi64x(e1:a, e0:a)
1121}
1122
1123/// Broadcasts 32-bit integer `a` to all elements.
1124///
1125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi32)
1126#[inline]
1127#[target_feature(enable = "sse2")]
1128// no particular instruction to test
1129#[stable(feature = "simd_x86", since = "1.27.0")]
1130pub fn _mm_set1_epi32(a: i32) -> __m128i {
1131 _mm_set_epi32(e3:a, e2:a, e1:a, e0:a)
1132}
1133
1134/// Broadcasts 16-bit integer `a` to all elements.
1135///
1136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi16)
1137#[inline]
1138#[target_feature(enable = "sse2")]
1139// no particular instruction to test
1140#[stable(feature = "simd_x86", since = "1.27.0")]
1141pub fn _mm_set1_epi16(a: i16) -> __m128i {
1142 _mm_set_epi16(e7:a, e6:a, e5:a, e4:a, e3:a, e2:a, e1:a, e0:a)
1143}
1144
1145/// Broadcasts 8-bit integer `a` to all elements.
1146///
1147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi8)
1148#[inline]
1149#[target_feature(enable = "sse2")]
1150// no particular instruction to test
1151#[stable(feature = "simd_x86", since = "1.27.0")]
1152pub fn _mm_set1_epi8(a: i8) -> __m128i {
1153 _mm_set_epi8(e15:a, e14:a, e13:a, e12:a, e11:a, e10:a, e9:a, e8:a, e7:a, e6:a, e5:a, e4:a, e3:a, e2:a, e1:a, e0:a)
1154}
1155
1156/// Sets packed 32-bit integers with the supplied values in reverse order.
1157///
1158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi32)
1159#[inline]
1160#[target_feature(enable = "sse2")]
1161// no particular instruction to test
1162#[stable(feature = "simd_x86", since = "1.27.0")]
1163pub fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1164 _mm_set_epi32(e3:e0, e2:e1, e1:e2, e0:e3)
1165}
1166
1167/// Sets packed 16-bit integers with the supplied values in reverse order.
1168///
1169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi16)
1170#[inline]
1171#[target_feature(enable = "sse2")]
1172// no particular instruction to test
1173#[stable(feature = "simd_x86", since = "1.27.0")]
1174pub fn _mm_setr_epi16(
1175 e7: i16,
1176 e6: i16,
1177 e5: i16,
1178 e4: i16,
1179 e3: i16,
1180 e2: i16,
1181 e1: i16,
1182 e0: i16,
1183) -> __m128i {
1184 _mm_set_epi16(e7:e0, e6:e1, e5:e2, e4:e3, e3:e4, e2:e5, e1:e6, e0:e7)
1185}
1186
1187/// Sets packed 8-bit integers with the supplied values in reverse order.
1188///
1189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi8)
1190#[inline]
1191#[target_feature(enable = "sse2")]
1192// no particular instruction to test
1193#[stable(feature = "simd_x86", since = "1.27.0")]
1194pub fn _mm_setr_epi8(
1195 e15: i8,
1196 e14: i8,
1197 e13: i8,
1198 e12: i8,
1199 e11: i8,
1200 e10: i8,
1201 e9: i8,
1202 e8: i8,
1203 e7: i8,
1204 e6: i8,
1205 e5: i8,
1206 e4: i8,
1207 e3: i8,
1208 e2: i8,
1209 e1: i8,
1210 e0: i8,
1211) -> __m128i {
1212 #[rustfmt::skip]
1213 _mm_set_epi8(
1214 e15:e0, e14:e1, e13:e2, e12:e3, e11:e4, e10:e5, e9:e6, e8:e7, e7:e8, e6:e9, e5:e10, e4:e11, e3:e12, e2:e13, e1:e14, e0:e15,
1215 )
1216}
1217
1218/// Returns a vector with all elements set to zero.
1219///
1220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_si128)
1221#[inline]
1222#[target_feature(enable = "sse2")]
1223#[cfg_attr(test, assert_instr(xorps))]
1224#[stable(feature = "simd_x86", since = "1.27.0")]
1225pub fn _mm_setzero_si128() -> __m128i {
1226 const { unsafe { mem::zeroed() } }
1227}
1228
1229/// Loads 64-bit integer from memory into first element of returned vector.
1230///
1231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadl_epi64)
1232#[inline]
1233#[target_feature(enable = "sse2")]
1234#[stable(feature = "simd_x86", since = "1.27.0")]
1235pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i {
1236 _mm_set_epi64x(e1:0, e0:ptr::read_unaligned(src:mem_addr as *const i64))
1237}
1238
1239/// Loads 128-bits of integer data from memory into a new vector.
1240///
1241/// `mem_addr` must be aligned on a 16-byte boundary.
1242///
1243/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_si128)
1244#[inline]
1245#[target_feature(enable = "sse2")]
1246#[cfg_attr(test, assert_instr(movaps))]
1247#[stable(feature = "simd_x86", since = "1.27.0")]
1248pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i {
1249 *mem_addr
1250}
1251
1252/// Loads 128-bits of integer data from memory into a new vector.
1253///
1254/// `mem_addr` does not need to be aligned on any particular boundary.
1255///
1256/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si128)
1257#[inline]
1258#[target_feature(enable = "sse2")]
1259#[cfg_attr(test, assert_instr(movups))]
1260#[stable(feature = "simd_x86", since = "1.27.0")]
1261pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
1262 let mut dst: __m128i = _mm_undefined_si128();
1263 ptr::copy_nonoverlapping(
1264 src:mem_addr as *const u8,
1265 dst:ptr::addr_of_mut!(dst) as *mut u8,
1266 count:mem::size_of::<__m128i>(),
1267 );
1268 dst
1269}
1270
1271/// Conditionally store 8-bit integer elements from `a` into memory using
1272/// `mask`.
1273///
1274/// Elements are not stored when the highest bit is not set in the
1275/// corresponding element.
1276///
1277/// `mem_addr` should correspond to a 128-bit memory location and does not need
1278/// to be aligned on any particular boundary.
1279///
1280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskmoveu_si128)
1281#[inline]
1282#[target_feature(enable = "sse2")]
1283#[cfg_attr(test, assert_instr(maskmovdqu))]
1284#[stable(feature = "simd_x86", since = "1.27.0")]
1285pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8) {
1286 maskmovdqu(a.as_i8x16(), mask.as_i8x16(), mem_addr)
1287}
1288
1289/// Stores 128-bits of integer data from `a` into memory.
1290///
1291/// `mem_addr` must be aligned on a 16-byte boundary.
1292///
1293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_si128)
1294#[inline]
1295#[target_feature(enable = "sse2")]
1296#[cfg_attr(test, assert_instr(movaps))]
1297#[stable(feature = "simd_x86", since = "1.27.0")]
1298pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) {
1299 *mem_addr = a;
1300}
1301
1302/// Stores 128-bits of integer data from `a` into memory.
1303///
1304/// `mem_addr` does not need to be aligned on any particular boundary.
1305///
1306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si128)
1307#[inline]
1308#[target_feature(enable = "sse2")]
1309#[cfg_attr(test, assert_instr(movups))] // FIXME movdqu expected
1310#[stable(feature = "simd_x86", since = "1.27.0")]
1311pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) {
1312 mem_addr.write_unaligned(val:a);
1313}
1314
1315/// Stores the lower 64-bit integer `a` to a memory location.
1316///
1317/// `mem_addr` does not need to be aligned on any particular boundary.
1318///
1319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storel_epi64)
1320#[inline]
1321#[target_feature(enable = "sse2")]
1322#[stable(feature = "simd_x86", since = "1.27.0")]
1323pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
1324 ptr::copy_nonoverlapping(src:ptr::addr_of!(a) as *const u8, dst:mem_addr as *mut u8, count:8);
1325}
1326
1327/// Stores a 128-bit integer vector to a 128-bit aligned memory location.
1328/// To minimize caching, the data is flagged as non-temporal (unlikely to be
1329/// used again soon).
1330///
1331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si128)
1332///
1333/// # Safety of non-temporal stores
1334///
1335/// After using this intrinsic, but before any other access to the memory that this intrinsic
1336/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
1337/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
1338/// return.
1339///
1340/// See [`_mm_sfence`] for details.
1341#[inline]
1342#[target_feature(enable = "sse2")]
1343#[cfg_attr(test, assert_instr(movntdq))]
1344#[stable(feature = "simd_x86", since = "1.27.0")]
1345pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
1346 crate::arch::asm!(
1347 vps!("movntdq", ",{a}"),
1348 p = in(reg) mem_addr,
1349 a = in(xmm_reg) a,
1350 options(nostack, preserves_flags),
1351 );
1352}
1353
1354/// Stores a 32-bit integer value in the specified memory location.
1355/// To minimize caching, the data is flagged as non-temporal (unlikely to be
1356/// used again soon).
1357///
1358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si32)
1359///
1360/// # Safety of non-temporal stores
1361///
1362/// After using this intrinsic, but before any other access to the memory that this intrinsic
1363/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
1364/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
1365/// return.
1366///
1367/// See [`_mm_sfence`] for details.
1368#[inline]
1369#[target_feature(enable = "sse2")]
1370#[cfg_attr(test, assert_instr(movnti))]
1371#[stable(feature = "simd_x86", since = "1.27.0")]
1372pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) {
1373 crate::arch::asm!(
1374 vps!("movnti", ",{a:e}"), // `:e` for 32bit value
1375 p = in(reg) mem_addr,
1376 a = in(reg) a,
1377 options(nostack, preserves_flags),
1378 );
1379}
1380
1381/// Returns a vector where the low element is extracted from `a` and its upper
1382/// element is zero.
1383///
1384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_move_epi64)
1385#[inline]
1386#[target_feature(enable = "sse2")]
1387// FIXME movd on msvc, movd on i686
1388#[cfg_attr(
1389 all(test, not(target_env = "msvc"), target_arch = "x86_64"),
1390 assert_instr(movq)
1391)]
1392#[stable(feature = "simd_x86", since = "1.27.0")]
1393pub fn _mm_move_epi64(a: __m128i) -> __m128i {
1394 unsafe {
1395 let r: i64x2 = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 2]);
1396 transmute(src:r)
1397 }
1398}
1399
1400/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
1401/// using signed saturation.
1402///
1403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi16)
1404#[inline]
1405#[target_feature(enable = "sse2")]
1406#[cfg_attr(test, assert_instr(packsswb))]
1407#[stable(feature = "simd_x86", since = "1.27.0")]
1408pub fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
1409 unsafe { transmute(src:packsswb(a.as_i16x8(), b.as_i16x8())) }
1410}
1411
1412/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
1413/// using signed saturation.
1414///
1415/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi32)
1416#[inline]
1417#[target_feature(enable = "sse2")]
1418#[cfg_attr(test, assert_instr(packssdw))]
1419#[stable(feature = "simd_x86", since = "1.27.0")]
1420pub fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
1421 unsafe { transmute(src:packssdw(a.as_i32x4(), b.as_i32x4())) }
1422}
1423
1424/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
1425/// using unsigned saturation.
1426///
1427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi16)
1428#[inline]
1429#[target_feature(enable = "sse2")]
1430#[cfg_attr(test, assert_instr(packuswb))]
1431#[stable(feature = "simd_x86", since = "1.27.0")]
1432pub fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
1433 unsafe { transmute(src:packuswb(a.as_i16x8(), b.as_i16x8())) }
1434}
1435
1436/// Returns the `imm8` element of `a`.
1437///
1438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_epi16)
1439#[inline]
1440#[target_feature(enable = "sse2")]
1441#[cfg_attr(test, assert_instr(pextrw, IMM8 = 7))]
1442#[rustc_legacy_const_generics(1)]
1443#[stable(feature = "simd_x86", since = "1.27.0")]
1444pub fn _mm_extract_epi16<const IMM8: i32>(a: __m128i) -> i32 {
1445 static_assert_uimm_bits!(IMM8, 3);
1446 unsafe { simd_extract!(a.as_u16x8(), IMM8 as u32, u16) as i32 }
1447}
1448
1449/// Returns a new vector where the `imm8` element of `a` is replaced with `i`.
1450///
1451/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_epi16)
1452#[inline]
1453#[target_feature(enable = "sse2")]
1454#[cfg_attr(test, assert_instr(pinsrw, IMM8 = 7))]
1455#[rustc_legacy_const_generics(2)]
1456#[stable(feature = "simd_x86", since = "1.27.0")]
1457pub fn _mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
1458 static_assert_uimm_bits!(IMM8, 3);
1459 unsafe { transmute(src:simd_insert!(a.as_i16x8(), IMM8 as u32, i as i16)) }
1460}
1461
1462/// Returns a mask of the most significant bit of each element in `a`.
1463///
1464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_epi8)
1465#[inline]
1466#[target_feature(enable = "sse2")]
1467#[cfg_attr(test, assert_instr(pmovmskb))]
1468#[stable(feature = "simd_x86", since = "1.27.0")]
1469pub fn _mm_movemask_epi8(a: __m128i) -> i32 {
1470 unsafe {
1471 let z: i8x16 = i8x16::ZERO;
1472 let m: i8x16 = simd_lt(x:a.as_i8x16(), y:z);
1473 simd_bitmask::<_, u16>(m) as u32 as i32
1474 }
1475}
1476
1477/// Shuffles 32-bit integers in `a` using the control in `IMM8`.
1478///
1479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_epi32)
1480#[inline]
1481#[target_feature(enable = "sse2")]
1482#[cfg_attr(test, assert_instr(pshufd, IMM8 = 9))]
1483#[rustc_legacy_const_generics(1)]
1484#[stable(feature = "simd_x86", since = "1.27.0")]
1485pub fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
1486 static_assert_uimm_bits!(IMM8, 8);
1487 unsafe {
1488 let a: i32x4 = a.as_i32x4();
1489 let x: i32x4 = simd_shuffle!(
1490 a,
1491 a,
1492 [
1493 IMM8 as u32 & 0b11,
1494 (IMM8 as u32 >> 2) & 0b11,
1495 (IMM8 as u32 >> 4) & 0b11,
1496 (IMM8 as u32 >> 6) & 0b11,
1497 ],
1498 );
1499 transmute(src:x)
1500 }
1501}
1502
1503/// Shuffles 16-bit integers in the high 64 bits of `a` using the control in
1504/// `IMM8`.
1505///
1506/// Put the results in the high 64 bits of the returned vector, with the low 64
1507/// bits being copied from `a`.
1508///
1509/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shufflehi_epi16)
1510#[inline]
1511#[target_feature(enable = "sse2")]
1512#[cfg_attr(test, assert_instr(pshufhw, IMM8 = 9))]
1513#[rustc_legacy_const_generics(1)]
1514#[stable(feature = "simd_x86", since = "1.27.0")]
1515pub fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1516 static_assert_uimm_bits!(IMM8, 8);
1517 unsafe {
1518 let a: i16x8 = a.as_i16x8();
1519 let x: i16x8 = simd_shuffle!(
1520 a,
1521 a,
1522 [
1523 0,
1524 1,
1525 2,
1526 3,
1527 (IMM8 as u32 & 0b11) + 4,
1528 ((IMM8 as u32 >> 2) & 0b11) + 4,
1529 ((IMM8 as u32 >> 4) & 0b11) + 4,
1530 ((IMM8 as u32 >> 6) & 0b11) + 4,
1531 ],
1532 );
1533 transmute(src:x)
1534 }
1535}
1536
1537/// Shuffles 16-bit integers in the low 64 bits of `a` using the control in
1538/// `IMM8`.
1539///
1540/// Put the results in the low 64 bits of the returned vector, with the high 64
1541/// bits being copied from `a`.
1542///
1543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shufflelo_epi16)
1544#[inline]
1545#[target_feature(enable = "sse2")]
1546#[cfg_attr(test, assert_instr(pshuflw, IMM8 = 9))]
1547#[rustc_legacy_const_generics(1)]
1548#[stable(feature = "simd_x86", since = "1.27.0")]
1549pub fn _mm_shufflelo_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1550 static_assert_uimm_bits!(IMM8, 8);
1551 unsafe {
1552 let a: i16x8 = a.as_i16x8();
1553 let x: i16x8 = simd_shuffle!(
1554 a,
1555 a,
1556 [
1557 IMM8 as u32 & 0b11,
1558 (IMM8 as u32 >> 2) & 0b11,
1559 (IMM8 as u32 >> 4) & 0b11,
1560 (IMM8 as u32 >> 6) & 0b11,
1561 4,
1562 5,
1563 6,
1564 7,
1565 ],
1566 );
1567 transmute(src:x)
1568 }
1569}
1570
1571/// Unpacks and interleave 8-bit integers from the high half of `a` and `b`.
1572///
1573/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi8)
1574#[inline]
1575#[target_feature(enable = "sse2")]
1576#[cfg_attr(test, assert_instr(punpckhbw))]
1577#[stable(feature = "simd_x86", since = "1.27.0")]
1578pub fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i {
1579 unsafe {
1580 transmute::<i8x16, _>(src:simd_shuffle!(
1581 a.as_i8x16(),
1582 b.as_i8x16(),
1583 [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31],
1584 ))
1585 }
1586}
1587
1588/// Unpacks and interleave 16-bit integers from the high half of `a` and `b`.
1589///
1590/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi16)
1591#[inline]
1592#[target_feature(enable = "sse2")]
1593#[cfg_attr(test, assert_instr(punpckhwd))]
1594#[stable(feature = "simd_x86", since = "1.27.0")]
1595pub fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i {
1596 unsafe {
1597 let x: i16x8 = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]);
1598 transmute::<i16x8, _>(src:x)
1599 }
1600}
1601
1602/// Unpacks and interleave 32-bit integers from the high half of `a` and `b`.
1603///
1604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi32)
1605#[inline]
1606#[target_feature(enable = "sse2")]
1607#[cfg_attr(test, assert_instr(unpckhps))]
1608#[stable(feature = "simd_x86", since = "1.27.0")]
1609pub fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i {
1610 unsafe { transmute::<i32x4, _>(src:simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7])) }
1611}
1612
1613/// Unpacks and interleave 64-bit integers from the high half of `a` and `b`.
1614///
1615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi64)
1616#[inline]
1617#[target_feature(enable = "sse2")]
1618#[cfg_attr(test, assert_instr(unpckhpd))]
1619#[stable(feature = "simd_x86", since = "1.27.0")]
1620pub fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i {
1621 unsafe { transmute::<i64x2, _>(src:simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [1, 3])) }
1622}
1623
1624/// Unpacks and interleave 8-bit integers from the low half of `a` and `b`.
1625///
1626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi8)
1627#[inline]
1628#[target_feature(enable = "sse2")]
1629#[cfg_attr(test, assert_instr(punpcklbw))]
1630#[stable(feature = "simd_x86", since = "1.27.0")]
1631pub fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i {
1632 unsafe {
1633 transmute::<i8x16, _>(src:simd_shuffle!(
1634 a.as_i8x16(),
1635 b.as_i8x16(),
1636 [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23],
1637 ))
1638 }
1639}
1640
1641/// Unpacks and interleave 16-bit integers from the low half of `a` and `b`.
1642///
1643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi16)
1644#[inline]
1645#[target_feature(enable = "sse2")]
1646#[cfg_attr(test, assert_instr(punpcklwd))]
1647#[stable(feature = "simd_x86", since = "1.27.0")]
1648pub fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i {
1649 unsafe {
1650 let x: i16x8 = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]);
1651 transmute::<i16x8, _>(src:x)
1652 }
1653}
1654
1655/// Unpacks and interleave 32-bit integers from the low half of `a` and `b`.
1656///
1657/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi32)
1658#[inline]
1659#[target_feature(enable = "sse2")]
1660#[cfg_attr(test, assert_instr(unpcklps))]
1661#[stable(feature = "simd_x86", since = "1.27.0")]
1662pub fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i {
1663 unsafe { transmute::<i32x4, _>(src:simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5])) }
1664}
1665
1666/// Unpacks and interleave 64-bit integers from the low half of `a` and `b`.
1667///
1668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi64)
1669#[inline]
1670#[target_feature(enable = "sse2")]
1671#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movlhps))]
1672#[stable(feature = "simd_x86", since = "1.27.0")]
1673pub fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i {
1674 unsafe { transmute::<i64x2, _>(src:simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [0, 2])) }
1675}
1676
1677/// Returns a new vector with the low element of `a` replaced by the sum of the
1678/// low elements of `a` and `b`.
1679///
1680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_sd)
1681#[inline]
1682#[target_feature(enable = "sse2")]
1683#[cfg_attr(test, assert_instr(addsd))]
1684#[stable(feature = "simd_x86", since = "1.27.0")]
1685pub fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d {
1686 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b)) }
1687}
1688
1689/// Adds packed double-precision (64-bit) floating-point elements in `a` and
1690/// `b`.
1691///
1692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_pd)
1693#[inline]
1694#[target_feature(enable = "sse2")]
1695#[cfg_attr(test, assert_instr(addpd))]
1696#[stable(feature = "simd_x86", since = "1.27.0")]
1697pub fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d {
1698 unsafe { simd_add(x:a, y:b) }
1699}
1700
1701/// Returns a new vector with the low element of `a` replaced by the result of
1702/// diving the lower element of `a` by the lower element of `b`.
1703///
1704/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_sd)
1705#[inline]
1706#[target_feature(enable = "sse2")]
1707#[cfg_attr(test, assert_instr(divsd))]
1708#[stable(feature = "simd_x86", since = "1.27.0")]
1709pub fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d {
1710 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) / _mm_cvtsd_f64(b)) }
1711}
1712
1713/// Divide packed double-precision (64-bit) floating-point elements in `a` by
1714/// packed elements in `b`.
1715///
1716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_pd)
1717#[inline]
1718#[target_feature(enable = "sse2")]
1719#[cfg_attr(test, assert_instr(divpd))]
1720#[stable(feature = "simd_x86", since = "1.27.0")]
1721pub fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d {
1722 unsafe { simd_div(lhs:a, rhs:b) }
1723}
1724
1725/// Returns a new vector with the low element of `a` replaced by the maximum
1726/// of the lower elements of `a` and `b`.
1727///
1728/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_sd)
1729#[inline]
1730#[target_feature(enable = "sse2")]
1731#[cfg_attr(test, assert_instr(maxsd))]
1732#[stable(feature = "simd_x86", since = "1.27.0")]
1733pub fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d {
1734 unsafe { maxsd(a, b) }
1735}
1736
1737/// Returns a new vector with the maximum values from corresponding elements in
1738/// `a` and `b`.
1739///
1740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_pd)
1741#[inline]
1742#[target_feature(enable = "sse2")]
1743#[cfg_attr(test, assert_instr(maxpd))]
1744#[stable(feature = "simd_x86", since = "1.27.0")]
1745pub fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d {
1746 unsafe { maxpd(a, b) }
1747}
1748
1749/// Returns a new vector with the low element of `a` replaced by the minimum
1750/// of the lower elements of `a` and `b`.
1751///
1752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_sd)
1753#[inline]
1754#[target_feature(enable = "sse2")]
1755#[cfg_attr(test, assert_instr(minsd))]
1756#[stable(feature = "simd_x86", since = "1.27.0")]
1757pub fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d {
1758 unsafe { minsd(a, b) }
1759}
1760
1761/// Returns a new vector with the minimum values from corresponding elements in
1762/// `a` and `b`.
1763///
1764/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_pd)
1765#[inline]
1766#[target_feature(enable = "sse2")]
1767#[cfg_attr(test, assert_instr(minpd))]
1768#[stable(feature = "simd_x86", since = "1.27.0")]
1769pub fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d {
1770 unsafe { minpd(a, b) }
1771}
1772
1773/// Returns a new vector with the low element of `a` replaced by multiplying the
1774/// low elements of `a` and `b`.
1775///
1776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_sd)
1777#[inline]
1778#[target_feature(enable = "sse2")]
1779#[cfg_attr(test, assert_instr(mulsd))]
1780#[stable(feature = "simd_x86", since = "1.27.0")]
1781pub fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d {
1782 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b)) }
1783}
1784
1785/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
1786/// and `b`.
1787///
1788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_pd)
1789#[inline]
1790#[target_feature(enable = "sse2")]
1791#[cfg_attr(test, assert_instr(mulpd))]
1792#[stable(feature = "simd_x86", since = "1.27.0")]
1793pub fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d {
1794 unsafe { simd_mul(x:a, y:b) }
1795}
1796
1797/// Returns a new vector with the low element of `a` replaced by the square
1798/// root of the lower element `b`.
1799///
1800/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_sd)
1801#[inline]
1802#[target_feature(enable = "sse2")]
1803#[cfg_attr(test, assert_instr(sqrtsd))]
1804#[stable(feature = "simd_x86", since = "1.27.0")]
1805pub fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d {
1806 unsafe { simd_insert!(a, 0, sqrtf64(_mm_cvtsd_f64(b))) }
1807}
1808
1809/// Returns a new vector with the square root of each of the values in `a`.
1810///
1811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_pd)
1812#[inline]
1813#[target_feature(enable = "sse2")]
1814#[cfg_attr(test, assert_instr(sqrtpd))]
1815#[stable(feature = "simd_x86", since = "1.27.0")]
1816pub fn _mm_sqrt_pd(a: __m128d) -> __m128d {
1817 unsafe { simd_fsqrt(a) }
1818}
1819
1820/// Returns a new vector with the low element of `a` replaced by subtracting the
1821/// low element by `b` from the low element of `a`.
1822///
1823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_sd)
1824#[inline]
1825#[target_feature(enable = "sse2")]
1826#[cfg_attr(test, assert_instr(subsd))]
1827#[stable(feature = "simd_x86", since = "1.27.0")]
1828pub fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d {
1829 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) - _mm_cvtsd_f64(b)) }
1830}
1831
1832/// Subtract packed double-precision (64-bit) floating-point elements in `b`
1833/// from `a`.
1834///
1835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_pd)
1836#[inline]
1837#[target_feature(enable = "sse2")]
1838#[cfg_attr(test, assert_instr(subpd))]
1839#[stable(feature = "simd_x86", since = "1.27.0")]
1840pub fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d {
1841 unsafe { simd_sub(lhs:a, rhs:b) }
1842}
1843
1844/// Computes the bitwise AND of packed double-precision (64-bit) floating-point
1845/// elements in `a` and `b`.
1846///
1847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_and_pd)
1848#[inline]
1849#[target_feature(enable = "sse2")]
1850#[cfg_attr(test, assert_instr(andps))]
1851#[stable(feature = "simd_x86", since = "1.27.0")]
1852pub fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d {
1853 unsafe {
1854 let a: __m128i = transmute(src:a);
1855 let b: __m128i = transmute(src:b);
1856 transmute(src:_mm_and_si128(a, b))
1857 }
1858}
1859
1860/// Computes the bitwise NOT of `a` and then AND with `b`.
1861///
1862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_andnot_pd)
1863#[inline]
1864#[target_feature(enable = "sse2")]
1865#[cfg_attr(test, assert_instr(andnps))]
1866#[stable(feature = "simd_x86", since = "1.27.0")]
1867pub fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d {
1868 unsafe {
1869 let a: __m128i = transmute(src:a);
1870 let b: __m128i = transmute(src:b);
1871 transmute(src:_mm_andnot_si128(a, b))
1872 }
1873}
1874
1875/// Computes the bitwise OR of `a` and `b`.
1876///
1877/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_pd)
1878#[inline]
1879#[target_feature(enable = "sse2")]
1880#[cfg_attr(test, assert_instr(orps))]
1881#[stable(feature = "simd_x86", since = "1.27.0")]
1882pub fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d {
1883 unsafe {
1884 let a: __m128i = transmute(src:a);
1885 let b: __m128i = transmute(src:b);
1886 transmute(src:_mm_or_si128(a, b))
1887 }
1888}
1889
1890/// Computes the bitwise XOR of `a` and `b`.
1891///
1892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_pd)
1893#[inline]
1894#[target_feature(enable = "sse2")]
1895#[cfg_attr(test, assert_instr(xorps))]
1896#[stable(feature = "simd_x86", since = "1.27.0")]
1897pub fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d {
1898 unsafe {
1899 let a: __m128i = transmute(src:a);
1900 let b: __m128i = transmute(src:b);
1901 transmute(src:_mm_xor_si128(a, b))
1902 }
1903}
1904
1905/// Returns a new vector with the low element of `a` replaced by the equality
1906/// comparison of the lower elements of `a` and `b`.
1907///
1908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_sd)
1909#[inline]
1910#[target_feature(enable = "sse2")]
1911#[cfg_attr(test, assert_instr(cmpeqsd))]
1912#[stable(feature = "simd_x86", since = "1.27.0")]
1913pub fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d {
1914 unsafe { cmpsd(a, b, imm8:0) }
1915}
1916
1917/// Returns a new vector with the low element of `a` replaced by the less-than
1918/// comparison of the lower elements of `a` and `b`.
1919///
1920/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_sd)
1921#[inline]
1922#[target_feature(enable = "sse2")]
1923#[cfg_attr(test, assert_instr(cmpltsd))]
1924#[stable(feature = "simd_x86", since = "1.27.0")]
1925pub fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d {
1926 unsafe { cmpsd(a, b, imm8:1) }
1927}
1928
1929/// Returns a new vector with the low element of `a` replaced by the
1930/// less-than-or-equal comparison of the lower elements of `a` and `b`.
1931///
1932/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_sd)
1933#[inline]
1934#[target_feature(enable = "sse2")]
1935#[cfg_attr(test, assert_instr(cmplesd))]
1936#[stable(feature = "simd_x86", since = "1.27.0")]
1937pub fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d {
1938 unsafe { cmpsd(a, b, imm8:2) }
1939}
1940
1941/// Returns a new vector with the low element of `a` replaced by the
1942/// greater-than comparison of the lower elements of `a` and `b`.
1943///
1944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_sd)
1945#[inline]
1946#[target_feature(enable = "sse2")]
1947#[cfg_attr(test, assert_instr(cmpltsd))]
1948#[stable(feature = "simd_x86", since = "1.27.0")]
1949pub fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d {
1950 unsafe { simd_insert!(_mm_cmplt_sd(b, a), 1, simd_extract!(a, 1, f64)) }
1951}
1952
1953/// Returns a new vector with the low element of `a` replaced by the
1954/// greater-than-or-equal comparison of the lower elements of `a` and `b`.
1955///
1956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_sd)
1957#[inline]
1958#[target_feature(enable = "sse2")]
1959#[cfg_attr(test, assert_instr(cmplesd))]
1960#[stable(feature = "simd_x86", since = "1.27.0")]
1961pub fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d {
1962 unsafe { simd_insert!(_mm_cmple_sd(b, a), 1, simd_extract!(a, 1, f64)) }
1963}
1964
1965/// Returns a new vector with the low element of `a` replaced by the result
1966/// of comparing both of the lower elements of `a` and `b` to `NaN`. If
1967/// neither are equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0`
1968/// otherwise.
1969///
1970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_sd)
1971#[inline]
1972#[target_feature(enable = "sse2")]
1973#[cfg_attr(test, assert_instr(cmpordsd))]
1974#[stable(feature = "simd_x86", since = "1.27.0")]
1975pub fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d {
1976 unsafe { cmpsd(a, b, imm8:7) }
1977}
1978
1979/// Returns a new vector with the low element of `a` replaced by the result of
1980/// comparing both of the lower elements of `a` and `b` to `NaN`. If either is
1981/// equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` otherwise.
1982///
1983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_sd)
1984#[inline]
1985#[target_feature(enable = "sse2")]
1986#[cfg_attr(test, assert_instr(cmpunordsd))]
1987#[stable(feature = "simd_x86", since = "1.27.0")]
1988pub fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d {
1989 unsafe { cmpsd(a, b, imm8:3) }
1990}
1991
1992/// Returns a new vector with the low element of `a` replaced by the not-equal
1993/// comparison of the lower elements of `a` and `b`.
1994///
1995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_sd)
1996#[inline]
1997#[target_feature(enable = "sse2")]
1998#[cfg_attr(test, assert_instr(cmpneqsd))]
1999#[stable(feature = "simd_x86", since = "1.27.0")]
2000pub fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d {
2001 unsafe { cmpsd(a, b, imm8:4) }
2002}
2003
2004/// Returns a new vector with the low element of `a` replaced by the
2005/// not-less-than comparison of the lower elements of `a` and `b`.
2006///
2007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_sd)
2008#[inline]
2009#[target_feature(enable = "sse2")]
2010#[cfg_attr(test, assert_instr(cmpnltsd))]
2011#[stable(feature = "simd_x86", since = "1.27.0")]
2012pub fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d {
2013 unsafe { cmpsd(a, b, imm8:5) }
2014}
2015
2016/// Returns a new vector with the low element of `a` replaced by the
2017/// not-less-than-or-equal comparison of the lower elements of `a` and `b`.
2018///
2019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_sd)
2020#[inline]
2021#[target_feature(enable = "sse2")]
2022#[cfg_attr(test, assert_instr(cmpnlesd))]
2023#[stable(feature = "simd_x86", since = "1.27.0")]
2024pub fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d {
2025 unsafe { cmpsd(a, b, imm8:6) }
2026}
2027
2028/// Returns a new vector with the low element of `a` replaced by the
2029/// not-greater-than comparison of the lower elements of `a` and `b`.
2030///
2031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpngt_sd)
2032#[inline]
2033#[target_feature(enable = "sse2")]
2034#[cfg_attr(test, assert_instr(cmpnltsd))]
2035#[stable(feature = "simd_x86", since = "1.27.0")]
2036pub fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d {
2037 unsafe { simd_insert!(_mm_cmpnlt_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2038}
2039
2040/// Returns a new vector with the low element of `a` replaced by the
2041/// not-greater-than-or-equal comparison of the lower elements of `a` and `b`.
2042///
2043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_sd)
2044#[inline]
2045#[target_feature(enable = "sse2")]
2046#[cfg_attr(test, assert_instr(cmpnlesd))]
2047#[stable(feature = "simd_x86", since = "1.27.0")]
2048pub fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d {
2049 unsafe { simd_insert!(_mm_cmpnle_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2050}
2051
2052/// Compares corresponding elements in `a` and `b` for equality.
2053///
2054/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_pd)
2055#[inline]
2056#[target_feature(enable = "sse2")]
2057#[cfg_attr(test, assert_instr(cmpeqpd))]
2058#[stable(feature = "simd_x86", since = "1.27.0")]
2059pub fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d {
2060 unsafe { cmppd(a, b, imm8:0) }
2061}
2062
2063/// Compares corresponding elements in `a` and `b` for less-than.
2064///
2065/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_pd)
2066#[inline]
2067#[target_feature(enable = "sse2")]
2068#[cfg_attr(test, assert_instr(cmpltpd))]
2069#[stable(feature = "simd_x86", since = "1.27.0")]
2070pub fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d {
2071 unsafe { cmppd(a, b, imm8:1) }
2072}
2073
2074/// Compares corresponding elements in `a` and `b` for less-than-or-equal
2075///
2076/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_pd)
2077#[inline]
2078#[target_feature(enable = "sse2")]
2079#[cfg_attr(test, assert_instr(cmplepd))]
2080#[stable(feature = "simd_x86", since = "1.27.0")]
2081pub fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d {
2082 unsafe { cmppd(a, b, imm8:2) }
2083}
2084
2085/// Compares corresponding elements in `a` and `b` for greater-than.
2086///
2087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_pd)
2088#[inline]
2089#[target_feature(enable = "sse2")]
2090#[cfg_attr(test, assert_instr(cmpltpd))]
2091#[stable(feature = "simd_x86", since = "1.27.0")]
2092pub fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d {
2093 _mm_cmplt_pd(a:b, b:a)
2094}
2095
2096/// Compares corresponding elements in `a` and `b` for greater-than-or-equal.
2097///
2098/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_pd)
2099#[inline]
2100#[target_feature(enable = "sse2")]
2101#[cfg_attr(test, assert_instr(cmplepd))]
2102#[stable(feature = "simd_x86", since = "1.27.0")]
2103pub fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d {
2104 _mm_cmple_pd(a:b, b:a)
2105}
2106
2107/// Compares corresponding elements in `a` and `b` to see if neither is `NaN`.
2108///
2109/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_pd)
2110#[inline]
2111#[target_feature(enable = "sse2")]
2112#[cfg_attr(test, assert_instr(cmpordpd))]
2113#[stable(feature = "simd_x86", since = "1.27.0")]
2114pub fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d {
2115 unsafe { cmppd(a, b, imm8:7) }
2116}
2117
2118/// Compares corresponding elements in `a` and `b` to see if either is `NaN`.
2119///
2120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_pd)
2121#[inline]
2122#[target_feature(enable = "sse2")]
2123#[cfg_attr(test, assert_instr(cmpunordpd))]
2124#[stable(feature = "simd_x86", since = "1.27.0")]
2125pub fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d {
2126 unsafe { cmppd(a, b, imm8:3) }
2127}
2128
2129/// Compares corresponding elements in `a` and `b` for not-equal.
2130///
2131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_pd)
2132#[inline]
2133#[target_feature(enable = "sse2")]
2134#[cfg_attr(test, assert_instr(cmpneqpd))]
2135#[stable(feature = "simd_x86", since = "1.27.0")]
2136pub fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d {
2137 unsafe { cmppd(a, b, imm8:4) }
2138}
2139
2140/// Compares corresponding elements in `a` and `b` for not-less-than.
2141///
2142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_pd)
2143#[inline]
2144#[target_feature(enable = "sse2")]
2145#[cfg_attr(test, assert_instr(cmpnltpd))]
2146#[stable(feature = "simd_x86", since = "1.27.0")]
2147pub fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d {
2148 unsafe { cmppd(a, b, imm8:5) }
2149}
2150
2151/// Compares corresponding elements in `a` and `b` for not-less-than-or-equal.
2152///
2153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_pd)
2154#[inline]
2155#[target_feature(enable = "sse2")]
2156#[cfg_attr(test, assert_instr(cmpnlepd))]
2157#[stable(feature = "simd_x86", since = "1.27.0")]
2158pub fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d {
2159 unsafe { cmppd(a, b, imm8:6) }
2160}
2161
2162/// Compares corresponding elements in `a` and `b` for not-greater-than.
2163///
2164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpngt_pd)
2165#[inline]
2166#[target_feature(enable = "sse2")]
2167#[cfg_attr(test, assert_instr(cmpnltpd))]
2168#[stable(feature = "simd_x86", since = "1.27.0")]
2169pub fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d {
2170 _mm_cmpnlt_pd(a:b, b:a)
2171}
2172
2173/// Compares corresponding elements in `a` and `b` for
2174/// not-greater-than-or-equal.
2175///
2176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_pd)
2177#[inline]
2178#[target_feature(enable = "sse2")]
2179#[cfg_attr(test, assert_instr(cmpnlepd))]
2180#[stable(feature = "simd_x86", since = "1.27.0")]
2181pub fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d {
2182 _mm_cmpnle_pd(a:b, b:a)
2183}
2184
2185/// Compares the lower element of `a` and `b` for equality.
2186///
2187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comieq_sd)
2188#[inline]
2189#[target_feature(enable = "sse2")]
2190#[cfg_attr(test, assert_instr(comisd))]
2191#[stable(feature = "simd_x86", since = "1.27.0")]
2192pub fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 {
2193 unsafe { comieqsd(a, b) }
2194}
2195
2196/// Compares the lower element of `a` and `b` for less-than.
2197///
2198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comilt_sd)
2199#[inline]
2200#[target_feature(enable = "sse2")]
2201#[cfg_attr(test, assert_instr(comisd))]
2202#[stable(feature = "simd_x86", since = "1.27.0")]
2203pub fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 {
2204 unsafe { comiltsd(a, b) }
2205}
2206
2207/// Compares the lower element of `a` and `b` for less-than-or-equal.
2208///
2209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comile_sd)
2210#[inline]
2211#[target_feature(enable = "sse2")]
2212#[cfg_attr(test, assert_instr(comisd))]
2213#[stable(feature = "simd_x86", since = "1.27.0")]
2214pub fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 {
2215 unsafe { comilesd(a, b) }
2216}
2217
2218/// Compares the lower element of `a` and `b` for greater-than.
2219///
2220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comigt_sd)
2221#[inline]
2222#[target_feature(enable = "sse2")]
2223#[cfg_attr(test, assert_instr(comisd))]
2224#[stable(feature = "simd_x86", since = "1.27.0")]
2225pub fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 {
2226 unsafe { comigtsd(a, b) }
2227}
2228
2229/// Compares the lower element of `a` and `b` for greater-than-or-equal.
2230///
2231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comige_sd)
2232#[inline]
2233#[target_feature(enable = "sse2")]
2234#[cfg_attr(test, assert_instr(comisd))]
2235#[stable(feature = "simd_x86", since = "1.27.0")]
2236pub fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 {
2237 unsafe { comigesd(a, b) }
2238}
2239
2240/// Compares the lower element of `a` and `b` for not-equal.
2241///
2242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comineq_sd)
2243#[inline]
2244#[target_feature(enable = "sse2")]
2245#[cfg_attr(test, assert_instr(comisd))]
2246#[stable(feature = "simd_x86", since = "1.27.0")]
2247pub fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 {
2248 unsafe { comineqsd(a, b) }
2249}
2250
2251/// Compares the lower element of `a` and `b` for equality.
2252///
2253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomieq_sd)
2254#[inline]
2255#[target_feature(enable = "sse2")]
2256#[cfg_attr(test, assert_instr(ucomisd))]
2257#[stable(feature = "simd_x86", since = "1.27.0")]
2258pub fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 {
2259 unsafe { ucomieqsd(a, b) }
2260}
2261
2262/// Compares the lower element of `a` and `b` for less-than.
2263///
2264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomilt_sd)
2265#[inline]
2266#[target_feature(enable = "sse2")]
2267#[cfg_attr(test, assert_instr(ucomisd))]
2268#[stable(feature = "simd_x86", since = "1.27.0")]
2269pub fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 {
2270 unsafe { ucomiltsd(a, b) }
2271}
2272
2273/// Compares the lower element of `a` and `b` for less-than-or-equal.
2274///
2275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomile_sd)
2276#[inline]
2277#[target_feature(enable = "sse2")]
2278#[cfg_attr(test, assert_instr(ucomisd))]
2279#[stable(feature = "simd_x86", since = "1.27.0")]
2280pub fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 {
2281 unsafe { ucomilesd(a, b) }
2282}
2283
2284/// Compares the lower element of `a` and `b` for greater-than.
2285///
2286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomigt_sd)
2287#[inline]
2288#[target_feature(enable = "sse2")]
2289#[cfg_attr(test, assert_instr(ucomisd))]
2290#[stable(feature = "simd_x86", since = "1.27.0")]
2291pub fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 {
2292 unsafe { ucomigtsd(a, b) }
2293}
2294
2295/// Compares the lower element of `a` and `b` for greater-than-or-equal.
2296///
2297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomige_sd)
2298#[inline]
2299#[target_feature(enable = "sse2")]
2300#[cfg_attr(test, assert_instr(ucomisd))]
2301#[stable(feature = "simd_x86", since = "1.27.0")]
2302pub fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 {
2303 unsafe { ucomigesd(a, b) }
2304}
2305
2306/// Compares the lower element of `a` and `b` for not-equal.
2307///
2308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomineq_sd)
2309#[inline]
2310#[target_feature(enable = "sse2")]
2311#[cfg_attr(test, assert_instr(ucomisd))]
2312#[stable(feature = "simd_x86", since = "1.27.0")]
2313pub fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 {
2314 unsafe { ucomineqsd(a, b) }
2315}
2316
2317/// Converts packed double-precision (64-bit) floating-point elements in `a` to
2318/// packed single-precision (32-bit) floating-point elements
2319///
2320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_ps)
2321#[inline]
2322#[target_feature(enable = "sse2")]
2323#[cfg_attr(test, assert_instr(cvtpd2ps))]
2324#[stable(feature = "simd_x86", since = "1.27.0")]
2325pub fn _mm_cvtpd_ps(a: __m128d) -> __m128 {
2326 unsafe {
2327 let r: f32x2 = simd_cast::<_, f32x2>(a.as_f64x2());
2328 let zero: f32x2 = f32x2::ZERO;
2329 transmute::<f32x4, _>(src:simd_shuffle!(r, zero, [0, 1, 2, 3]))
2330 }
2331}
2332
2333/// Converts packed single-precision (32-bit) floating-point elements in `a` to
2334/// packed
2335/// double-precision (64-bit) floating-point elements.
2336///
2337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_pd)
2338#[inline]
2339#[target_feature(enable = "sse2")]
2340#[cfg_attr(test, assert_instr(cvtps2pd))]
2341#[stable(feature = "simd_x86", since = "1.27.0")]
2342pub fn _mm_cvtps_pd(a: __m128) -> __m128d {
2343 unsafe {
2344 let a: f32x4 = a.as_f32x4();
2345 transmute(src:simd_cast::<f32x2, f64x2>(simd_shuffle!(a, a, [0, 1])))
2346 }
2347}
2348
2349/// Converts packed double-precision (64-bit) floating-point elements in `a` to
2350/// packed 32-bit integers.
2351///
2352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epi32)
2353#[inline]
2354#[target_feature(enable = "sse2")]
2355#[cfg_attr(test, assert_instr(cvtpd2dq))]
2356#[stable(feature = "simd_x86", since = "1.27.0")]
2357pub fn _mm_cvtpd_epi32(a: __m128d) -> __m128i {
2358 unsafe { transmute(src:cvtpd2dq(a)) }
2359}
2360
2361/// Converts the lower double-precision (64-bit) floating-point element in a to
2362/// a 32-bit integer.
2363///
2364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_si32)
2365#[inline]
2366#[target_feature(enable = "sse2")]
2367#[cfg_attr(test, assert_instr(cvtsd2si))]
2368#[stable(feature = "simd_x86", since = "1.27.0")]
2369pub fn _mm_cvtsd_si32(a: __m128d) -> i32 {
2370 unsafe { cvtsd2si(a) }
2371}
2372
2373/// Converts the lower double-precision (64-bit) floating-point element in `b`
2374/// to a single-precision (32-bit) floating-point element, store the result in
2375/// the lower element of the return value, and copies the upper element from `a`
2376/// to the upper element the return value.
2377///
2378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_ss)
2379#[inline]
2380#[target_feature(enable = "sse2")]
2381#[cfg_attr(test, assert_instr(cvtsd2ss))]
2382#[stable(feature = "simd_x86", since = "1.27.0")]
2383pub fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 {
2384 unsafe { cvtsd2ss(a, b) }
2385}
2386
2387/// Returns the lower double-precision (64-bit) floating-point element of `a`.
2388///
2389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_f64)
2390#[inline]
2391#[target_feature(enable = "sse2")]
2392#[stable(feature = "simd_x86", since = "1.27.0")]
2393pub fn _mm_cvtsd_f64(a: __m128d) -> f64 {
2394 unsafe { simd_extract!(a, 0) }
2395}
2396
2397/// Converts the lower single-precision (32-bit) floating-point element in `b`
2398/// to a double-precision (64-bit) floating-point element, store the result in
2399/// the lower element of the return value, and copies the upper element from `a`
2400/// to the upper element the return value.
2401///
2402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtss_sd)
2403#[inline]
2404#[target_feature(enable = "sse2")]
2405#[cfg_attr(test, assert_instr(cvtss2sd))]
2406#[stable(feature = "simd_x86", since = "1.27.0")]
2407pub fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d {
2408 unsafe { cvtss2sd(a, b) }
2409}
2410
2411/// Converts packed double-precision (64-bit) floating-point elements in `a` to
2412/// packed 32-bit integers with truncation.
2413///
2414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epi32)
2415#[inline]
2416#[target_feature(enable = "sse2")]
2417#[cfg_attr(test, assert_instr(cvttpd2dq))]
2418#[stable(feature = "simd_x86", since = "1.27.0")]
2419pub fn _mm_cvttpd_epi32(a: __m128d) -> __m128i {
2420 unsafe { transmute(src:cvttpd2dq(a)) }
2421}
2422
2423/// Converts the lower double-precision (64-bit) floating-point element in `a`
2424/// to a 32-bit integer with truncation.
2425///
2426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_si32)
2427#[inline]
2428#[target_feature(enable = "sse2")]
2429#[cfg_attr(test, assert_instr(cvttsd2si))]
2430#[stable(feature = "simd_x86", since = "1.27.0")]
2431pub fn _mm_cvttsd_si32(a: __m128d) -> i32 {
2432 unsafe { cvttsd2si(a) }
2433}
2434
2435/// Converts packed single-precision (32-bit) floating-point elements in `a` to
2436/// packed 32-bit integers with truncation.
2437///
2438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epi32)
2439#[inline]
2440#[target_feature(enable = "sse2")]
2441#[cfg_attr(test, assert_instr(cvttps2dq))]
2442#[stable(feature = "simd_x86", since = "1.27.0")]
2443pub fn _mm_cvttps_epi32(a: __m128) -> __m128i {
2444 unsafe { transmute(src:cvttps2dq(a)) }
2445}
2446
2447/// Copies double-precision (64-bit) floating-point element `a` to the lower
2448/// element of the packed 64-bit return value.
2449///
2450/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_sd)
2451#[inline]
2452#[target_feature(enable = "sse2")]
2453#[stable(feature = "simd_x86", since = "1.27.0")]
2454pub fn _mm_set_sd(a: f64) -> __m128d {
2455 _mm_set_pd(a:0.0, b:a)
2456}
2457
2458/// Broadcasts double-precision (64-bit) floating-point value a to all elements
2459/// of the return value.
2460///
2461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_pd)
2462#[inline]
2463#[target_feature(enable = "sse2")]
2464#[stable(feature = "simd_x86", since = "1.27.0")]
2465pub fn _mm_set1_pd(a: f64) -> __m128d {
2466 _mm_set_pd(a, b:a)
2467}
2468
2469/// Broadcasts double-precision (64-bit) floating-point value a to all elements
2470/// of the return value.
2471///
2472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_pd1)
2473#[inline]
2474#[target_feature(enable = "sse2")]
2475#[stable(feature = "simd_x86", since = "1.27.0")]
2476pub fn _mm_set_pd1(a: f64) -> __m128d {
2477 _mm_set_pd(a, b:a)
2478}
2479
2480/// Sets packed double-precision (64-bit) floating-point elements in the return
2481/// value with the supplied values.
2482///
2483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_pd)
2484#[inline]
2485#[target_feature(enable = "sse2")]
2486#[stable(feature = "simd_x86", since = "1.27.0")]
2487pub fn _mm_set_pd(a: f64, b: f64) -> __m128d {
2488 __m128d([b, a])
2489}
2490
2491/// Sets packed double-precision (64-bit) floating-point elements in the return
2492/// value with the supplied values in reverse order.
2493///
2494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_pd)
2495#[inline]
2496#[target_feature(enable = "sse2")]
2497#[stable(feature = "simd_x86", since = "1.27.0")]
2498pub fn _mm_setr_pd(a: f64, b: f64) -> __m128d {
2499 _mm_set_pd(a:b, b:a)
2500}
2501
2502/// Returns packed double-precision (64-bit) floating-point elements with all
2503/// zeros.
2504///
2505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_pd)
2506#[inline]
2507#[target_feature(enable = "sse2")]
2508#[cfg_attr(test, assert_instr(xorp))]
2509#[stable(feature = "simd_x86", since = "1.27.0")]
2510pub fn _mm_setzero_pd() -> __m128d {
2511 const { unsafe { mem::zeroed() } }
2512}
2513
2514/// Returns a mask of the most significant bit of each element in `a`.
2515///
2516/// The mask is stored in the 2 least significant bits of the return value.
2517/// All other bits are set to `0`.
2518///
2519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_pd)
2520#[inline]
2521#[target_feature(enable = "sse2")]
2522#[cfg_attr(test, assert_instr(movmskpd))]
2523#[stable(feature = "simd_x86", since = "1.27.0")]
2524pub fn _mm_movemask_pd(a: __m128d) -> i32 {
2525 // Propagate the highest bit to the rest, because simd_bitmask
2526 // requires all-1 or all-0.
2527 unsafe {
2528 let mask: i64x2 = simd_lt(x:transmute(a), y:i64x2::ZERO);
2529 simd_bitmask::<i64x2, u8>(mask).into()
2530 }
2531}
2532
2533/// Loads 128-bits (composed of 2 packed double-precision (64-bit)
2534/// floating-point elements) from memory into the returned vector.
2535/// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
2536/// exception may be generated.
2537///
2538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_pd)
2539#[inline]
2540#[target_feature(enable = "sse2")]
2541#[cfg_attr(test, assert_instr(movaps))]
2542#[stable(feature = "simd_x86", since = "1.27.0")]
2543#[allow(clippy::cast_ptr_alignment)]
2544pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d {
2545 *(mem_addr as *const __m128d)
2546}
2547
2548/// Loads a 64-bit double-precision value to the low element of a
2549/// 128-bit integer vector and clears the upper element.
2550///
2551/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_sd)
2552#[inline]
2553#[target_feature(enable = "sse2")]
2554#[cfg_attr(test, assert_instr(movsd))]
2555#[stable(feature = "simd_x86", since = "1.27.0")]
2556pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d {
2557 _mm_setr_pd(*mem_addr, b:0.)
2558}
2559
2560/// Loads a double-precision value into the high-order bits of a 128-bit
2561/// vector of `[2 x double]`. The low-order bits are copied from the low-order
2562/// bits of the first operand.
2563///
2564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadh_pd)
2565#[inline]
2566#[target_feature(enable = "sse2")]
2567#[cfg_attr(test, assert_instr(movhps))]
2568#[stable(feature = "simd_x86", since = "1.27.0")]
2569pub unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2570 _mm_setr_pd(a:simd_extract!(a, 0), *mem_addr)
2571}
2572
2573/// Loads a double-precision value into the low-order bits of a 128-bit
2574/// vector of `[2 x double]`. The high-order bits are copied from the
2575/// high-order bits of the first operand.
2576///
2577/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadl_pd)
2578#[inline]
2579#[target_feature(enable = "sse2")]
2580#[cfg_attr(test, assert_instr(movlps))]
2581#[stable(feature = "simd_x86", since = "1.27.0")]
2582pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2583 _mm_setr_pd(*mem_addr, b:simd_extract!(a, 1))
2584}
2585
2586/// Stores a 128-bit floating point vector of `[2 x double]` to a 128-bit
2587/// aligned memory location.
2588/// To minimize caching, the data is flagged as non-temporal (unlikely to be
2589/// used again soon).
2590///
2591/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_pd)
2592///
2593/// # Safety of non-temporal stores
2594///
2595/// After using this intrinsic, but before any other access to the memory that this intrinsic
2596/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
2597/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
2598/// return.
2599///
2600/// See [`_mm_sfence`] for details.
2601#[inline]
2602#[target_feature(enable = "sse2")]
2603#[cfg_attr(test, assert_instr(movntpd))]
2604#[stable(feature = "simd_x86", since = "1.27.0")]
2605#[allow(clippy::cast_ptr_alignment)]
2606pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) {
2607 crate::arch::asm!(
2608 vps!("movntpd", ",{a}"),
2609 p = in(reg) mem_addr,
2610 a = in(xmm_reg) a,
2611 options(nostack, preserves_flags),
2612 );
2613}
2614
2615/// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a
2616/// memory location.
2617///
2618/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_sd)
2619#[inline]
2620#[target_feature(enable = "sse2")]
2621#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movlps))]
2622#[stable(feature = "simd_x86", since = "1.27.0")]
2623pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) {
2624 *mem_addr = simd_extract!(a, 0)
2625}
2626
2627/// Stores 128-bits (composed of 2 packed double-precision (64-bit)
2628/// floating-point elements) from `a` into memory. `mem_addr` must be aligned
2629/// on a 16-byte boundary or a general-protection exception may be generated.
2630///
2631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_pd)
2632#[inline]
2633#[target_feature(enable = "sse2")]
2634#[cfg_attr(test, assert_instr(movaps))]
2635#[stable(feature = "simd_x86", since = "1.27.0")]
2636#[allow(clippy::cast_ptr_alignment)]
2637pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) {
2638 *(mem_addr as *mut __m128d) = a;
2639}
2640
2641/// Stores 128-bits (composed of 2 packed double-precision (64-bit)
2642/// floating-point elements) from `a` into memory.
2643/// `mem_addr` does not need to be aligned on any particular boundary.
2644///
2645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_pd)
2646#[inline]
2647#[target_feature(enable = "sse2")]
2648#[cfg_attr(test, assert_instr(movups))] // FIXME movupd expected
2649#[stable(feature = "simd_x86", since = "1.27.0")]
2650pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) {
2651 mem_addr.cast::<__m128d>().write_unaligned(val:a);
2652}
2653
2654/// Store 16-bit integer from the first element of a into memory.
2655///
2656/// `mem_addr` does not need to be aligned on any particular boundary.
2657///
2658/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si16)
2659#[inline]
2660#[target_feature(enable = "sse2")]
2661#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2662pub unsafe fn _mm_storeu_si16(mem_addr: *mut u8, a: __m128i) {
2663 ptr::write_unaligned(dst:mem_addr as *mut i16, src:simd_extract(x:a.as_i16x8(), idx:0))
2664}
2665
2666/// Store 32-bit integer from the first element of a into memory.
2667///
2668/// `mem_addr` does not need to be aligned on any particular boundary.
2669///
2670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si32)
2671#[inline]
2672#[target_feature(enable = "sse2")]
2673#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2674pub unsafe fn _mm_storeu_si32(mem_addr: *mut u8, a: __m128i) {
2675 ptr::write_unaligned(dst:mem_addr as *mut i32, src:simd_extract(x:a.as_i32x4(), idx:0))
2676}
2677
2678/// Store 64-bit integer from the first element of a into memory.
2679///
2680/// `mem_addr` does not need to be aligned on any particular boundary.
2681///
2682/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si64)
2683#[inline]
2684#[target_feature(enable = "sse2")]
2685#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2686pub unsafe fn _mm_storeu_si64(mem_addr: *mut u8, a: __m128i) {
2687 ptr::write_unaligned(dst:mem_addr as *mut i64, src:simd_extract(x:a.as_i64x2(), idx:0))
2688}
2689
2690/// Stores the lower double-precision (64-bit) floating-point element from `a`
2691/// into 2 contiguous elements in memory. `mem_addr` must be aligned on a
2692/// 16-byte boundary or a general-protection exception may be generated.
2693///
2694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store1_pd)
2695#[inline]
2696#[target_feature(enable = "sse2")]
2697#[stable(feature = "simd_x86", since = "1.27.0")]
2698#[allow(clippy::cast_ptr_alignment)]
2699pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) {
2700 let b: __m128d = simd_shuffle!(a, a, [0, 0]);
2701 *(mem_addr as *mut __m128d) = b;
2702}
2703
2704/// Stores the lower double-precision (64-bit) floating-point element from `a`
2705/// into 2 contiguous elements in memory. `mem_addr` must be aligned on a
2706/// 16-byte boundary or a general-protection exception may be generated.
2707///
2708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_pd1)
2709#[inline]
2710#[target_feature(enable = "sse2")]
2711#[stable(feature = "simd_x86", since = "1.27.0")]
2712#[allow(clippy::cast_ptr_alignment)]
2713pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) {
2714 let b: __m128d = simd_shuffle!(a, a, [0, 0]);
2715 *(mem_addr as *mut __m128d) = b;
2716}
2717
2718/// Stores 2 double-precision (64-bit) floating-point elements from `a` into
2719/// memory in reverse order.
2720/// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
2721/// exception may be generated.
2722///
2723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storer_pd)
2724#[inline]
2725#[target_feature(enable = "sse2")]
2726#[stable(feature = "simd_x86", since = "1.27.0")]
2727#[allow(clippy::cast_ptr_alignment)]
2728pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) {
2729 let b: __m128d = simd_shuffle!(a, a, [1, 0]);
2730 *(mem_addr as *mut __m128d) = b;
2731}
2732
2733/// Stores the upper 64 bits of a 128-bit vector of `[2 x double]` to a
2734/// memory location.
2735///
2736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeh_pd)
2737#[inline]
2738#[target_feature(enable = "sse2")]
2739#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movhps))]
2740#[stable(feature = "simd_x86", since = "1.27.0")]
2741pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) {
2742 *mem_addr = simd_extract!(a, 1);
2743}
2744
2745/// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a
2746/// memory location.
2747///
2748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storel_pd)
2749#[inline]
2750#[target_feature(enable = "sse2")]
2751#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movlps))]
2752#[stable(feature = "simd_x86", since = "1.27.0")]
2753pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) {
2754 *mem_addr = simd_extract!(a, 0);
2755}
2756
2757/// Loads a double-precision (64-bit) floating-point element from memory
2758/// into both elements of returned vector.
2759///
2760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load1_pd)
2761#[inline]
2762#[target_feature(enable = "sse2")]
2763// #[cfg_attr(test, assert_instr(movapd))] // FIXME LLVM uses different codegen
2764#[stable(feature = "simd_x86", since = "1.27.0")]
2765pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d {
2766 let d: f64 = *mem_addr;
2767 _mm_setr_pd(a:d, b:d)
2768}
2769
2770/// Loads a double-precision (64-bit) floating-point element from memory
2771/// into both elements of returned vector.
2772///
2773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_pd1)
2774#[inline]
2775#[target_feature(enable = "sse2")]
2776// #[cfg_attr(test, assert_instr(movapd))] // FIXME same as _mm_load1_pd
2777#[stable(feature = "simd_x86", since = "1.27.0")]
2778pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d {
2779 _mm_load1_pd(mem_addr)
2780}
2781
2782/// Loads 2 double-precision (64-bit) floating-point elements from memory into
2783/// the returned vector in reverse order. `mem_addr` must be aligned on a
2784/// 16-byte boundary or a general-protection exception may be generated.
2785///
2786/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadr_pd)
2787#[inline]
2788#[target_feature(enable = "sse2")]
2789#[cfg_attr(test, assert_instr(movaps))]
2790#[stable(feature = "simd_x86", since = "1.27.0")]
2791pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d {
2792 let a: __m128d = _mm_load_pd(mem_addr);
2793 simd_shuffle!(a, a, [1, 0])
2794}
2795
2796/// Loads 128-bits (composed of 2 packed double-precision (64-bit)
2797/// floating-point elements) from memory into the returned vector.
2798/// `mem_addr` does not need to be aligned on any particular boundary.
2799///
2800/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_pd)
2801#[inline]
2802#[target_feature(enable = "sse2")]
2803#[cfg_attr(test, assert_instr(movups))]
2804#[stable(feature = "simd_x86", since = "1.27.0")]
2805pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d {
2806 let mut dst: __m128d = _mm_undefined_pd();
2807 ptr::copy_nonoverlapping(
2808 src:mem_addr as *const u8,
2809 dst:ptr::addr_of_mut!(dst) as *mut u8,
2810 count:mem::size_of::<__m128d>(),
2811 );
2812 dst
2813}
2814
2815/// Loads unaligned 16-bits of integer data from memory into new vector.
2816///
2817/// `mem_addr` does not need to be aligned on any particular boundary.
2818///
2819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si16)
2820#[inline]
2821#[target_feature(enable = "sse2")]
2822#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2823pub unsafe fn _mm_loadu_si16(mem_addr: *const u8) -> __m128i {
2824 transmute(src:i16x8::new(
2825 x0:ptr::read_unaligned(mem_addr as *const i16),
2826 x1:0,
2827 x2:0,
2828 x3:0,
2829 x4:0,
2830 x5:0,
2831 x6:0,
2832 x7:0,
2833 ))
2834}
2835
2836/// Loads unaligned 32-bits of integer data from memory into new vector.
2837///
2838/// `mem_addr` does not need to be aligned on any particular boundary.
2839///
2840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si32)
2841#[inline]
2842#[target_feature(enable = "sse2")]
2843#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2844pub unsafe fn _mm_loadu_si32(mem_addr: *const u8) -> __m128i {
2845 transmute(src:i32x4::new(
2846 x0:ptr::read_unaligned(mem_addr as *const i32),
2847 x1:0,
2848 x2:0,
2849 x3:0,
2850 ))
2851}
2852
2853/// Loads unaligned 64-bits of integer data from memory into new vector.
2854///
2855/// `mem_addr` does not need to be aligned on any particular boundary.
2856///
2857/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si64)
2858#[inline]
2859#[target_feature(enable = "sse2")]
2860#[stable(feature = "simd_x86_mm_loadu_si64", since = "1.46.0")]
2861pub unsafe fn _mm_loadu_si64(mem_addr: *const u8) -> __m128i {
2862 transmute(src:i64x2::new(x0:ptr::read_unaligned(mem_addr as *const i64), x1:0))
2863}
2864
2865/// Constructs a 128-bit floating-point vector of `[2 x double]` from two
2866/// 128-bit vector parameters of `[2 x double]`, using the immediate-value
2867/// parameter as a specifier.
2868///
2869/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_pd)
2870#[inline]
2871#[target_feature(enable = "sse2")]
2872#[cfg_attr(test, assert_instr(shufps, MASK = 2))]
2873#[rustc_legacy_const_generics(2)]
2874#[stable(feature = "simd_x86", since = "1.27.0")]
2875pub fn _mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d {
2876 static_assert_uimm_bits!(MASK, 8);
2877 unsafe { simd_shuffle!(a, b, [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2]) }
2878}
2879
2880/// Constructs a 128-bit floating-point vector of `[2 x double]`. The lower
2881/// 64 bits are set to the lower 64 bits of the second parameter. The upper
2882/// 64 bits are set to the upper 64 bits of the first parameter.
2883///
2884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_move_sd)
2885#[inline]
2886#[target_feature(enable = "sse2")]
2887#[cfg_attr(test, assert_instr(movsd))]
2888#[stable(feature = "simd_x86", since = "1.27.0")]
2889pub fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d {
2890 unsafe { _mm_setr_pd(a:simd_extract!(b, 0), b:simd_extract!(a, 1)) }
2891}
2892
2893/// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit
2894/// floating-point vector of `[4 x float]`.
2895///
2896/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castpd_ps)
2897#[inline]
2898#[target_feature(enable = "sse2")]
2899#[stable(feature = "simd_x86", since = "1.27.0")]
2900pub fn _mm_castpd_ps(a: __m128d) -> __m128 {
2901 unsafe { transmute(src:a) }
2902}
2903
2904/// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit
2905/// integer vector.
2906///
2907/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castpd_si128)
2908#[inline]
2909#[target_feature(enable = "sse2")]
2910#[stable(feature = "simd_x86", since = "1.27.0")]
2911pub fn _mm_castpd_si128(a: __m128d) -> __m128i {
2912 unsafe { transmute(src:a) }
2913}
2914
2915/// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit
2916/// floating-point vector of `[2 x double]`.
2917///
2918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castps_pd)
2919#[inline]
2920#[target_feature(enable = "sse2")]
2921#[stable(feature = "simd_x86", since = "1.27.0")]
2922pub fn _mm_castps_pd(a: __m128) -> __m128d {
2923 unsafe { transmute(src:a) }
2924}
2925
2926/// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit
2927/// integer vector.
2928///
2929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castps_si128)
2930#[inline]
2931#[target_feature(enable = "sse2")]
2932#[stable(feature = "simd_x86", since = "1.27.0")]
2933pub fn _mm_castps_si128(a: __m128) -> __m128i {
2934 unsafe { transmute(src:a) }
2935}
2936
2937/// Casts a 128-bit integer vector into a 128-bit floating-point vector
2938/// of `[2 x double]`.
2939///
2940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castsi128_pd)
2941#[inline]
2942#[target_feature(enable = "sse2")]
2943#[stable(feature = "simd_x86", since = "1.27.0")]
2944pub fn _mm_castsi128_pd(a: __m128i) -> __m128d {
2945 unsafe { transmute(src:a) }
2946}
2947
2948/// Casts a 128-bit integer vector into a 128-bit floating-point vector
2949/// of `[4 x float]`.
2950///
2951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castsi128_ps)
2952#[inline]
2953#[target_feature(enable = "sse2")]
2954#[stable(feature = "simd_x86", since = "1.27.0")]
2955pub fn _mm_castsi128_ps(a: __m128i) -> __m128 {
2956 unsafe { transmute(src:a) }
2957}
2958
2959/// Returns vector of type __m128d with indeterminate elements.
2960/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
2961/// In practice, this is equivalent to [`mem::zeroed`].
2962///
2963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_undefined_pd)
2964#[inline]
2965#[target_feature(enable = "sse2")]
2966#[stable(feature = "simd_x86", since = "1.27.0")]
2967pub fn _mm_undefined_pd() -> __m128d {
2968 const { unsafe { mem::zeroed() } }
2969}
2970
2971/// Returns vector of type __m128i with indeterminate elements.
2972/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
2973/// In practice, this is equivalent to [`mem::zeroed`].
2974///
2975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_undefined_si128)
2976#[inline]
2977#[target_feature(enable = "sse2")]
2978#[stable(feature = "simd_x86", since = "1.27.0")]
2979pub fn _mm_undefined_si128() -> __m128i {
2980 const { unsafe { mem::zeroed() } }
2981}
2982
2983/// The resulting `__m128d` element is composed by the low-order values of
2984/// the two `__m128d` interleaved input elements, i.e.:
2985///
2986/// * The `[127:64]` bits are copied from the `[127:64]` bits of the second input
2987/// * The `[63:0]` bits are copied from the `[127:64]` bits of the first input
2988///
2989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_pd)
2990#[inline]
2991#[target_feature(enable = "sse2")]
2992#[cfg_attr(test, assert_instr(unpckhpd))]
2993#[stable(feature = "simd_x86", since = "1.27.0")]
2994pub fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d {
2995 unsafe { simd_shuffle!(a, b, [1, 3]) }
2996}
2997
2998/// The resulting `__m128d` element is composed by the high-order values of
2999/// the two `__m128d` interleaved input elements, i.e.:
3000///
3001/// * The `[127:64]` bits are copied from the `[63:0]` bits of the second input
3002/// * The `[63:0]` bits are copied from the `[63:0]` bits of the first input
3003///
3004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_pd)
3005#[inline]
3006#[target_feature(enable = "sse2")]
3007#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movlhps))]
3008#[stable(feature = "simd_x86", since = "1.27.0")]
3009pub fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d {
3010 unsafe { simd_shuffle!(a, b, [0, 2]) }
3011}
3012
3013#[allow(improper_ctypes)]
3014unsafe extern "C" {
3015 #[link_name = "llvm.x86.sse2.pause"]
3016 unsafefn pause();
3017 #[link_name = "llvm.x86.sse2.clflush"]
3018 unsafefn clflush(p: *const u8);
3019 #[link_name = "llvm.x86.sse2.lfence"]
3020 unsafefn lfence();
3021 #[link_name = "llvm.x86.sse2.mfence"]
3022 unsafefn mfence();
3023 #[link_name = "llvm.x86.sse2.pmadd.wd"]
3024 unsafefn pmaddwd(a: i16x8, b: i16x8) -> i32x4;
3025 #[link_name = "llvm.x86.sse2.psad.bw"]
3026 unsafefn psadbw(a: u8x16, b: u8x16) -> u64x2;
3027 #[link_name = "llvm.x86.sse2.psll.w"]
3028 unsafefn psllw(a: i16x8, count: i16x8) -> i16x8;
3029 #[link_name = "llvm.x86.sse2.psll.d"]
3030 unsafefn pslld(a: i32x4, count: i32x4) -> i32x4;
3031 #[link_name = "llvm.x86.sse2.psll.q"]
3032 unsafefn psllq(a: i64x2, count: i64x2) -> i64x2;
3033 #[link_name = "llvm.x86.sse2.psra.w"]
3034 unsafefn psraw(a: i16x8, count: i16x8) -> i16x8;
3035 #[link_name = "llvm.x86.sse2.psra.d"]
3036 unsafefn psrad(a: i32x4, count: i32x4) -> i32x4;
3037 #[link_name = "llvm.x86.sse2.psrl.w"]
3038 unsafefn psrlw(a: i16x8, count: i16x8) -> i16x8;
3039 #[link_name = "llvm.x86.sse2.psrl.d"]
3040 unsafefn psrld(a: i32x4, count: i32x4) -> i32x4;
3041 #[link_name = "llvm.x86.sse2.psrl.q"]
3042 unsafefn psrlq(a: i64x2, count: i64x2) -> i64x2;
3043 #[link_name = "llvm.x86.sse2.cvtps2dq"]
3044 unsafefn cvtps2dq(a: __m128) -> i32x4;
3045 #[link_name = "llvm.x86.sse2.maskmov.dqu"]
3046 unsafefn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8);
3047 #[link_name = "llvm.x86.sse2.packsswb.128"]
3048 unsafefn packsswb(a: i16x8, b: i16x8) -> i8x16;
3049 #[link_name = "llvm.x86.sse2.packssdw.128"]
3050 unsafefn packssdw(a: i32x4, b: i32x4) -> i16x8;
3051 #[link_name = "llvm.x86.sse2.packuswb.128"]
3052 unsafefn packuswb(a: i16x8, b: i16x8) -> u8x16;
3053 #[link_name = "llvm.x86.sse2.max.sd"]
3054 unsafefn maxsd(a: __m128d, b: __m128d) -> __m128d;
3055 #[link_name = "llvm.x86.sse2.max.pd"]
3056 unsafefn maxpd(a: __m128d, b: __m128d) -> __m128d;
3057 #[link_name = "llvm.x86.sse2.min.sd"]
3058 unsafefn minsd(a: __m128d, b: __m128d) -> __m128d;
3059 #[link_name = "llvm.x86.sse2.min.pd"]
3060 unsafefn minpd(a: __m128d, b: __m128d) -> __m128d;
3061 #[link_name = "llvm.x86.sse2.cmp.sd"]
3062 unsafefn cmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3063 #[link_name = "llvm.x86.sse2.cmp.pd"]
3064 unsafefn cmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3065 #[link_name = "llvm.x86.sse2.comieq.sd"]
3066 unsafefn comieqsd(a: __m128d, b: __m128d) -> i32;
3067 #[link_name = "llvm.x86.sse2.comilt.sd"]
3068 unsafefn comiltsd(a: __m128d, b: __m128d) -> i32;
3069 #[link_name = "llvm.x86.sse2.comile.sd"]
3070 unsafefn comilesd(a: __m128d, b: __m128d) -> i32;
3071 #[link_name = "llvm.x86.sse2.comigt.sd"]
3072 unsafefn comigtsd(a: __m128d, b: __m128d) -> i32;
3073 #[link_name = "llvm.x86.sse2.comige.sd"]
3074 unsafefn comigesd(a: __m128d, b: __m128d) -> i32;
3075 #[link_name = "llvm.x86.sse2.comineq.sd"]
3076 unsafefn comineqsd(a: __m128d, b: __m128d) -> i32;
3077 #[link_name = "llvm.x86.sse2.ucomieq.sd"]
3078 unsafefn ucomieqsd(a: __m128d, b: __m128d) -> i32;
3079 #[link_name = "llvm.x86.sse2.ucomilt.sd"]
3080 unsafefn ucomiltsd(a: __m128d, b: __m128d) -> i32;
3081 #[link_name = "llvm.x86.sse2.ucomile.sd"]
3082 unsafefn ucomilesd(a: __m128d, b: __m128d) -> i32;
3083 #[link_name = "llvm.x86.sse2.ucomigt.sd"]
3084 unsafefn ucomigtsd(a: __m128d, b: __m128d) -> i32;
3085 #[link_name = "llvm.x86.sse2.ucomige.sd"]
3086 unsafefn ucomigesd(a: __m128d, b: __m128d) -> i32;
3087 #[link_name = "llvm.x86.sse2.ucomineq.sd"]
3088 unsafefn ucomineqsd(a: __m128d, b: __m128d) -> i32;
3089 #[link_name = "llvm.x86.sse2.cvtpd2dq"]
3090 unsafefn cvtpd2dq(a: __m128d) -> i32x4;
3091 #[link_name = "llvm.x86.sse2.cvtsd2si"]
3092 unsafefn cvtsd2si(a: __m128d) -> i32;
3093 #[link_name = "llvm.x86.sse2.cvtsd2ss"]
3094 unsafefn cvtsd2ss(a: __m128, b: __m128d) -> __m128;
3095 #[link_name = "llvm.x86.sse2.cvtss2sd"]
3096 unsafefn cvtss2sd(a: __m128d, b: __m128) -> __m128d;
3097 #[link_name = "llvm.x86.sse2.cvttpd2dq"]
3098 unsafefn cvttpd2dq(a: __m128d) -> i32x4;
3099 #[link_name = "llvm.x86.sse2.cvttsd2si"]
3100 unsafefn cvttsd2si(a: __m128d) -> i32;
3101 #[link_name = "llvm.x86.sse2.cvttps2dq"]
3102 unsafefn cvttps2dq(a: __m128) -> i32x4;
3103}
3104
3105#[cfg(test)]
3106mod tests {
3107 use crate::{
3108 core_arch::{simd::*, x86::*},
3109 hint::black_box,
3110 };
3111 use std::{
3112 boxed, f32, f64,
3113 mem::{self, transmute},
3114 ptr,
3115 };
3116 use stdarch_test::simd_test;
3117
3118 const NAN: f64 = f64::NAN;
3119
3120 #[test]
3121 fn test_mm_pause() {
3122 unsafe { _mm_pause() }
3123 }
3124
3125 #[simd_test(enable = "sse2")]
3126 unsafe fn test_mm_clflush() {
3127 let x = 0_u8;
3128 _mm_clflush(ptr::addr_of!(x));
3129 }
3130
3131 #[simd_test(enable = "sse2")]
3132 // Miri cannot support this until it is clear how it fits in the Rust memory model
3133 #[cfg_attr(miri, ignore)]
3134 unsafe fn test_mm_lfence() {
3135 _mm_lfence();
3136 }
3137
3138 #[simd_test(enable = "sse2")]
3139 // Miri cannot support this until it is clear how it fits in the Rust memory model
3140 #[cfg_attr(miri, ignore)]
3141 unsafe fn test_mm_mfence() {
3142 _mm_mfence();
3143 }
3144
3145 #[simd_test(enable = "sse2")]
3146 unsafe fn test_mm_add_epi8() {
3147 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3148 #[rustfmt::skip]
3149 let b = _mm_setr_epi8(
3150 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3151 );
3152 let r = _mm_add_epi8(a, b);
3153 #[rustfmt::skip]
3154 let e = _mm_setr_epi8(
3155 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3156 );
3157 assert_eq_m128i(r, e);
3158 }
3159
3160 #[simd_test(enable = "sse2")]
3161 unsafe fn test_mm_add_epi8_overflow() {
3162 let a = _mm_set1_epi8(0x7F);
3163 let b = _mm_set1_epi8(1);
3164 let r = _mm_add_epi8(a, b);
3165 assert_eq_m128i(r, _mm_set1_epi8(-128));
3166 }
3167
3168 #[simd_test(enable = "sse2")]
3169 unsafe fn test_mm_add_epi16() {
3170 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3171 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3172 let r = _mm_add_epi16(a, b);
3173 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3174 assert_eq_m128i(r, e);
3175 }
3176
3177 #[simd_test(enable = "sse2")]
3178 unsafe fn test_mm_add_epi32() {
3179 let a = _mm_setr_epi32(0, 1, 2, 3);
3180 let b = _mm_setr_epi32(4, 5, 6, 7);
3181 let r = _mm_add_epi32(a, b);
3182 let e = _mm_setr_epi32(4, 6, 8, 10);
3183 assert_eq_m128i(r, e);
3184 }
3185
3186 #[simd_test(enable = "sse2")]
3187 unsafe fn test_mm_add_epi64() {
3188 let a = _mm_setr_epi64x(0, 1);
3189 let b = _mm_setr_epi64x(2, 3);
3190 let r = _mm_add_epi64(a, b);
3191 let e = _mm_setr_epi64x(2, 4);
3192 assert_eq_m128i(r, e);
3193 }
3194
3195 #[simd_test(enable = "sse2")]
3196 unsafe fn test_mm_adds_epi8() {
3197 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3198 #[rustfmt::skip]
3199 let b = _mm_setr_epi8(
3200 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3201 );
3202 let r = _mm_adds_epi8(a, b);
3203 #[rustfmt::skip]
3204 let e = _mm_setr_epi8(
3205 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3206 );
3207 assert_eq_m128i(r, e);
3208 }
3209
3210 #[simd_test(enable = "sse2")]
3211 unsafe fn test_mm_adds_epi8_saturate_positive() {
3212 let a = _mm_set1_epi8(0x7F);
3213 let b = _mm_set1_epi8(1);
3214 let r = _mm_adds_epi8(a, b);
3215 assert_eq_m128i(r, a);
3216 }
3217
3218 #[simd_test(enable = "sse2")]
3219 unsafe fn test_mm_adds_epi8_saturate_negative() {
3220 let a = _mm_set1_epi8(-0x80);
3221 let b = _mm_set1_epi8(-1);
3222 let r = _mm_adds_epi8(a, b);
3223 assert_eq_m128i(r, a);
3224 }
3225
3226 #[simd_test(enable = "sse2")]
3227 unsafe fn test_mm_adds_epi16() {
3228 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3229 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3230 let r = _mm_adds_epi16(a, b);
3231 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3232 assert_eq_m128i(r, e);
3233 }
3234
3235 #[simd_test(enable = "sse2")]
3236 unsafe fn test_mm_adds_epi16_saturate_positive() {
3237 let a = _mm_set1_epi16(0x7FFF);
3238 let b = _mm_set1_epi16(1);
3239 let r = _mm_adds_epi16(a, b);
3240 assert_eq_m128i(r, a);
3241 }
3242
3243 #[simd_test(enable = "sse2")]
3244 unsafe fn test_mm_adds_epi16_saturate_negative() {
3245 let a = _mm_set1_epi16(-0x8000);
3246 let b = _mm_set1_epi16(-1);
3247 let r = _mm_adds_epi16(a, b);
3248 assert_eq_m128i(r, a);
3249 }
3250
3251 #[simd_test(enable = "sse2")]
3252 unsafe fn test_mm_adds_epu8() {
3253 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3254 #[rustfmt::skip]
3255 let b = _mm_setr_epi8(
3256 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3257 );
3258 let r = _mm_adds_epu8(a, b);
3259 #[rustfmt::skip]
3260 let e = _mm_setr_epi8(
3261 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3262 );
3263 assert_eq_m128i(r, e);
3264 }
3265
3266 #[simd_test(enable = "sse2")]
3267 unsafe fn test_mm_adds_epu8_saturate() {
3268 let a = _mm_set1_epi8(!0);
3269 let b = _mm_set1_epi8(1);
3270 let r = _mm_adds_epu8(a, b);
3271 assert_eq_m128i(r, a);
3272 }
3273
3274 #[simd_test(enable = "sse2")]
3275 unsafe fn test_mm_adds_epu16() {
3276 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3277 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3278 let r = _mm_adds_epu16(a, b);
3279 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3280 assert_eq_m128i(r, e);
3281 }
3282
3283 #[simd_test(enable = "sse2")]
3284 unsafe fn test_mm_adds_epu16_saturate() {
3285 let a = _mm_set1_epi16(!0);
3286 let b = _mm_set1_epi16(1);
3287 let r = _mm_adds_epu16(a, b);
3288 assert_eq_m128i(r, a);
3289 }
3290
3291 #[simd_test(enable = "sse2")]
3292 unsafe fn test_mm_avg_epu8() {
3293 let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9));
3294 let r = _mm_avg_epu8(a, b);
3295 assert_eq_m128i(r, _mm_set1_epi8(6));
3296 }
3297
3298 #[simd_test(enable = "sse2")]
3299 unsafe fn test_mm_avg_epu16() {
3300 let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9));
3301 let r = _mm_avg_epu16(a, b);
3302 assert_eq_m128i(r, _mm_set1_epi16(6));
3303 }
3304
3305 #[simd_test(enable = "sse2")]
3306 unsafe fn test_mm_madd_epi16() {
3307 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
3308 let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
3309 let r = _mm_madd_epi16(a, b);
3310 let e = _mm_setr_epi32(29, 81, 149, 233);
3311 assert_eq_m128i(r, e);
3312
3313 // Test large values.
3314 // MIN*MIN+MIN*MIN will overflow into i32::MIN.
3315 let a = _mm_setr_epi16(
3316 i16::MAX,
3317 i16::MAX,
3318 i16::MIN,
3319 i16::MIN,
3320 i16::MIN,
3321 i16::MAX,
3322 0,
3323 0,
3324 );
3325 let b = _mm_setr_epi16(
3326 i16::MAX,
3327 i16::MAX,
3328 i16::MIN,
3329 i16::MIN,
3330 i16::MAX,
3331 i16::MIN,
3332 0,
3333 0,
3334 );
3335 let r = _mm_madd_epi16(a, b);
3336 let e = _mm_setr_epi32(0x7FFE0002, i32::MIN, -0x7FFF0000, 0);
3337 assert_eq_m128i(r, e);
3338 }
3339
3340 #[simd_test(enable = "sse2")]
3341 unsafe fn test_mm_max_epi16() {
3342 let a = _mm_set1_epi16(1);
3343 let b = _mm_set1_epi16(-1);
3344 let r = _mm_max_epi16(a, b);
3345 assert_eq_m128i(r, a);
3346 }
3347
3348 #[simd_test(enable = "sse2")]
3349 unsafe fn test_mm_max_epu8() {
3350 let a = _mm_set1_epi8(1);
3351 let b = _mm_set1_epi8(!0);
3352 let r = _mm_max_epu8(a, b);
3353 assert_eq_m128i(r, b);
3354 }
3355
3356 #[simd_test(enable = "sse2")]
3357 unsafe fn test_mm_min_epi16() {
3358 let a = _mm_set1_epi16(1);
3359 let b = _mm_set1_epi16(-1);
3360 let r = _mm_min_epi16(a, b);
3361 assert_eq_m128i(r, b);
3362 }
3363
3364 #[simd_test(enable = "sse2")]
3365 unsafe fn test_mm_min_epu8() {
3366 let a = _mm_set1_epi8(1);
3367 let b = _mm_set1_epi8(!0);
3368 let r = _mm_min_epu8(a, b);
3369 assert_eq_m128i(r, a);
3370 }
3371
3372 #[simd_test(enable = "sse2")]
3373 unsafe fn test_mm_mulhi_epi16() {
3374 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3375 let r = _mm_mulhi_epi16(a, b);
3376 assert_eq_m128i(r, _mm_set1_epi16(-16));
3377 }
3378
3379 #[simd_test(enable = "sse2")]
3380 unsafe fn test_mm_mulhi_epu16() {
3381 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001));
3382 let r = _mm_mulhi_epu16(a, b);
3383 assert_eq_m128i(r, _mm_set1_epi16(15));
3384 }
3385
3386 #[simd_test(enable = "sse2")]
3387 unsafe fn test_mm_mullo_epi16() {
3388 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3389 let r = _mm_mullo_epi16(a, b);
3390 assert_eq_m128i(r, _mm_set1_epi16(-17960));
3391 }
3392
3393 #[simd_test(enable = "sse2")]
3394 unsafe fn test_mm_mul_epu32() {
3395 let a = _mm_setr_epi64x(1_000_000_000, 1 << 34);
3396 let b = _mm_setr_epi64x(1_000_000_000, 1 << 35);
3397 let r = _mm_mul_epu32(a, b);
3398 let e = _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0);
3399 assert_eq_m128i(r, e);
3400 }
3401
3402 #[simd_test(enable = "sse2")]
3403 unsafe fn test_mm_sad_epu8() {
3404 #[rustfmt::skip]
3405 let a = _mm_setr_epi8(
3406 255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
3407 1, 2, 3, 4,
3408 155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8,
3409 1, 2, 3, 4,
3410 );
3411 let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2);
3412 let r = _mm_sad_epu8(a, b);
3413 let e = _mm_setr_epi64x(1020, 614);
3414 assert_eq_m128i(r, e);
3415 }
3416
3417 #[simd_test(enable = "sse2")]
3418 unsafe fn test_mm_sub_epi8() {
3419 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(6));
3420 let r = _mm_sub_epi8(a, b);
3421 assert_eq_m128i(r, _mm_set1_epi8(-1));
3422 }
3423
3424 #[simd_test(enable = "sse2")]
3425 unsafe fn test_mm_sub_epi16() {
3426 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(6));
3427 let r = _mm_sub_epi16(a, b);
3428 assert_eq_m128i(r, _mm_set1_epi16(-1));
3429 }
3430
3431 #[simd_test(enable = "sse2")]
3432 unsafe fn test_mm_sub_epi32() {
3433 let (a, b) = (_mm_set1_epi32(5), _mm_set1_epi32(6));
3434 let r = _mm_sub_epi32(a, b);
3435 assert_eq_m128i(r, _mm_set1_epi32(-1));
3436 }
3437
3438 #[simd_test(enable = "sse2")]
3439 unsafe fn test_mm_sub_epi64() {
3440 let (a, b) = (_mm_set1_epi64x(5), _mm_set1_epi64x(6));
3441 let r = _mm_sub_epi64(a, b);
3442 assert_eq_m128i(r, _mm_set1_epi64x(-1));
3443 }
3444
3445 #[simd_test(enable = "sse2")]
3446 unsafe fn test_mm_subs_epi8() {
3447 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3448 let r = _mm_subs_epi8(a, b);
3449 assert_eq_m128i(r, _mm_set1_epi8(3));
3450 }
3451
3452 #[simd_test(enable = "sse2")]
3453 unsafe fn test_mm_subs_epi8_saturate_positive() {
3454 let a = _mm_set1_epi8(0x7F);
3455 let b = _mm_set1_epi8(-1);
3456 let r = _mm_subs_epi8(a, b);
3457 assert_eq_m128i(r, a);
3458 }
3459
3460 #[simd_test(enable = "sse2")]
3461 unsafe fn test_mm_subs_epi8_saturate_negative() {
3462 let a = _mm_set1_epi8(-0x80);
3463 let b = _mm_set1_epi8(1);
3464 let r = _mm_subs_epi8(a, b);
3465 assert_eq_m128i(r, a);
3466 }
3467
3468 #[simd_test(enable = "sse2")]
3469 unsafe fn test_mm_subs_epi16() {
3470 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3471 let r = _mm_subs_epi16(a, b);
3472 assert_eq_m128i(r, _mm_set1_epi16(3));
3473 }
3474
3475 #[simd_test(enable = "sse2")]
3476 unsafe fn test_mm_subs_epi16_saturate_positive() {
3477 let a = _mm_set1_epi16(0x7FFF);
3478 let b = _mm_set1_epi16(-1);
3479 let r = _mm_subs_epi16(a, b);
3480 assert_eq_m128i(r, a);
3481 }
3482
3483 #[simd_test(enable = "sse2")]
3484 unsafe fn test_mm_subs_epi16_saturate_negative() {
3485 let a = _mm_set1_epi16(-0x8000);
3486 let b = _mm_set1_epi16(1);
3487 let r = _mm_subs_epi16(a, b);
3488 assert_eq_m128i(r, a);
3489 }
3490
3491 #[simd_test(enable = "sse2")]
3492 unsafe fn test_mm_subs_epu8() {
3493 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3494 let r = _mm_subs_epu8(a, b);
3495 assert_eq_m128i(r, _mm_set1_epi8(3));
3496 }
3497
3498 #[simd_test(enable = "sse2")]
3499 unsafe fn test_mm_subs_epu8_saturate() {
3500 let a = _mm_set1_epi8(0);
3501 let b = _mm_set1_epi8(1);
3502 let r = _mm_subs_epu8(a, b);
3503 assert_eq_m128i(r, a);
3504 }
3505
3506 #[simd_test(enable = "sse2")]
3507 unsafe fn test_mm_subs_epu16() {
3508 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3509 let r = _mm_subs_epu16(a, b);
3510 assert_eq_m128i(r, _mm_set1_epi16(3));
3511 }
3512
3513 #[simd_test(enable = "sse2")]
3514 unsafe fn test_mm_subs_epu16_saturate() {
3515 let a = _mm_set1_epi16(0);
3516 let b = _mm_set1_epi16(1);
3517 let r = _mm_subs_epu16(a, b);
3518 assert_eq_m128i(r, a);
3519 }
3520
3521 #[simd_test(enable = "sse2")]
3522 unsafe fn test_mm_slli_si128() {
3523 #[rustfmt::skip]
3524 let a = _mm_setr_epi8(
3525 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3526 );
3527 let r = _mm_slli_si128::<1>(a);
3528 let e = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3529 assert_eq_m128i(r, e);
3530
3531 #[rustfmt::skip]
3532 let a = _mm_setr_epi8(
3533 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3534 );
3535 let r = _mm_slli_si128::<15>(a);
3536 let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
3537 assert_eq_m128i(r, e);
3538
3539 #[rustfmt::skip]
3540 let a = _mm_setr_epi8(
3541 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3542 );
3543 let r = _mm_slli_si128::<16>(a);
3544 assert_eq_m128i(r, _mm_set1_epi8(0));
3545 }
3546
3547 #[simd_test(enable = "sse2")]
3548 unsafe fn test_mm_slli_epi16() {
3549 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3550 let r = _mm_slli_epi16::<4>(a);
3551 assert_eq_m128i(
3552 r,
3553 _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
3554 );
3555 let r = _mm_slli_epi16::<16>(a);
3556 assert_eq_m128i(r, _mm_set1_epi16(0));
3557 }
3558
3559 #[simd_test(enable = "sse2")]
3560 unsafe fn test_mm_sll_epi16() {
3561 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3562 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 4));
3563 assert_eq_m128i(
3564 r,
3565 _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
3566 );
3567 let r = _mm_sll_epi16(a, _mm_set_epi64x(4, 0));
3568 assert_eq_m128i(r, a);
3569 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 16));
3570 assert_eq_m128i(r, _mm_set1_epi16(0));
3571 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, i64::MAX));
3572 assert_eq_m128i(r, _mm_set1_epi16(0));
3573 }
3574
3575 #[simd_test(enable = "sse2")]
3576 unsafe fn test_mm_slli_epi32() {
3577 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3578 let r = _mm_slli_epi32::<4>(a);
3579 assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
3580 let r = _mm_slli_epi32::<32>(a);
3581 assert_eq_m128i(r, _mm_set1_epi32(0));
3582 }
3583
3584 #[simd_test(enable = "sse2")]
3585 unsafe fn test_mm_sll_epi32() {
3586 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3587 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 4));
3588 assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
3589 let r = _mm_sll_epi32(a, _mm_set_epi64x(4, 0));
3590 assert_eq_m128i(r, a);
3591 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 32));
3592 assert_eq_m128i(r, _mm_set1_epi32(0));
3593 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, i64::MAX));
3594 assert_eq_m128i(r, _mm_set1_epi32(0));
3595 }
3596
3597 #[simd_test(enable = "sse2")]
3598 unsafe fn test_mm_slli_epi64() {
3599 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3600 let r = _mm_slli_epi64::<4>(a);
3601 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
3602 let r = _mm_slli_epi64::<64>(a);
3603 assert_eq_m128i(r, _mm_set1_epi64x(0));
3604 }
3605
3606 #[simd_test(enable = "sse2")]
3607 unsafe fn test_mm_sll_epi64() {
3608 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3609 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 4));
3610 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
3611 let r = _mm_sll_epi64(a, _mm_set_epi64x(4, 0));
3612 assert_eq_m128i(r, a);
3613 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 64));
3614 assert_eq_m128i(r, _mm_set1_epi64x(0));
3615 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, i64::MAX));
3616 assert_eq_m128i(r, _mm_set1_epi64x(0));
3617 }
3618
3619 #[simd_test(enable = "sse2")]
3620 unsafe fn test_mm_srai_epi16() {
3621 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3622 let r = _mm_srai_epi16::<4>(a);
3623 assert_eq_m128i(
3624 r,
3625 _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10),
3626 );
3627 let r = _mm_srai_epi16::<16>(a);
3628 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3629 }
3630
3631 #[simd_test(enable = "sse2")]
3632 unsafe fn test_mm_sra_epi16() {
3633 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3634 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 4));
3635 assert_eq_m128i(
3636 r,
3637 _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10),
3638 );
3639 let r = _mm_sra_epi16(a, _mm_set_epi64x(4, 0));
3640 assert_eq_m128i(r, a);
3641 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 16));
3642 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3643 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, i64::MAX));
3644 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3645 }
3646
3647 #[simd_test(enable = "sse2")]
3648 unsafe fn test_mm_srai_epi32() {
3649 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3650 let r = _mm_srai_epi32::<4>(a);
3651 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
3652 let r = _mm_srai_epi32::<32>(a);
3653 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3654 }
3655
3656 #[simd_test(enable = "sse2")]
3657 unsafe fn test_mm_sra_epi32() {
3658 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3659 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 4));
3660 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
3661 let r = _mm_sra_epi32(a, _mm_set_epi64x(4, 0));
3662 assert_eq_m128i(r, a);
3663 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 32));
3664 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3665 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, i64::MAX));
3666 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3667 }
3668
3669 #[simd_test(enable = "sse2")]
3670 unsafe fn test_mm_srli_si128() {
3671 #[rustfmt::skip]
3672 let a = _mm_setr_epi8(
3673 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3674 );
3675 let r = _mm_srli_si128::<1>(a);
3676 #[rustfmt::skip]
3677 let e = _mm_setr_epi8(
3678 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0,
3679 );
3680 assert_eq_m128i(r, e);
3681
3682 #[rustfmt::skip]
3683 let a = _mm_setr_epi8(
3684 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3685 );
3686 let r = _mm_srli_si128::<15>(a);
3687 let e = _mm_setr_epi8(16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3688 assert_eq_m128i(r, e);
3689
3690 #[rustfmt::skip]
3691 let a = _mm_setr_epi8(
3692 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3693 );
3694 let r = _mm_srli_si128::<16>(a);
3695 assert_eq_m128i(r, _mm_set1_epi8(0));
3696 }
3697
3698 #[simd_test(enable = "sse2")]
3699 unsafe fn test_mm_srli_epi16() {
3700 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3701 let r = _mm_srli_epi16::<4>(a);
3702 assert_eq_m128i(
3703 r,
3704 _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0),
3705 );
3706 let r = _mm_srli_epi16::<16>(a);
3707 assert_eq_m128i(r, _mm_set1_epi16(0));
3708 }
3709
3710 #[simd_test(enable = "sse2")]
3711 unsafe fn test_mm_srl_epi16() {
3712 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3713 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 4));
3714 assert_eq_m128i(
3715 r,
3716 _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0),
3717 );
3718 let r = _mm_srl_epi16(a, _mm_set_epi64x(4, 0));
3719 assert_eq_m128i(r, a);
3720 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 16));
3721 assert_eq_m128i(r, _mm_set1_epi16(0));
3722 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, i64::MAX));
3723 assert_eq_m128i(r, _mm_set1_epi16(0));
3724 }
3725
3726 #[simd_test(enable = "sse2")]
3727 unsafe fn test_mm_srli_epi32() {
3728 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3729 let r = _mm_srli_epi32::<4>(a);
3730 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
3731 let r = _mm_srli_epi32::<32>(a);
3732 assert_eq_m128i(r, _mm_set1_epi32(0));
3733 }
3734
3735 #[simd_test(enable = "sse2")]
3736 unsafe fn test_mm_srl_epi32() {
3737 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3738 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 4));
3739 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
3740 let r = _mm_srl_epi32(a, _mm_set_epi64x(4, 0));
3741 assert_eq_m128i(r, a);
3742 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 32));
3743 assert_eq_m128i(r, _mm_set1_epi32(0));
3744 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, i64::MAX));
3745 assert_eq_m128i(r, _mm_set1_epi32(0));
3746 }
3747
3748 #[simd_test(enable = "sse2")]
3749 unsafe fn test_mm_srli_epi64() {
3750 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3751 let r = _mm_srli_epi64::<4>(a);
3752 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
3753 let r = _mm_srli_epi64::<64>(a);
3754 assert_eq_m128i(r, _mm_set1_epi64x(0));
3755 }
3756
3757 #[simd_test(enable = "sse2")]
3758 unsafe fn test_mm_srl_epi64() {
3759 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3760 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 4));
3761 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
3762 let r = _mm_srl_epi64(a, _mm_set_epi64x(4, 0));
3763 assert_eq_m128i(r, a);
3764 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 64));
3765 assert_eq_m128i(r, _mm_set1_epi64x(0));
3766 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, i64::MAX));
3767 assert_eq_m128i(r, _mm_set1_epi64x(0));
3768 }
3769
3770 #[simd_test(enable = "sse2")]
3771 unsafe fn test_mm_and_si128() {
3772 let a = _mm_set1_epi8(5);
3773 let b = _mm_set1_epi8(3);
3774 let r = _mm_and_si128(a, b);
3775 assert_eq_m128i(r, _mm_set1_epi8(1));
3776 }
3777
3778 #[simd_test(enable = "sse2")]
3779 unsafe fn test_mm_andnot_si128() {
3780 let a = _mm_set1_epi8(5);
3781 let b = _mm_set1_epi8(3);
3782 let r = _mm_andnot_si128(a, b);
3783 assert_eq_m128i(r, _mm_set1_epi8(2));
3784 }
3785
3786 #[simd_test(enable = "sse2")]
3787 unsafe fn test_mm_or_si128() {
3788 let a = _mm_set1_epi8(5);
3789 let b = _mm_set1_epi8(3);
3790 let r = _mm_or_si128(a, b);
3791 assert_eq_m128i(r, _mm_set1_epi8(7));
3792 }
3793
3794 #[simd_test(enable = "sse2")]
3795 unsafe fn test_mm_xor_si128() {
3796 let a = _mm_set1_epi8(5);
3797 let b = _mm_set1_epi8(3);
3798 let r = _mm_xor_si128(a, b);
3799 assert_eq_m128i(r, _mm_set1_epi8(6));
3800 }
3801
3802 #[simd_test(enable = "sse2")]
3803 unsafe fn test_mm_cmpeq_epi8() {
3804 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3805 let b = _mm_setr_epi8(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
3806 let r = _mm_cmpeq_epi8(a, b);
3807 #[rustfmt::skip]
3808 assert_eq_m128i(
3809 r,
3810 _mm_setr_epi8(
3811 0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3812 )
3813 );
3814 }
3815
3816 #[simd_test(enable = "sse2")]
3817 unsafe fn test_mm_cmpeq_epi16() {
3818 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3819 let b = _mm_setr_epi16(7, 6, 2, 4, 3, 2, 1, 0);
3820 let r = _mm_cmpeq_epi16(a, b);
3821 assert_eq_m128i(r, _mm_setr_epi16(0, 0, !0, 0, 0, 0, 0, 0));
3822 }
3823
3824 #[simd_test(enable = "sse2")]
3825 unsafe fn test_mm_cmpeq_epi32() {
3826 let a = _mm_setr_epi32(0, 1, 2, 3);
3827 let b = _mm_setr_epi32(3, 2, 2, 0);
3828 let r = _mm_cmpeq_epi32(a, b);
3829 assert_eq_m128i(r, _mm_setr_epi32(0, 0, !0, 0));
3830 }
3831
3832 #[simd_test(enable = "sse2")]
3833 unsafe fn test_mm_cmpgt_epi8() {
3834 let a = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3835 let b = _mm_set1_epi8(0);
3836 let r = _mm_cmpgt_epi8(a, b);
3837 let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3838 assert_eq_m128i(r, e);
3839 }
3840
3841 #[simd_test(enable = "sse2")]
3842 unsafe fn test_mm_cmpgt_epi16() {
3843 let a = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
3844 let b = _mm_set1_epi16(0);
3845 let r = _mm_cmpgt_epi16(a, b);
3846 let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
3847 assert_eq_m128i(r, e);
3848 }
3849
3850 #[simd_test(enable = "sse2")]
3851 unsafe fn test_mm_cmpgt_epi32() {
3852 let a = _mm_set_epi32(5, 0, 0, 0);
3853 let b = _mm_set1_epi32(0);
3854 let r = _mm_cmpgt_epi32(a, b);
3855 assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
3856 }
3857
3858 #[simd_test(enable = "sse2")]
3859 unsafe fn test_mm_cmplt_epi8() {
3860 let a = _mm_set1_epi8(0);
3861 let b = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3862 let r = _mm_cmplt_epi8(a, b);
3863 let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3864 assert_eq_m128i(r, e);
3865 }
3866
3867 #[simd_test(enable = "sse2")]
3868 unsafe fn test_mm_cmplt_epi16() {
3869 let a = _mm_set1_epi16(0);
3870 let b = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
3871 let r = _mm_cmplt_epi16(a, b);
3872 let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
3873 assert_eq_m128i(r, e);
3874 }
3875
3876 #[simd_test(enable = "sse2")]
3877 unsafe fn test_mm_cmplt_epi32() {
3878 let a = _mm_set1_epi32(0);
3879 let b = _mm_set_epi32(5, 0, 0, 0);
3880 let r = _mm_cmplt_epi32(a, b);
3881 assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
3882 }
3883
3884 #[simd_test(enable = "sse2")]
3885 unsafe fn test_mm_cvtepi32_pd() {
3886 let a = _mm_set_epi32(35, 25, 15, 5);
3887 let r = _mm_cvtepi32_pd(a);
3888 assert_eq_m128d(r, _mm_setr_pd(5.0, 15.0));
3889 }
3890
3891 #[simd_test(enable = "sse2")]
3892 unsafe fn test_mm_cvtsi32_sd() {
3893 let a = _mm_set1_pd(3.5);
3894 let r = _mm_cvtsi32_sd(a, 5);
3895 assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5));
3896 }
3897
3898 #[simd_test(enable = "sse2")]
3899 unsafe fn test_mm_cvtepi32_ps() {
3900 let a = _mm_setr_epi32(1, 2, 3, 4);
3901 let r = _mm_cvtepi32_ps(a);
3902 assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0));
3903 }
3904
3905 #[simd_test(enable = "sse2")]
3906 unsafe fn test_mm_cvtps_epi32() {
3907 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3908 let r = _mm_cvtps_epi32(a);
3909 assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4));
3910 }
3911
3912 #[simd_test(enable = "sse2")]
3913 unsafe fn test_mm_cvtsi32_si128() {
3914 let r = _mm_cvtsi32_si128(5);
3915 assert_eq_m128i(r, _mm_setr_epi32(5, 0, 0, 0));
3916 }
3917
3918 #[simd_test(enable = "sse2")]
3919 unsafe fn test_mm_cvtsi128_si32() {
3920 let r = _mm_cvtsi128_si32(_mm_setr_epi32(5, 0, 0, 0));
3921 assert_eq!(r, 5);
3922 }
3923
3924 #[simd_test(enable = "sse2")]
3925 unsafe fn test_mm_set_epi64x() {
3926 let r = _mm_set_epi64x(0, 1);
3927 assert_eq_m128i(r, _mm_setr_epi64x(1, 0));
3928 }
3929
3930 #[simd_test(enable = "sse2")]
3931 unsafe fn test_mm_set_epi32() {
3932 let r = _mm_set_epi32(0, 1, 2, 3);
3933 assert_eq_m128i(r, _mm_setr_epi32(3, 2, 1, 0));
3934 }
3935
3936 #[simd_test(enable = "sse2")]
3937 unsafe fn test_mm_set_epi16() {
3938 let r = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3939 assert_eq_m128i(r, _mm_setr_epi16(7, 6, 5, 4, 3, 2, 1, 0));
3940 }
3941
3942 #[simd_test(enable = "sse2")]
3943 unsafe fn test_mm_set_epi8() {
3944 #[rustfmt::skip]
3945 let r = _mm_set_epi8(
3946 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3947 );
3948 #[rustfmt::skip]
3949 let e = _mm_setr_epi8(
3950 15, 14, 13, 12, 11, 10, 9, 8,
3951 7, 6, 5, 4, 3, 2, 1, 0,
3952 );
3953 assert_eq_m128i(r, e);
3954 }
3955
3956 #[simd_test(enable = "sse2")]
3957 unsafe fn test_mm_set1_epi64x() {
3958 let r = _mm_set1_epi64x(1);
3959 assert_eq_m128i(r, _mm_set1_epi64x(1));
3960 }
3961
3962 #[simd_test(enable = "sse2")]
3963 unsafe fn test_mm_set1_epi32() {
3964 let r = _mm_set1_epi32(1);
3965 assert_eq_m128i(r, _mm_set1_epi32(1));
3966 }
3967
3968 #[simd_test(enable = "sse2")]
3969 unsafe fn test_mm_set1_epi16() {
3970 let r = _mm_set1_epi16(1);
3971 assert_eq_m128i(r, _mm_set1_epi16(1));
3972 }
3973
3974 #[simd_test(enable = "sse2")]
3975 unsafe fn test_mm_set1_epi8() {
3976 let r = _mm_set1_epi8(1);
3977 assert_eq_m128i(r, _mm_set1_epi8(1));
3978 }
3979
3980 #[simd_test(enable = "sse2")]
3981 unsafe fn test_mm_setr_epi32() {
3982 let r = _mm_setr_epi32(0, 1, 2, 3);
3983 assert_eq_m128i(r, _mm_setr_epi32(0, 1, 2, 3));
3984 }
3985
3986 #[simd_test(enable = "sse2")]
3987 unsafe fn test_mm_setr_epi16() {
3988 let r = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3989 assert_eq_m128i(r, _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7));
3990 }
3991
3992 #[simd_test(enable = "sse2")]
3993 unsafe fn test_mm_setr_epi8() {
3994 #[rustfmt::skip]
3995 let r = _mm_setr_epi8(
3996 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3997 );
3998 #[rustfmt::skip]
3999 let e = _mm_setr_epi8(
4000 0, 1, 2, 3, 4, 5, 6, 7,
4001 8, 9, 10, 11, 12, 13, 14, 15,
4002 );
4003 assert_eq_m128i(r, e);
4004 }
4005
4006 #[simd_test(enable = "sse2")]
4007 unsafe fn test_mm_setzero_si128() {
4008 let r = _mm_setzero_si128();
4009 assert_eq_m128i(r, _mm_set1_epi64x(0));
4010 }
4011
4012 #[simd_test(enable = "sse2")]
4013 unsafe fn test_mm_loadl_epi64() {
4014 let a = _mm_setr_epi64x(6, 5);
4015 let r = _mm_loadl_epi64(ptr::addr_of!(a));
4016 assert_eq_m128i(r, _mm_setr_epi64x(6, 0));
4017 }
4018
4019 #[simd_test(enable = "sse2")]
4020 unsafe fn test_mm_load_si128() {
4021 let a = _mm_set_epi64x(5, 6);
4022 let r = _mm_load_si128(ptr::addr_of!(a) as *const _);
4023 assert_eq_m128i(a, r);
4024 }
4025
4026 #[simd_test(enable = "sse2")]
4027 unsafe fn test_mm_loadu_si128() {
4028 let a = _mm_set_epi64x(5, 6);
4029 let r = _mm_loadu_si128(ptr::addr_of!(a) as *const _);
4030 assert_eq_m128i(a, r);
4031 }
4032
4033 #[simd_test(enable = "sse2")]
4034 // Miri cannot support this until it is clear how it fits in the Rust memory model
4035 // (non-temporal store)
4036 #[cfg_attr(miri, ignore)]
4037 unsafe fn test_mm_maskmoveu_si128() {
4038 let a = _mm_set1_epi8(9);
4039 #[rustfmt::skip]
4040 let mask = _mm_set_epi8(
4041 0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0,
4042 0, 0, 0, 0, 0, 0, 0, 0,
4043 );
4044 let mut r = _mm_set1_epi8(0);
4045 _mm_maskmoveu_si128(a, mask, ptr::addr_of_mut!(r) as *mut i8);
4046 let e = _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
4047 assert_eq_m128i(r, e);
4048 }
4049
4050 #[simd_test(enable = "sse2")]
4051 unsafe fn test_mm_store_si128() {
4052 let a = _mm_set1_epi8(9);
4053 let mut r = _mm_set1_epi8(0);
4054 _mm_store_si128(&mut r, a);
4055 assert_eq_m128i(r, a);
4056 }
4057
4058 #[simd_test(enable = "sse2")]
4059 unsafe fn test_mm_storeu_si128() {
4060 let a = _mm_set1_epi8(9);
4061 let mut r = _mm_set1_epi8(0);
4062 _mm_storeu_si128(&mut r, a);
4063 assert_eq_m128i(r, a);
4064 }
4065
4066 #[simd_test(enable = "sse2")]
4067 unsafe fn test_mm_storel_epi64() {
4068 let a = _mm_setr_epi64x(2, 9);
4069 let mut r = _mm_set1_epi8(0);
4070 _mm_storel_epi64(&mut r, a);
4071 assert_eq_m128i(r, _mm_setr_epi64x(2, 0));
4072 }
4073
4074 #[simd_test(enable = "sse2")]
4075 // Miri cannot support this until it is clear how it fits in the Rust memory model
4076 // (non-temporal store)
4077 #[cfg_attr(miri, ignore)]
4078 unsafe fn test_mm_stream_si128() {
4079 let a = _mm_setr_epi32(1, 2, 3, 4);
4080 let mut r = _mm_undefined_si128();
4081 _mm_stream_si128(ptr::addr_of_mut!(r), a);
4082 assert_eq_m128i(r, a);
4083 }
4084
4085 #[simd_test(enable = "sse2")]
4086 // Miri cannot support this until it is clear how it fits in the Rust memory model
4087 // (non-temporal store)
4088 #[cfg_attr(miri, ignore)]
4089 unsafe fn test_mm_stream_si32() {
4090 let a: i32 = 7;
4091 let mut mem = boxed::Box::<i32>::new(-1);
4092 _mm_stream_si32(ptr::addr_of_mut!(*mem), a);
4093 assert_eq!(a, *mem);
4094 }
4095
4096 #[simd_test(enable = "sse2")]
4097 unsafe fn test_mm_move_epi64() {
4098 let a = _mm_setr_epi64x(5, 6);
4099 let r = _mm_move_epi64(a);
4100 assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
4101 }
4102
4103 #[simd_test(enable = "sse2")]
4104 unsafe fn test_mm_packs_epi16() {
4105 let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0);
4106 let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80);
4107 let r = _mm_packs_epi16(a, b);
4108 #[rustfmt::skip]
4109 assert_eq_m128i(
4110 r,
4111 _mm_setr_epi8(
4112 0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F
4113 )
4114 );
4115 }
4116
4117 #[simd_test(enable = "sse2")]
4118 unsafe fn test_mm_packs_epi32() {
4119 let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0);
4120 let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000);
4121 let r = _mm_packs_epi32(a, b);
4122 assert_eq_m128i(
4123 r,
4124 _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF),
4125 );
4126 }
4127
4128 #[simd_test(enable = "sse2")]
4129 unsafe fn test_mm_packus_epi16() {
4130 let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0);
4131 let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100);
4132 let r = _mm_packus_epi16(a, b);
4133 assert_eq_m128i(
4134 r,
4135 _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0),
4136 );
4137 }
4138
4139 #[simd_test(enable = "sse2")]
4140 unsafe fn test_mm_extract_epi16() {
4141 let a = _mm_setr_epi16(-1, 1, 2, 3, 4, 5, 6, 7);
4142 let r1 = _mm_extract_epi16::<0>(a);
4143 let r2 = _mm_extract_epi16::<3>(a);
4144 assert_eq!(r1, 0xFFFF);
4145 assert_eq!(r2, 3);
4146 }
4147
4148 #[simd_test(enable = "sse2")]
4149 unsafe fn test_mm_insert_epi16() {
4150 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4151 let r = _mm_insert_epi16::<0>(a, 9);
4152 let e = _mm_setr_epi16(9, 1, 2, 3, 4, 5, 6, 7);
4153 assert_eq_m128i(r, e);
4154 }
4155
4156 #[simd_test(enable = "sse2")]
4157 unsafe fn test_mm_movemask_epi8() {
4158 #[rustfmt::skip]
4159 let a = _mm_setr_epi8(
4160 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01,
4161 0b0101, 0b1111_0000u8 as i8, 0, 0,
4162 0, 0b1011_0101u8 as i8, 0b1111_0000u8 as i8, 0b0101,
4163 0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8,
4164 );
4165 let r = _mm_movemask_epi8(a);
4166 assert_eq!(r, 0b10100110_00100101);
4167 }
4168
4169 #[simd_test(enable = "sse2")]
4170 unsafe fn test_mm_shuffle_epi32() {
4171 let a = _mm_setr_epi32(5, 10, 15, 20);
4172 let r = _mm_shuffle_epi32::<0b00_01_01_11>(a);
4173 let e = _mm_setr_epi32(20, 10, 10, 5);
4174 assert_eq_m128i(r, e);
4175 }
4176
4177 #[simd_test(enable = "sse2")]
4178 unsafe fn test_mm_shufflehi_epi16() {
4179 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 10, 15, 20);
4180 let r = _mm_shufflehi_epi16::<0b00_01_01_11>(a);
4181 let e = _mm_setr_epi16(1, 2, 3, 4, 20, 10, 10, 5);
4182 assert_eq_m128i(r, e);
4183 }
4184
4185 #[simd_test(enable = "sse2")]
4186 unsafe fn test_mm_shufflelo_epi16() {
4187 let a = _mm_setr_epi16(5, 10, 15, 20, 1, 2, 3, 4);
4188 let r = _mm_shufflelo_epi16::<0b00_01_01_11>(a);
4189 let e = _mm_setr_epi16(20, 10, 10, 5, 1, 2, 3, 4);
4190 assert_eq_m128i(r, e);
4191 }
4192
4193 #[simd_test(enable = "sse2")]
4194 unsafe fn test_mm_unpackhi_epi8() {
4195 #[rustfmt::skip]
4196 let a = _mm_setr_epi8(
4197 0, 1, 2, 3, 4, 5, 6, 7,
4198 8, 9, 10, 11, 12, 13, 14, 15,
4199 );
4200 #[rustfmt::skip]
4201 let b = _mm_setr_epi8(
4202 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4203 );
4204 let r = _mm_unpackhi_epi8(a, b);
4205 #[rustfmt::skip]
4206 let e = _mm_setr_epi8(
4207 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31,
4208 );
4209 assert_eq_m128i(r, e);
4210 }
4211
4212 #[simd_test(enable = "sse2")]
4213 unsafe fn test_mm_unpackhi_epi16() {
4214 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4215 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4216 let r = _mm_unpackhi_epi16(a, b);
4217 let e = _mm_setr_epi16(4, 12, 5, 13, 6, 14, 7, 15);
4218 assert_eq_m128i(r, e);
4219 }
4220
4221 #[simd_test(enable = "sse2")]
4222 unsafe fn test_mm_unpackhi_epi32() {
4223 let a = _mm_setr_epi32(0, 1, 2, 3);
4224 let b = _mm_setr_epi32(4, 5, 6, 7);
4225 let r = _mm_unpackhi_epi32(a, b);
4226 let e = _mm_setr_epi32(2, 6, 3, 7);
4227 assert_eq_m128i(r, e);
4228 }
4229
4230 #[simd_test(enable = "sse2")]
4231 unsafe fn test_mm_unpackhi_epi64() {
4232 let a = _mm_setr_epi64x(0, 1);
4233 let b = _mm_setr_epi64x(2, 3);
4234 let r = _mm_unpackhi_epi64(a, b);
4235 let e = _mm_setr_epi64x(1, 3);
4236 assert_eq_m128i(r, e);
4237 }
4238
4239 #[simd_test(enable = "sse2")]
4240 unsafe fn test_mm_unpacklo_epi8() {
4241 #[rustfmt::skip]
4242 let a = _mm_setr_epi8(
4243 0, 1, 2, 3, 4, 5, 6, 7,
4244 8, 9, 10, 11, 12, 13, 14, 15,
4245 );
4246 #[rustfmt::skip]
4247 let b = _mm_setr_epi8(
4248 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4249 );
4250 let r = _mm_unpacklo_epi8(a, b);
4251 #[rustfmt::skip]
4252 let e = _mm_setr_epi8(
4253 0, 16, 1, 17, 2, 18, 3, 19,
4254 4, 20, 5, 21, 6, 22, 7, 23,
4255 );
4256 assert_eq_m128i(r, e);
4257 }
4258
4259 #[simd_test(enable = "sse2")]
4260 unsafe fn test_mm_unpacklo_epi16() {
4261 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4262 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4263 let r = _mm_unpacklo_epi16(a, b);
4264 let e = _mm_setr_epi16(0, 8, 1, 9, 2, 10, 3, 11);
4265 assert_eq_m128i(r, e);
4266 }
4267
4268 #[simd_test(enable = "sse2")]
4269 unsafe fn test_mm_unpacklo_epi32() {
4270 let a = _mm_setr_epi32(0, 1, 2, 3);
4271 let b = _mm_setr_epi32(4, 5, 6, 7);
4272 let r = _mm_unpacklo_epi32(a, b);
4273 let e = _mm_setr_epi32(0, 4, 1, 5);
4274 assert_eq_m128i(r, e);
4275 }
4276
4277 #[simd_test(enable = "sse2")]
4278 unsafe fn test_mm_unpacklo_epi64() {
4279 let a = _mm_setr_epi64x(0, 1);
4280 let b = _mm_setr_epi64x(2, 3);
4281 let r = _mm_unpacklo_epi64(a, b);
4282 let e = _mm_setr_epi64x(0, 2);
4283 assert_eq_m128i(r, e);
4284 }
4285
4286 #[simd_test(enable = "sse2")]
4287 unsafe fn test_mm_add_sd() {
4288 let a = _mm_setr_pd(1.0, 2.0);
4289 let b = _mm_setr_pd(5.0, 10.0);
4290 let r = _mm_add_sd(a, b);
4291 assert_eq_m128d(r, _mm_setr_pd(6.0, 2.0));
4292 }
4293
4294 #[simd_test(enable = "sse2")]
4295 unsafe fn test_mm_add_pd() {
4296 let a = _mm_setr_pd(1.0, 2.0);
4297 let b = _mm_setr_pd(5.0, 10.0);
4298 let r = _mm_add_pd(a, b);
4299 assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0));
4300 }
4301
4302 #[simd_test(enable = "sse2")]
4303 unsafe fn test_mm_div_sd() {
4304 let a = _mm_setr_pd(1.0, 2.0);
4305 let b = _mm_setr_pd(5.0, 10.0);
4306 let r = _mm_div_sd(a, b);
4307 assert_eq_m128d(r, _mm_setr_pd(0.2, 2.0));
4308 }
4309
4310 #[simd_test(enable = "sse2")]
4311 unsafe fn test_mm_div_pd() {
4312 let a = _mm_setr_pd(1.0, 2.0);
4313 let b = _mm_setr_pd(5.0, 10.0);
4314 let r = _mm_div_pd(a, b);
4315 assert_eq_m128d(r, _mm_setr_pd(0.2, 0.2));
4316 }
4317
4318 #[simd_test(enable = "sse2")]
4319 unsafe fn test_mm_max_sd() {
4320 let a = _mm_setr_pd(1.0, 2.0);
4321 let b = _mm_setr_pd(5.0, 10.0);
4322 let r = _mm_max_sd(a, b);
4323 assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4324 }
4325
4326 #[simd_test(enable = "sse2")]
4327 unsafe fn test_mm_max_pd() {
4328 let a = _mm_setr_pd(1.0, 2.0);
4329 let b = _mm_setr_pd(5.0, 10.0);
4330 let r = _mm_max_pd(a, b);
4331 assert_eq_m128d(r, _mm_setr_pd(5.0, 10.0));
4332
4333 // Check SSE(2)-specific semantics for -0.0 handling.
4334 let a = _mm_setr_pd(-0.0, 0.0);
4335 let b = _mm_setr_pd(0.0, 0.0);
4336 let r1: [u8; 16] = transmute(_mm_max_pd(a, b));
4337 let r2: [u8; 16] = transmute(_mm_max_pd(b, a));
4338 let a: [u8; 16] = transmute(a);
4339 let b: [u8; 16] = transmute(b);
4340 assert_eq!(r1, b);
4341 assert_eq!(r2, a);
4342 assert_ne!(a, b); // sanity check that -0.0 is actually present
4343 }
4344
4345 #[simd_test(enable = "sse2")]
4346 unsafe fn test_mm_min_sd() {
4347 let a = _mm_setr_pd(1.0, 2.0);
4348 let b = _mm_setr_pd(5.0, 10.0);
4349 let r = _mm_min_sd(a, b);
4350 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4351 }
4352
4353 #[simd_test(enable = "sse2")]
4354 unsafe fn test_mm_min_pd() {
4355 let a = _mm_setr_pd(1.0, 2.0);
4356 let b = _mm_setr_pd(5.0, 10.0);
4357 let r = _mm_min_pd(a, b);
4358 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4359
4360 // Check SSE(2)-specific semantics for -0.0 handling.
4361 let a = _mm_setr_pd(-0.0, 0.0);
4362 let b = _mm_setr_pd(0.0, 0.0);
4363 let r1: [u8; 16] = transmute(_mm_min_pd(a, b));
4364 let r2: [u8; 16] = transmute(_mm_min_pd(b, a));
4365 let a: [u8; 16] = transmute(a);
4366 let b: [u8; 16] = transmute(b);
4367 assert_eq!(r1, b);
4368 assert_eq!(r2, a);
4369 assert_ne!(a, b); // sanity check that -0.0 is actually present
4370 }
4371
4372 #[simd_test(enable = "sse2")]
4373 unsafe fn test_mm_mul_sd() {
4374 let a = _mm_setr_pd(1.0, 2.0);
4375 let b = _mm_setr_pd(5.0, 10.0);
4376 let r = _mm_mul_sd(a, b);
4377 assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4378 }
4379
4380 #[simd_test(enable = "sse2")]
4381 unsafe fn test_mm_mul_pd() {
4382 let a = _mm_setr_pd(1.0, 2.0);
4383 let b = _mm_setr_pd(5.0, 10.0);
4384 let r = _mm_mul_pd(a, b);
4385 assert_eq_m128d(r, _mm_setr_pd(5.0, 20.0));
4386 }
4387
4388 #[simd_test(enable = "sse2")]
4389 unsafe fn test_mm_sqrt_sd() {
4390 let a = _mm_setr_pd(1.0, 2.0);
4391 let b = _mm_setr_pd(5.0, 10.0);
4392 let r = _mm_sqrt_sd(a, b);
4393 assert_eq_m128d(r, _mm_setr_pd(5.0f64.sqrt(), 2.0));
4394 }
4395
4396 #[simd_test(enable = "sse2")]
4397 unsafe fn test_mm_sqrt_pd() {
4398 let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0));
4399 assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt()));
4400 }
4401
4402 #[simd_test(enable = "sse2")]
4403 unsafe fn test_mm_sub_sd() {
4404 let a = _mm_setr_pd(1.0, 2.0);
4405 let b = _mm_setr_pd(5.0, 10.0);
4406 let r = _mm_sub_sd(a, b);
4407 assert_eq_m128d(r, _mm_setr_pd(-4.0, 2.0));
4408 }
4409
4410 #[simd_test(enable = "sse2")]
4411 unsafe fn test_mm_sub_pd() {
4412 let a = _mm_setr_pd(1.0, 2.0);
4413 let b = _mm_setr_pd(5.0, 10.0);
4414 let r = _mm_sub_pd(a, b);
4415 assert_eq_m128d(r, _mm_setr_pd(-4.0, -8.0));
4416 }
4417
4418 #[simd_test(enable = "sse2")]
4419 unsafe fn test_mm_and_pd() {
4420 let a = transmute(u64x2::splat(5));
4421 let b = transmute(u64x2::splat(3));
4422 let r = _mm_and_pd(a, b);
4423 let e = transmute(u64x2::splat(1));
4424 assert_eq_m128d(r, e);
4425 }
4426
4427 #[simd_test(enable = "sse2")]
4428 unsafe fn test_mm_andnot_pd() {
4429 let a = transmute(u64x2::splat(5));
4430 let b = transmute(u64x2::splat(3));
4431 let r = _mm_andnot_pd(a, b);
4432 let e = transmute(u64x2::splat(2));
4433 assert_eq_m128d(r, e);
4434 }
4435
4436 #[simd_test(enable = "sse2")]
4437 unsafe fn test_mm_or_pd() {
4438 let a = transmute(u64x2::splat(5));
4439 let b = transmute(u64x2::splat(3));
4440 let r = _mm_or_pd(a, b);
4441 let e = transmute(u64x2::splat(7));
4442 assert_eq_m128d(r, e);
4443 }
4444
4445 #[simd_test(enable = "sse2")]
4446 unsafe fn test_mm_xor_pd() {
4447 let a = transmute(u64x2::splat(5));
4448 let b = transmute(u64x2::splat(3));
4449 let r = _mm_xor_pd(a, b);
4450 let e = transmute(u64x2::splat(6));
4451 assert_eq_m128d(r, e);
4452 }
4453
4454 #[simd_test(enable = "sse2")]
4455 unsafe fn test_mm_cmpeq_sd() {
4456 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4457 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4458 let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b));
4459 assert_eq_m128i(r, e);
4460 }
4461
4462 #[simd_test(enable = "sse2")]
4463 unsafe fn test_mm_cmplt_sd() {
4464 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4465 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4466 let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b));
4467 assert_eq_m128i(r, e);
4468 }
4469
4470 #[simd_test(enable = "sse2")]
4471 unsafe fn test_mm_cmple_sd() {
4472 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4473 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4474 let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b));
4475 assert_eq_m128i(r, e);
4476 }
4477
4478 #[simd_test(enable = "sse2")]
4479 unsafe fn test_mm_cmpgt_sd() {
4480 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4481 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4482 let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b));
4483 assert_eq_m128i(r, e);
4484 }
4485
4486 #[simd_test(enable = "sse2")]
4487 unsafe fn test_mm_cmpge_sd() {
4488 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4489 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4490 let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b));
4491 assert_eq_m128i(r, e);
4492 }
4493
4494 #[simd_test(enable = "sse2")]
4495 unsafe fn test_mm_cmpord_sd() {
4496 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4497 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4498 let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b));
4499 assert_eq_m128i(r, e);
4500 }
4501
4502 #[simd_test(enable = "sse2")]
4503 unsafe fn test_mm_cmpunord_sd() {
4504 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4505 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4506 let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b));
4507 assert_eq_m128i(r, e);
4508 }
4509
4510 #[simd_test(enable = "sse2")]
4511 unsafe fn test_mm_cmpneq_sd() {
4512 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4513 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4514 let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b));
4515 assert_eq_m128i(r, e);
4516 }
4517
4518 #[simd_test(enable = "sse2")]
4519 unsafe fn test_mm_cmpnlt_sd() {
4520 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4521 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4522 let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b));
4523 assert_eq_m128i(r, e);
4524 }
4525
4526 #[simd_test(enable = "sse2")]
4527 unsafe fn test_mm_cmpnle_sd() {
4528 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4529 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4530 let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b));
4531 assert_eq_m128i(r, e);
4532 }
4533
4534 #[simd_test(enable = "sse2")]
4535 unsafe fn test_mm_cmpngt_sd() {
4536 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4537 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4538 let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b));
4539 assert_eq_m128i(r, e);
4540 }
4541
4542 #[simd_test(enable = "sse2")]
4543 unsafe fn test_mm_cmpnge_sd() {
4544 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4545 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4546 let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b));
4547 assert_eq_m128i(r, e);
4548 }
4549
4550 #[simd_test(enable = "sse2")]
4551 unsafe fn test_mm_cmpeq_pd() {
4552 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4553 let e = _mm_setr_epi64x(!0, 0);
4554 let r = transmute::<_, __m128i>(_mm_cmpeq_pd(a, b));
4555 assert_eq_m128i(r, e);
4556 }
4557
4558 #[simd_test(enable = "sse2")]
4559 unsafe fn test_mm_cmplt_pd() {
4560 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4561 let e = _mm_setr_epi64x(0, !0);
4562 let r = transmute::<_, __m128i>(_mm_cmplt_pd(a, b));
4563 assert_eq_m128i(r, e);
4564 }
4565
4566 #[simd_test(enable = "sse2")]
4567 unsafe fn test_mm_cmple_pd() {
4568 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4569 let e = _mm_setr_epi64x(!0, !0);
4570 let r = transmute::<_, __m128i>(_mm_cmple_pd(a, b));
4571 assert_eq_m128i(r, e);
4572 }
4573
4574 #[simd_test(enable = "sse2")]
4575 unsafe fn test_mm_cmpgt_pd() {
4576 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4577 let e = _mm_setr_epi64x(0, 0);
4578 let r = transmute::<_, __m128i>(_mm_cmpgt_pd(a, b));
4579 assert_eq_m128i(r, e);
4580 }
4581
4582 #[simd_test(enable = "sse2")]
4583 unsafe fn test_mm_cmpge_pd() {
4584 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4585 let e = _mm_setr_epi64x(!0, 0);
4586 let r = transmute::<_, __m128i>(_mm_cmpge_pd(a, b));
4587 assert_eq_m128i(r, e);
4588 }
4589
4590 #[simd_test(enable = "sse2")]
4591 unsafe fn test_mm_cmpord_pd() {
4592 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4593 let e = _mm_setr_epi64x(0, !0);
4594 let r = transmute::<_, __m128i>(_mm_cmpord_pd(a, b));
4595 assert_eq_m128i(r, e);
4596 }
4597
4598 #[simd_test(enable = "sse2")]
4599 unsafe fn test_mm_cmpunord_pd() {
4600 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4601 let e = _mm_setr_epi64x(!0, 0);
4602 let r = transmute::<_, __m128i>(_mm_cmpunord_pd(a, b));
4603 assert_eq_m128i(r, e);
4604 }
4605
4606 #[simd_test(enable = "sse2")]
4607 unsafe fn test_mm_cmpneq_pd() {
4608 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4609 let e = _mm_setr_epi64x(!0, !0);
4610 let r = transmute::<_, __m128i>(_mm_cmpneq_pd(a, b));
4611 assert_eq_m128i(r, e);
4612 }
4613
4614 #[simd_test(enable = "sse2")]
4615 unsafe fn test_mm_cmpnlt_pd() {
4616 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4617 let e = _mm_setr_epi64x(0, 0);
4618 let r = transmute::<_, __m128i>(_mm_cmpnlt_pd(a, b));
4619 assert_eq_m128i(r, e);
4620 }
4621
4622 #[simd_test(enable = "sse2")]
4623 unsafe fn test_mm_cmpnle_pd() {
4624 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4625 let e = _mm_setr_epi64x(0, 0);
4626 let r = transmute::<_, __m128i>(_mm_cmpnle_pd(a, b));
4627 assert_eq_m128i(r, e);
4628 }
4629
4630 #[simd_test(enable = "sse2")]
4631 unsafe fn test_mm_cmpngt_pd() {
4632 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4633 let e = _mm_setr_epi64x(0, !0);
4634 let r = transmute::<_, __m128i>(_mm_cmpngt_pd(a, b));
4635 assert_eq_m128i(r, e);
4636 }
4637
4638 #[simd_test(enable = "sse2")]
4639 unsafe fn test_mm_cmpnge_pd() {
4640 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4641 let e = _mm_setr_epi64x(0, !0);
4642 let r = transmute::<_, __m128i>(_mm_cmpnge_pd(a, b));
4643 assert_eq_m128i(r, e);
4644 }
4645
4646 #[simd_test(enable = "sse2")]
4647 unsafe fn test_mm_comieq_sd() {
4648 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4649 assert!(_mm_comieq_sd(a, b) != 0);
4650
4651 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0));
4652 assert!(_mm_comieq_sd(a, b) == 0);
4653 }
4654
4655 #[simd_test(enable = "sse2")]
4656 unsafe fn test_mm_comilt_sd() {
4657 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4658 assert!(_mm_comilt_sd(a, b) == 0);
4659 }
4660
4661 #[simd_test(enable = "sse2")]
4662 unsafe fn test_mm_comile_sd() {
4663 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4664 assert!(_mm_comile_sd(a, b) != 0);
4665 }
4666
4667 #[simd_test(enable = "sse2")]
4668 unsafe fn test_mm_comigt_sd() {
4669 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4670 assert!(_mm_comigt_sd(a, b) == 0);
4671 }
4672
4673 #[simd_test(enable = "sse2")]
4674 unsafe fn test_mm_comige_sd() {
4675 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4676 assert!(_mm_comige_sd(a, b) != 0);
4677 }
4678
4679 #[simd_test(enable = "sse2")]
4680 unsafe fn test_mm_comineq_sd() {
4681 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4682 assert!(_mm_comineq_sd(a, b) == 0);
4683 }
4684
4685 #[simd_test(enable = "sse2")]
4686 unsafe fn test_mm_ucomieq_sd() {
4687 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4688 assert!(_mm_ucomieq_sd(a, b) != 0);
4689
4690 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0));
4691 assert!(_mm_ucomieq_sd(a, b) == 0);
4692 }
4693
4694 #[simd_test(enable = "sse2")]
4695 unsafe fn test_mm_ucomilt_sd() {
4696 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4697 assert!(_mm_ucomilt_sd(a, b) == 0);
4698 }
4699
4700 #[simd_test(enable = "sse2")]
4701 unsafe fn test_mm_ucomile_sd() {
4702 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4703 assert!(_mm_ucomile_sd(a, b) != 0);
4704 }
4705
4706 #[simd_test(enable = "sse2")]
4707 unsafe fn test_mm_ucomigt_sd() {
4708 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4709 assert!(_mm_ucomigt_sd(a, b) == 0);
4710 }
4711
4712 #[simd_test(enable = "sse2")]
4713 unsafe fn test_mm_ucomige_sd() {
4714 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4715 assert!(_mm_ucomige_sd(a, b) != 0);
4716 }
4717
4718 #[simd_test(enable = "sse2")]
4719 unsafe fn test_mm_ucomineq_sd() {
4720 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4721 assert!(_mm_ucomineq_sd(a, b) == 0);
4722 }
4723
4724 #[simd_test(enable = "sse2")]
4725 unsafe fn test_mm_movemask_pd() {
4726 let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0));
4727 assert_eq!(r, 0b01);
4728
4729 let r = _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0));
4730 assert_eq!(r, 0b11);
4731 }
4732
4733 #[repr(align(16))]
4734 struct Memory {
4735 data: [f64; 4],
4736 }
4737
4738 #[simd_test(enable = "sse2")]
4739 unsafe fn test_mm_load_pd() {
4740 let mem = Memory {
4741 data: [1.0f64, 2.0, 3.0, 4.0],
4742 };
4743 let vals = &mem.data;
4744 let d = vals.as_ptr();
4745
4746 let r = _mm_load_pd(d);
4747 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4748 }
4749
4750 #[simd_test(enable = "sse2")]
4751 unsafe fn test_mm_load_sd() {
4752 let a = 1.;
4753 let expected = _mm_setr_pd(a, 0.);
4754 let r = _mm_load_sd(&a);
4755 assert_eq_m128d(r, expected);
4756 }
4757
4758 #[simd_test(enable = "sse2")]
4759 unsafe fn test_mm_loadh_pd() {
4760 let a = _mm_setr_pd(1., 2.);
4761 let b = 3.;
4762 let expected = _mm_setr_pd(_mm_cvtsd_f64(a), 3.);
4763 let r = _mm_loadh_pd(a, &b);
4764 assert_eq_m128d(r, expected);
4765 }
4766
4767 #[simd_test(enable = "sse2")]
4768 unsafe fn test_mm_loadl_pd() {
4769 let a = _mm_setr_pd(1., 2.);
4770 let b = 3.;
4771 let expected = _mm_setr_pd(3., get_m128d(a, 1));
4772 let r = _mm_loadl_pd(a, &b);
4773 assert_eq_m128d(r, expected);
4774 }
4775
4776 #[simd_test(enable = "sse2")]
4777 // Miri cannot support this until it is clear how it fits in the Rust memory model
4778 // (non-temporal store)
4779 #[cfg_attr(miri, ignore)]
4780 unsafe fn test_mm_stream_pd() {
4781 #[repr(align(128))]
4782 struct Memory {
4783 pub data: [f64; 2],
4784 }
4785 let a = _mm_set1_pd(7.0);
4786 let mut mem = Memory { data: [-1.0; 2] };
4787
4788 _mm_stream_pd(ptr::addr_of_mut!(mem.data[0]), a);
4789 for i in 0..2 {
4790 assert_eq!(mem.data[i], get_m128d(a, i));
4791 }
4792 }
4793
4794 #[simd_test(enable = "sse2")]
4795 unsafe fn test_mm_store_sd() {
4796 let mut dest = 0.;
4797 let a = _mm_setr_pd(1., 2.);
4798 _mm_store_sd(&mut dest, a);
4799 assert_eq!(dest, _mm_cvtsd_f64(a));
4800 }
4801
4802 #[simd_test(enable = "sse2")]
4803 unsafe fn test_mm_store_pd() {
4804 let mut mem = Memory { data: [0.0f64; 4] };
4805 let vals = &mut mem.data;
4806 let a = _mm_setr_pd(1.0, 2.0);
4807 let d = vals.as_mut_ptr();
4808
4809 _mm_store_pd(d, *black_box(&a));
4810 assert_eq!(vals[0], 1.0);
4811 assert_eq!(vals[1], 2.0);
4812 }
4813
4814 #[simd_test(enable = "sse2")]
4815 unsafe fn test_mm_storeu_pd() {
4816 let mut mem = Memory { data: [0.0f64; 4] };
4817 let vals = &mut mem.data;
4818 let a = _mm_setr_pd(1.0, 2.0);
4819
4820 let mut ofs = 0;
4821 let mut p = vals.as_mut_ptr();
4822
4823 // Make sure p is **not** aligned to 16-byte boundary
4824 if (p as usize) & 0xf == 0 {
4825 ofs = 1;
4826 p = p.add(1);
4827 }
4828
4829 _mm_storeu_pd(p, *black_box(&a));
4830
4831 if ofs > 0 {
4832 assert_eq!(vals[ofs - 1], 0.0);
4833 }
4834 assert_eq!(vals[ofs + 0], 1.0);
4835 assert_eq!(vals[ofs + 1], 2.0);
4836 }
4837
4838 #[simd_test(enable = "sse2")]
4839 unsafe fn test_mm_storeu_si16() {
4840 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
4841 let mut r = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
4842 _mm_storeu_si16(ptr::addr_of_mut!(r).cast(), a);
4843 let e = _mm_setr_epi16(1, 10, 11, 12, 13, 14, 15, 16);
4844 assert_eq_m128i(r, e);
4845 }
4846
4847 #[simd_test(enable = "sse2")]
4848 unsafe fn test_mm_storeu_si32() {
4849 let a = _mm_setr_epi32(1, 2, 3, 4);
4850 let mut r = _mm_setr_epi32(5, 6, 7, 8);
4851 _mm_storeu_si32(ptr::addr_of_mut!(r).cast(), a);
4852 let e = _mm_setr_epi32(1, 6, 7, 8);
4853 assert_eq_m128i(r, e);
4854 }
4855
4856 #[simd_test(enable = "sse2")]
4857 unsafe fn test_mm_storeu_si64() {
4858 let a = _mm_setr_epi64x(1, 2);
4859 let mut r = _mm_setr_epi64x(3, 4);
4860 _mm_storeu_si64(ptr::addr_of_mut!(r).cast(), a);
4861 let e = _mm_setr_epi64x(1, 4);
4862 assert_eq_m128i(r, e);
4863 }
4864
4865 #[simd_test(enable = "sse2")]
4866 unsafe fn test_mm_store1_pd() {
4867 let mut mem = Memory { data: [0.0f64; 4] };
4868 let vals = &mut mem.data;
4869 let a = _mm_setr_pd(1.0, 2.0);
4870 let d = vals.as_mut_ptr();
4871
4872 _mm_store1_pd(d, *black_box(&a));
4873 assert_eq!(vals[0], 1.0);
4874 assert_eq!(vals[1], 1.0);
4875 }
4876
4877 #[simd_test(enable = "sse2")]
4878 unsafe fn test_mm_store_pd1() {
4879 let mut mem = Memory { data: [0.0f64; 4] };
4880 let vals = &mut mem.data;
4881 let a = _mm_setr_pd(1.0, 2.0);
4882 let d = vals.as_mut_ptr();
4883
4884 _mm_store_pd1(d, *black_box(&a));
4885 assert_eq!(vals[0], 1.0);
4886 assert_eq!(vals[1], 1.0);
4887 }
4888
4889 #[simd_test(enable = "sse2")]
4890 unsafe fn test_mm_storer_pd() {
4891 let mut mem = Memory { data: [0.0f64; 4] };
4892 let vals = &mut mem.data;
4893 let a = _mm_setr_pd(1.0, 2.0);
4894 let d = vals.as_mut_ptr();
4895
4896 _mm_storer_pd(d, *black_box(&a));
4897 assert_eq!(vals[0], 2.0);
4898 assert_eq!(vals[1], 1.0);
4899 }
4900
4901 #[simd_test(enable = "sse2")]
4902 unsafe fn test_mm_storeh_pd() {
4903 let mut dest = 0.;
4904 let a = _mm_setr_pd(1., 2.);
4905 _mm_storeh_pd(&mut dest, a);
4906 assert_eq!(dest, get_m128d(a, 1));
4907 }
4908
4909 #[simd_test(enable = "sse2")]
4910 unsafe fn test_mm_storel_pd() {
4911 let mut dest = 0.;
4912 let a = _mm_setr_pd(1., 2.);
4913 _mm_storel_pd(&mut dest, a);
4914 assert_eq!(dest, _mm_cvtsd_f64(a));
4915 }
4916
4917 #[simd_test(enable = "sse2")]
4918 unsafe fn test_mm_loadr_pd() {
4919 let mut mem = Memory {
4920 data: [1.0f64, 2.0, 3.0, 4.0],
4921 };
4922 let vals = &mut mem.data;
4923 let d = vals.as_ptr();
4924
4925 let r = _mm_loadr_pd(d);
4926 assert_eq_m128d(r, _mm_setr_pd(2.0, 1.0));
4927 }
4928
4929 #[simd_test(enable = "sse2")]
4930 unsafe fn test_mm_loadu_pd() {
4931 let mut mem = Memory {
4932 data: [1.0f64, 2.0, 3.0, 4.0],
4933 };
4934 let vals = &mut mem.data;
4935 let mut d = vals.as_ptr();
4936
4937 // make sure d is not aligned to 16-byte boundary
4938 let mut offset = 0;
4939 if (d as usize) & 0xf == 0 {
4940 offset = 1;
4941 d = d.add(offset);
4942 }
4943
4944 let r = _mm_loadu_pd(d);
4945 let e = _mm_add_pd(_mm_setr_pd(1.0, 2.0), _mm_set1_pd(offset as f64));
4946 assert_eq_m128d(r, e);
4947 }
4948
4949 #[simd_test(enable = "sse2")]
4950 unsafe fn test_mm_loadu_si16() {
4951 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
4952 let r = _mm_loadu_si16(ptr::addr_of!(a) as *const _);
4953 assert_eq_m128i(r, _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0));
4954 }
4955
4956 #[simd_test(enable = "sse2")]
4957 unsafe fn test_mm_loadu_si32() {
4958 let a = _mm_setr_epi32(1, 2, 3, 4);
4959 let r = _mm_loadu_si32(ptr::addr_of!(a) as *const _);
4960 assert_eq_m128i(r, _mm_setr_epi32(1, 0, 0, 0));
4961 }
4962
4963 #[simd_test(enable = "sse2")]
4964 unsafe fn test_mm_loadu_si64() {
4965 let a = _mm_setr_epi64x(5, 6);
4966 let r = _mm_loadu_si64(ptr::addr_of!(a) as *const _);
4967 assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
4968 }
4969
4970 #[simd_test(enable = "sse2")]
4971 unsafe fn test_mm_cvtpd_ps() {
4972 let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0));
4973 assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0));
4974
4975 let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0));
4976 assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0));
4977
4978 let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN));
4979 assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0));
4980
4981 let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64));
4982 assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0));
4983 }
4984
4985 #[simd_test(enable = "sse2")]
4986 unsafe fn test_mm_cvtps_pd() {
4987 let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0));
4988 assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0));
4989
4990 let r = _mm_cvtps_pd(_mm_setr_ps(
4991 f32::MAX,
4992 f32::INFINITY,
4993 f32::NEG_INFINITY,
4994 f32::MIN,
4995 ));
4996 assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY));
4997 }
4998
4999 #[simd_test(enable = "sse2")]
5000 unsafe fn test_mm_cvtpd_epi32() {
5001 let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0));
5002 assert_eq_m128i(r, _mm_setr_epi32(-1, 5, 0, 0));
5003
5004 let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0));
5005 assert_eq_m128i(r, _mm_setr_epi32(-1, -5, 0, 0));
5006
5007 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN));
5008 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5009
5010 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY));
5011 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5012
5013 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN));
5014 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5015 }
5016
5017 #[simd_test(enable = "sse2")]
5018 unsafe fn test_mm_cvtsd_si32() {
5019 let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0));
5020 assert_eq!(r, -2);
5021
5022 let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN));
5023 assert_eq!(r, i32::MIN);
5024
5025 let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN));
5026 assert_eq!(r, i32::MIN);
5027 }
5028
5029 #[simd_test(enable = "sse2")]
5030 unsafe fn test_mm_cvtsd_ss() {
5031 let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4);
5032 let b = _mm_setr_pd(2.0, -5.0);
5033
5034 let r = _mm_cvtsd_ss(a, b);
5035
5036 assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4));
5037
5038 let a = _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY);
5039 let b = _mm_setr_pd(f64::INFINITY, -5.0);
5040
5041 let r = _mm_cvtsd_ss(a, b);
5042
5043 assert_eq_m128(
5044 r,
5045 _mm_setr_ps(
5046 f32::INFINITY,
5047 f32::NEG_INFINITY,
5048 f32::MAX,
5049 f32::NEG_INFINITY,
5050 ),
5051 );
5052 }
5053
5054 #[simd_test(enable = "sse2")]
5055 unsafe fn test_mm_cvtsd_f64() {
5056 let r = _mm_cvtsd_f64(_mm_setr_pd(-1.1, 2.2));
5057 assert_eq!(r, -1.1);
5058 }
5059
5060 #[simd_test(enable = "sse2")]
5061 unsafe fn test_mm_cvtss_sd() {
5062 let a = _mm_setr_pd(-1.1, 2.2);
5063 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
5064
5065 let r = _mm_cvtss_sd(a, b);
5066 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.2));
5067
5068 let a = _mm_setr_pd(-1.1, f64::INFINITY);
5069 let b = _mm_setr_ps(f32::NEG_INFINITY, 2.0, 3.0, 4.0);
5070
5071 let r = _mm_cvtss_sd(a, b);
5072 assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY));
5073 }
5074
5075 #[simd_test(enable = "sse2")]
5076 unsafe fn test_mm_cvttpd_epi32() {
5077 let a = _mm_setr_pd(-1.1, 2.2);
5078 let r = _mm_cvttpd_epi32(a);
5079 assert_eq_m128i(r, _mm_setr_epi32(-1, 2, 0, 0));
5080
5081 let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
5082 let r = _mm_cvttpd_epi32(a);
5083 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5084 }
5085
5086 #[simd_test(enable = "sse2")]
5087 unsafe fn test_mm_cvttsd_si32() {
5088 let a = _mm_setr_pd(-1.1, 2.2);
5089 let r = _mm_cvttsd_si32(a);
5090 assert_eq!(r, -1);
5091
5092 let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
5093 let r = _mm_cvttsd_si32(a);
5094 assert_eq!(r, i32::MIN);
5095 }
5096
5097 #[simd_test(enable = "sse2")]
5098 unsafe fn test_mm_cvttps_epi32() {
5099 let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6);
5100 let r = _mm_cvttps_epi32(a);
5101 assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6));
5102
5103 let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX);
5104 let r = _mm_cvttps_epi32(a);
5105 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN));
5106 }
5107
5108 #[simd_test(enable = "sse2")]
5109 unsafe fn test_mm_set_sd() {
5110 let r = _mm_set_sd(-1.0_f64);
5111 assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, 0_f64));
5112 }
5113
5114 #[simd_test(enable = "sse2")]
5115 unsafe fn test_mm_set1_pd() {
5116 let r = _mm_set1_pd(-1.0_f64);
5117 assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, -1.0_f64));
5118 }
5119
5120 #[simd_test(enable = "sse2")]
5121 unsafe fn test_mm_set_pd1() {
5122 let r = _mm_set_pd1(-2.0_f64);
5123 assert_eq_m128d(r, _mm_setr_pd(-2.0_f64, -2.0_f64));
5124 }
5125
5126 #[simd_test(enable = "sse2")]
5127 unsafe fn test_mm_set_pd() {
5128 let r = _mm_set_pd(1.0_f64, 5.0_f64);
5129 assert_eq_m128d(r, _mm_setr_pd(5.0_f64, 1.0_f64));
5130 }
5131
5132 #[simd_test(enable = "sse2")]
5133 unsafe fn test_mm_setr_pd() {
5134 let r = _mm_setr_pd(1.0_f64, -5.0_f64);
5135 assert_eq_m128d(r, _mm_setr_pd(1.0_f64, -5.0_f64));
5136 }
5137
5138 #[simd_test(enable = "sse2")]
5139 unsafe fn test_mm_setzero_pd() {
5140 let r = _mm_setzero_pd();
5141 assert_eq_m128d(r, _mm_setr_pd(0_f64, 0_f64));
5142 }
5143
5144 #[simd_test(enable = "sse2")]
5145 unsafe fn test_mm_load1_pd() {
5146 let d = -5.0;
5147 let r = _mm_load1_pd(&d);
5148 assert_eq_m128d(r, _mm_setr_pd(d, d));
5149 }
5150
5151 #[simd_test(enable = "sse2")]
5152 unsafe fn test_mm_load_pd1() {
5153 let d = -5.0;
5154 let r = _mm_load_pd1(&d);
5155 assert_eq_m128d(r, _mm_setr_pd(d, d));
5156 }
5157
5158 #[simd_test(enable = "sse2")]
5159 unsafe fn test_mm_unpackhi_pd() {
5160 let a = _mm_setr_pd(1.0, 2.0);
5161 let b = _mm_setr_pd(3.0, 4.0);
5162 let r = _mm_unpackhi_pd(a, b);
5163 assert_eq_m128d(r, _mm_setr_pd(2.0, 4.0));
5164 }
5165
5166 #[simd_test(enable = "sse2")]
5167 unsafe fn test_mm_unpacklo_pd() {
5168 let a = _mm_setr_pd(1.0, 2.0);
5169 let b = _mm_setr_pd(3.0, 4.0);
5170 let r = _mm_unpacklo_pd(a, b);
5171 assert_eq_m128d(r, _mm_setr_pd(1.0, 3.0));
5172 }
5173
5174 #[simd_test(enable = "sse2")]
5175 unsafe fn test_mm_shuffle_pd() {
5176 let a = _mm_setr_pd(1., 2.);
5177 let b = _mm_setr_pd(3., 4.);
5178 let expected = _mm_setr_pd(1., 3.);
5179 let r = _mm_shuffle_pd::<0b00_00_00_00>(a, b);
5180 assert_eq_m128d(r, expected);
5181 }
5182
5183 #[simd_test(enable = "sse2")]
5184 unsafe fn test_mm_move_sd() {
5185 let a = _mm_setr_pd(1., 2.);
5186 let b = _mm_setr_pd(3., 4.);
5187 let expected = _mm_setr_pd(3., 2.);
5188 let r = _mm_move_sd(a, b);
5189 assert_eq_m128d(r, expected);
5190 }
5191
5192 #[simd_test(enable = "sse2")]
5193 unsafe fn test_mm_castpd_ps() {
5194 let a = _mm_set1_pd(0.);
5195 let expected = _mm_set1_ps(0.);
5196 let r = _mm_castpd_ps(a);
5197 assert_eq_m128(r, expected);
5198 }
5199
5200 #[simd_test(enable = "sse2")]
5201 unsafe fn test_mm_castpd_si128() {
5202 let a = _mm_set1_pd(0.);
5203 let expected = _mm_set1_epi64x(0);
5204 let r = _mm_castpd_si128(a);
5205 assert_eq_m128i(r, expected);
5206 }
5207
5208 #[simd_test(enable = "sse2")]
5209 unsafe fn test_mm_castps_pd() {
5210 let a = _mm_set1_ps(0.);
5211 let expected = _mm_set1_pd(0.);
5212 let r = _mm_castps_pd(a);
5213 assert_eq_m128d(r, expected);
5214 }
5215
5216 #[simd_test(enable = "sse2")]
5217 unsafe fn test_mm_castps_si128() {
5218 let a = _mm_set1_ps(0.);
5219 let expected = _mm_set1_epi32(0);
5220 let r = _mm_castps_si128(a);
5221 assert_eq_m128i(r, expected);
5222 }
5223
5224 #[simd_test(enable = "sse2")]
5225 unsafe fn test_mm_castsi128_pd() {
5226 let a = _mm_set1_epi64x(0);
5227 let expected = _mm_set1_pd(0.);
5228 let r = _mm_castsi128_pd(a);
5229 assert_eq_m128d(r, expected);
5230 }
5231
5232 #[simd_test(enable = "sse2")]
5233 unsafe fn test_mm_castsi128_ps() {
5234 let a = _mm_set1_epi32(0);
5235 let expected = _mm_set1_ps(0.);
5236 let r = _mm_castsi128_ps(a);
5237 assert_eq_m128(r, expected);
5238 }
5239}
5240