1//! Streaming SIMD Extensions 2 (SSE2)
2
3#[cfg(test)]
4use stdarch_test::assert_instr;
5
6use crate::{
7 core_arch::{simd::*, x86::*},
8 intrinsics::simd::*,
9 intrinsics::sqrtf64,
10 mem, ptr,
11};
12
13/// Provides a hint to the processor that the code sequence is a spin-wait loop.
14///
15/// This can help improve the performance and power consumption of spin-wait
16/// loops.
17///
18/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_pause)
19#[inline]
20#[cfg_attr(all(test, target_feature = "sse2"), assert_instr(pause))]
21#[stable(feature = "simd_x86", since = "1.27.0")]
22pub fn _mm_pause() {
23 // note: `pause` is guaranteed to be interpreted as a `nop` by CPUs without
24 // the SSE2 target-feature - therefore it does not require any target features
25 unsafe { pause() }
26}
27
28/// Invalidates and flushes the cache line that contains `p` from all levels of
29/// the cache hierarchy.
30///
31/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clflush)
32#[inline]
33#[target_feature(enable = "sse2")]
34#[cfg_attr(test, assert_instr(clflush))]
35#[stable(feature = "simd_x86", since = "1.27.0")]
36pub unsafe fn _mm_clflush(p: *const u8) {
37 clflush(p)
38}
39
40/// Performs a serializing operation on all load-from-memory instructions
41/// that were issued prior to this instruction.
42///
43/// Guarantees that every load instruction that precedes, in program order, is
44/// globally visible before any load instruction which follows the fence in
45/// program order.
46///
47/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_lfence)
48#[inline]
49#[target_feature(enable = "sse2")]
50#[cfg_attr(test, assert_instr(lfence))]
51#[stable(feature = "simd_x86", since = "1.27.0")]
52pub fn _mm_lfence() {
53 unsafe { lfence() }
54}
55
56/// Performs a serializing operation on all load-from-memory and store-to-memory
57/// instructions that were issued prior to this instruction.
58///
59/// Guarantees that every memory access that precedes, in program order, the
60/// memory fence instruction is globally visible before any memory instruction
61/// which follows the fence in program order.
62///
63/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mfence)
64#[inline]
65#[target_feature(enable = "sse2")]
66#[cfg_attr(test, assert_instr(mfence))]
67#[stable(feature = "simd_x86", since = "1.27.0")]
68pub fn _mm_mfence() {
69 unsafe { mfence() }
70}
71
72/// Adds packed 8-bit integers in `a` and `b`.
73///
74/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi8)
75#[inline]
76#[target_feature(enable = "sse2")]
77#[cfg_attr(test, assert_instr(paddb))]
78#[stable(feature = "simd_x86", since = "1.27.0")]
79pub fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i {
80 unsafe { transmute(src:simd_add(x:a.as_i8x16(), y:b.as_i8x16())) }
81}
82
83/// Adds packed 16-bit integers in `a` and `b`.
84///
85/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi16)
86#[inline]
87#[target_feature(enable = "sse2")]
88#[cfg_attr(test, assert_instr(paddw))]
89#[stable(feature = "simd_x86", since = "1.27.0")]
90pub fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i {
91 unsafe { transmute(src:simd_add(x:a.as_i16x8(), y:b.as_i16x8())) }
92}
93
94/// Adds packed 32-bit integers in `a` and `b`.
95///
96/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi32)
97#[inline]
98#[target_feature(enable = "sse2")]
99#[cfg_attr(test, assert_instr(paddd))]
100#[stable(feature = "simd_x86", since = "1.27.0")]
101pub fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i {
102 unsafe { transmute(src:simd_add(x:a.as_i32x4(), y:b.as_i32x4())) }
103}
104
105/// Adds packed 64-bit integers in `a` and `b`.
106///
107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi64)
108#[inline]
109#[target_feature(enable = "sse2")]
110#[cfg_attr(test, assert_instr(paddq))]
111#[stable(feature = "simd_x86", since = "1.27.0")]
112pub fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i {
113 unsafe { transmute(src:simd_add(x:a.as_i64x2(), y:b.as_i64x2())) }
114}
115
116/// Adds packed 8-bit integers in `a` and `b` using saturation.
117///
118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epi8)
119#[inline]
120#[target_feature(enable = "sse2")]
121#[cfg_attr(test, assert_instr(paddsb))]
122#[stable(feature = "simd_x86", since = "1.27.0")]
123pub fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i {
124 unsafe { transmute(src:simd_saturating_add(x:a.as_i8x16(), y:b.as_i8x16())) }
125}
126
127/// Adds packed 16-bit integers in `a` and `b` using saturation.
128///
129/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epi16)
130#[inline]
131#[target_feature(enable = "sse2")]
132#[cfg_attr(test, assert_instr(paddsw))]
133#[stable(feature = "simd_x86", since = "1.27.0")]
134pub fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i {
135 unsafe { transmute(src:simd_saturating_add(x:a.as_i16x8(), y:b.as_i16x8())) }
136}
137
138/// Adds packed unsigned 8-bit integers in `a` and `b` using saturation.
139///
140/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epu8)
141#[inline]
142#[target_feature(enable = "sse2")]
143#[cfg_attr(test, assert_instr(paddusb))]
144#[stable(feature = "simd_x86", since = "1.27.0")]
145pub fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i {
146 unsafe { transmute(src:simd_saturating_add(x:a.as_u8x16(), y:b.as_u8x16())) }
147}
148
149/// Adds packed unsigned 16-bit integers in `a` and `b` using saturation.
150///
151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epu16)
152#[inline]
153#[target_feature(enable = "sse2")]
154#[cfg_attr(test, assert_instr(paddusw))]
155#[stable(feature = "simd_x86", since = "1.27.0")]
156pub fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i {
157 unsafe { transmute(src:simd_saturating_add(x:a.as_u16x8(), y:b.as_u16x8())) }
158}
159
160/// Averages packed unsigned 8-bit integers in `a` and `b`.
161///
162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_avg_epu8)
163#[inline]
164#[target_feature(enable = "sse2")]
165#[cfg_attr(test, assert_instr(pavgb))]
166#[stable(feature = "simd_x86", since = "1.27.0")]
167pub fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i {
168 unsafe {
169 let a: u16x16 = simd_cast::<_, u16x16>(a.as_u8x16());
170 let b: u16x16 = simd_cast::<_, u16x16>(b.as_u8x16());
171 let r: u16x16 = simd_shr(lhs:simd_add(simd_add(a, b), u16x16::splat(1)), rhs:u16x16::splat(1));
172 transmute(src:simd_cast::<_, u8x16>(r))
173 }
174}
175
176/// Averages packed unsigned 16-bit integers in `a` and `b`.
177///
178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_avg_epu16)
179#[inline]
180#[target_feature(enable = "sse2")]
181#[cfg_attr(test, assert_instr(pavgw))]
182#[stable(feature = "simd_x86", since = "1.27.0")]
183pub fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i {
184 unsafe {
185 let a: u32x8 = simd_cast::<_, u32x8>(a.as_u16x8());
186 let b: u32x8 = simd_cast::<_, u32x8>(b.as_u16x8());
187 let r: u32x8 = simd_shr(lhs:simd_add(simd_add(a, b), u32x8::splat(1)), rhs:u32x8::splat(1));
188 transmute(src:simd_cast::<_, u16x8>(r))
189 }
190}
191
192/// Multiplies and then horizontally add signed 16 bit integers in `a` and `b`.
193///
194/// Multiplies packed signed 16-bit integers in `a` and `b`, producing
195/// intermediate signed 32-bit integers. Horizontally add adjacent pairs of
196/// intermediate 32-bit integers.
197///
198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_madd_epi16)
199#[inline]
200#[target_feature(enable = "sse2")]
201#[cfg_attr(test, assert_instr(pmaddwd))]
202#[stable(feature = "simd_x86", since = "1.27.0")]
203pub fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i {
204 // It's a trick used in the Adler-32 algorithm to perform a widening addition.
205 //
206 // ```rust
207 // #[target_feature(enable = "sse2")]
208 // unsafe fn widening_add(mad: __m128i) -> __m128i {
209 // _mm_madd_epi16(mad, _mm_set1_epi16(1))
210 // }
211 // ```
212 //
213 // If we implement this using generic vector intrinsics, the optimizer
214 // will eliminate this pattern, and `pmaddwd` will no longer be emitted.
215 // For this reason, we use x86 intrinsics.
216 unsafe { transmute(src:pmaddwd(a.as_i16x8(), b.as_i16x8())) }
217}
218
219/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
220/// maximum values.
221///
222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi16)
223#[inline]
224#[target_feature(enable = "sse2")]
225#[cfg_attr(test, assert_instr(pmaxsw))]
226#[stable(feature = "simd_x86", since = "1.27.0")]
227pub fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i {
228 unsafe {
229 let a: i16x8 = a.as_i16x8();
230 let b: i16x8 = b.as_i16x8();
231 transmute(src:simd_select::<i16x8, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
232 }
233}
234
235/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the
236/// packed maximum values.
237///
238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu8)
239#[inline]
240#[target_feature(enable = "sse2")]
241#[cfg_attr(test, assert_instr(pmaxub))]
242#[stable(feature = "simd_x86", since = "1.27.0")]
243pub fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i {
244 unsafe {
245 let a: u8x16 = a.as_u8x16();
246 let b: u8x16 = b.as_u8x16();
247 transmute(src:simd_select::<i8x16, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
248 }
249}
250
251/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
252/// minimum values.
253///
254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi16)
255#[inline]
256#[target_feature(enable = "sse2")]
257#[cfg_attr(test, assert_instr(pminsw))]
258#[stable(feature = "simd_x86", since = "1.27.0")]
259pub fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i {
260 unsafe {
261 let a: i16x8 = a.as_i16x8();
262 let b: i16x8 = b.as_i16x8();
263 transmute(src:simd_select::<i16x8, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
264 }
265}
266
267/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the
268/// packed minimum values.
269///
270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu8)
271#[inline]
272#[target_feature(enable = "sse2")]
273#[cfg_attr(test, assert_instr(pminub))]
274#[stable(feature = "simd_x86", since = "1.27.0")]
275pub fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i {
276 unsafe {
277 let a: u8x16 = a.as_u8x16();
278 let b: u8x16 = b.as_u8x16();
279 transmute(src:simd_select::<i8x16, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
280 }
281}
282
283/// Multiplies the packed 16-bit integers in `a` and `b`.
284///
285/// The multiplication produces intermediate 32-bit integers, and returns the
286/// high 16 bits of the intermediate integers.
287///
288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhi_epi16)
289#[inline]
290#[target_feature(enable = "sse2")]
291#[cfg_attr(test, assert_instr(pmulhw))]
292#[stable(feature = "simd_x86", since = "1.27.0")]
293pub fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i {
294 unsafe {
295 let a: i32x8 = simd_cast::<_, i32x8>(a.as_i16x8());
296 let b: i32x8 = simd_cast::<_, i32x8>(b.as_i16x8());
297 let r: i32x8 = simd_shr(lhs:simd_mul(a, b), rhs:i32x8::splat(16));
298 transmute(src:simd_cast::<i32x8, i16x8>(r))
299 }
300}
301
302/// Multiplies the packed unsigned 16-bit integers in `a` and `b`.
303///
304/// The multiplication produces intermediate 32-bit integers, and returns the
305/// high 16 bits of the intermediate integers.
306///
307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhi_epu16)
308#[inline]
309#[target_feature(enable = "sse2")]
310#[cfg_attr(test, assert_instr(pmulhuw))]
311#[stable(feature = "simd_x86", since = "1.27.0")]
312pub fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i {
313 unsafe {
314 let a: u32x8 = simd_cast::<_, u32x8>(a.as_u16x8());
315 let b: u32x8 = simd_cast::<_, u32x8>(b.as_u16x8());
316 let r: u32x8 = simd_shr(lhs:simd_mul(a, b), rhs:u32x8::splat(16));
317 transmute(src:simd_cast::<u32x8, u16x8>(r))
318 }
319}
320
321/// Multiplies the packed 16-bit integers in `a` and `b`.
322///
323/// The multiplication produces intermediate 32-bit integers, and returns the
324/// low 16 bits of the intermediate integers.
325///
326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mullo_epi16)
327#[inline]
328#[target_feature(enable = "sse2")]
329#[cfg_attr(test, assert_instr(pmullw))]
330#[stable(feature = "simd_x86", since = "1.27.0")]
331pub fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i {
332 unsafe { transmute(src:simd_mul(x:a.as_i16x8(), y:b.as_i16x8())) }
333}
334
335/// Multiplies the low unsigned 32-bit integers from each packed 64-bit element
336/// in `a` and `b`.
337///
338/// Returns the unsigned 64-bit results.
339///
340/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_epu32)
341#[inline]
342#[target_feature(enable = "sse2")]
343#[cfg_attr(test, assert_instr(pmuludq))]
344#[stable(feature = "simd_x86", since = "1.27.0")]
345pub fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i {
346 unsafe {
347 let a: u64x2 = a.as_u64x2();
348 let b: u64x2 = b.as_u64x2();
349 let mask: u64x2 = u64x2::splat(u32::MAX.into());
350 transmute(src:simd_mul(x:simd_and(a, mask), y:simd_and(x:b, y:mask)))
351 }
352}
353
354/// Sum the absolute differences of packed unsigned 8-bit integers.
355///
356/// Computes the absolute differences of packed unsigned 8-bit integers in `a`
357/// and `b`, then horizontally sum each consecutive 8 differences to produce
358/// two unsigned 16-bit integers, and pack these unsigned 16-bit integers in
359/// the low 16 bits of 64-bit elements returned.
360///
361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sad_epu8)
362#[inline]
363#[target_feature(enable = "sse2")]
364#[cfg_attr(test, assert_instr(psadbw))]
365#[stable(feature = "simd_x86", since = "1.27.0")]
366pub fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i {
367 unsafe { transmute(src:psadbw(a.as_u8x16(), b.as_u8x16())) }
368}
369
370/// Subtracts packed 8-bit integers in `b` from packed 8-bit integers in `a`.
371///
372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi8)
373#[inline]
374#[target_feature(enable = "sse2")]
375#[cfg_attr(test, assert_instr(psubb))]
376#[stable(feature = "simd_x86", since = "1.27.0")]
377pub fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i {
378 unsafe { transmute(src:simd_sub(lhs:a.as_i8x16(), rhs:b.as_i8x16())) }
379}
380
381/// Subtracts packed 16-bit integers in `b` from packed 16-bit integers in `a`.
382///
383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi16)
384#[inline]
385#[target_feature(enable = "sse2")]
386#[cfg_attr(test, assert_instr(psubw))]
387#[stable(feature = "simd_x86", since = "1.27.0")]
388pub fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i {
389 unsafe { transmute(src:simd_sub(lhs:a.as_i16x8(), rhs:b.as_i16x8())) }
390}
391
392/// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`.
393///
394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi32)
395#[inline]
396#[target_feature(enable = "sse2")]
397#[cfg_attr(test, assert_instr(psubd))]
398#[stable(feature = "simd_x86", since = "1.27.0")]
399pub fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i {
400 unsafe { transmute(src:simd_sub(lhs:a.as_i32x4(), rhs:b.as_i32x4())) }
401}
402
403/// Subtract packed 64-bit integers in `b` from packed 64-bit integers in `a`.
404///
405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi64)
406#[inline]
407#[target_feature(enable = "sse2")]
408#[cfg_attr(test, assert_instr(psubq))]
409#[stable(feature = "simd_x86", since = "1.27.0")]
410pub fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i {
411 unsafe { transmute(src:simd_sub(lhs:a.as_i64x2(), rhs:b.as_i64x2())) }
412}
413
414/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`
415/// using saturation.
416///
417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epi8)
418#[inline]
419#[target_feature(enable = "sse2")]
420#[cfg_attr(test, assert_instr(psubsb))]
421#[stable(feature = "simd_x86", since = "1.27.0")]
422pub fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i {
423 unsafe { transmute(src:simd_saturating_sub(lhs:a.as_i8x16(), rhs:b.as_i8x16())) }
424}
425
426/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
427/// using saturation.
428///
429/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epi16)
430#[inline]
431#[target_feature(enable = "sse2")]
432#[cfg_attr(test, assert_instr(psubsw))]
433#[stable(feature = "simd_x86", since = "1.27.0")]
434pub fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i {
435 unsafe { transmute(src:simd_saturating_sub(lhs:a.as_i16x8(), rhs:b.as_i16x8())) }
436}
437
438/// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit
439/// integers in `a` using saturation.
440///
441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epu8)
442#[inline]
443#[target_feature(enable = "sse2")]
444#[cfg_attr(test, assert_instr(psubusb))]
445#[stable(feature = "simd_x86", since = "1.27.0")]
446pub fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i {
447 unsafe { transmute(src:simd_saturating_sub(lhs:a.as_u8x16(), rhs:b.as_u8x16())) }
448}
449
450/// Subtract packed unsigned 16-bit integers in `b` from packed unsigned 16-bit
451/// integers in `a` using saturation.
452///
453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epu16)
454#[inline]
455#[target_feature(enable = "sse2")]
456#[cfg_attr(test, assert_instr(psubusw))]
457#[stable(feature = "simd_x86", since = "1.27.0")]
458pub fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i {
459 unsafe { transmute(src:simd_saturating_sub(lhs:a.as_u16x8(), rhs:b.as_u16x8())) }
460}
461
462/// Shifts `a` left by `IMM8` bytes while shifting in zeros.
463///
464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_si128)
465#[inline]
466#[target_feature(enable = "sse2")]
467#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
468#[rustc_legacy_const_generics(1)]
469#[stable(feature = "simd_x86", since = "1.27.0")]
470pub fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
471 static_assert_uimm_bits!(IMM8, 8);
472 unsafe { _mm_slli_si128_impl::<IMM8>(a) }
473}
474
475/// Implementation detail: converts the immediate argument of the
476/// `_mm_slli_si128` intrinsic into a compile-time constant.
477#[inline]
478#[target_feature(enable = "sse2")]
479unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
480 const fn mask(shift: i32, i: u32) -> u32 {
481 let shift = shift as u32 & 0xff;
482 if shift > 15 { i } else { 16 - shift + i }
483 }
484 transmute::<i8x16, _>(simd_shuffle!(
485 i8x16::ZERO,
486 a.as_i8x16(),
487 [
488 mask(IMM8, 0),
489 mask(IMM8, 1),
490 mask(IMM8, 2),
491 mask(IMM8, 3),
492 mask(IMM8, 4),
493 mask(IMM8, 5),
494 mask(IMM8, 6),
495 mask(IMM8, 7),
496 mask(IMM8, 8),
497 mask(IMM8, 9),
498 mask(IMM8, 10),
499 mask(IMM8, 11),
500 mask(IMM8, 12),
501 mask(IMM8, 13),
502 mask(IMM8, 14),
503 mask(IMM8, 15),
504 ],
505 ))
506}
507
508/// Shifts `a` left by `IMM8` bytes while shifting in zeros.
509///
510/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bslli_si128)
511#[inline]
512#[target_feature(enable = "sse2")]
513#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
514#[rustc_legacy_const_generics(1)]
515#[stable(feature = "simd_x86", since = "1.27.0")]
516pub fn _mm_bslli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
517 unsafe {
518 static_assert_uimm_bits!(IMM8, 8);
519 _mm_slli_si128_impl::<IMM8>(a)
520 }
521}
522
523/// Shifts `a` right by `IMM8` bytes while shifting in zeros.
524///
525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bsrli_si128)
526#[inline]
527#[target_feature(enable = "sse2")]
528#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
529#[rustc_legacy_const_generics(1)]
530#[stable(feature = "simd_x86", since = "1.27.0")]
531pub fn _mm_bsrli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
532 unsafe {
533 static_assert_uimm_bits!(IMM8, 8);
534 _mm_srli_si128_impl::<IMM8>(a)
535 }
536}
537
538/// Shifts packed 16-bit integers in `a` left by `IMM8` while shifting in zeros.
539///
540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi16)
541#[inline]
542#[target_feature(enable = "sse2")]
543#[cfg_attr(test, assert_instr(psllw, IMM8 = 7))]
544#[rustc_legacy_const_generics(1)]
545#[stable(feature = "simd_x86", since = "1.27.0")]
546pub fn _mm_slli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
547 static_assert_uimm_bits!(IMM8, 8);
548 unsafe {
549 if IMM8 >= 16 {
550 _mm_setzero_si128()
551 } else {
552 transmute(src:simd_shl(lhs:a.as_u16x8(), rhs:u16x8::splat(IMM8 as u16)))
553 }
554 }
555}
556
557/// Shifts packed 16-bit integers in `a` left by `count` while shifting in
558/// zeros.
559///
560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi16)
561#[inline]
562#[target_feature(enable = "sse2")]
563#[cfg_attr(test, assert_instr(psllw))]
564#[stable(feature = "simd_x86", since = "1.27.0")]
565pub fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i {
566 unsafe { transmute(src:psllw(a.as_i16x8(), count.as_i16x8())) }
567}
568
569/// Shifts packed 32-bit integers in `a` left by `IMM8` while shifting in zeros.
570///
571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi32)
572#[inline]
573#[target_feature(enable = "sse2")]
574#[cfg_attr(test, assert_instr(pslld, IMM8 = 7))]
575#[rustc_legacy_const_generics(1)]
576#[stable(feature = "simd_x86", since = "1.27.0")]
577pub fn _mm_slli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
578 static_assert_uimm_bits!(IMM8, 8);
579 unsafe {
580 if IMM8 >= 32 {
581 _mm_setzero_si128()
582 } else {
583 transmute(src:simd_shl(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8 as u32)))
584 }
585 }
586}
587
588/// Shifts packed 32-bit integers in `a` left by `count` while shifting in
589/// zeros.
590///
591/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi32)
592#[inline]
593#[target_feature(enable = "sse2")]
594#[cfg_attr(test, assert_instr(pslld))]
595#[stable(feature = "simd_x86", since = "1.27.0")]
596pub fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i {
597 unsafe { transmute(src:pslld(a.as_i32x4(), count.as_i32x4())) }
598}
599
600/// Shifts packed 64-bit integers in `a` left by `IMM8` while shifting in zeros.
601///
602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi64)
603#[inline]
604#[target_feature(enable = "sse2")]
605#[cfg_attr(test, assert_instr(psllq, IMM8 = 7))]
606#[rustc_legacy_const_generics(1)]
607#[stable(feature = "simd_x86", since = "1.27.0")]
608pub fn _mm_slli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
609 static_assert_uimm_bits!(IMM8, 8);
610 unsafe {
611 if IMM8 >= 64 {
612 _mm_setzero_si128()
613 } else {
614 transmute(src:simd_shl(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64)))
615 }
616 }
617}
618
619/// Shifts packed 64-bit integers in `a` left by `count` while shifting in
620/// zeros.
621///
622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi64)
623#[inline]
624#[target_feature(enable = "sse2")]
625#[cfg_attr(test, assert_instr(psllq))]
626#[stable(feature = "simd_x86", since = "1.27.0")]
627pub fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i {
628 unsafe { transmute(src:psllq(a.as_i64x2(), count.as_i64x2())) }
629}
630
631/// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in sign
632/// bits.
633///
634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi16)
635#[inline]
636#[target_feature(enable = "sse2")]
637#[cfg_attr(test, assert_instr(psraw, IMM8 = 1))]
638#[rustc_legacy_const_generics(1)]
639#[stable(feature = "simd_x86", since = "1.27.0")]
640pub fn _mm_srai_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
641 static_assert_uimm_bits!(IMM8, 8);
642 unsafe { transmute(src:simd_shr(lhs:a.as_i16x8(), rhs:i16x8::splat(IMM8.min(15) as i16))) }
643}
644
645/// Shifts packed 16-bit integers in `a` right by `count` while shifting in sign
646/// bits.
647///
648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi16)
649#[inline]
650#[target_feature(enable = "sse2")]
651#[cfg_attr(test, assert_instr(psraw))]
652#[stable(feature = "simd_x86", since = "1.27.0")]
653pub fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i {
654 unsafe { transmute(src:psraw(a.as_i16x8(), count.as_i16x8())) }
655}
656
657/// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in sign
658/// bits.
659///
660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi32)
661#[inline]
662#[target_feature(enable = "sse2")]
663#[cfg_attr(test, assert_instr(psrad, IMM8 = 1))]
664#[rustc_legacy_const_generics(1)]
665#[stable(feature = "simd_x86", since = "1.27.0")]
666pub fn _mm_srai_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
667 static_assert_uimm_bits!(IMM8, 8);
668 unsafe { transmute(src:simd_shr(lhs:a.as_i32x4(), rhs:i32x4::splat(IMM8.min(31)))) }
669}
670
671/// Shifts packed 32-bit integers in `a` right by `count` while shifting in sign
672/// bits.
673///
674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi32)
675#[inline]
676#[target_feature(enable = "sse2")]
677#[cfg_attr(test, assert_instr(psrad))]
678#[stable(feature = "simd_x86", since = "1.27.0")]
679pub fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i {
680 unsafe { transmute(src:psrad(a.as_i32x4(), count.as_i32x4())) }
681}
682
683/// Shifts `a` right by `IMM8` bytes while shifting in zeros.
684///
685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_si128)
686#[inline]
687#[target_feature(enable = "sse2")]
688#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
689#[rustc_legacy_const_generics(1)]
690#[stable(feature = "simd_x86", since = "1.27.0")]
691pub fn _mm_srli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
692 static_assert_uimm_bits!(IMM8, 8);
693 unsafe { _mm_srli_si128_impl::<IMM8>(a) }
694}
695
696/// Implementation detail: converts the immediate argument of the
697/// `_mm_srli_si128` intrinsic into a compile-time constant.
698#[inline]
699#[target_feature(enable = "sse2")]
700unsafe fn _mm_srli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
701 const fn mask(shift: i32, i: u32) -> u32 {
702 if (shift as u32) > 15 {
703 i + 16
704 } else {
705 i + (shift as u32)
706 }
707 }
708 let x: i8x16 = simd_shuffle!(
709 a.as_i8x16(),
710 i8x16::ZERO,
711 [
712 mask(IMM8, 0),
713 mask(IMM8, 1),
714 mask(IMM8, 2),
715 mask(IMM8, 3),
716 mask(IMM8, 4),
717 mask(IMM8, 5),
718 mask(IMM8, 6),
719 mask(IMM8, 7),
720 mask(IMM8, 8),
721 mask(IMM8, 9),
722 mask(IMM8, 10),
723 mask(IMM8, 11),
724 mask(IMM8, 12),
725 mask(IMM8, 13),
726 mask(IMM8, 14),
727 mask(IMM8, 15),
728 ],
729 );
730 transmute(x)
731}
732
733/// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in
734/// zeros.
735///
736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi16)
737#[inline]
738#[target_feature(enable = "sse2")]
739#[cfg_attr(test, assert_instr(psrlw, IMM8 = 1))]
740#[rustc_legacy_const_generics(1)]
741#[stable(feature = "simd_x86", since = "1.27.0")]
742pub fn _mm_srli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
743 static_assert_uimm_bits!(IMM8, 8);
744 unsafe {
745 if IMM8 >= 16 {
746 _mm_setzero_si128()
747 } else {
748 transmute(src:simd_shr(lhs:a.as_u16x8(), rhs:u16x8::splat(IMM8 as u16)))
749 }
750 }
751}
752
753/// Shifts packed 16-bit integers in `a` right by `count` while shifting in
754/// zeros.
755///
756/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi16)
757#[inline]
758#[target_feature(enable = "sse2")]
759#[cfg_attr(test, assert_instr(psrlw))]
760#[stable(feature = "simd_x86", since = "1.27.0")]
761pub fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i {
762 unsafe { transmute(src:psrlw(a.as_i16x8(), count.as_i16x8())) }
763}
764
765/// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in
766/// zeros.
767///
768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi32)
769#[inline]
770#[target_feature(enable = "sse2")]
771#[cfg_attr(test, assert_instr(psrld, IMM8 = 8))]
772#[rustc_legacy_const_generics(1)]
773#[stable(feature = "simd_x86", since = "1.27.0")]
774pub fn _mm_srli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
775 static_assert_uimm_bits!(IMM8, 8);
776 unsafe {
777 if IMM8 >= 32 {
778 _mm_setzero_si128()
779 } else {
780 transmute(src:simd_shr(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8 as u32)))
781 }
782 }
783}
784
785/// Shifts packed 32-bit integers in `a` right by `count` while shifting in
786/// zeros.
787///
788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi32)
789#[inline]
790#[target_feature(enable = "sse2")]
791#[cfg_attr(test, assert_instr(psrld))]
792#[stable(feature = "simd_x86", since = "1.27.0")]
793pub fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i {
794 unsafe { transmute(src:psrld(a.as_i32x4(), count.as_i32x4())) }
795}
796
797/// Shifts packed 64-bit integers in `a` right by `IMM8` while shifting in
798/// zeros.
799///
800/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi64)
801#[inline]
802#[target_feature(enable = "sse2")]
803#[cfg_attr(test, assert_instr(psrlq, IMM8 = 1))]
804#[rustc_legacy_const_generics(1)]
805#[stable(feature = "simd_x86", since = "1.27.0")]
806pub fn _mm_srli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
807 static_assert_uimm_bits!(IMM8, 8);
808 unsafe {
809 if IMM8 >= 64 {
810 _mm_setzero_si128()
811 } else {
812 transmute(src:simd_shr(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64)))
813 }
814 }
815}
816
817/// Shifts packed 64-bit integers in `a` right by `count` while shifting in
818/// zeros.
819///
820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi64)
821#[inline]
822#[target_feature(enable = "sse2")]
823#[cfg_attr(test, assert_instr(psrlq))]
824#[stable(feature = "simd_x86", since = "1.27.0")]
825pub fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i {
826 unsafe { transmute(src:psrlq(a.as_i64x2(), count.as_i64x2())) }
827}
828
829/// Computes the bitwise AND of 128 bits (representing integer data) in `a` and
830/// `b`.
831///
832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_and_si128)
833#[inline]
834#[target_feature(enable = "sse2")]
835#[cfg_attr(test, assert_instr(andps))]
836#[stable(feature = "simd_x86", since = "1.27.0")]
837pub fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i {
838 unsafe { simd_and(x:a, y:b) }
839}
840
841/// Computes the bitwise NOT of 128 bits (representing integer data) in `a` and
842/// then AND with `b`.
843///
844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_andnot_si128)
845#[inline]
846#[target_feature(enable = "sse2")]
847#[cfg_attr(test, assert_instr(andnps))]
848#[stable(feature = "simd_x86", since = "1.27.0")]
849pub fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i {
850 unsafe { simd_and(x:simd_xor(_mm_set1_epi8(-1), a), y:b) }
851}
852
853/// Computes the bitwise OR of 128 bits (representing integer data) in `a` and
854/// `b`.
855///
856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_si128)
857#[inline]
858#[target_feature(enable = "sse2")]
859#[cfg_attr(test, assert_instr(orps))]
860#[stable(feature = "simd_x86", since = "1.27.0")]
861pub fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i {
862 unsafe { simd_or(x:a, y:b) }
863}
864
865/// Computes the bitwise XOR of 128 bits (representing integer data) in `a` and
866/// `b`.
867///
868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_si128)
869#[inline]
870#[target_feature(enable = "sse2")]
871#[cfg_attr(test, assert_instr(xorps))]
872#[stable(feature = "simd_x86", since = "1.27.0")]
873pub fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i {
874 unsafe { simd_xor(x:a, y:b) }
875}
876
877/// Compares packed 8-bit integers in `a` and `b` for equality.
878///
879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi8)
880#[inline]
881#[target_feature(enable = "sse2")]
882#[cfg_attr(test, assert_instr(pcmpeqb))]
883#[stable(feature = "simd_x86", since = "1.27.0")]
884pub fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i {
885 unsafe { transmute::<i8x16, _>(src:simd_eq(x:a.as_i8x16(), y:b.as_i8x16())) }
886}
887
888/// Compares packed 16-bit integers in `a` and `b` for equality.
889///
890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi16)
891#[inline]
892#[target_feature(enable = "sse2")]
893#[cfg_attr(test, assert_instr(pcmpeqw))]
894#[stable(feature = "simd_x86", since = "1.27.0")]
895pub fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i {
896 unsafe { transmute::<i16x8, _>(src:simd_eq(x:a.as_i16x8(), y:b.as_i16x8())) }
897}
898
899/// Compares packed 32-bit integers in `a` and `b` for equality.
900///
901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi32)
902#[inline]
903#[target_feature(enable = "sse2")]
904#[cfg_attr(test, assert_instr(pcmpeqd))]
905#[stable(feature = "simd_x86", since = "1.27.0")]
906pub fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i {
907 unsafe { transmute::<i32x4, _>(src:simd_eq(x:a.as_i32x4(), y:b.as_i32x4())) }
908}
909
910/// Compares packed 8-bit integers in `a` and `b` for greater-than.
911///
912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi8)
913#[inline]
914#[target_feature(enable = "sse2")]
915#[cfg_attr(test, assert_instr(pcmpgtb))]
916#[stable(feature = "simd_x86", since = "1.27.0")]
917pub fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i {
918 unsafe { transmute::<i8x16, _>(src:simd_gt(x:a.as_i8x16(), y:b.as_i8x16())) }
919}
920
921/// Compares packed 16-bit integers in `a` and `b` for greater-than.
922///
923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi16)
924#[inline]
925#[target_feature(enable = "sse2")]
926#[cfg_attr(test, assert_instr(pcmpgtw))]
927#[stable(feature = "simd_x86", since = "1.27.0")]
928pub fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i {
929 unsafe { transmute::<i16x8, _>(src:simd_gt(x:a.as_i16x8(), y:b.as_i16x8())) }
930}
931
932/// Compares packed 32-bit integers in `a` and `b` for greater-than.
933///
934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi32)
935#[inline]
936#[target_feature(enable = "sse2")]
937#[cfg_attr(test, assert_instr(pcmpgtd))]
938#[stable(feature = "simd_x86", since = "1.27.0")]
939pub fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i {
940 unsafe { transmute::<i32x4, _>(src:simd_gt(x:a.as_i32x4(), y:b.as_i32x4())) }
941}
942
943/// Compares packed 8-bit integers in `a` and `b` for less-than.
944///
945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi8)
946#[inline]
947#[target_feature(enable = "sse2")]
948#[cfg_attr(test, assert_instr(pcmpgtb))]
949#[stable(feature = "simd_x86", since = "1.27.0")]
950pub fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i {
951 unsafe { transmute::<i8x16, _>(src:simd_lt(x:a.as_i8x16(), y:b.as_i8x16())) }
952}
953
954/// Compares packed 16-bit integers in `a` and `b` for less-than.
955///
956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi16)
957#[inline]
958#[target_feature(enable = "sse2")]
959#[cfg_attr(test, assert_instr(pcmpgtw))]
960#[stable(feature = "simd_x86", since = "1.27.0")]
961pub fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i {
962 unsafe { transmute::<i16x8, _>(src:simd_lt(x:a.as_i16x8(), y:b.as_i16x8())) }
963}
964
965/// Compares packed 32-bit integers in `a` and `b` for less-than.
966///
967/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi32)
968#[inline]
969#[target_feature(enable = "sse2")]
970#[cfg_attr(test, assert_instr(pcmpgtd))]
971#[stable(feature = "simd_x86", since = "1.27.0")]
972pub fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i {
973 unsafe { transmute::<i32x4, _>(src:simd_lt(x:a.as_i32x4(), y:b.as_i32x4())) }
974}
975
976/// Converts the lower two packed 32-bit integers in `a` to packed
977/// double-precision (64-bit) floating-point elements.
978///
979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_pd)
980#[inline]
981#[target_feature(enable = "sse2")]
982#[cfg_attr(test, assert_instr(cvtdq2pd))]
983#[stable(feature = "simd_x86", since = "1.27.0")]
984pub fn _mm_cvtepi32_pd(a: __m128i) -> __m128d {
985 unsafe {
986 let a: i32x4 = a.as_i32x4();
987 simd_cast::<i32x2, __m128d>(simd_shuffle!(a, a, [0, 1]))
988 }
989}
990
991/// Returns `a` with its lower element replaced by `b` after converting it to
992/// an `f64`.
993///
994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi32_sd)
995#[inline]
996#[target_feature(enable = "sse2")]
997#[cfg_attr(test, assert_instr(cvtsi2sd))]
998#[stable(feature = "simd_x86", since = "1.27.0")]
999pub fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d {
1000 unsafe { simd_insert!(a, 0, b as f64) }
1001}
1002
1003/// Converts packed 32-bit integers in `a` to packed single-precision (32-bit)
1004/// floating-point elements.
1005///
1006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_ps)
1007#[inline]
1008#[target_feature(enable = "sse2")]
1009#[cfg_attr(test, assert_instr(cvtdq2ps))]
1010#[stable(feature = "simd_x86", since = "1.27.0")]
1011pub fn _mm_cvtepi32_ps(a: __m128i) -> __m128 {
1012 unsafe { transmute(src:simd_cast::<_, f32x4>(a.as_i32x4())) }
1013}
1014
1015/// Converts packed single-precision (32-bit) floating-point elements in `a`
1016/// to packed 32-bit integers.
1017///
1018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epi32)
1019#[inline]
1020#[target_feature(enable = "sse2")]
1021#[cfg_attr(test, assert_instr(cvtps2dq))]
1022#[stable(feature = "simd_x86", since = "1.27.0")]
1023pub fn _mm_cvtps_epi32(a: __m128) -> __m128i {
1024 unsafe { transmute(src:cvtps2dq(a)) }
1025}
1026
1027/// Returns a vector whose lowest element is `a` and all higher elements are
1028/// `0`.
1029///
1030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi32_si128)
1031#[inline]
1032#[target_feature(enable = "sse2")]
1033#[stable(feature = "simd_x86", since = "1.27.0")]
1034pub fn _mm_cvtsi32_si128(a: i32) -> __m128i {
1035 unsafe { transmute(src:i32x4::new(x0:a, x1:0, x2:0, x3:0)) }
1036}
1037
1038/// Returns the lowest element of `a`.
1039///
1040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi128_si32)
1041#[inline]
1042#[target_feature(enable = "sse2")]
1043#[stable(feature = "simd_x86", since = "1.27.0")]
1044pub fn _mm_cvtsi128_si32(a: __m128i) -> i32 {
1045 unsafe { simd_extract!(a.as_i32x4(), 0) }
1046}
1047
1048/// Sets packed 64-bit integers with the supplied values, from highest to
1049/// lowest.
1050///
1051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi64x)
1052#[inline]
1053#[target_feature(enable = "sse2")]
1054// no particular instruction to test
1055#[stable(feature = "simd_x86", since = "1.27.0")]
1056pub fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i {
1057 unsafe { transmute(src:i64x2::new(x0:e0, x1:e1)) }
1058}
1059
1060/// Sets packed 32-bit integers with the supplied values.
1061///
1062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi32)
1063#[inline]
1064#[target_feature(enable = "sse2")]
1065// no particular instruction to test
1066#[stable(feature = "simd_x86", since = "1.27.0")]
1067pub fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1068 unsafe { transmute(src:i32x4::new(x0:e0, x1:e1, x2:e2, x3:e3)) }
1069}
1070
1071/// Sets packed 16-bit integers with the supplied values.
1072///
1073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi16)
1074#[inline]
1075#[target_feature(enable = "sse2")]
1076// no particular instruction to test
1077#[stable(feature = "simd_x86", since = "1.27.0")]
1078pub fn _mm_set_epi16(
1079 e7: i16,
1080 e6: i16,
1081 e5: i16,
1082 e4: i16,
1083 e3: i16,
1084 e2: i16,
1085 e1: i16,
1086 e0: i16,
1087) -> __m128i {
1088 unsafe { transmute(src:i16x8::new(x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7)) }
1089}
1090
1091/// Sets packed 8-bit integers with the supplied values.
1092///
1093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi8)
1094#[inline]
1095#[target_feature(enable = "sse2")]
1096// no particular instruction to test
1097#[stable(feature = "simd_x86", since = "1.27.0")]
1098pub fn _mm_set_epi8(
1099 e15: i8,
1100 e14: i8,
1101 e13: i8,
1102 e12: i8,
1103 e11: i8,
1104 e10: i8,
1105 e9: i8,
1106 e8: i8,
1107 e7: i8,
1108 e6: i8,
1109 e5: i8,
1110 e4: i8,
1111 e3: i8,
1112 e2: i8,
1113 e1: i8,
1114 e0: i8,
1115) -> __m128i {
1116 unsafe {
1117 #[rustfmt::skip]
1118 transmute(src:i8x16::new(
1119 x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7, x8:e8, x9:e9, x10:e10, x11:e11, x12:e12, x13:e13, x14:e14, x15:e15,
1120 ))
1121 }
1122}
1123
1124/// Broadcasts 64-bit integer `a` to all elements.
1125///
1126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi64x)
1127#[inline]
1128#[target_feature(enable = "sse2")]
1129// no particular instruction to test
1130#[stable(feature = "simd_x86", since = "1.27.0")]
1131pub fn _mm_set1_epi64x(a: i64) -> __m128i {
1132 _mm_set_epi64x(e1:a, e0:a)
1133}
1134
1135/// Broadcasts 32-bit integer `a` to all elements.
1136///
1137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi32)
1138#[inline]
1139#[target_feature(enable = "sse2")]
1140// no particular instruction to test
1141#[stable(feature = "simd_x86", since = "1.27.0")]
1142pub fn _mm_set1_epi32(a: i32) -> __m128i {
1143 _mm_set_epi32(e3:a, e2:a, e1:a, e0:a)
1144}
1145
1146/// Broadcasts 16-bit integer `a` to all elements.
1147///
1148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi16)
1149#[inline]
1150#[target_feature(enable = "sse2")]
1151// no particular instruction to test
1152#[stable(feature = "simd_x86", since = "1.27.0")]
1153pub fn _mm_set1_epi16(a: i16) -> __m128i {
1154 _mm_set_epi16(e7:a, e6:a, e5:a, e4:a, e3:a, e2:a, e1:a, e0:a)
1155}
1156
1157/// Broadcasts 8-bit integer `a` to all elements.
1158///
1159/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi8)
1160#[inline]
1161#[target_feature(enable = "sse2")]
1162// no particular instruction to test
1163#[stable(feature = "simd_x86", since = "1.27.0")]
1164pub fn _mm_set1_epi8(a: i8) -> __m128i {
1165 _mm_set_epi8(e15:a, e14:a, e13:a, e12:a, e11:a, e10:a, e9:a, e8:a, e7:a, e6:a, e5:a, e4:a, e3:a, e2:a, e1:a, e0:a)
1166}
1167
1168/// Sets packed 32-bit integers with the supplied values in reverse order.
1169///
1170/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi32)
1171#[inline]
1172#[target_feature(enable = "sse2")]
1173// no particular instruction to test
1174#[stable(feature = "simd_x86", since = "1.27.0")]
1175pub fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1176 _mm_set_epi32(e3:e0, e2:e1, e1:e2, e0:e3)
1177}
1178
1179/// Sets packed 16-bit integers with the supplied values in reverse order.
1180///
1181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi16)
1182#[inline]
1183#[target_feature(enable = "sse2")]
1184// no particular instruction to test
1185#[stable(feature = "simd_x86", since = "1.27.0")]
1186pub fn _mm_setr_epi16(
1187 e7: i16,
1188 e6: i16,
1189 e5: i16,
1190 e4: i16,
1191 e3: i16,
1192 e2: i16,
1193 e1: i16,
1194 e0: i16,
1195) -> __m128i {
1196 _mm_set_epi16(e7:e0, e6:e1, e5:e2, e4:e3, e3:e4, e2:e5, e1:e6, e0:e7)
1197}
1198
1199/// Sets packed 8-bit integers with the supplied values in reverse order.
1200///
1201/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi8)
1202#[inline]
1203#[target_feature(enable = "sse2")]
1204// no particular instruction to test
1205#[stable(feature = "simd_x86", since = "1.27.0")]
1206pub fn _mm_setr_epi8(
1207 e15: i8,
1208 e14: i8,
1209 e13: i8,
1210 e12: i8,
1211 e11: i8,
1212 e10: i8,
1213 e9: i8,
1214 e8: i8,
1215 e7: i8,
1216 e6: i8,
1217 e5: i8,
1218 e4: i8,
1219 e3: i8,
1220 e2: i8,
1221 e1: i8,
1222 e0: i8,
1223) -> __m128i {
1224 #[rustfmt::skip]
1225 _mm_set_epi8(
1226 e15:e0, e14:e1, e13:e2, e12:e3, e11:e4, e10:e5, e9:e6, e8:e7, e7:e8, e6:e9, e5:e10, e4:e11, e3:e12, e2:e13, e1:e14, e0:e15,
1227 )
1228}
1229
1230/// Returns a vector with all elements set to zero.
1231///
1232/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_si128)
1233#[inline]
1234#[target_feature(enable = "sse2")]
1235#[cfg_attr(test, assert_instr(xorps))]
1236#[stable(feature = "simd_x86", since = "1.27.0")]
1237pub fn _mm_setzero_si128() -> __m128i {
1238 const { unsafe { mem::zeroed() } }
1239}
1240
1241/// Loads 64-bit integer from memory into first element of returned vector.
1242///
1243/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadl_epi64)
1244#[inline]
1245#[target_feature(enable = "sse2")]
1246#[stable(feature = "simd_x86", since = "1.27.0")]
1247pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i {
1248 _mm_set_epi64x(e1:0, e0:ptr::read_unaligned(src:mem_addr as *const i64))
1249}
1250
1251/// Loads 128-bits of integer data from memory into a new vector.
1252///
1253/// `mem_addr` must be aligned on a 16-byte boundary.
1254///
1255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_si128)
1256#[inline]
1257#[target_feature(enable = "sse2")]
1258#[cfg_attr(
1259 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1260 assert_instr(movaps)
1261)]
1262#[stable(feature = "simd_x86", since = "1.27.0")]
1263pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i {
1264 *mem_addr
1265}
1266
1267/// Loads 128-bits of integer data from memory into a new vector.
1268///
1269/// `mem_addr` does not need to be aligned on any particular boundary.
1270///
1271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si128)
1272#[inline]
1273#[target_feature(enable = "sse2")]
1274#[cfg_attr(test, assert_instr(movups))]
1275#[stable(feature = "simd_x86", since = "1.27.0")]
1276pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
1277 let mut dst: __m128i = _mm_undefined_si128();
1278 ptr::copy_nonoverlapping(
1279 src:mem_addr as *const u8,
1280 dst:ptr::addr_of_mut!(dst) as *mut u8,
1281 count:mem::size_of::<__m128i>(),
1282 );
1283 dst
1284}
1285
1286/// Conditionally store 8-bit integer elements from `a` into memory using
1287/// `mask` flagged as non-temporal (unlikely to be used again soon).
1288///
1289/// Elements are not stored when the highest bit is not set in the
1290/// corresponding element.
1291///
1292/// `mem_addr` should correspond to a 128-bit memory location and does not need
1293/// to be aligned on any particular boundary.
1294///
1295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskmoveu_si128)
1296///
1297/// # Safety of non-temporal stores
1298///
1299/// After using this intrinsic, but before any other access to the memory that this intrinsic
1300/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
1301/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
1302/// return.
1303///
1304/// See [`_mm_sfence`] for details.
1305#[inline]
1306#[target_feature(enable = "sse2")]
1307#[cfg_attr(test, assert_instr(maskmovdqu))]
1308#[stable(feature = "simd_x86", since = "1.27.0")]
1309pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8) {
1310 maskmovdqu(a.as_i8x16(), mask.as_i8x16(), mem_addr)
1311}
1312
1313/// Stores 128-bits of integer data from `a` into memory.
1314///
1315/// `mem_addr` must be aligned on a 16-byte boundary.
1316///
1317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_si128)
1318#[inline]
1319#[target_feature(enable = "sse2")]
1320#[cfg_attr(
1321 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1322 assert_instr(movaps)
1323)]
1324#[stable(feature = "simd_x86", since = "1.27.0")]
1325pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) {
1326 *mem_addr = a;
1327}
1328
1329/// Stores 128-bits of integer data from `a` into memory.
1330///
1331/// `mem_addr` does not need to be aligned on any particular boundary.
1332///
1333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si128)
1334#[inline]
1335#[target_feature(enable = "sse2")]
1336#[cfg_attr(test, assert_instr(movups))] // FIXME movdqu expected
1337#[stable(feature = "simd_x86", since = "1.27.0")]
1338pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) {
1339 mem_addr.write_unaligned(val:a);
1340}
1341
1342/// Stores the lower 64-bit integer `a` to a memory location.
1343///
1344/// `mem_addr` does not need to be aligned on any particular boundary.
1345///
1346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storel_epi64)
1347#[inline]
1348#[target_feature(enable = "sse2")]
1349#[stable(feature = "simd_x86", since = "1.27.0")]
1350pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
1351 ptr::copy_nonoverlapping(src:ptr::addr_of!(a) as *const u8, dst:mem_addr as *mut u8, count:8);
1352}
1353
1354/// Stores a 128-bit integer vector to a 128-bit aligned memory location.
1355/// To minimize caching, the data is flagged as non-temporal (unlikely to be
1356/// used again soon).
1357///
1358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si128)
1359///
1360/// # Safety of non-temporal stores
1361///
1362/// After using this intrinsic, but before any other access to the memory that this intrinsic
1363/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
1364/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
1365/// return.
1366///
1367/// See [`_mm_sfence`] for details.
1368#[inline]
1369#[target_feature(enable = "sse2")]
1370#[cfg_attr(test, assert_instr(movntdq))]
1371#[stable(feature = "simd_x86", since = "1.27.0")]
1372pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
1373 // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough
1374 crate::arch::asm!(
1375 vps!("movntdq", ",{a}"),
1376 p = in(reg) mem_addr,
1377 a = in(xmm_reg) a,
1378 options(nostack, preserves_flags),
1379 );
1380}
1381
1382/// Stores a 32-bit integer value in the specified memory location.
1383/// To minimize caching, the data is flagged as non-temporal (unlikely to be
1384/// used again soon).
1385///
1386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si32)
1387///
1388/// # Safety of non-temporal stores
1389///
1390/// After using this intrinsic, but before any other access to the memory that this intrinsic
1391/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
1392/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
1393/// return.
1394///
1395/// See [`_mm_sfence`] for details.
1396#[inline]
1397#[target_feature(enable = "sse2")]
1398#[cfg_attr(test, assert_instr(movnti))]
1399#[stable(feature = "simd_x86", since = "1.27.0")]
1400pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) {
1401 // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough
1402 crate::arch::asm!(
1403 vps!("movnti", ",{a:e}"), // `:e` for 32bit value
1404 p = in(reg) mem_addr,
1405 a = in(reg) a,
1406 options(nostack, preserves_flags),
1407 );
1408}
1409
1410/// Returns a vector where the low element is extracted from `a` and its upper
1411/// element is zero.
1412///
1413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_move_epi64)
1414#[inline]
1415#[target_feature(enable = "sse2")]
1416// FIXME movd on msvc, movd on i686
1417#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movq))]
1418#[stable(feature = "simd_x86", since = "1.27.0")]
1419pub fn _mm_move_epi64(a: __m128i) -> __m128i {
1420 unsafe {
1421 let r: i64x2 = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 2]);
1422 transmute(src:r)
1423 }
1424}
1425
1426/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
1427/// using signed saturation.
1428///
1429/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi16)
1430#[inline]
1431#[target_feature(enable = "sse2")]
1432#[cfg_attr(test, assert_instr(packsswb))]
1433#[stable(feature = "simd_x86", since = "1.27.0")]
1434pub fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
1435 unsafe { transmute(src:packsswb(a.as_i16x8(), b.as_i16x8())) }
1436}
1437
1438/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
1439/// using signed saturation.
1440///
1441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi32)
1442#[inline]
1443#[target_feature(enable = "sse2")]
1444#[cfg_attr(test, assert_instr(packssdw))]
1445#[stable(feature = "simd_x86", since = "1.27.0")]
1446pub fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
1447 unsafe { transmute(src:packssdw(a.as_i32x4(), b.as_i32x4())) }
1448}
1449
1450/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
1451/// using unsigned saturation.
1452///
1453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi16)
1454#[inline]
1455#[target_feature(enable = "sse2")]
1456#[cfg_attr(test, assert_instr(packuswb))]
1457#[stable(feature = "simd_x86", since = "1.27.0")]
1458pub fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
1459 unsafe { transmute(src:packuswb(a.as_i16x8(), b.as_i16x8())) }
1460}
1461
1462/// Returns the `imm8` element of `a`.
1463///
1464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_epi16)
1465#[inline]
1466#[target_feature(enable = "sse2")]
1467#[cfg_attr(test, assert_instr(pextrw, IMM8 = 7))]
1468#[rustc_legacy_const_generics(1)]
1469#[stable(feature = "simd_x86", since = "1.27.0")]
1470pub fn _mm_extract_epi16<const IMM8: i32>(a: __m128i) -> i32 {
1471 static_assert_uimm_bits!(IMM8, 3);
1472 unsafe { simd_extract!(a.as_u16x8(), IMM8 as u32, u16) as i32 }
1473}
1474
1475/// Returns a new vector where the `imm8` element of `a` is replaced with `i`.
1476///
1477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_epi16)
1478#[inline]
1479#[target_feature(enable = "sse2")]
1480#[cfg_attr(test, assert_instr(pinsrw, IMM8 = 7))]
1481#[rustc_legacy_const_generics(2)]
1482#[stable(feature = "simd_x86", since = "1.27.0")]
1483pub fn _mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
1484 static_assert_uimm_bits!(IMM8, 3);
1485 unsafe { transmute(src:simd_insert!(a.as_i16x8(), IMM8 as u32, i as i16)) }
1486}
1487
1488/// Returns a mask of the most significant bit of each element in `a`.
1489///
1490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_epi8)
1491#[inline]
1492#[target_feature(enable = "sse2")]
1493#[cfg_attr(test, assert_instr(pmovmskb))]
1494#[stable(feature = "simd_x86", since = "1.27.0")]
1495pub fn _mm_movemask_epi8(a: __m128i) -> i32 {
1496 unsafe {
1497 let z: i8x16 = i8x16::ZERO;
1498 let m: i8x16 = simd_lt(x:a.as_i8x16(), y:z);
1499 simd_bitmask::<_, u16>(m) as u32 as i32
1500 }
1501}
1502
1503/// Shuffles 32-bit integers in `a` using the control in `IMM8`.
1504///
1505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_epi32)
1506#[inline]
1507#[target_feature(enable = "sse2")]
1508#[cfg_attr(test, assert_instr(pshufd, IMM8 = 9))]
1509#[rustc_legacy_const_generics(1)]
1510#[stable(feature = "simd_x86", since = "1.27.0")]
1511pub fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
1512 static_assert_uimm_bits!(IMM8, 8);
1513 unsafe {
1514 let a: i32x4 = a.as_i32x4();
1515 let x: i32x4 = simd_shuffle!(
1516 a,
1517 a,
1518 [
1519 IMM8 as u32 & 0b11,
1520 (IMM8 as u32 >> 2) & 0b11,
1521 (IMM8 as u32 >> 4) & 0b11,
1522 (IMM8 as u32 >> 6) & 0b11,
1523 ],
1524 );
1525 transmute(src:x)
1526 }
1527}
1528
1529/// Shuffles 16-bit integers in the high 64 bits of `a` using the control in
1530/// `IMM8`.
1531///
1532/// Put the results in the high 64 bits of the returned vector, with the low 64
1533/// bits being copied from `a`.
1534///
1535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shufflehi_epi16)
1536#[inline]
1537#[target_feature(enable = "sse2")]
1538#[cfg_attr(test, assert_instr(pshufhw, IMM8 = 9))]
1539#[rustc_legacy_const_generics(1)]
1540#[stable(feature = "simd_x86", since = "1.27.0")]
1541pub fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1542 static_assert_uimm_bits!(IMM8, 8);
1543 unsafe {
1544 let a: i16x8 = a.as_i16x8();
1545 let x: i16x8 = simd_shuffle!(
1546 a,
1547 a,
1548 [
1549 0,
1550 1,
1551 2,
1552 3,
1553 (IMM8 as u32 & 0b11) + 4,
1554 ((IMM8 as u32 >> 2) & 0b11) + 4,
1555 ((IMM8 as u32 >> 4) & 0b11) + 4,
1556 ((IMM8 as u32 >> 6) & 0b11) + 4,
1557 ],
1558 );
1559 transmute(src:x)
1560 }
1561}
1562
1563/// Shuffles 16-bit integers in the low 64 bits of `a` using the control in
1564/// `IMM8`.
1565///
1566/// Put the results in the low 64 bits of the returned vector, with the high 64
1567/// bits being copied from `a`.
1568///
1569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shufflelo_epi16)
1570#[inline]
1571#[target_feature(enable = "sse2")]
1572#[cfg_attr(test, assert_instr(pshuflw, IMM8 = 9))]
1573#[rustc_legacy_const_generics(1)]
1574#[stable(feature = "simd_x86", since = "1.27.0")]
1575pub fn _mm_shufflelo_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1576 static_assert_uimm_bits!(IMM8, 8);
1577 unsafe {
1578 let a: i16x8 = a.as_i16x8();
1579 let x: i16x8 = simd_shuffle!(
1580 a,
1581 a,
1582 [
1583 IMM8 as u32 & 0b11,
1584 (IMM8 as u32 >> 2) & 0b11,
1585 (IMM8 as u32 >> 4) & 0b11,
1586 (IMM8 as u32 >> 6) & 0b11,
1587 4,
1588 5,
1589 6,
1590 7,
1591 ],
1592 );
1593 transmute(src:x)
1594 }
1595}
1596
1597/// Unpacks and interleave 8-bit integers from the high half of `a` and `b`.
1598///
1599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi8)
1600#[inline]
1601#[target_feature(enable = "sse2")]
1602#[cfg_attr(test, assert_instr(punpckhbw))]
1603#[stable(feature = "simd_x86", since = "1.27.0")]
1604pub fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i {
1605 unsafe {
1606 transmute::<i8x16, _>(src:simd_shuffle!(
1607 a.as_i8x16(),
1608 b.as_i8x16(),
1609 [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31],
1610 ))
1611 }
1612}
1613
1614/// Unpacks and interleave 16-bit integers from the high half of `a` and `b`.
1615///
1616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi16)
1617#[inline]
1618#[target_feature(enable = "sse2")]
1619#[cfg_attr(test, assert_instr(punpckhwd))]
1620#[stable(feature = "simd_x86", since = "1.27.0")]
1621pub fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i {
1622 unsafe {
1623 let x: i16x8 = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]);
1624 transmute::<i16x8, _>(src:x)
1625 }
1626}
1627
1628/// Unpacks and interleave 32-bit integers from the high half of `a` and `b`.
1629///
1630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi32)
1631#[inline]
1632#[target_feature(enable = "sse2")]
1633#[cfg_attr(test, assert_instr(unpckhps))]
1634#[stable(feature = "simd_x86", since = "1.27.0")]
1635pub fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i {
1636 unsafe { transmute::<i32x4, _>(src:simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7])) }
1637}
1638
1639/// Unpacks and interleave 64-bit integers from the high half of `a` and `b`.
1640///
1641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi64)
1642#[inline]
1643#[target_feature(enable = "sse2")]
1644#[cfg_attr(test, assert_instr(unpckhpd))]
1645#[stable(feature = "simd_x86", since = "1.27.0")]
1646pub fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i {
1647 unsafe { transmute::<i64x2, _>(src:simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [1, 3])) }
1648}
1649
1650/// Unpacks and interleave 8-bit integers from the low half of `a` and `b`.
1651///
1652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi8)
1653#[inline]
1654#[target_feature(enable = "sse2")]
1655#[cfg_attr(test, assert_instr(punpcklbw))]
1656#[stable(feature = "simd_x86", since = "1.27.0")]
1657pub fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i {
1658 unsafe {
1659 transmute::<i8x16, _>(src:simd_shuffle!(
1660 a.as_i8x16(),
1661 b.as_i8x16(),
1662 [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23],
1663 ))
1664 }
1665}
1666
1667/// Unpacks and interleave 16-bit integers from the low half of `a` and `b`.
1668///
1669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi16)
1670#[inline]
1671#[target_feature(enable = "sse2")]
1672#[cfg_attr(test, assert_instr(punpcklwd))]
1673#[stable(feature = "simd_x86", since = "1.27.0")]
1674pub fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i {
1675 unsafe {
1676 let x: i16x8 = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]);
1677 transmute::<i16x8, _>(src:x)
1678 }
1679}
1680
1681/// Unpacks and interleave 32-bit integers from the low half of `a` and `b`.
1682///
1683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi32)
1684#[inline]
1685#[target_feature(enable = "sse2")]
1686#[cfg_attr(test, assert_instr(unpcklps))]
1687#[stable(feature = "simd_x86", since = "1.27.0")]
1688pub fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i {
1689 unsafe { transmute::<i32x4, _>(src:simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5])) }
1690}
1691
1692/// Unpacks and interleave 64-bit integers from the low half of `a` and `b`.
1693///
1694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi64)
1695#[inline]
1696#[target_feature(enable = "sse2")]
1697#[cfg_attr(test, assert_instr(movlhps))]
1698#[stable(feature = "simd_x86", since = "1.27.0")]
1699pub fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i {
1700 unsafe { transmute::<i64x2, _>(src:simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [0, 2])) }
1701}
1702
1703/// Returns a new vector with the low element of `a` replaced by the sum of the
1704/// low elements of `a` and `b`.
1705///
1706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_sd)
1707#[inline]
1708#[target_feature(enable = "sse2")]
1709#[cfg_attr(test, assert_instr(addsd))]
1710#[stable(feature = "simd_x86", since = "1.27.0")]
1711pub fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d {
1712 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b)) }
1713}
1714
1715/// Adds packed double-precision (64-bit) floating-point elements in `a` and
1716/// `b`.
1717///
1718/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_pd)
1719#[inline]
1720#[target_feature(enable = "sse2")]
1721#[cfg_attr(test, assert_instr(addpd))]
1722#[stable(feature = "simd_x86", since = "1.27.0")]
1723pub fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d {
1724 unsafe { simd_add(x:a, y:b) }
1725}
1726
1727/// Returns a new vector with the low element of `a` replaced by the result of
1728/// diving the lower element of `a` by the lower element of `b`.
1729///
1730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_sd)
1731#[inline]
1732#[target_feature(enable = "sse2")]
1733#[cfg_attr(test, assert_instr(divsd))]
1734#[stable(feature = "simd_x86", since = "1.27.0")]
1735pub fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d {
1736 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) / _mm_cvtsd_f64(b)) }
1737}
1738
1739/// Divide packed double-precision (64-bit) floating-point elements in `a` by
1740/// packed elements in `b`.
1741///
1742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_pd)
1743#[inline]
1744#[target_feature(enable = "sse2")]
1745#[cfg_attr(test, assert_instr(divpd))]
1746#[stable(feature = "simd_x86", since = "1.27.0")]
1747pub fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d {
1748 unsafe { simd_div(lhs:a, rhs:b) }
1749}
1750
1751/// Returns a new vector with the low element of `a` replaced by the maximum
1752/// of the lower elements of `a` and `b`.
1753///
1754/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_sd)
1755#[inline]
1756#[target_feature(enable = "sse2")]
1757#[cfg_attr(test, assert_instr(maxsd))]
1758#[stable(feature = "simd_x86", since = "1.27.0")]
1759pub fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d {
1760 unsafe { maxsd(a, b) }
1761}
1762
1763/// Returns a new vector with the maximum values from corresponding elements in
1764/// `a` and `b`.
1765///
1766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_pd)
1767#[inline]
1768#[target_feature(enable = "sse2")]
1769#[cfg_attr(test, assert_instr(maxpd))]
1770#[stable(feature = "simd_x86", since = "1.27.0")]
1771pub fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d {
1772 unsafe { maxpd(a, b) }
1773}
1774
1775/// Returns a new vector with the low element of `a` replaced by the minimum
1776/// of the lower elements of `a` and `b`.
1777///
1778/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_sd)
1779#[inline]
1780#[target_feature(enable = "sse2")]
1781#[cfg_attr(test, assert_instr(minsd))]
1782#[stable(feature = "simd_x86", since = "1.27.0")]
1783pub fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d {
1784 unsafe { minsd(a, b) }
1785}
1786
1787/// Returns a new vector with the minimum values from corresponding elements in
1788/// `a` and `b`.
1789///
1790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_pd)
1791#[inline]
1792#[target_feature(enable = "sse2")]
1793#[cfg_attr(test, assert_instr(minpd))]
1794#[stable(feature = "simd_x86", since = "1.27.0")]
1795pub fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d {
1796 unsafe { minpd(a, b) }
1797}
1798
1799/// Returns a new vector with the low element of `a` replaced by multiplying the
1800/// low elements of `a` and `b`.
1801///
1802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_sd)
1803#[inline]
1804#[target_feature(enable = "sse2")]
1805#[cfg_attr(test, assert_instr(mulsd))]
1806#[stable(feature = "simd_x86", since = "1.27.0")]
1807pub fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d {
1808 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b)) }
1809}
1810
1811/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
1812/// and `b`.
1813///
1814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_pd)
1815#[inline]
1816#[target_feature(enable = "sse2")]
1817#[cfg_attr(test, assert_instr(mulpd))]
1818#[stable(feature = "simd_x86", since = "1.27.0")]
1819pub fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d {
1820 unsafe { simd_mul(x:a, y:b) }
1821}
1822
1823/// Returns a new vector with the low element of `a` replaced by the square
1824/// root of the lower element `b`.
1825///
1826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_sd)
1827#[inline]
1828#[target_feature(enable = "sse2")]
1829#[cfg_attr(test, assert_instr(sqrtsd))]
1830#[stable(feature = "simd_x86", since = "1.27.0")]
1831pub fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d {
1832 unsafe { simd_insert!(a, 0, sqrtf64(_mm_cvtsd_f64(b))) }
1833}
1834
1835/// Returns a new vector with the square root of each of the values in `a`.
1836///
1837/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_pd)
1838#[inline]
1839#[target_feature(enable = "sse2")]
1840#[cfg_attr(test, assert_instr(sqrtpd))]
1841#[stable(feature = "simd_x86", since = "1.27.0")]
1842pub fn _mm_sqrt_pd(a: __m128d) -> __m128d {
1843 unsafe { simd_fsqrt(a) }
1844}
1845
1846/// Returns a new vector with the low element of `a` replaced by subtracting the
1847/// low element by `b` from the low element of `a`.
1848///
1849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_sd)
1850#[inline]
1851#[target_feature(enable = "sse2")]
1852#[cfg_attr(test, assert_instr(subsd))]
1853#[stable(feature = "simd_x86", since = "1.27.0")]
1854pub fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d {
1855 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) - _mm_cvtsd_f64(b)) }
1856}
1857
1858/// Subtract packed double-precision (64-bit) floating-point elements in `b`
1859/// from `a`.
1860///
1861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_pd)
1862#[inline]
1863#[target_feature(enable = "sse2")]
1864#[cfg_attr(test, assert_instr(subpd))]
1865#[stable(feature = "simd_x86", since = "1.27.0")]
1866pub fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d {
1867 unsafe { simd_sub(lhs:a, rhs:b) }
1868}
1869
1870/// Computes the bitwise AND of packed double-precision (64-bit) floating-point
1871/// elements in `a` and `b`.
1872///
1873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_and_pd)
1874#[inline]
1875#[target_feature(enable = "sse2")]
1876#[cfg_attr(test, assert_instr(andps))]
1877#[stable(feature = "simd_x86", since = "1.27.0")]
1878pub fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d {
1879 unsafe {
1880 let a: __m128i = transmute(src:a);
1881 let b: __m128i = transmute(src:b);
1882 transmute(src:_mm_and_si128(a, b))
1883 }
1884}
1885
1886/// Computes the bitwise NOT of `a` and then AND with `b`.
1887///
1888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_andnot_pd)
1889#[inline]
1890#[target_feature(enable = "sse2")]
1891#[cfg_attr(test, assert_instr(andnps))]
1892#[stable(feature = "simd_x86", since = "1.27.0")]
1893pub fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d {
1894 unsafe {
1895 let a: __m128i = transmute(src:a);
1896 let b: __m128i = transmute(src:b);
1897 transmute(src:_mm_andnot_si128(a, b))
1898 }
1899}
1900
1901/// Computes the bitwise OR of `a` and `b`.
1902///
1903/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_pd)
1904#[inline]
1905#[target_feature(enable = "sse2")]
1906#[cfg_attr(test, assert_instr(orps))]
1907#[stable(feature = "simd_x86", since = "1.27.0")]
1908pub fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d {
1909 unsafe {
1910 let a: __m128i = transmute(src:a);
1911 let b: __m128i = transmute(src:b);
1912 transmute(src:_mm_or_si128(a, b))
1913 }
1914}
1915
1916/// Computes the bitwise XOR of `a` and `b`.
1917///
1918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_pd)
1919#[inline]
1920#[target_feature(enable = "sse2")]
1921#[cfg_attr(test, assert_instr(xorps))]
1922#[stable(feature = "simd_x86", since = "1.27.0")]
1923pub fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d {
1924 unsafe {
1925 let a: __m128i = transmute(src:a);
1926 let b: __m128i = transmute(src:b);
1927 transmute(src:_mm_xor_si128(a, b))
1928 }
1929}
1930
1931/// Returns a new vector with the low element of `a` replaced by the equality
1932/// comparison of the lower elements of `a` and `b`.
1933///
1934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_sd)
1935#[inline]
1936#[target_feature(enable = "sse2")]
1937#[cfg_attr(test, assert_instr(cmpeqsd))]
1938#[stable(feature = "simd_x86", since = "1.27.0")]
1939pub fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d {
1940 unsafe { cmpsd(a, b, imm8:0) }
1941}
1942
1943/// Returns a new vector with the low element of `a` replaced by the less-than
1944/// comparison of the lower elements of `a` and `b`.
1945///
1946/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_sd)
1947#[inline]
1948#[target_feature(enable = "sse2")]
1949#[cfg_attr(test, assert_instr(cmpltsd))]
1950#[stable(feature = "simd_x86", since = "1.27.0")]
1951pub fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d {
1952 unsafe { cmpsd(a, b, imm8:1) }
1953}
1954
1955/// Returns a new vector with the low element of `a` replaced by the
1956/// less-than-or-equal comparison of the lower elements of `a` and `b`.
1957///
1958/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_sd)
1959#[inline]
1960#[target_feature(enable = "sse2")]
1961#[cfg_attr(test, assert_instr(cmplesd))]
1962#[stable(feature = "simd_x86", since = "1.27.0")]
1963pub fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d {
1964 unsafe { cmpsd(a, b, imm8:2) }
1965}
1966
1967/// Returns a new vector with the low element of `a` replaced by the
1968/// greater-than comparison of the lower elements of `a` and `b`.
1969///
1970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_sd)
1971#[inline]
1972#[target_feature(enable = "sse2")]
1973#[cfg_attr(test, assert_instr(cmpltsd))]
1974#[stable(feature = "simd_x86", since = "1.27.0")]
1975pub fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d {
1976 unsafe { simd_insert!(_mm_cmplt_sd(b, a), 1, simd_extract!(a, 1, f64)) }
1977}
1978
1979/// Returns a new vector with the low element of `a` replaced by the
1980/// greater-than-or-equal comparison of the lower elements of `a` and `b`.
1981///
1982/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_sd)
1983#[inline]
1984#[target_feature(enable = "sse2")]
1985#[cfg_attr(test, assert_instr(cmplesd))]
1986#[stable(feature = "simd_x86", since = "1.27.0")]
1987pub fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d {
1988 unsafe { simd_insert!(_mm_cmple_sd(b, a), 1, simd_extract!(a, 1, f64)) }
1989}
1990
1991/// Returns a new vector with the low element of `a` replaced by the result
1992/// of comparing both of the lower elements of `a` and `b` to `NaN`. If
1993/// neither are equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0`
1994/// otherwise.
1995///
1996/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_sd)
1997#[inline]
1998#[target_feature(enable = "sse2")]
1999#[cfg_attr(test, assert_instr(cmpordsd))]
2000#[stable(feature = "simd_x86", since = "1.27.0")]
2001pub fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d {
2002 unsafe { cmpsd(a, b, imm8:7) }
2003}
2004
2005/// Returns a new vector with the low element of `a` replaced by the result of
2006/// comparing both of the lower elements of `a` and `b` to `NaN`. If either is
2007/// equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` otherwise.
2008///
2009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_sd)
2010#[inline]
2011#[target_feature(enable = "sse2")]
2012#[cfg_attr(test, assert_instr(cmpunordsd))]
2013#[stable(feature = "simd_x86", since = "1.27.0")]
2014pub fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d {
2015 unsafe { cmpsd(a, b, imm8:3) }
2016}
2017
2018/// Returns a new vector with the low element of `a` replaced by the not-equal
2019/// comparison of the lower elements of `a` and `b`.
2020///
2021/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_sd)
2022#[inline]
2023#[target_feature(enable = "sse2")]
2024#[cfg_attr(test, assert_instr(cmpneqsd))]
2025#[stable(feature = "simd_x86", since = "1.27.0")]
2026pub fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d {
2027 unsafe { cmpsd(a, b, imm8:4) }
2028}
2029
2030/// Returns a new vector with the low element of `a` replaced by the
2031/// not-less-than comparison of the lower elements of `a` and `b`.
2032///
2033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_sd)
2034#[inline]
2035#[target_feature(enable = "sse2")]
2036#[cfg_attr(test, assert_instr(cmpnltsd))]
2037#[stable(feature = "simd_x86", since = "1.27.0")]
2038pub fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d {
2039 unsafe { cmpsd(a, b, imm8:5) }
2040}
2041
2042/// Returns a new vector with the low element of `a` replaced by the
2043/// not-less-than-or-equal comparison of the lower elements of `a` and `b`.
2044///
2045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_sd)
2046#[inline]
2047#[target_feature(enable = "sse2")]
2048#[cfg_attr(test, assert_instr(cmpnlesd))]
2049#[stable(feature = "simd_x86", since = "1.27.0")]
2050pub fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d {
2051 unsafe { cmpsd(a, b, imm8:6) }
2052}
2053
2054/// Returns a new vector with the low element of `a` replaced by the
2055/// not-greater-than comparison of the lower elements of `a` and `b`.
2056///
2057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpngt_sd)
2058#[inline]
2059#[target_feature(enable = "sse2")]
2060#[cfg_attr(test, assert_instr(cmpnltsd))]
2061#[stable(feature = "simd_x86", since = "1.27.0")]
2062pub fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d {
2063 unsafe { simd_insert!(_mm_cmpnlt_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2064}
2065
2066/// Returns a new vector with the low element of `a` replaced by the
2067/// not-greater-than-or-equal comparison of the lower elements of `a` and `b`.
2068///
2069/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_sd)
2070#[inline]
2071#[target_feature(enable = "sse2")]
2072#[cfg_attr(test, assert_instr(cmpnlesd))]
2073#[stable(feature = "simd_x86", since = "1.27.0")]
2074pub fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d {
2075 unsafe { simd_insert!(_mm_cmpnle_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2076}
2077
2078/// Compares corresponding elements in `a` and `b` for equality.
2079///
2080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_pd)
2081#[inline]
2082#[target_feature(enable = "sse2")]
2083#[cfg_attr(test, assert_instr(cmpeqpd))]
2084#[stable(feature = "simd_x86", since = "1.27.0")]
2085pub fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d {
2086 unsafe { cmppd(a, b, imm8:0) }
2087}
2088
2089/// Compares corresponding elements in `a` and `b` for less-than.
2090///
2091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_pd)
2092#[inline]
2093#[target_feature(enable = "sse2")]
2094#[cfg_attr(test, assert_instr(cmpltpd))]
2095#[stable(feature = "simd_x86", since = "1.27.0")]
2096pub fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d {
2097 unsafe { cmppd(a, b, imm8:1) }
2098}
2099
2100/// Compares corresponding elements in `a` and `b` for less-than-or-equal
2101///
2102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_pd)
2103#[inline]
2104#[target_feature(enable = "sse2")]
2105#[cfg_attr(test, assert_instr(cmplepd))]
2106#[stable(feature = "simd_x86", since = "1.27.0")]
2107pub fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d {
2108 unsafe { cmppd(a, b, imm8:2) }
2109}
2110
2111/// Compares corresponding elements in `a` and `b` for greater-than.
2112///
2113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_pd)
2114#[inline]
2115#[target_feature(enable = "sse2")]
2116#[cfg_attr(test, assert_instr(cmpltpd))]
2117#[stable(feature = "simd_x86", since = "1.27.0")]
2118pub fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d {
2119 _mm_cmplt_pd(a:b, b:a)
2120}
2121
2122/// Compares corresponding elements in `a` and `b` for greater-than-or-equal.
2123///
2124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_pd)
2125#[inline]
2126#[target_feature(enable = "sse2")]
2127#[cfg_attr(test, assert_instr(cmplepd))]
2128#[stable(feature = "simd_x86", since = "1.27.0")]
2129pub fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d {
2130 _mm_cmple_pd(a:b, b:a)
2131}
2132
2133/// Compares corresponding elements in `a` and `b` to see if neither is `NaN`.
2134///
2135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_pd)
2136#[inline]
2137#[target_feature(enable = "sse2")]
2138#[cfg_attr(test, assert_instr(cmpordpd))]
2139#[stable(feature = "simd_x86", since = "1.27.0")]
2140pub fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d {
2141 unsafe { cmppd(a, b, imm8:7) }
2142}
2143
2144/// Compares corresponding elements in `a` and `b` to see if either is `NaN`.
2145///
2146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_pd)
2147#[inline]
2148#[target_feature(enable = "sse2")]
2149#[cfg_attr(test, assert_instr(cmpunordpd))]
2150#[stable(feature = "simd_x86", since = "1.27.0")]
2151pub fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d {
2152 unsafe { cmppd(a, b, imm8:3) }
2153}
2154
2155/// Compares corresponding elements in `a` and `b` for not-equal.
2156///
2157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_pd)
2158#[inline]
2159#[target_feature(enable = "sse2")]
2160#[cfg_attr(test, assert_instr(cmpneqpd))]
2161#[stable(feature = "simd_x86", since = "1.27.0")]
2162pub fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d {
2163 unsafe { cmppd(a, b, imm8:4) }
2164}
2165
2166/// Compares corresponding elements in `a` and `b` for not-less-than.
2167///
2168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_pd)
2169#[inline]
2170#[target_feature(enable = "sse2")]
2171#[cfg_attr(test, assert_instr(cmpnltpd))]
2172#[stable(feature = "simd_x86", since = "1.27.0")]
2173pub fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d {
2174 unsafe { cmppd(a, b, imm8:5) }
2175}
2176
2177/// Compares corresponding elements in `a` and `b` for not-less-than-or-equal.
2178///
2179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_pd)
2180#[inline]
2181#[target_feature(enable = "sse2")]
2182#[cfg_attr(test, assert_instr(cmpnlepd))]
2183#[stable(feature = "simd_x86", since = "1.27.0")]
2184pub fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d {
2185 unsafe { cmppd(a, b, imm8:6) }
2186}
2187
2188/// Compares corresponding elements in `a` and `b` for not-greater-than.
2189///
2190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpngt_pd)
2191#[inline]
2192#[target_feature(enable = "sse2")]
2193#[cfg_attr(test, assert_instr(cmpnltpd))]
2194#[stable(feature = "simd_x86", since = "1.27.0")]
2195pub fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d {
2196 _mm_cmpnlt_pd(a:b, b:a)
2197}
2198
2199/// Compares corresponding elements in `a` and `b` for
2200/// not-greater-than-or-equal.
2201///
2202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_pd)
2203#[inline]
2204#[target_feature(enable = "sse2")]
2205#[cfg_attr(test, assert_instr(cmpnlepd))]
2206#[stable(feature = "simd_x86", since = "1.27.0")]
2207pub fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d {
2208 _mm_cmpnle_pd(a:b, b:a)
2209}
2210
2211/// Compares the lower element of `a` and `b` for equality.
2212///
2213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comieq_sd)
2214#[inline]
2215#[target_feature(enable = "sse2")]
2216#[cfg_attr(test, assert_instr(comisd))]
2217#[stable(feature = "simd_x86", since = "1.27.0")]
2218pub fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 {
2219 unsafe { comieqsd(a, b) }
2220}
2221
2222/// Compares the lower element of `a` and `b` for less-than.
2223///
2224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comilt_sd)
2225#[inline]
2226#[target_feature(enable = "sse2")]
2227#[cfg_attr(test, assert_instr(comisd))]
2228#[stable(feature = "simd_x86", since = "1.27.0")]
2229pub fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 {
2230 unsafe { comiltsd(a, b) }
2231}
2232
2233/// Compares the lower element of `a` and `b` for less-than-or-equal.
2234///
2235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comile_sd)
2236#[inline]
2237#[target_feature(enable = "sse2")]
2238#[cfg_attr(test, assert_instr(comisd))]
2239#[stable(feature = "simd_x86", since = "1.27.0")]
2240pub fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 {
2241 unsafe { comilesd(a, b) }
2242}
2243
2244/// Compares the lower element of `a` and `b` for greater-than.
2245///
2246/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comigt_sd)
2247#[inline]
2248#[target_feature(enable = "sse2")]
2249#[cfg_attr(test, assert_instr(comisd))]
2250#[stable(feature = "simd_x86", since = "1.27.0")]
2251pub fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 {
2252 unsafe { comigtsd(a, b) }
2253}
2254
2255/// Compares the lower element of `a` and `b` for greater-than-or-equal.
2256///
2257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comige_sd)
2258#[inline]
2259#[target_feature(enable = "sse2")]
2260#[cfg_attr(test, assert_instr(comisd))]
2261#[stable(feature = "simd_x86", since = "1.27.0")]
2262pub fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 {
2263 unsafe { comigesd(a, b) }
2264}
2265
2266/// Compares the lower element of `a` and `b` for not-equal.
2267///
2268/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comineq_sd)
2269#[inline]
2270#[target_feature(enable = "sse2")]
2271#[cfg_attr(test, assert_instr(comisd))]
2272#[stable(feature = "simd_x86", since = "1.27.0")]
2273pub fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 {
2274 unsafe { comineqsd(a, b) }
2275}
2276
2277/// Compares the lower element of `a` and `b` for equality.
2278///
2279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomieq_sd)
2280#[inline]
2281#[target_feature(enable = "sse2")]
2282#[cfg_attr(test, assert_instr(ucomisd))]
2283#[stable(feature = "simd_x86", since = "1.27.0")]
2284pub fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 {
2285 unsafe { ucomieqsd(a, b) }
2286}
2287
2288/// Compares the lower element of `a` and `b` for less-than.
2289///
2290/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomilt_sd)
2291#[inline]
2292#[target_feature(enable = "sse2")]
2293#[cfg_attr(test, assert_instr(ucomisd))]
2294#[stable(feature = "simd_x86", since = "1.27.0")]
2295pub fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 {
2296 unsafe { ucomiltsd(a, b) }
2297}
2298
2299/// Compares the lower element of `a` and `b` for less-than-or-equal.
2300///
2301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomile_sd)
2302#[inline]
2303#[target_feature(enable = "sse2")]
2304#[cfg_attr(test, assert_instr(ucomisd))]
2305#[stable(feature = "simd_x86", since = "1.27.0")]
2306pub fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 {
2307 unsafe { ucomilesd(a, b) }
2308}
2309
2310/// Compares the lower element of `a` and `b` for greater-than.
2311///
2312/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomigt_sd)
2313#[inline]
2314#[target_feature(enable = "sse2")]
2315#[cfg_attr(test, assert_instr(ucomisd))]
2316#[stable(feature = "simd_x86", since = "1.27.0")]
2317pub fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 {
2318 unsafe { ucomigtsd(a, b) }
2319}
2320
2321/// Compares the lower element of `a` and `b` for greater-than-or-equal.
2322///
2323/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomige_sd)
2324#[inline]
2325#[target_feature(enable = "sse2")]
2326#[cfg_attr(test, assert_instr(ucomisd))]
2327#[stable(feature = "simd_x86", since = "1.27.0")]
2328pub fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 {
2329 unsafe { ucomigesd(a, b) }
2330}
2331
2332/// Compares the lower element of `a` and `b` for not-equal.
2333///
2334/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomineq_sd)
2335#[inline]
2336#[target_feature(enable = "sse2")]
2337#[cfg_attr(test, assert_instr(ucomisd))]
2338#[stable(feature = "simd_x86", since = "1.27.0")]
2339pub fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 {
2340 unsafe { ucomineqsd(a, b) }
2341}
2342
2343/// Converts packed double-precision (64-bit) floating-point elements in `a` to
2344/// packed single-precision (32-bit) floating-point elements
2345///
2346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_ps)
2347#[inline]
2348#[target_feature(enable = "sse2")]
2349#[cfg_attr(test, assert_instr(cvtpd2ps))]
2350#[stable(feature = "simd_x86", since = "1.27.0")]
2351pub fn _mm_cvtpd_ps(a: __m128d) -> __m128 {
2352 unsafe {
2353 let r: f32x2 = simd_cast::<_, f32x2>(a.as_f64x2());
2354 let zero: f32x2 = f32x2::ZERO;
2355 transmute::<f32x4, _>(src:simd_shuffle!(r, zero, [0, 1, 2, 3]))
2356 }
2357}
2358
2359/// Converts packed single-precision (32-bit) floating-point elements in `a` to
2360/// packed
2361/// double-precision (64-bit) floating-point elements.
2362///
2363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_pd)
2364#[inline]
2365#[target_feature(enable = "sse2")]
2366#[cfg_attr(test, assert_instr(cvtps2pd))]
2367#[stable(feature = "simd_x86", since = "1.27.0")]
2368pub fn _mm_cvtps_pd(a: __m128) -> __m128d {
2369 unsafe {
2370 let a: f32x4 = a.as_f32x4();
2371 transmute(src:simd_cast::<f32x2, f64x2>(simd_shuffle!(a, a, [0, 1])))
2372 }
2373}
2374
2375/// Converts packed double-precision (64-bit) floating-point elements in `a` to
2376/// packed 32-bit integers.
2377///
2378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epi32)
2379#[inline]
2380#[target_feature(enable = "sse2")]
2381#[cfg_attr(test, assert_instr(cvtpd2dq))]
2382#[stable(feature = "simd_x86", since = "1.27.0")]
2383pub fn _mm_cvtpd_epi32(a: __m128d) -> __m128i {
2384 unsafe { transmute(src:cvtpd2dq(a)) }
2385}
2386
2387/// Converts the lower double-precision (64-bit) floating-point element in a to
2388/// a 32-bit integer.
2389///
2390/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_si32)
2391#[inline]
2392#[target_feature(enable = "sse2")]
2393#[cfg_attr(test, assert_instr(cvtsd2si))]
2394#[stable(feature = "simd_x86", since = "1.27.0")]
2395pub fn _mm_cvtsd_si32(a: __m128d) -> i32 {
2396 unsafe { cvtsd2si(a) }
2397}
2398
2399/// Converts the lower double-precision (64-bit) floating-point element in `b`
2400/// to a single-precision (32-bit) floating-point element, store the result in
2401/// the lower element of the return value, and copies the upper element from `a`
2402/// to the upper element the return value.
2403///
2404/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_ss)
2405#[inline]
2406#[target_feature(enable = "sse2")]
2407#[cfg_attr(test, assert_instr(cvtsd2ss))]
2408#[stable(feature = "simd_x86", since = "1.27.0")]
2409pub fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 {
2410 unsafe { cvtsd2ss(a, b) }
2411}
2412
2413/// Returns the lower double-precision (64-bit) floating-point element of `a`.
2414///
2415/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_f64)
2416#[inline]
2417#[target_feature(enable = "sse2")]
2418#[stable(feature = "simd_x86", since = "1.27.0")]
2419pub fn _mm_cvtsd_f64(a: __m128d) -> f64 {
2420 unsafe { simd_extract!(a, 0) }
2421}
2422
2423/// Converts the lower single-precision (32-bit) floating-point element in `b`
2424/// to a double-precision (64-bit) floating-point element, store the result in
2425/// the lower element of the return value, and copies the upper element from `a`
2426/// to the upper element the return value.
2427///
2428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtss_sd)
2429#[inline]
2430#[target_feature(enable = "sse2")]
2431#[cfg_attr(test, assert_instr(cvtss2sd))]
2432#[stable(feature = "simd_x86", since = "1.27.0")]
2433pub fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d {
2434 unsafe {
2435 let elt: f32 = simd_extract!(b, 0);
2436 simd_insert!(a, 0, elt as f64)
2437 }
2438}
2439
2440/// Converts packed double-precision (64-bit) floating-point elements in `a` to
2441/// packed 32-bit integers with truncation.
2442///
2443/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epi32)
2444#[inline]
2445#[target_feature(enable = "sse2")]
2446#[cfg_attr(test, assert_instr(cvttpd2dq))]
2447#[stable(feature = "simd_x86", since = "1.27.0")]
2448pub fn _mm_cvttpd_epi32(a: __m128d) -> __m128i {
2449 unsafe { transmute(src:cvttpd2dq(a)) }
2450}
2451
2452/// Converts the lower double-precision (64-bit) floating-point element in `a`
2453/// to a 32-bit integer with truncation.
2454///
2455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_si32)
2456#[inline]
2457#[target_feature(enable = "sse2")]
2458#[cfg_attr(test, assert_instr(cvttsd2si))]
2459#[stable(feature = "simd_x86", since = "1.27.0")]
2460pub fn _mm_cvttsd_si32(a: __m128d) -> i32 {
2461 unsafe { cvttsd2si(a) }
2462}
2463
2464/// Converts packed single-precision (32-bit) floating-point elements in `a` to
2465/// packed 32-bit integers with truncation.
2466///
2467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epi32)
2468#[inline]
2469#[target_feature(enable = "sse2")]
2470#[cfg_attr(test, assert_instr(cvttps2dq))]
2471#[stable(feature = "simd_x86", since = "1.27.0")]
2472pub fn _mm_cvttps_epi32(a: __m128) -> __m128i {
2473 unsafe { transmute(src:cvttps2dq(a)) }
2474}
2475
2476/// Copies double-precision (64-bit) floating-point element `a` to the lower
2477/// element of the packed 64-bit return value.
2478///
2479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_sd)
2480#[inline]
2481#[target_feature(enable = "sse2")]
2482#[stable(feature = "simd_x86", since = "1.27.0")]
2483pub fn _mm_set_sd(a: f64) -> __m128d {
2484 _mm_set_pd(a:0.0, b:a)
2485}
2486
2487/// Broadcasts double-precision (64-bit) floating-point value a to all elements
2488/// of the return value.
2489///
2490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_pd)
2491#[inline]
2492#[target_feature(enable = "sse2")]
2493#[stable(feature = "simd_x86", since = "1.27.0")]
2494pub fn _mm_set1_pd(a: f64) -> __m128d {
2495 _mm_set_pd(a, b:a)
2496}
2497
2498/// Broadcasts double-precision (64-bit) floating-point value a to all elements
2499/// of the return value.
2500///
2501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_pd1)
2502#[inline]
2503#[target_feature(enable = "sse2")]
2504#[stable(feature = "simd_x86", since = "1.27.0")]
2505pub fn _mm_set_pd1(a: f64) -> __m128d {
2506 _mm_set_pd(a, b:a)
2507}
2508
2509/// Sets packed double-precision (64-bit) floating-point elements in the return
2510/// value with the supplied values.
2511///
2512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_pd)
2513#[inline]
2514#[target_feature(enable = "sse2")]
2515#[stable(feature = "simd_x86", since = "1.27.0")]
2516pub fn _mm_set_pd(a: f64, b: f64) -> __m128d {
2517 __m128d([b, a])
2518}
2519
2520/// Sets packed double-precision (64-bit) floating-point elements in the return
2521/// value with the supplied values in reverse order.
2522///
2523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_pd)
2524#[inline]
2525#[target_feature(enable = "sse2")]
2526#[stable(feature = "simd_x86", since = "1.27.0")]
2527pub fn _mm_setr_pd(a: f64, b: f64) -> __m128d {
2528 _mm_set_pd(a:b, b:a)
2529}
2530
2531/// Returns packed double-precision (64-bit) floating-point elements with all
2532/// zeros.
2533///
2534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_pd)
2535#[inline]
2536#[target_feature(enable = "sse2")]
2537#[cfg_attr(test, assert_instr(xorp))]
2538#[stable(feature = "simd_x86", since = "1.27.0")]
2539pub fn _mm_setzero_pd() -> __m128d {
2540 const { unsafe { mem::zeroed() } }
2541}
2542
2543/// Returns a mask of the most significant bit of each element in `a`.
2544///
2545/// The mask is stored in the 2 least significant bits of the return value.
2546/// All other bits are set to `0`.
2547///
2548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_pd)
2549#[inline]
2550#[target_feature(enable = "sse2")]
2551#[cfg_attr(test, assert_instr(movmskpd))]
2552#[stable(feature = "simd_x86", since = "1.27.0")]
2553pub fn _mm_movemask_pd(a: __m128d) -> i32 {
2554 // Propagate the highest bit to the rest, because simd_bitmask
2555 // requires all-1 or all-0.
2556 unsafe {
2557 let mask: i64x2 = simd_lt(x:transmute(a), y:i64x2::ZERO);
2558 simd_bitmask::<i64x2, u8>(mask).into()
2559 }
2560}
2561
2562/// Loads 128-bits (composed of 2 packed double-precision (64-bit)
2563/// floating-point elements) from memory into the returned vector.
2564/// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
2565/// exception may be generated.
2566///
2567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_pd)
2568#[inline]
2569#[target_feature(enable = "sse2")]
2570#[cfg_attr(
2571 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
2572 assert_instr(movaps)
2573)]
2574#[stable(feature = "simd_x86", since = "1.27.0")]
2575#[allow(clippy::cast_ptr_alignment)]
2576pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d {
2577 *(mem_addr as *const __m128d)
2578}
2579
2580/// Loads a 64-bit double-precision value to the low element of a
2581/// 128-bit integer vector and clears the upper element.
2582///
2583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_sd)
2584#[inline]
2585#[target_feature(enable = "sse2")]
2586#[cfg_attr(test, assert_instr(movsd))]
2587#[stable(feature = "simd_x86", since = "1.27.0")]
2588pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d {
2589 _mm_setr_pd(*mem_addr, b:0.)
2590}
2591
2592/// Loads a double-precision value into the high-order bits of a 128-bit
2593/// vector of `[2 x double]`. The low-order bits are copied from the low-order
2594/// bits of the first operand.
2595///
2596/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadh_pd)
2597#[inline]
2598#[target_feature(enable = "sse2")]
2599#[cfg_attr(test, assert_instr(movhps))]
2600#[stable(feature = "simd_x86", since = "1.27.0")]
2601pub unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2602 _mm_setr_pd(a:simd_extract!(a, 0), *mem_addr)
2603}
2604
2605/// Loads a double-precision value into the low-order bits of a 128-bit
2606/// vector of `[2 x double]`. The high-order bits are copied from the
2607/// high-order bits of the first operand.
2608///
2609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadl_pd)
2610#[inline]
2611#[target_feature(enable = "sse2")]
2612#[cfg_attr(test, assert_instr(movlps))]
2613#[stable(feature = "simd_x86", since = "1.27.0")]
2614pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2615 _mm_setr_pd(*mem_addr, b:simd_extract!(a, 1))
2616}
2617
2618/// Stores a 128-bit floating point vector of `[2 x double]` to a 128-bit
2619/// aligned memory location.
2620/// To minimize caching, the data is flagged as non-temporal (unlikely to be
2621/// used again soon).
2622///
2623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_pd)
2624///
2625/// # Safety of non-temporal stores
2626///
2627/// After using this intrinsic, but before any other access to the memory that this intrinsic
2628/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
2629/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
2630/// return.
2631///
2632/// See [`_mm_sfence`] for details.
2633#[inline]
2634#[target_feature(enable = "sse2")]
2635#[cfg_attr(test, assert_instr(movntpd))]
2636#[stable(feature = "simd_x86", since = "1.27.0")]
2637#[allow(clippy::cast_ptr_alignment)]
2638pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) {
2639 // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough
2640 crate::arch::asm!(
2641 vps!("movntpd", ",{a}"),
2642 p = in(reg) mem_addr,
2643 a = in(xmm_reg) a,
2644 options(nostack, preserves_flags),
2645 );
2646}
2647
2648/// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a
2649/// memory location.
2650///
2651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_sd)
2652#[inline]
2653#[target_feature(enable = "sse2")]
2654#[cfg_attr(test, assert_instr(movlps))]
2655#[stable(feature = "simd_x86", since = "1.27.0")]
2656pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) {
2657 *mem_addr = simd_extract!(a, 0)
2658}
2659
2660/// Stores 128-bits (composed of 2 packed double-precision (64-bit)
2661/// floating-point elements) from `a` into memory. `mem_addr` must be aligned
2662/// on a 16-byte boundary or a general-protection exception may be generated.
2663///
2664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_pd)
2665#[inline]
2666#[target_feature(enable = "sse2")]
2667#[cfg_attr(
2668 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
2669 assert_instr(movaps)
2670)]
2671#[stable(feature = "simd_x86", since = "1.27.0")]
2672#[allow(clippy::cast_ptr_alignment)]
2673pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) {
2674 *(mem_addr as *mut __m128d) = a;
2675}
2676
2677/// Stores 128-bits (composed of 2 packed double-precision (64-bit)
2678/// floating-point elements) from `a` into memory.
2679/// `mem_addr` does not need to be aligned on any particular boundary.
2680///
2681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_pd)
2682#[inline]
2683#[target_feature(enable = "sse2")]
2684#[cfg_attr(test, assert_instr(movups))] // FIXME movupd expected
2685#[stable(feature = "simd_x86", since = "1.27.0")]
2686pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) {
2687 mem_addr.cast::<__m128d>().write_unaligned(val:a);
2688}
2689
2690/// Store 16-bit integer from the first element of a into memory.
2691///
2692/// `mem_addr` does not need to be aligned on any particular boundary.
2693///
2694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si16)
2695#[inline]
2696#[target_feature(enable = "sse2")]
2697#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2698pub unsafe fn _mm_storeu_si16(mem_addr: *mut u8, a: __m128i) {
2699 ptr::write_unaligned(dst:mem_addr as *mut i16, src:simd_extract(x:a.as_i16x8(), idx:0))
2700}
2701
2702/// Store 32-bit integer from the first element of a into memory.
2703///
2704/// `mem_addr` does not need to be aligned on any particular boundary.
2705///
2706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si32)
2707#[inline]
2708#[target_feature(enable = "sse2")]
2709#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2710pub unsafe fn _mm_storeu_si32(mem_addr: *mut u8, a: __m128i) {
2711 ptr::write_unaligned(dst:mem_addr as *mut i32, src:simd_extract(x:a.as_i32x4(), idx:0))
2712}
2713
2714/// Store 64-bit integer from the first element of a into memory.
2715///
2716/// `mem_addr` does not need to be aligned on any particular boundary.
2717///
2718/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si64)
2719#[inline]
2720#[target_feature(enable = "sse2")]
2721#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2722pub unsafe fn _mm_storeu_si64(mem_addr: *mut u8, a: __m128i) {
2723 ptr::write_unaligned(dst:mem_addr as *mut i64, src:simd_extract(x:a.as_i64x2(), idx:0))
2724}
2725
2726/// Stores the lower double-precision (64-bit) floating-point element from `a`
2727/// into 2 contiguous elements in memory. `mem_addr` must be aligned on a
2728/// 16-byte boundary or a general-protection exception may be generated.
2729///
2730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store1_pd)
2731#[inline]
2732#[target_feature(enable = "sse2")]
2733#[stable(feature = "simd_x86", since = "1.27.0")]
2734#[allow(clippy::cast_ptr_alignment)]
2735pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) {
2736 let b: __m128d = simd_shuffle!(a, a, [0, 0]);
2737 *(mem_addr as *mut __m128d) = b;
2738}
2739
2740/// Stores the lower double-precision (64-bit) floating-point element from `a`
2741/// into 2 contiguous elements in memory. `mem_addr` must be aligned on a
2742/// 16-byte boundary or a general-protection exception may be generated.
2743///
2744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_pd1)
2745#[inline]
2746#[target_feature(enable = "sse2")]
2747#[stable(feature = "simd_x86", since = "1.27.0")]
2748#[allow(clippy::cast_ptr_alignment)]
2749pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) {
2750 let b: __m128d = simd_shuffle!(a, a, [0, 0]);
2751 *(mem_addr as *mut __m128d) = b;
2752}
2753
2754/// Stores 2 double-precision (64-bit) floating-point elements from `a` into
2755/// memory in reverse order.
2756/// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
2757/// exception may be generated.
2758///
2759/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storer_pd)
2760#[inline]
2761#[target_feature(enable = "sse2")]
2762#[stable(feature = "simd_x86", since = "1.27.0")]
2763#[allow(clippy::cast_ptr_alignment)]
2764pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) {
2765 let b: __m128d = simd_shuffle!(a, a, [1, 0]);
2766 *(mem_addr as *mut __m128d) = b;
2767}
2768
2769/// Stores the upper 64 bits of a 128-bit vector of `[2 x double]` to a
2770/// memory location.
2771///
2772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeh_pd)
2773#[inline]
2774#[target_feature(enable = "sse2")]
2775#[cfg_attr(test, assert_instr(movhps))]
2776#[stable(feature = "simd_x86", since = "1.27.0")]
2777pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) {
2778 *mem_addr = simd_extract!(a, 1);
2779}
2780
2781/// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a
2782/// memory location.
2783///
2784/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storel_pd)
2785#[inline]
2786#[target_feature(enable = "sse2")]
2787#[cfg_attr(test, assert_instr(movlps))]
2788#[stable(feature = "simd_x86", since = "1.27.0")]
2789pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) {
2790 *mem_addr = simd_extract!(a, 0);
2791}
2792
2793/// Loads a double-precision (64-bit) floating-point element from memory
2794/// into both elements of returned vector.
2795///
2796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load1_pd)
2797#[inline]
2798#[target_feature(enable = "sse2")]
2799// #[cfg_attr(test, assert_instr(movapd))] // FIXME LLVM uses different codegen
2800#[stable(feature = "simd_x86", since = "1.27.0")]
2801pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d {
2802 let d: f64 = *mem_addr;
2803 _mm_setr_pd(a:d, b:d)
2804}
2805
2806/// Loads a double-precision (64-bit) floating-point element from memory
2807/// into both elements of returned vector.
2808///
2809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_pd1)
2810#[inline]
2811#[target_feature(enable = "sse2")]
2812// #[cfg_attr(test, assert_instr(movapd))] // FIXME same as _mm_load1_pd
2813#[stable(feature = "simd_x86", since = "1.27.0")]
2814pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d {
2815 _mm_load1_pd(mem_addr)
2816}
2817
2818/// Loads 2 double-precision (64-bit) floating-point elements from memory into
2819/// the returned vector in reverse order. `mem_addr` must be aligned on a
2820/// 16-byte boundary or a general-protection exception may be generated.
2821///
2822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadr_pd)
2823#[inline]
2824#[target_feature(enable = "sse2")]
2825#[cfg_attr(
2826 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
2827 assert_instr(movaps)
2828)]
2829#[stable(feature = "simd_x86", since = "1.27.0")]
2830pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d {
2831 let a: __m128d = _mm_load_pd(mem_addr);
2832 simd_shuffle!(a, a, [1, 0])
2833}
2834
2835/// Loads 128-bits (composed of 2 packed double-precision (64-bit)
2836/// floating-point elements) from memory into the returned vector.
2837/// `mem_addr` does not need to be aligned on any particular boundary.
2838///
2839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_pd)
2840#[inline]
2841#[target_feature(enable = "sse2")]
2842#[cfg_attr(test, assert_instr(movups))]
2843#[stable(feature = "simd_x86", since = "1.27.0")]
2844pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d {
2845 let mut dst: __m128d = _mm_undefined_pd();
2846 ptr::copy_nonoverlapping(
2847 src:mem_addr as *const u8,
2848 dst:ptr::addr_of_mut!(dst) as *mut u8,
2849 count:mem::size_of::<__m128d>(),
2850 );
2851 dst
2852}
2853
2854/// Loads unaligned 16-bits of integer data from memory into new vector.
2855///
2856/// `mem_addr` does not need to be aligned on any particular boundary.
2857///
2858/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si16)
2859#[inline]
2860#[target_feature(enable = "sse2")]
2861#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2862pub unsafe fn _mm_loadu_si16(mem_addr: *const u8) -> __m128i {
2863 transmute(src:i16x8::new(
2864 x0:ptr::read_unaligned(mem_addr as *const i16),
2865 x1:0,
2866 x2:0,
2867 x3:0,
2868 x4:0,
2869 x5:0,
2870 x6:0,
2871 x7:0,
2872 ))
2873}
2874
2875/// Loads unaligned 32-bits of integer data from memory into new vector.
2876///
2877/// `mem_addr` does not need to be aligned on any particular boundary.
2878///
2879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si32)
2880#[inline]
2881#[target_feature(enable = "sse2")]
2882#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2883pub unsafe fn _mm_loadu_si32(mem_addr: *const u8) -> __m128i {
2884 transmute(src:i32x4::new(
2885 x0:ptr::read_unaligned(mem_addr as *const i32),
2886 x1:0,
2887 x2:0,
2888 x3:0,
2889 ))
2890}
2891
2892/// Loads unaligned 64-bits of integer data from memory into new vector.
2893///
2894/// `mem_addr` does not need to be aligned on any particular boundary.
2895///
2896/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si64)
2897#[inline]
2898#[target_feature(enable = "sse2")]
2899#[stable(feature = "simd_x86_mm_loadu_si64", since = "1.46.0")]
2900pub unsafe fn _mm_loadu_si64(mem_addr: *const u8) -> __m128i {
2901 transmute(src:i64x2::new(x0:ptr::read_unaligned(mem_addr as *const i64), x1:0))
2902}
2903
2904/// Constructs a 128-bit floating-point vector of `[2 x double]` from two
2905/// 128-bit vector parameters of `[2 x double]`, using the immediate-value
2906/// parameter as a specifier.
2907///
2908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_pd)
2909#[inline]
2910#[target_feature(enable = "sse2")]
2911#[cfg_attr(test, assert_instr(shufps, MASK = 2))]
2912#[rustc_legacy_const_generics(2)]
2913#[stable(feature = "simd_x86", since = "1.27.0")]
2914pub fn _mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d {
2915 static_assert_uimm_bits!(MASK, 8);
2916 unsafe { simd_shuffle!(a, b, [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2]) }
2917}
2918
2919/// Constructs a 128-bit floating-point vector of `[2 x double]`. The lower
2920/// 64 bits are set to the lower 64 bits of the second parameter. The upper
2921/// 64 bits are set to the upper 64 bits of the first parameter.
2922///
2923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_move_sd)
2924#[inline]
2925#[target_feature(enable = "sse2")]
2926#[cfg_attr(test, assert_instr(movsd))]
2927#[stable(feature = "simd_x86", since = "1.27.0")]
2928pub fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d {
2929 unsafe { _mm_setr_pd(a:simd_extract!(b, 0), b:simd_extract!(a, 1)) }
2930}
2931
2932/// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit
2933/// floating-point vector of `[4 x float]`.
2934///
2935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castpd_ps)
2936#[inline]
2937#[target_feature(enable = "sse2")]
2938#[stable(feature = "simd_x86", since = "1.27.0")]
2939pub fn _mm_castpd_ps(a: __m128d) -> __m128 {
2940 unsafe { transmute(src:a) }
2941}
2942
2943/// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit
2944/// integer vector.
2945///
2946/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castpd_si128)
2947#[inline]
2948#[target_feature(enable = "sse2")]
2949#[stable(feature = "simd_x86", since = "1.27.0")]
2950pub fn _mm_castpd_si128(a: __m128d) -> __m128i {
2951 unsafe { transmute(src:a) }
2952}
2953
2954/// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit
2955/// floating-point vector of `[2 x double]`.
2956///
2957/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castps_pd)
2958#[inline]
2959#[target_feature(enable = "sse2")]
2960#[stable(feature = "simd_x86", since = "1.27.0")]
2961pub fn _mm_castps_pd(a: __m128) -> __m128d {
2962 unsafe { transmute(src:a) }
2963}
2964
2965/// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit
2966/// integer vector.
2967///
2968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castps_si128)
2969#[inline]
2970#[target_feature(enable = "sse2")]
2971#[stable(feature = "simd_x86", since = "1.27.0")]
2972pub fn _mm_castps_si128(a: __m128) -> __m128i {
2973 unsafe { transmute(src:a) }
2974}
2975
2976/// Casts a 128-bit integer vector into a 128-bit floating-point vector
2977/// of `[2 x double]`.
2978///
2979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castsi128_pd)
2980#[inline]
2981#[target_feature(enable = "sse2")]
2982#[stable(feature = "simd_x86", since = "1.27.0")]
2983pub fn _mm_castsi128_pd(a: __m128i) -> __m128d {
2984 unsafe { transmute(src:a) }
2985}
2986
2987/// Casts a 128-bit integer vector into a 128-bit floating-point vector
2988/// of `[4 x float]`.
2989///
2990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castsi128_ps)
2991#[inline]
2992#[target_feature(enable = "sse2")]
2993#[stable(feature = "simd_x86", since = "1.27.0")]
2994pub fn _mm_castsi128_ps(a: __m128i) -> __m128 {
2995 unsafe { transmute(src:a) }
2996}
2997
2998/// Returns vector of type __m128d with indeterminate elements.with indetermination elements.
2999/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
3000/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
3001/// In practice, this is typically equivalent to [`mem::zeroed`].
3002///
3003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_undefined_pd)
3004#[inline]
3005#[target_feature(enable = "sse2")]
3006#[stable(feature = "simd_x86", since = "1.27.0")]
3007pub fn _mm_undefined_pd() -> __m128d {
3008 const { unsafe { mem::zeroed() } }
3009}
3010
3011/// Returns vector of type __m128i with indeterminate elements.with indetermination elements.
3012/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
3013/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
3014/// In practice, this is typically equivalent to [`mem::zeroed`].
3015///
3016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_undefined_si128)
3017#[inline]
3018#[target_feature(enable = "sse2")]
3019#[stable(feature = "simd_x86", since = "1.27.0")]
3020pub fn _mm_undefined_si128() -> __m128i {
3021 const { unsafe { mem::zeroed() } }
3022}
3023
3024/// The resulting `__m128d` element is composed by the low-order values of
3025/// the two `__m128d` interleaved input elements, i.e.:
3026///
3027/// * The `[127:64]` bits are copied from the `[127:64]` bits of the second input
3028/// * The `[63:0]` bits are copied from the `[127:64]` bits of the first input
3029///
3030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_pd)
3031#[inline]
3032#[target_feature(enable = "sse2")]
3033#[cfg_attr(test, assert_instr(unpckhpd))]
3034#[stable(feature = "simd_x86", since = "1.27.0")]
3035pub fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d {
3036 unsafe { simd_shuffle!(a, b, [1, 3]) }
3037}
3038
3039/// The resulting `__m128d` element is composed by the high-order values of
3040/// the two `__m128d` interleaved input elements, i.e.:
3041///
3042/// * The `[127:64]` bits are copied from the `[63:0]` bits of the second input
3043/// * The `[63:0]` bits are copied from the `[63:0]` bits of the first input
3044///
3045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_pd)
3046#[inline]
3047#[target_feature(enable = "sse2")]
3048#[cfg_attr(test, assert_instr(movlhps))]
3049#[stable(feature = "simd_x86", since = "1.27.0")]
3050pub fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d {
3051 unsafe { simd_shuffle!(a, b, [0, 2]) }
3052}
3053
3054#[allow(improper_ctypes)]
3055unsafe extern "C" {
3056 #[link_name = "llvm.x86.sse2.pause"]
3057 unsafefn pause();
3058 #[link_name = "llvm.x86.sse2.clflush"]
3059 unsafefn clflush(p: *const u8);
3060 #[link_name = "llvm.x86.sse2.lfence"]
3061 unsafefn lfence();
3062 #[link_name = "llvm.x86.sse2.mfence"]
3063 unsafefn mfence();
3064 #[link_name = "llvm.x86.sse2.pmadd.wd"]
3065 unsafefn pmaddwd(a: i16x8, b: i16x8) -> i32x4;
3066 #[link_name = "llvm.x86.sse2.psad.bw"]
3067 unsafefn psadbw(a: u8x16, b: u8x16) -> u64x2;
3068 #[link_name = "llvm.x86.sse2.psll.w"]
3069 unsafefn psllw(a: i16x8, count: i16x8) -> i16x8;
3070 #[link_name = "llvm.x86.sse2.psll.d"]
3071 unsafefn pslld(a: i32x4, count: i32x4) -> i32x4;
3072 #[link_name = "llvm.x86.sse2.psll.q"]
3073 unsafefn psllq(a: i64x2, count: i64x2) -> i64x2;
3074 #[link_name = "llvm.x86.sse2.psra.w"]
3075 unsafefn psraw(a: i16x8, count: i16x8) -> i16x8;
3076 #[link_name = "llvm.x86.sse2.psra.d"]
3077 unsafefn psrad(a: i32x4, count: i32x4) -> i32x4;
3078 #[link_name = "llvm.x86.sse2.psrl.w"]
3079 unsafefn psrlw(a: i16x8, count: i16x8) -> i16x8;
3080 #[link_name = "llvm.x86.sse2.psrl.d"]
3081 unsafefn psrld(a: i32x4, count: i32x4) -> i32x4;
3082 #[link_name = "llvm.x86.sse2.psrl.q"]
3083 unsafefn psrlq(a: i64x2, count: i64x2) -> i64x2;
3084 #[link_name = "llvm.x86.sse2.cvtps2dq"]
3085 unsafefn cvtps2dq(a: __m128) -> i32x4;
3086 #[link_name = "llvm.x86.sse2.maskmov.dqu"]
3087 unsafefn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8);
3088 #[link_name = "llvm.x86.sse2.packsswb.128"]
3089 unsafefn packsswb(a: i16x8, b: i16x8) -> i8x16;
3090 #[link_name = "llvm.x86.sse2.packssdw.128"]
3091 unsafefn packssdw(a: i32x4, b: i32x4) -> i16x8;
3092 #[link_name = "llvm.x86.sse2.packuswb.128"]
3093 unsafefn packuswb(a: i16x8, b: i16x8) -> u8x16;
3094 #[link_name = "llvm.x86.sse2.max.sd"]
3095 unsafefn maxsd(a: __m128d, b: __m128d) -> __m128d;
3096 #[link_name = "llvm.x86.sse2.max.pd"]
3097 unsafefn maxpd(a: __m128d, b: __m128d) -> __m128d;
3098 #[link_name = "llvm.x86.sse2.min.sd"]
3099 unsafefn minsd(a: __m128d, b: __m128d) -> __m128d;
3100 #[link_name = "llvm.x86.sse2.min.pd"]
3101 unsafefn minpd(a: __m128d, b: __m128d) -> __m128d;
3102 #[link_name = "llvm.x86.sse2.cmp.sd"]
3103 unsafefn cmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3104 #[link_name = "llvm.x86.sse2.cmp.pd"]
3105 unsafefn cmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3106 #[link_name = "llvm.x86.sse2.comieq.sd"]
3107 unsafefn comieqsd(a: __m128d, b: __m128d) -> i32;
3108 #[link_name = "llvm.x86.sse2.comilt.sd"]
3109 unsafefn comiltsd(a: __m128d, b: __m128d) -> i32;
3110 #[link_name = "llvm.x86.sse2.comile.sd"]
3111 unsafefn comilesd(a: __m128d, b: __m128d) -> i32;
3112 #[link_name = "llvm.x86.sse2.comigt.sd"]
3113 unsafefn comigtsd(a: __m128d, b: __m128d) -> i32;
3114 #[link_name = "llvm.x86.sse2.comige.sd"]
3115 unsafefn comigesd(a: __m128d, b: __m128d) -> i32;
3116 #[link_name = "llvm.x86.sse2.comineq.sd"]
3117 unsafefn comineqsd(a: __m128d, b: __m128d) -> i32;
3118 #[link_name = "llvm.x86.sse2.ucomieq.sd"]
3119 unsafefn ucomieqsd(a: __m128d, b: __m128d) -> i32;
3120 #[link_name = "llvm.x86.sse2.ucomilt.sd"]
3121 unsafefn ucomiltsd(a: __m128d, b: __m128d) -> i32;
3122 #[link_name = "llvm.x86.sse2.ucomile.sd"]
3123 unsafefn ucomilesd(a: __m128d, b: __m128d) -> i32;
3124 #[link_name = "llvm.x86.sse2.ucomigt.sd"]
3125 unsafefn ucomigtsd(a: __m128d, b: __m128d) -> i32;
3126 #[link_name = "llvm.x86.sse2.ucomige.sd"]
3127 unsafefn ucomigesd(a: __m128d, b: __m128d) -> i32;
3128 #[link_name = "llvm.x86.sse2.ucomineq.sd"]
3129 unsafefn ucomineqsd(a: __m128d, b: __m128d) -> i32;
3130 #[link_name = "llvm.x86.sse2.cvtpd2dq"]
3131 unsafefn cvtpd2dq(a: __m128d) -> i32x4;
3132 #[link_name = "llvm.x86.sse2.cvtsd2si"]
3133 unsafefn cvtsd2si(a: __m128d) -> i32;
3134 #[link_name = "llvm.x86.sse2.cvtsd2ss"]
3135 unsafefn cvtsd2ss(a: __m128, b: __m128d) -> __m128;
3136 #[link_name = "llvm.x86.sse2.cvttpd2dq"]
3137 unsafefn cvttpd2dq(a: __m128d) -> i32x4;
3138 #[link_name = "llvm.x86.sse2.cvttsd2si"]
3139 unsafefn cvttsd2si(a: __m128d) -> i32;
3140 #[link_name = "llvm.x86.sse2.cvttps2dq"]
3141 unsafefn cvttps2dq(a: __m128) -> i32x4;
3142}
3143
3144#[cfg(test)]
3145mod tests {
3146 use crate::{
3147 core_arch::{simd::*, x86::*},
3148 hint::black_box,
3149 };
3150 use std::{
3151 boxed, f32, f64,
3152 mem::{self, transmute},
3153 ptr,
3154 };
3155 use stdarch_test::simd_test;
3156
3157 const NAN: f64 = f64::NAN;
3158
3159 #[test]
3160 fn test_mm_pause() {
3161 _mm_pause()
3162 }
3163
3164 #[simd_test(enable = "sse2")]
3165 unsafe fn test_mm_clflush() {
3166 let x = 0_u8;
3167 _mm_clflush(ptr::addr_of!(x));
3168 }
3169
3170 #[simd_test(enable = "sse2")]
3171 // Miri cannot support this until it is clear how it fits in the Rust memory model
3172 #[cfg_attr(miri, ignore)]
3173 unsafe fn test_mm_lfence() {
3174 _mm_lfence();
3175 }
3176
3177 #[simd_test(enable = "sse2")]
3178 // Miri cannot support this until it is clear how it fits in the Rust memory model
3179 #[cfg_attr(miri, ignore)]
3180 unsafe fn test_mm_mfence() {
3181 _mm_mfence();
3182 }
3183
3184 #[simd_test(enable = "sse2")]
3185 unsafe fn test_mm_add_epi8() {
3186 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3187 #[rustfmt::skip]
3188 let b = _mm_setr_epi8(
3189 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3190 );
3191 let r = _mm_add_epi8(a, b);
3192 #[rustfmt::skip]
3193 let e = _mm_setr_epi8(
3194 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3195 );
3196 assert_eq_m128i(r, e);
3197 }
3198
3199 #[simd_test(enable = "sse2")]
3200 unsafe fn test_mm_add_epi8_overflow() {
3201 let a = _mm_set1_epi8(0x7F);
3202 let b = _mm_set1_epi8(1);
3203 let r = _mm_add_epi8(a, b);
3204 assert_eq_m128i(r, _mm_set1_epi8(-128));
3205 }
3206
3207 #[simd_test(enable = "sse2")]
3208 unsafe fn test_mm_add_epi16() {
3209 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3210 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3211 let r = _mm_add_epi16(a, b);
3212 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3213 assert_eq_m128i(r, e);
3214 }
3215
3216 #[simd_test(enable = "sse2")]
3217 unsafe fn test_mm_add_epi32() {
3218 let a = _mm_setr_epi32(0, 1, 2, 3);
3219 let b = _mm_setr_epi32(4, 5, 6, 7);
3220 let r = _mm_add_epi32(a, b);
3221 let e = _mm_setr_epi32(4, 6, 8, 10);
3222 assert_eq_m128i(r, e);
3223 }
3224
3225 #[simd_test(enable = "sse2")]
3226 unsafe fn test_mm_add_epi64() {
3227 let a = _mm_setr_epi64x(0, 1);
3228 let b = _mm_setr_epi64x(2, 3);
3229 let r = _mm_add_epi64(a, b);
3230 let e = _mm_setr_epi64x(2, 4);
3231 assert_eq_m128i(r, e);
3232 }
3233
3234 #[simd_test(enable = "sse2")]
3235 unsafe fn test_mm_adds_epi8() {
3236 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3237 #[rustfmt::skip]
3238 let b = _mm_setr_epi8(
3239 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3240 );
3241 let r = _mm_adds_epi8(a, b);
3242 #[rustfmt::skip]
3243 let e = _mm_setr_epi8(
3244 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3245 );
3246 assert_eq_m128i(r, e);
3247 }
3248
3249 #[simd_test(enable = "sse2")]
3250 unsafe fn test_mm_adds_epi8_saturate_positive() {
3251 let a = _mm_set1_epi8(0x7F);
3252 let b = _mm_set1_epi8(1);
3253 let r = _mm_adds_epi8(a, b);
3254 assert_eq_m128i(r, a);
3255 }
3256
3257 #[simd_test(enable = "sse2")]
3258 unsafe fn test_mm_adds_epi8_saturate_negative() {
3259 let a = _mm_set1_epi8(-0x80);
3260 let b = _mm_set1_epi8(-1);
3261 let r = _mm_adds_epi8(a, b);
3262 assert_eq_m128i(r, a);
3263 }
3264
3265 #[simd_test(enable = "sse2")]
3266 unsafe fn test_mm_adds_epi16() {
3267 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3268 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3269 let r = _mm_adds_epi16(a, b);
3270 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3271 assert_eq_m128i(r, e);
3272 }
3273
3274 #[simd_test(enable = "sse2")]
3275 unsafe fn test_mm_adds_epi16_saturate_positive() {
3276 let a = _mm_set1_epi16(0x7FFF);
3277 let b = _mm_set1_epi16(1);
3278 let r = _mm_adds_epi16(a, b);
3279 assert_eq_m128i(r, a);
3280 }
3281
3282 #[simd_test(enable = "sse2")]
3283 unsafe fn test_mm_adds_epi16_saturate_negative() {
3284 let a = _mm_set1_epi16(-0x8000);
3285 let b = _mm_set1_epi16(-1);
3286 let r = _mm_adds_epi16(a, b);
3287 assert_eq_m128i(r, a);
3288 }
3289
3290 #[simd_test(enable = "sse2")]
3291 unsafe fn test_mm_adds_epu8() {
3292 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3293 #[rustfmt::skip]
3294 let b = _mm_setr_epi8(
3295 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3296 );
3297 let r = _mm_adds_epu8(a, b);
3298 #[rustfmt::skip]
3299 let e = _mm_setr_epi8(
3300 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3301 );
3302 assert_eq_m128i(r, e);
3303 }
3304
3305 #[simd_test(enable = "sse2")]
3306 unsafe fn test_mm_adds_epu8_saturate() {
3307 let a = _mm_set1_epi8(!0);
3308 let b = _mm_set1_epi8(1);
3309 let r = _mm_adds_epu8(a, b);
3310 assert_eq_m128i(r, a);
3311 }
3312
3313 #[simd_test(enable = "sse2")]
3314 unsafe fn test_mm_adds_epu16() {
3315 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3316 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3317 let r = _mm_adds_epu16(a, b);
3318 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3319 assert_eq_m128i(r, e);
3320 }
3321
3322 #[simd_test(enable = "sse2")]
3323 unsafe fn test_mm_adds_epu16_saturate() {
3324 let a = _mm_set1_epi16(!0);
3325 let b = _mm_set1_epi16(1);
3326 let r = _mm_adds_epu16(a, b);
3327 assert_eq_m128i(r, a);
3328 }
3329
3330 #[simd_test(enable = "sse2")]
3331 unsafe fn test_mm_avg_epu8() {
3332 let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9));
3333 let r = _mm_avg_epu8(a, b);
3334 assert_eq_m128i(r, _mm_set1_epi8(6));
3335 }
3336
3337 #[simd_test(enable = "sse2")]
3338 unsafe fn test_mm_avg_epu16() {
3339 let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9));
3340 let r = _mm_avg_epu16(a, b);
3341 assert_eq_m128i(r, _mm_set1_epi16(6));
3342 }
3343
3344 #[simd_test(enable = "sse2")]
3345 unsafe fn test_mm_madd_epi16() {
3346 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
3347 let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
3348 let r = _mm_madd_epi16(a, b);
3349 let e = _mm_setr_epi32(29, 81, 149, 233);
3350 assert_eq_m128i(r, e);
3351
3352 // Test large values.
3353 // MIN*MIN+MIN*MIN will overflow into i32::MIN.
3354 let a = _mm_setr_epi16(
3355 i16::MAX,
3356 i16::MAX,
3357 i16::MIN,
3358 i16::MIN,
3359 i16::MIN,
3360 i16::MAX,
3361 0,
3362 0,
3363 );
3364 let b = _mm_setr_epi16(
3365 i16::MAX,
3366 i16::MAX,
3367 i16::MIN,
3368 i16::MIN,
3369 i16::MAX,
3370 i16::MIN,
3371 0,
3372 0,
3373 );
3374 let r = _mm_madd_epi16(a, b);
3375 let e = _mm_setr_epi32(0x7FFE0002, i32::MIN, -0x7FFF0000, 0);
3376 assert_eq_m128i(r, e);
3377 }
3378
3379 #[simd_test(enable = "sse2")]
3380 unsafe fn test_mm_max_epi16() {
3381 let a = _mm_set1_epi16(1);
3382 let b = _mm_set1_epi16(-1);
3383 let r = _mm_max_epi16(a, b);
3384 assert_eq_m128i(r, a);
3385 }
3386
3387 #[simd_test(enable = "sse2")]
3388 unsafe fn test_mm_max_epu8() {
3389 let a = _mm_set1_epi8(1);
3390 let b = _mm_set1_epi8(!0);
3391 let r = _mm_max_epu8(a, b);
3392 assert_eq_m128i(r, b);
3393 }
3394
3395 #[simd_test(enable = "sse2")]
3396 unsafe fn test_mm_min_epi16() {
3397 let a = _mm_set1_epi16(1);
3398 let b = _mm_set1_epi16(-1);
3399 let r = _mm_min_epi16(a, b);
3400 assert_eq_m128i(r, b);
3401 }
3402
3403 #[simd_test(enable = "sse2")]
3404 unsafe fn test_mm_min_epu8() {
3405 let a = _mm_set1_epi8(1);
3406 let b = _mm_set1_epi8(!0);
3407 let r = _mm_min_epu8(a, b);
3408 assert_eq_m128i(r, a);
3409 }
3410
3411 #[simd_test(enable = "sse2")]
3412 unsafe fn test_mm_mulhi_epi16() {
3413 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3414 let r = _mm_mulhi_epi16(a, b);
3415 assert_eq_m128i(r, _mm_set1_epi16(-16));
3416 }
3417
3418 #[simd_test(enable = "sse2")]
3419 unsafe fn test_mm_mulhi_epu16() {
3420 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001));
3421 let r = _mm_mulhi_epu16(a, b);
3422 assert_eq_m128i(r, _mm_set1_epi16(15));
3423 }
3424
3425 #[simd_test(enable = "sse2")]
3426 unsafe fn test_mm_mullo_epi16() {
3427 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3428 let r = _mm_mullo_epi16(a, b);
3429 assert_eq_m128i(r, _mm_set1_epi16(-17960));
3430 }
3431
3432 #[simd_test(enable = "sse2")]
3433 unsafe fn test_mm_mul_epu32() {
3434 let a = _mm_setr_epi64x(1_000_000_000, 1 << 34);
3435 let b = _mm_setr_epi64x(1_000_000_000, 1 << 35);
3436 let r = _mm_mul_epu32(a, b);
3437 let e = _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0);
3438 assert_eq_m128i(r, e);
3439 }
3440
3441 #[simd_test(enable = "sse2")]
3442 unsafe fn test_mm_sad_epu8() {
3443 #[rustfmt::skip]
3444 let a = _mm_setr_epi8(
3445 255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
3446 1, 2, 3, 4,
3447 155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8,
3448 1, 2, 3, 4,
3449 );
3450 let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2);
3451 let r = _mm_sad_epu8(a, b);
3452 let e = _mm_setr_epi64x(1020, 614);
3453 assert_eq_m128i(r, e);
3454 }
3455
3456 #[simd_test(enable = "sse2")]
3457 unsafe fn test_mm_sub_epi8() {
3458 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(6));
3459 let r = _mm_sub_epi8(a, b);
3460 assert_eq_m128i(r, _mm_set1_epi8(-1));
3461 }
3462
3463 #[simd_test(enable = "sse2")]
3464 unsafe fn test_mm_sub_epi16() {
3465 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(6));
3466 let r = _mm_sub_epi16(a, b);
3467 assert_eq_m128i(r, _mm_set1_epi16(-1));
3468 }
3469
3470 #[simd_test(enable = "sse2")]
3471 unsafe fn test_mm_sub_epi32() {
3472 let (a, b) = (_mm_set1_epi32(5), _mm_set1_epi32(6));
3473 let r = _mm_sub_epi32(a, b);
3474 assert_eq_m128i(r, _mm_set1_epi32(-1));
3475 }
3476
3477 #[simd_test(enable = "sse2")]
3478 unsafe fn test_mm_sub_epi64() {
3479 let (a, b) = (_mm_set1_epi64x(5), _mm_set1_epi64x(6));
3480 let r = _mm_sub_epi64(a, b);
3481 assert_eq_m128i(r, _mm_set1_epi64x(-1));
3482 }
3483
3484 #[simd_test(enable = "sse2")]
3485 unsafe fn test_mm_subs_epi8() {
3486 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3487 let r = _mm_subs_epi8(a, b);
3488 assert_eq_m128i(r, _mm_set1_epi8(3));
3489 }
3490
3491 #[simd_test(enable = "sse2")]
3492 unsafe fn test_mm_subs_epi8_saturate_positive() {
3493 let a = _mm_set1_epi8(0x7F);
3494 let b = _mm_set1_epi8(-1);
3495 let r = _mm_subs_epi8(a, b);
3496 assert_eq_m128i(r, a);
3497 }
3498
3499 #[simd_test(enable = "sse2")]
3500 unsafe fn test_mm_subs_epi8_saturate_negative() {
3501 let a = _mm_set1_epi8(-0x80);
3502 let b = _mm_set1_epi8(1);
3503 let r = _mm_subs_epi8(a, b);
3504 assert_eq_m128i(r, a);
3505 }
3506
3507 #[simd_test(enable = "sse2")]
3508 unsafe fn test_mm_subs_epi16() {
3509 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3510 let r = _mm_subs_epi16(a, b);
3511 assert_eq_m128i(r, _mm_set1_epi16(3));
3512 }
3513
3514 #[simd_test(enable = "sse2")]
3515 unsafe fn test_mm_subs_epi16_saturate_positive() {
3516 let a = _mm_set1_epi16(0x7FFF);
3517 let b = _mm_set1_epi16(-1);
3518 let r = _mm_subs_epi16(a, b);
3519 assert_eq_m128i(r, a);
3520 }
3521
3522 #[simd_test(enable = "sse2")]
3523 unsafe fn test_mm_subs_epi16_saturate_negative() {
3524 let a = _mm_set1_epi16(-0x8000);
3525 let b = _mm_set1_epi16(1);
3526 let r = _mm_subs_epi16(a, b);
3527 assert_eq_m128i(r, a);
3528 }
3529
3530 #[simd_test(enable = "sse2")]
3531 unsafe fn test_mm_subs_epu8() {
3532 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3533 let r = _mm_subs_epu8(a, b);
3534 assert_eq_m128i(r, _mm_set1_epi8(3));
3535 }
3536
3537 #[simd_test(enable = "sse2")]
3538 unsafe fn test_mm_subs_epu8_saturate() {
3539 let a = _mm_set1_epi8(0);
3540 let b = _mm_set1_epi8(1);
3541 let r = _mm_subs_epu8(a, b);
3542 assert_eq_m128i(r, a);
3543 }
3544
3545 #[simd_test(enable = "sse2")]
3546 unsafe fn test_mm_subs_epu16() {
3547 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3548 let r = _mm_subs_epu16(a, b);
3549 assert_eq_m128i(r, _mm_set1_epi16(3));
3550 }
3551
3552 #[simd_test(enable = "sse2")]
3553 unsafe fn test_mm_subs_epu16_saturate() {
3554 let a = _mm_set1_epi16(0);
3555 let b = _mm_set1_epi16(1);
3556 let r = _mm_subs_epu16(a, b);
3557 assert_eq_m128i(r, a);
3558 }
3559
3560 #[simd_test(enable = "sse2")]
3561 unsafe fn test_mm_slli_si128() {
3562 #[rustfmt::skip]
3563 let a = _mm_setr_epi8(
3564 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3565 );
3566 let r = _mm_slli_si128::<1>(a);
3567 let e = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3568 assert_eq_m128i(r, e);
3569
3570 #[rustfmt::skip]
3571 let a = _mm_setr_epi8(
3572 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3573 );
3574 let r = _mm_slli_si128::<15>(a);
3575 let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
3576 assert_eq_m128i(r, e);
3577
3578 #[rustfmt::skip]
3579 let a = _mm_setr_epi8(
3580 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3581 );
3582 let r = _mm_slli_si128::<16>(a);
3583 assert_eq_m128i(r, _mm_set1_epi8(0));
3584 }
3585
3586 #[simd_test(enable = "sse2")]
3587 unsafe fn test_mm_slli_epi16() {
3588 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3589 let r = _mm_slli_epi16::<4>(a);
3590 assert_eq_m128i(
3591 r,
3592 _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
3593 );
3594 let r = _mm_slli_epi16::<16>(a);
3595 assert_eq_m128i(r, _mm_set1_epi16(0));
3596 }
3597
3598 #[simd_test(enable = "sse2")]
3599 unsafe fn test_mm_sll_epi16() {
3600 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3601 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 4));
3602 assert_eq_m128i(
3603 r,
3604 _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
3605 );
3606 let r = _mm_sll_epi16(a, _mm_set_epi64x(4, 0));
3607 assert_eq_m128i(r, a);
3608 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 16));
3609 assert_eq_m128i(r, _mm_set1_epi16(0));
3610 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, i64::MAX));
3611 assert_eq_m128i(r, _mm_set1_epi16(0));
3612 }
3613
3614 #[simd_test(enable = "sse2")]
3615 unsafe fn test_mm_slli_epi32() {
3616 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3617 let r = _mm_slli_epi32::<4>(a);
3618 assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
3619 let r = _mm_slli_epi32::<32>(a);
3620 assert_eq_m128i(r, _mm_set1_epi32(0));
3621 }
3622
3623 #[simd_test(enable = "sse2")]
3624 unsafe fn test_mm_sll_epi32() {
3625 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3626 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 4));
3627 assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
3628 let r = _mm_sll_epi32(a, _mm_set_epi64x(4, 0));
3629 assert_eq_m128i(r, a);
3630 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 32));
3631 assert_eq_m128i(r, _mm_set1_epi32(0));
3632 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, i64::MAX));
3633 assert_eq_m128i(r, _mm_set1_epi32(0));
3634 }
3635
3636 #[simd_test(enable = "sse2")]
3637 unsafe fn test_mm_slli_epi64() {
3638 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3639 let r = _mm_slli_epi64::<4>(a);
3640 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
3641 let r = _mm_slli_epi64::<64>(a);
3642 assert_eq_m128i(r, _mm_set1_epi64x(0));
3643 }
3644
3645 #[simd_test(enable = "sse2")]
3646 unsafe fn test_mm_sll_epi64() {
3647 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3648 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 4));
3649 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
3650 let r = _mm_sll_epi64(a, _mm_set_epi64x(4, 0));
3651 assert_eq_m128i(r, a);
3652 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 64));
3653 assert_eq_m128i(r, _mm_set1_epi64x(0));
3654 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, i64::MAX));
3655 assert_eq_m128i(r, _mm_set1_epi64x(0));
3656 }
3657
3658 #[simd_test(enable = "sse2")]
3659 unsafe fn test_mm_srai_epi16() {
3660 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3661 let r = _mm_srai_epi16::<4>(a);
3662 assert_eq_m128i(
3663 r,
3664 _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10),
3665 );
3666 let r = _mm_srai_epi16::<16>(a);
3667 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3668 }
3669
3670 #[simd_test(enable = "sse2")]
3671 unsafe fn test_mm_sra_epi16() {
3672 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3673 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 4));
3674 assert_eq_m128i(
3675 r,
3676 _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10),
3677 );
3678 let r = _mm_sra_epi16(a, _mm_set_epi64x(4, 0));
3679 assert_eq_m128i(r, a);
3680 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 16));
3681 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3682 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, i64::MAX));
3683 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3684 }
3685
3686 #[simd_test(enable = "sse2")]
3687 unsafe fn test_mm_srai_epi32() {
3688 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3689 let r = _mm_srai_epi32::<4>(a);
3690 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
3691 let r = _mm_srai_epi32::<32>(a);
3692 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3693 }
3694
3695 #[simd_test(enable = "sse2")]
3696 unsafe fn test_mm_sra_epi32() {
3697 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3698 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 4));
3699 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
3700 let r = _mm_sra_epi32(a, _mm_set_epi64x(4, 0));
3701 assert_eq_m128i(r, a);
3702 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 32));
3703 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3704 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, i64::MAX));
3705 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3706 }
3707
3708 #[simd_test(enable = "sse2")]
3709 unsafe fn test_mm_srli_si128() {
3710 #[rustfmt::skip]
3711 let a = _mm_setr_epi8(
3712 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3713 );
3714 let r = _mm_srli_si128::<1>(a);
3715 #[rustfmt::skip]
3716 let e = _mm_setr_epi8(
3717 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0,
3718 );
3719 assert_eq_m128i(r, e);
3720
3721 #[rustfmt::skip]
3722 let a = _mm_setr_epi8(
3723 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3724 );
3725 let r = _mm_srli_si128::<15>(a);
3726 let e = _mm_setr_epi8(16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3727 assert_eq_m128i(r, e);
3728
3729 #[rustfmt::skip]
3730 let a = _mm_setr_epi8(
3731 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3732 );
3733 let r = _mm_srli_si128::<16>(a);
3734 assert_eq_m128i(r, _mm_set1_epi8(0));
3735 }
3736
3737 #[simd_test(enable = "sse2")]
3738 unsafe fn test_mm_srli_epi16() {
3739 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3740 let r = _mm_srli_epi16::<4>(a);
3741 assert_eq_m128i(
3742 r,
3743 _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0),
3744 );
3745 let r = _mm_srli_epi16::<16>(a);
3746 assert_eq_m128i(r, _mm_set1_epi16(0));
3747 }
3748
3749 #[simd_test(enable = "sse2")]
3750 unsafe fn test_mm_srl_epi16() {
3751 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3752 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 4));
3753 assert_eq_m128i(
3754 r,
3755 _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0),
3756 );
3757 let r = _mm_srl_epi16(a, _mm_set_epi64x(4, 0));
3758 assert_eq_m128i(r, a);
3759 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 16));
3760 assert_eq_m128i(r, _mm_set1_epi16(0));
3761 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, i64::MAX));
3762 assert_eq_m128i(r, _mm_set1_epi16(0));
3763 }
3764
3765 #[simd_test(enable = "sse2")]
3766 unsafe fn test_mm_srli_epi32() {
3767 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3768 let r = _mm_srli_epi32::<4>(a);
3769 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
3770 let r = _mm_srli_epi32::<32>(a);
3771 assert_eq_m128i(r, _mm_set1_epi32(0));
3772 }
3773
3774 #[simd_test(enable = "sse2")]
3775 unsafe fn test_mm_srl_epi32() {
3776 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3777 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 4));
3778 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
3779 let r = _mm_srl_epi32(a, _mm_set_epi64x(4, 0));
3780 assert_eq_m128i(r, a);
3781 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 32));
3782 assert_eq_m128i(r, _mm_set1_epi32(0));
3783 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, i64::MAX));
3784 assert_eq_m128i(r, _mm_set1_epi32(0));
3785 }
3786
3787 #[simd_test(enable = "sse2")]
3788 unsafe fn test_mm_srli_epi64() {
3789 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3790 let r = _mm_srli_epi64::<4>(a);
3791 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
3792 let r = _mm_srli_epi64::<64>(a);
3793 assert_eq_m128i(r, _mm_set1_epi64x(0));
3794 }
3795
3796 #[simd_test(enable = "sse2")]
3797 unsafe fn test_mm_srl_epi64() {
3798 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3799 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 4));
3800 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
3801 let r = _mm_srl_epi64(a, _mm_set_epi64x(4, 0));
3802 assert_eq_m128i(r, a);
3803 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 64));
3804 assert_eq_m128i(r, _mm_set1_epi64x(0));
3805 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, i64::MAX));
3806 assert_eq_m128i(r, _mm_set1_epi64x(0));
3807 }
3808
3809 #[simd_test(enable = "sse2")]
3810 unsafe fn test_mm_and_si128() {
3811 let a = _mm_set1_epi8(5);
3812 let b = _mm_set1_epi8(3);
3813 let r = _mm_and_si128(a, b);
3814 assert_eq_m128i(r, _mm_set1_epi8(1));
3815 }
3816
3817 #[simd_test(enable = "sse2")]
3818 unsafe fn test_mm_andnot_si128() {
3819 let a = _mm_set1_epi8(5);
3820 let b = _mm_set1_epi8(3);
3821 let r = _mm_andnot_si128(a, b);
3822 assert_eq_m128i(r, _mm_set1_epi8(2));
3823 }
3824
3825 #[simd_test(enable = "sse2")]
3826 unsafe fn test_mm_or_si128() {
3827 let a = _mm_set1_epi8(5);
3828 let b = _mm_set1_epi8(3);
3829 let r = _mm_or_si128(a, b);
3830 assert_eq_m128i(r, _mm_set1_epi8(7));
3831 }
3832
3833 #[simd_test(enable = "sse2")]
3834 unsafe fn test_mm_xor_si128() {
3835 let a = _mm_set1_epi8(5);
3836 let b = _mm_set1_epi8(3);
3837 let r = _mm_xor_si128(a, b);
3838 assert_eq_m128i(r, _mm_set1_epi8(6));
3839 }
3840
3841 #[simd_test(enable = "sse2")]
3842 unsafe fn test_mm_cmpeq_epi8() {
3843 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3844 let b = _mm_setr_epi8(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
3845 let r = _mm_cmpeq_epi8(a, b);
3846 #[rustfmt::skip]
3847 assert_eq_m128i(
3848 r,
3849 _mm_setr_epi8(
3850 0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3851 )
3852 );
3853 }
3854
3855 #[simd_test(enable = "sse2")]
3856 unsafe fn test_mm_cmpeq_epi16() {
3857 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3858 let b = _mm_setr_epi16(7, 6, 2, 4, 3, 2, 1, 0);
3859 let r = _mm_cmpeq_epi16(a, b);
3860 assert_eq_m128i(r, _mm_setr_epi16(0, 0, !0, 0, 0, 0, 0, 0));
3861 }
3862
3863 #[simd_test(enable = "sse2")]
3864 unsafe fn test_mm_cmpeq_epi32() {
3865 let a = _mm_setr_epi32(0, 1, 2, 3);
3866 let b = _mm_setr_epi32(3, 2, 2, 0);
3867 let r = _mm_cmpeq_epi32(a, b);
3868 assert_eq_m128i(r, _mm_setr_epi32(0, 0, !0, 0));
3869 }
3870
3871 #[simd_test(enable = "sse2")]
3872 unsafe fn test_mm_cmpgt_epi8() {
3873 let a = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3874 let b = _mm_set1_epi8(0);
3875 let r = _mm_cmpgt_epi8(a, b);
3876 let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3877 assert_eq_m128i(r, e);
3878 }
3879
3880 #[simd_test(enable = "sse2")]
3881 unsafe fn test_mm_cmpgt_epi16() {
3882 let a = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
3883 let b = _mm_set1_epi16(0);
3884 let r = _mm_cmpgt_epi16(a, b);
3885 let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
3886 assert_eq_m128i(r, e);
3887 }
3888
3889 #[simd_test(enable = "sse2")]
3890 unsafe fn test_mm_cmpgt_epi32() {
3891 let a = _mm_set_epi32(5, 0, 0, 0);
3892 let b = _mm_set1_epi32(0);
3893 let r = _mm_cmpgt_epi32(a, b);
3894 assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
3895 }
3896
3897 #[simd_test(enable = "sse2")]
3898 unsafe fn test_mm_cmplt_epi8() {
3899 let a = _mm_set1_epi8(0);
3900 let b = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3901 let r = _mm_cmplt_epi8(a, b);
3902 let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3903 assert_eq_m128i(r, e);
3904 }
3905
3906 #[simd_test(enable = "sse2")]
3907 unsafe fn test_mm_cmplt_epi16() {
3908 let a = _mm_set1_epi16(0);
3909 let b = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
3910 let r = _mm_cmplt_epi16(a, b);
3911 let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
3912 assert_eq_m128i(r, e);
3913 }
3914
3915 #[simd_test(enable = "sse2")]
3916 unsafe fn test_mm_cmplt_epi32() {
3917 let a = _mm_set1_epi32(0);
3918 let b = _mm_set_epi32(5, 0, 0, 0);
3919 let r = _mm_cmplt_epi32(a, b);
3920 assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
3921 }
3922
3923 #[simd_test(enable = "sse2")]
3924 unsafe fn test_mm_cvtepi32_pd() {
3925 let a = _mm_set_epi32(35, 25, 15, 5);
3926 let r = _mm_cvtepi32_pd(a);
3927 assert_eq_m128d(r, _mm_setr_pd(5.0, 15.0));
3928 }
3929
3930 #[simd_test(enable = "sse2")]
3931 unsafe fn test_mm_cvtsi32_sd() {
3932 let a = _mm_set1_pd(3.5);
3933 let r = _mm_cvtsi32_sd(a, 5);
3934 assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5));
3935 }
3936
3937 #[simd_test(enable = "sse2")]
3938 unsafe fn test_mm_cvtepi32_ps() {
3939 let a = _mm_setr_epi32(1, 2, 3, 4);
3940 let r = _mm_cvtepi32_ps(a);
3941 assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0));
3942 }
3943
3944 #[simd_test(enable = "sse2")]
3945 unsafe fn test_mm_cvtps_epi32() {
3946 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3947 let r = _mm_cvtps_epi32(a);
3948 assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4));
3949 }
3950
3951 #[simd_test(enable = "sse2")]
3952 unsafe fn test_mm_cvtsi32_si128() {
3953 let r = _mm_cvtsi32_si128(5);
3954 assert_eq_m128i(r, _mm_setr_epi32(5, 0, 0, 0));
3955 }
3956
3957 #[simd_test(enable = "sse2")]
3958 unsafe fn test_mm_cvtsi128_si32() {
3959 let r = _mm_cvtsi128_si32(_mm_setr_epi32(5, 0, 0, 0));
3960 assert_eq!(r, 5);
3961 }
3962
3963 #[simd_test(enable = "sse2")]
3964 unsafe fn test_mm_set_epi64x() {
3965 let r = _mm_set_epi64x(0, 1);
3966 assert_eq_m128i(r, _mm_setr_epi64x(1, 0));
3967 }
3968
3969 #[simd_test(enable = "sse2")]
3970 unsafe fn test_mm_set_epi32() {
3971 let r = _mm_set_epi32(0, 1, 2, 3);
3972 assert_eq_m128i(r, _mm_setr_epi32(3, 2, 1, 0));
3973 }
3974
3975 #[simd_test(enable = "sse2")]
3976 unsafe fn test_mm_set_epi16() {
3977 let r = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3978 assert_eq_m128i(r, _mm_setr_epi16(7, 6, 5, 4, 3, 2, 1, 0));
3979 }
3980
3981 #[simd_test(enable = "sse2")]
3982 unsafe fn test_mm_set_epi8() {
3983 #[rustfmt::skip]
3984 let r = _mm_set_epi8(
3985 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3986 );
3987 #[rustfmt::skip]
3988 let e = _mm_setr_epi8(
3989 15, 14, 13, 12, 11, 10, 9, 8,
3990 7, 6, 5, 4, 3, 2, 1, 0,
3991 );
3992 assert_eq_m128i(r, e);
3993 }
3994
3995 #[simd_test(enable = "sse2")]
3996 unsafe fn test_mm_set1_epi64x() {
3997 let r = _mm_set1_epi64x(1);
3998 assert_eq_m128i(r, _mm_set1_epi64x(1));
3999 }
4000
4001 #[simd_test(enable = "sse2")]
4002 unsafe fn test_mm_set1_epi32() {
4003 let r = _mm_set1_epi32(1);
4004 assert_eq_m128i(r, _mm_set1_epi32(1));
4005 }
4006
4007 #[simd_test(enable = "sse2")]
4008 unsafe fn test_mm_set1_epi16() {
4009 let r = _mm_set1_epi16(1);
4010 assert_eq_m128i(r, _mm_set1_epi16(1));
4011 }
4012
4013 #[simd_test(enable = "sse2")]
4014 unsafe fn test_mm_set1_epi8() {
4015 let r = _mm_set1_epi8(1);
4016 assert_eq_m128i(r, _mm_set1_epi8(1));
4017 }
4018
4019 #[simd_test(enable = "sse2")]
4020 unsafe fn test_mm_setr_epi32() {
4021 let r = _mm_setr_epi32(0, 1, 2, 3);
4022 assert_eq_m128i(r, _mm_setr_epi32(0, 1, 2, 3));
4023 }
4024
4025 #[simd_test(enable = "sse2")]
4026 unsafe fn test_mm_setr_epi16() {
4027 let r = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4028 assert_eq_m128i(r, _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7));
4029 }
4030
4031 #[simd_test(enable = "sse2")]
4032 unsafe fn test_mm_setr_epi8() {
4033 #[rustfmt::skip]
4034 let r = _mm_setr_epi8(
4035 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
4036 );
4037 #[rustfmt::skip]
4038 let e = _mm_setr_epi8(
4039 0, 1, 2, 3, 4, 5, 6, 7,
4040 8, 9, 10, 11, 12, 13, 14, 15,
4041 );
4042 assert_eq_m128i(r, e);
4043 }
4044
4045 #[simd_test(enable = "sse2")]
4046 unsafe fn test_mm_setzero_si128() {
4047 let r = _mm_setzero_si128();
4048 assert_eq_m128i(r, _mm_set1_epi64x(0));
4049 }
4050
4051 #[simd_test(enable = "sse2")]
4052 unsafe fn test_mm_loadl_epi64() {
4053 let a = _mm_setr_epi64x(6, 5);
4054 let r = _mm_loadl_epi64(ptr::addr_of!(a));
4055 assert_eq_m128i(r, _mm_setr_epi64x(6, 0));
4056 }
4057
4058 #[simd_test(enable = "sse2")]
4059 unsafe fn test_mm_load_si128() {
4060 let a = _mm_set_epi64x(5, 6);
4061 let r = _mm_load_si128(ptr::addr_of!(a) as *const _);
4062 assert_eq_m128i(a, r);
4063 }
4064
4065 #[simd_test(enable = "sse2")]
4066 unsafe fn test_mm_loadu_si128() {
4067 let a = _mm_set_epi64x(5, 6);
4068 let r = _mm_loadu_si128(ptr::addr_of!(a) as *const _);
4069 assert_eq_m128i(a, r);
4070 }
4071
4072 #[simd_test(enable = "sse2")]
4073 // Miri cannot support this until it is clear how it fits in the Rust memory model
4074 // (non-temporal store)
4075 #[cfg_attr(miri, ignore)]
4076 unsafe fn test_mm_maskmoveu_si128() {
4077 let a = _mm_set1_epi8(9);
4078 #[rustfmt::skip]
4079 let mask = _mm_set_epi8(
4080 0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0,
4081 0, 0, 0, 0, 0, 0, 0, 0,
4082 );
4083 let mut r = _mm_set1_epi8(0);
4084 _mm_maskmoveu_si128(a, mask, ptr::addr_of_mut!(r) as *mut i8);
4085 _mm_sfence();
4086 let e = _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
4087 assert_eq_m128i(r, e);
4088 }
4089
4090 #[simd_test(enable = "sse2")]
4091 unsafe fn test_mm_store_si128() {
4092 let a = _mm_set1_epi8(9);
4093 let mut r = _mm_set1_epi8(0);
4094 _mm_store_si128(&mut r, a);
4095 assert_eq_m128i(r, a);
4096 }
4097
4098 #[simd_test(enable = "sse2")]
4099 unsafe fn test_mm_storeu_si128() {
4100 let a = _mm_set1_epi8(9);
4101 let mut r = _mm_set1_epi8(0);
4102 _mm_storeu_si128(&mut r, a);
4103 assert_eq_m128i(r, a);
4104 }
4105
4106 #[simd_test(enable = "sse2")]
4107 unsafe fn test_mm_storel_epi64() {
4108 let a = _mm_setr_epi64x(2, 9);
4109 let mut r = _mm_set1_epi8(0);
4110 _mm_storel_epi64(&mut r, a);
4111 assert_eq_m128i(r, _mm_setr_epi64x(2, 0));
4112 }
4113
4114 #[simd_test(enable = "sse2")]
4115 // Miri cannot support this until it is clear how it fits in the Rust memory model
4116 // (non-temporal store)
4117 #[cfg_attr(miri, ignore)]
4118 unsafe fn test_mm_stream_si128() {
4119 let a = _mm_setr_epi32(1, 2, 3, 4);
4120 let mut r = _mm_undefined_si128();
4121 _mm_stream_si128(ptr::addr_of_mut!(r), a);
4122 _mm_sfence();
4123 assert_eq_m128i(r, a);
4124 }
4125
4126 #[simd_test(enable = "sse2")]
4127 // Miri cannot support this until it is clear how it fits in the Rust memory model
4128 // (non-temporal store)
4129 #[cfg_attr(miri, ignore)]
4130 unsafe fn test_mm_stream_si32() {
4131 let a: i32 = 7;
4132 let mut mem = boxed::Box::<i32>::new(-1);
4133 _mm_stream_si32(ptr::addr_of_mut!(*mem), a);
4134 _mm_sfence();
4135 assert_eq!(a, *mem);
4136 }
4137
4138 #[simd_test(enable = "sse2")]
4139 unsafe fn test_mm_move_epi64() {
4140 let a = _mm_setr_epi64x(5, 6);
4141 let r = _mm_move_epi64(a);
4142 assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
4143 }
4144
4145 #[simd_test(enable = "sse2")]
4146 unsafe fn test_mm_packs_epi16() {
4147 let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0);
4148 let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80);
4149 let r = _mm_packs_epi16(a, b);
4150 #[rustfmt::skip]
4151 assert_eq_m128i(
4152 r,
4153 _mm_setr_epi8(
4154 0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F
4155 )
4156 );
4157 }
4158
4159 #[simd_test(enable = "sse2")]
4160 unsafe fn test_mm_packs_epi32() {
4161 let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0);
4162 let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000);
4163 let r = _mm_packs_epi32(a, b);
4164 assert_eq_m128i(
4165 r,
4166 _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF),
4167 );
4168 }
4169
4170 #[simd_test(enable = "sse2")]
4171 unsafe fn test_mm_packus_epi16() {
4172 let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0);
4173 let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100);
4174 let r = _mm_packus_epi16(a, b);
4175 assert_eq_m128i(
4176 r,
4177 _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0),
4178 );
4179 }
4180
4181 #[simd_test(enable = "sse2")]
4182 unsafe fn test_mm_extract_epi16() {
4183 let a = _mm_setr_epi16(-1, 1, 2, 3, 4, 5, 6, 7);
4184 let r1 = _mm_extract_epi16::<0>(a);
4185 let r2 = _mm_extract_epi16::<3>(a);
4186 assert_eq!(r1, 0xFFFF);
4187 assert_eq!(r2, 3);
4188 }
4189
4190 #[simd_test(enable = "sse2")]
4191 unsafe fn test_mm_insert_epi16() {
4192 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4193 let r = _mm_insert_epi16::<0>(a, 9);
4194 let e = _mm_setr_epi16(9, 1, 2, 3, 4, 5, 6, 7);
4195 assert_eq_m128i(r, e);
4196 }
4197
4198 #[simd_test(enable = "sse2")]
4199 unsafe fn test_mm_movemask_epi8() {
4200 #[rustfmt::skip]
4201 let a = _mm_setr_epi8(
4202 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01,
4203 0b0101, 0b1111_0000u8 as i8, 0, 0,
4204 0, 0b1011_0101u8 as i8, 0b1111_0000u8 as i8, 0b0101,
4205 0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8,
4206 );
4207 let r = _mm_movemask_epi8(a);
4208 assert_eq!(r, 0b10100110_00100101);
4209 }
4210
4211 #[simd_test(enable = "sse2")]
4212 unsafe fn test_mm_shuffle_epi32() {
4213 let a = _mm_setr_epi32(5, 10, 15, 20);
4214 let r = _mm_shuffle_epi32::<0b00_01_01_11>(a);
4215 let e = _mm_setr_epi32(20, 10, 10, 5);
4216 assert_eq_m128i(r, e);
4217 }
4218
4219 #[simd_test(enable = "sse2")]
4220 unsafe fn test_mm_shufflehi_epi16() {
4221 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 10, 15, 20);
4222 let r = _mm_shufflehi_epi16::<0b00_01_01_11>(a);
4223 let e = _mm_setr_epi16(1, 2, 3, 4, 20, 10, 10, 5);
4224 assert_eq_m128i(r, e);
4225 }
4226
4227 #[simd_test(enable = "sse2")]
4228 unsafe fn test_mm_shufflelo_epi16() {
4229 let a = _mm_setr_epi16(5, 10, 15, 20, 1, 2, 3, 4);
4230 let r = _mm_shufflelo_epi16::<0b00_01_01_11>(a);
4231 let e = _mm_setr_epi16(20, 10, 10, 5, 1, 2, 3, 4);
4232 assert_eq_m128i(r, e);
4233 }
4234
4235 #[simd_test(enable = "sse2")]
4236 unsafe fn test_mm_unpackhi_epi8() {
4237 #[rustfmt::skip]
4238 let a = _mm_setr_epi8(
4239 0, 1, 2, 3, 4, 5, 6, 7,
4240 8, 9, 10, 11, 12, 13, 14, 15,
4241 );
4242 #[rustfmt::skip]
4243 let b = _mm_setr_epi8(
4244 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4245 );
4246 let r = _mm_unpackhi_epi8(a, b);
4247 #[rustfmt::skip]
4248 let e = _mm_setr_epi8(
4249 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31,
4250 );
4251 assert_eq_m128i(r, e);
4252 }
4253
4254 #[simd_test(enable = "sse2")]
4255 unsafe fn test_mm_unpackhi_epi16() {
4256 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4257 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4258 let r = _mm_unpackhi_epi16(a, b);
4259 let e = _mm_setr_epi16(4, 12, 5, 13, 6, 14, 7, 15);
4260 assert_eq_m128i(r, e);
4261 }
4262
4263 #[simd_test(enable = "sse2")]
4264 unsafe fn test_mm_unpackhi_epi32() {
4265 let a = _mm_setr_epi32(0, 1, 2, 3);
4266 let b = _mm_setr_epi32(4, 5, 6, 7);
4267 let r = _mm_unpackhi_epi32(a, b);
4268 let e = _mm_setr_epi32(2, 6, 3, 7);
4269 assert_eq_m128i(r, e);
4270 }
4271
4272 #[simd_test(enable = "sse2")]
4273 unsafe fn test_mm_unpackhi_epi64() {
4274 let a = _mm_setr_epi64x(0, 1);
4275 let b = _mm_setr_epi64x(2, 3);
4276 let r = _mm_unpackhi_epi64(a, b);
4277 let e = _mm_setr_epi64x(1, 3);
4278 assert_eq_m128i(r, e);
4279 }
4280
4281 #[simd_test(enable = "sse2")]
4282 unsafe fn test_mm_unpacklo_epi8() {
4283 #[rustfmt::skip]
4284 let a = _mm_setr_epi8(
4285 0, 1, 2, 3, 4, 5, 6, 7,
4286 8, 9, 10, 11, 12, 13, 14, 15,
4287 );
4288 #[rustfmt::skip]
4289 let b = _mm_setr_epi8(
4290 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4291 );
4292 let r = _mm_unpacklo_epi8(a, b);
4293 #[rustfmt::skip]
4294 let e = _mm_setr_epi8(
4295 0, 16, 1, 17, 2, 18, 3, 19,
4296 4, 20, 5, 21, 6, 22, 7, 23,
4297 );
4298 assert_eq_m128i(r, e);
4299 }
4300
4301 #[simd_test(enable = "sse2")]
4302 unsafe fn test_mm_unpacklo_epi16() {
4303 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4304 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4305 let r = _mm_unpacklo_epi16(a, b);
4306 let e = _mm_setr_epi16(0, 8, 1, 9, 2, 10, 3, 11);
4307 assert_eq_m128i(r, e);
4308 }
4309
4310 #[simd_test(enable = "sse2")]
4311 unsafe fn test_mm_unpacklo_epi32() {
4312 let a = _mm_setr_epi32(0, 1, 2, 3);
4313 let b = _mm_setr_epi32(4, 5, 6, 7);
4314 let r = _mm_unpacklo_epi32(a, b);
4315 let e = _mm_setr_epi32(0, 4, 1, 5);
4316 assert_eq_m128i(r, e);
4317 }
4318
4319 #[simd_test(enable = "sse2")]
4320 unsafe fn test_mm_unpacklo_epi64() {
4321 let a = _mm_setr_epi64x(0, 1);
4322 let b = _mm_setr_epi64x(2, 3);
4323 let r = _mm_unpacklo_epi64(a, b);
4324 let e = _mm_setr_epi64x(0, 2);
4325 assert_eq_m128i(r, e);
4326 }
4327
4328 #[simd_test(enable = "sse2")]
4329 unsafe fn test_mm_add_sd() {
4330 let a = _mm_setr_pd(1.0, 2.0);
4331 let b = _mm_setr_pd(5.0, 10.0);
4332 let r = _mm_add_sd(a, b);
4333 assert_eq_m128d(r, _mm_setr_pd(6.0, 2.0));
4334 }
4335
4336 #[simd_test(enable = "sse2")]
4337 unsafe fn test_mm_add_pd() {
4338 let a = _mm_setr_pd(1.0, 2.0);
4339 let b = _mm_setr_pd(5.0, 10.0);
4340 let r = _mm_add_pd(a, b);
4341 assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0));
4342 }
4343
4344 #[simd_test(enable = "sse2")]
4345 unsafe fn test_mm_div_sd() {
4346 let a = _mm_setr_pd(1.0, 2.0);
4347 let b = _mm_setr_pd(5.0, 10.0);
4348 let r = _mm_div_sd(a, b);
4349 assert_eq_m128d(r, _mm_setr_pd(0.2, 2.0));
4350 }
4351
4352 #[simd_test(enable = "sse2")]
4353 unsafe fn test_mm_div_pd() {
4354 let a = _mm_setr_pd(1.0, 2.0);
4355 let b = _mm_setr_pd(5.0, 10.0);
4356 let r = _mm_div_pd(a, b);
4357 assert_eq_m128d(r, _mm_setr_pd(0.2, 0.2));
4358 }
4359
4360 #[simd_test(enable = "sse2")]
4361 unsafe fn test_mm_max_sd() {
4362 let a = _mm_setr_pd(1.0, 2.0);
4363 let b = _mm_setr_pd(5.0, 10.0);
4364 let r = _mm_max_sd(a, b);
4365 assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4366 }
4367
4368 #[simd_test(enable = "sse2")]
4369 unsafe fn test_mm_max_pd() {
4370 let a = _mm_setr_pd(1.0, 2.0);
4371 let b = _mm_setr_pd(5.0, 10.0);
4372 let r = _mm_max_pd(a, b);
4373 assert_eq_m128d(r, _mm_setr_pd(5.0, 10.0));
4374
4375 // Check SSE(2)-specific semantics for -0.0 handling.
4376 let a = _mm_setr_pd(-0.0, 0.0);
4377 let b = _mm_setr_pd(0.0, 0.0);
4378 let r1: [u8; 16] = transmute(_mm_max_pd(a, b));
4379 let r2: [u8; 16] = transmute(_mm_max_pd(b, a));
4380 let a: [u8; 16] = transmute(a);
4381 let b: [u8; 16] = transmute(b);
4382 assert_eq!(r1, b);
4383 assert_eq!(r2, a);
4384 assert_ne!(a, b); // sanity check that -0.0 is actually present
4385 }
4386
4387 #[simd_test(enable = "sse2")]
4388 unsafe fn test_mm_min_sd() {
4389 let a = _mm_setr_pd(1.0, 2.0);
4390 let b = _mm_setr_pd(5.0, 10.0);
4391 let r = _mm_min_sd(a, b);
4392 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4393 }
4394
4395 #[simd_test(enable = "sse2")]
4396 unsafe fn test_mm_min_pd() {
4397 let a = _mm_setr_pd(1.0, 2.0);
4398 let b = _mm_setr_pd(5.0, 10.0);
4399 let r = _mm_min_pd(a, b);
4400 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4401
4402 // Check SSE(2)-specific semantics for -0.0 handling.
4403 let a = _mm_setr_pd(-0.0, 0.0);
4404 let b = _mm_setr_pd(0.0, 0.0);
4405 let r1: [u8; 16] = transmute(_mm_min_pd(a, b));
4406 let r2: [u8; 16] = transmute(_mm_min_pd(b, a));
4407 let a: [u8; 16] = transmute(a);
4408 let b: [u8; 16] = transmute(b);
4409 assert_eq!(r1, b);
4410 assert_eq!(r2, a);
4411 assert_ne!(a, b); // sanity check that -0.0 is actually present
4412 }
4413
4414 #[simd_test(enable = "sse2")]
4415 unsafe fn test_mm_mul_sd() {
4416 let a = _mm_setr_pd(1.0, 2.0);
4417 let b = _mm_setr_pd(5.0, 10.0);
4418 let r = _mm_mul_sd(a, b);
4419 assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4420 }
4421
4422 #[simd_test(enable = "sse2")]
4423 unsafe fn test_mm_mul_pd() {
4424 let a = _mm_setr_pd(1.0, 2.0);
4425 let b = _mm_setr_pd(5.0, 10.0);
4426 let r = _mm_mul_pd(a, b);
4427 assert_eq_m128d(r, _mm_setr_pd(5.0, 20.0));
4428 }
4429
4430 #[simd_test(enable = "sse2")]
4431 unsafe fn test_mm_sqrt_sd() {
4432 let a = _mm_setr_pd(1.0, 2.0);
4433 let b = _mm_setr_pd(5.0, 10.0);
4434 let r = _mm_sqrt_sd(a, b);
4435 assert_eq_m128d(r, _mm_setr_pd(5.0f64.sqrt(), 2.0));
4436 }
4437
4438 #[simd_test(enable = "sse2")]
4439 unsafe fn test_mm_sqrt_pd() {
4440 let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0));
4441 assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt()));
4442 }
4443
4444 #[simd_test(enable = "sse2")]
4445 unsafe fn test_mm_sub_sd() {
4446 let a = _mm_setr_pd(1.0, 2.0);
4447 let b = _mm_setr_pd(5.0, 10.0);
4448 let r = _mm_sub_sd(a, b);
4449 assert_eq_m128d(r, _mm_setr_pd(-4.0, 2.0));
4450 }
4451
4452 #[simd_test(enable = "sse2")]
4453 unsafe fn test_mm_sub_pd() {
4454 let a = _mm_setr_pd(1.0, 2.0);
4455 let b = _mm_setr_pd(5.0, 10.0);
4456 let r = _mm_sub_pd(a, b);
4457 assert_eq_m128d(r, _mm_setr_pd(-4.0, -8.0));
4458 }
4459
4460 #[simd_test(enable = "sse2")]
4461 unsafe fn test_mm_and_pd() {
4462 let a = transmute(u64x2::splat(5));
4463 let b = transmute(u64x2::splat(3));
4464 let r = _mm_and_pd(a, b);
4465 let e = transmute(u64x2::splat(1));
4466 assert_eq_m128d(r, e);
4467 }
4468
4469 #[simd_test(enable = "sse2")]
4470 unsafe fn test_mm_andnot_pd() {
4471 let a = transmute(u64x2::splat(5));
4472 let b = transmute(u64x2::splat(3));
4473 let r = _mm_andnot_pd(a, b);
4474 let e = transmute(u64x2::splat(2));
4475 assert_eq_m128d(r, e);
4476 }
4477
4478 #[simd_test(enable = "sse2")]
4479 unsafe fn test_mm_or_pd() {
4480 let a = transmute(u64x2::splat(5));
4481 let b = transmute(u64x2::splat(3));
4482 let r = _mm_or_pd(a, b);
4483 let e = transmute(u64x2::splat(7));
4484 assert_eq_m128d(r, e);
4485 }
4486
4487 #[simd_test(enable = "sse2")]
4488 unsafe fn test_mm_xor_pd() {
4489 let a = transmute(u64x2::splat(5));
4490 let b = transmute(u64x2::splat(3));
4491 let r = _mm_xor_pd(a, b);
4492 let e = transmute(u64x2::splat(6));
4493 assert_eq_m128d(r, e);
4494 }
4495
4496 #[simd_test(enable = "sse2")]
4497 unsafe fn test_mm_cmpeq_sd() {
4498 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4499 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4500 let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b));
4501 assert_eq_m128i(r, e);
4502 }
4503
4504 #[simd_test(enable = "sse2")]
4505 unsafe fn test_mm_cmplt_sd() {
4506 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4507 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4508 let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b));
4509 assert_eq_m128i(r, e);
4510 }
4511
4512 #[simd_test(enable = "sse2")]
4513 unsafe fn test_mm_cmple_sd() {
4514 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4515 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4516 let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b));
4517 assert_eq_m128i(r, e);
4518 }
4519
4520 #[simd_test(enable = "sse2")]
4521 unsafe fn test_mm_cmpgt_sd() {
4522 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4523 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4524 let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b));
4525 assert_eq_m128i(r, e);
4526 }
4527
4528 #[simd_test(enable = "sse2")]
4529 unsafe fn test_mm_cmpge_sd() {
4530 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4531 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4532 let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b));
4533 assert_eq_m128i(r, e);
4534 }
4535
4536 #[simd_test(enable = "sse2")]
4537 unsafe fn test_mm_cmpord_sd() {
4538 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4539 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4540 let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b));
4541 assert_eq_m128i(r, e);
4542 }
4543
4544 #[simd_test(enable = "sse2")]
4545 unsafe fn test_mm_cmpunord_sd() {
4546 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4547 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4548 let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b));
4549 assert_eq_m128i(r, e);
4550 }
4551
4552 #[simd_test(enable = "sse2")]
4553 unsafe fn test_mm_cmpneq_sd() {
4554 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4555 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4556 let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b));
4557 assert_eq_m128i(r, e);
4558 }
4559
4560 #[simd_test(enable = "sse2")]
4561 unsafe fn test_mm_cmpnlt_sd() {
4562 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4563 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4564 let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b));
4565 assert_eq_m128i(r, e);
4566 }
4567
4568 #[simd_test(enable = "sse2")]
4569 unsafe fn test_mm_cmpnle_sd() {
4570 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4571 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4572 let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b));
4573 assert_eq_m128i(r, e);
4574 }
4575
4576 #[simd_test(enable = "sse2")]
4577 unsafe fn test_mm_cmpngt_sd() {
4578 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4579 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4580 let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b));
4581 assert_eq_m128i(r, e);
4582 }
4583
4584 #[simd_test(enable = "sse2")]
4585 unsafe fn test_mm_cmpnge_sd() {
4586 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4587 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4588 let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b));
4589 assert_eq_m128i(r, e);
4590 }
4591
4592 #[simd_test(enable = "sse2")]
4593 unsafe fn test_mm_cmpeq_pd() {
4594 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4595 let e = _mm_setr_epi64x(!0, 0);
4596 let r = transmute::<_, __m128i>(_mm_cmpeq_pd(a, b));
4597 assert_eq_m128i(r, e);
4598 }
4599
4600 #[simd_test(enable = "sse2")]
4601 unsafe fn test_mm_cmplt_pd() {
4602 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4603 let e = _mm_setr_epi64x(0, !0);
4604 let r = transmute::<_, __m128i>(_mm_cmplt_pd(a, b));
4605 assert_eq_m128i(r, e);
4606 }
4607
4608 #[simd_test(enable = "sse2")]
4609 unsafe fn test_mm_cmple_pd() {
4610 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4611 let e = _mm_setr_epi64x(!0, !0);
4612 let r = transmute::<_, __m128i>(_mm_cmple_pd(a, b));
4613 assert_eq_m128i(r, e);
4614 }
4615
4616 #[simd_test(enable = "sse2")]
4617 unsafe fn test_mm_cmpgt_pd() {
4618 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4619 let e = _mm_setr_epi64x(0, 0);
4620 let r = transmute::<_, __m128i>(_mm_cmpgt_pd(a, b));
4621 assert_eq_m128i(r, e);
4622 }
4623
4624 #[simd_test(enable = "sse2")]
4625 unsafe fn test_mm_cmpge_pd() {
4626 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4627 let e = _mm_setr_epi64x(!0, 0);
4628 let r = transmute::<_, __m128i>(_mm_cmpge_pd(a, b));
4629 assert_eq_m128i(r, e);
4630 }
4631
4632 #[simd_test(enable = "sse2")]
4633 unsafe fn test_mm_cmpord_pd() {
4634 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4635 let e = _mm_setr_epi64x(0, !0);
4636 let r = transmute::<_, __m128i>(_mm_cmpord_pd(a, b));
4637 assert_eq_m128i(r, e);
4638 }
4639
4640 #[simd_test(enable = "sse2")]
4641 unsafe fn test_mm_cmpunord_pd() {
4642 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4643 let e = _mm_setr_epi64x(!0, 0);
4644 let r = transmute::<_, __m128i>(_mm_cmpunord_pd(a, b));
4645 assert_eq_m128i(r, e);
4646 }
4647
4648 #[simd_test(enable = "sse2")]
4649 unsafe fn test_mm_cmpneq_pd() {
4650 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4651 let e = _mm_setr_epi64x(!0, !0);
4652 let r = transmute::<_, __m128i>(_mm_cmpneq_pd(a, b));
4653 assert_eq_m128i(r, e);
4654 }
4655
4656 #[simd_test(enable = "sse2")]
4657 unsafe fn test_mm_cmpnlt_pd() {
4658 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4659 let e = _mm_setr_epi64x(0, 0);
4660 let r = transmute::<_, __m128i>(_mm_cmpnlt_pd(a, b));
4661 assert_eq_m128i(r, e);
4662 }
4663
4664 #[simd_test(enable = "sse2")]
4665 unsafe fn test_mm_cmpnle_pd() {
4666 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4667 let e = _mm_setr_epi64x(0, 0);
4668 let r = transmute::<_, __m128i>(_mm_cmpnle_pd(a, b));
4669 assert_eq_m128i(r, e);
4670 }
4671
4672 #[simd_test(enable = "sse2")]
4673 unsafe fn test_mm_cmpngt_pd() {
4674 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4675 let e = _mm_setr_epi64x(0, !0);
4676 let r = transmute::<_, __m128i>(_mm_cmpngt_pd(a, b));
4677 assert_eq_m128i(r, e);
4678 }
4679
4680 #[simd_test(enable = "sse2")]
4681 unsafe fn test_mm_cmpnge_pd() {
4682 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4683 let e = _mm_setr_epi64x(0, !0);
4684 let r = transmute::<_, __m128i>(_mm_cmpnge_pd(a, b));
4685 assert_eq_m128i(r, e);
4686 }
4687
4688 #[simd_test(enable = "sse2")]
4689 unsafe fn test_mm_comieq_sd() {
4690 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4691 assert!(_mm_comieq_sd(a, b) != 0);
4692
4693 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0));
4694 assert!(_mm_comieq_sd(a, b) == 0);
4695 }
4696
4697 #[simd_test(enable = "sse2")]
4698 unsafe fn test_mm_comilt_sd() {
4699 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4700 assert!(_mm_comilt_sd(a, b) == 0);
4701 }
4702
4703 #[simd_test(enable = "sse2")]
4704 unsafe fn test_mm_comile_sd() {
4705 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4706 assert!(_mm_comile_sd(a, b) != 0);
4707 }
4708
4709 #[simd_test(enable = "sse2")]
4710 unsafe fn test_mm_comigt_sd() {
4711 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4712 assert!(_mm_comigt_sd(a, b) == 0);
4713 }
4714
4715 #[simd_test(enable = "sse2")]
4716 unsafe fn test_mm_comige_sd() {
4717 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4718 assert!(_mm_comige_sd(a, b) != 0);
4719 }
4720
4721 #[simd_test(enable = "sse2")]
4722 unsafe fn test_mm_comineq_sd() {
4723 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4724 assert!(_mm_comineq_sd(a, b) == 0);
4725 }
4726
4727 #[simd_test(enable = "sse2")]
4728 unsafe fn test_mm_ucomieq_sd() {
4729 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4730 assert!(_mm_ucomieq_sd(a, b) != 0);
4731
4732 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0));
4733 assert!(_mm_ucomieq_sd(a, b) == 0);
4734 }
4735
4736 #[simd_test(enable = "sse2")]
4737 unsafe fn test_mm_ucomilt_sd() {
4738 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4739 assert!(_mm_ucomilt_sd(a, b) == 0);
4740 }
4741
4742 #[simd_test(enable = "sse2")]
4743 unsafe fn test_mm_ucomile_sd() {
4744 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4745 assert!(_mm_ucomile_sd(a, b) != 0);
4746 }
4747
4748 #[simd_test(enable = "sse2")]
4749 unsafe fn test_mm_ucomigt_sd() {
4750 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4751 assert!(_mm_ucomigt_sd(a, b) == 0);
4752 }
4753
4754 #[simd_test(enable = "sse2")]
4755 unsafe fn test_mm_ucomige_sd() {
4756 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4757 assert!(_mm_ucomige_sd(a, b) != 0);
4758 }
4759
4760 #[simd_test(enable = "sse2")]
4761 unsafe fn test_mm_ucomineq_sd() {
4762 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4763 assert!(_mm_ucomineq_sd(a, b) == 0);
4764 }
4765
4766 #[simd_test(enable = "sse2")]
4767 unsafe fn test_mm_movemask_pd() {
4768 let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0));
4769 assert_eq!(r, 0b01);
4770
4771 let r = _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0));
4772 assert_eq!(r, 0b11);
4773 }
4774
4775 #[repr(align(16))]
4776 struct Memory {
4777 data: [f64; 4],
4778 }
4779
4780 #[simd_test(enable = "sse2")]
4781 unsafe fn test_mm_load_pd() {
4782 let mem = Memory {
4783 data: [1.0f64, 2.0, 3.0, 4.0],
4784 };
4785 let vals = &mem.data;
4786 let d = vals.as_ptr();
4787
4788 let r = _mm_load_pd(d);
4789 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4790 }
4791
4792 #[simd_test(enable = "sse2")]
4793 unsafe fn test_mm_load_sd() {
4794 let a = 1.;
4795 let expected = _mm_setr_pd(a, 0.);
4796 let r = _mm_load_sd(&a);
4797 assert_eq_m128d(r, expected);
4798 }
4799
4800 #[simd_test(enable = "sse2")]
4801 unsafe fn test_mm_loadh_pd() {
4802 let a = _mm_setr_pd(1., 2.);
4803 let b = 3.;
4804 let expected = _mm_setr_pd(_mm_cvtsd_f64(a), 3.);
4805 let r = _mm_loadh_pd(a, &b);
4806 assert_eq_m128d(r, expected);
4807 }
4808
4809 #[simd_test(enable = "sse2")]
4810 unsafe fn test_mm_loadl_pd() {
4811 let a = _mm_setr_pd(1., 2.);
4812 let b = 3.;
4813 let expected = _mm_setr_pd(3., get_m128d(a, 1));
4814 let r = _mm_loadl_pd(a, &b);
4815 assert_eq_m128d(r, expected);
4816 }
4817
4818 #[simd_test(enable = "sse2")]
4819 // Miri cannot support this until it is clear how it fits in the Rust memory model
4820 // (non-temporal store)
4821 #[cfg_attr(miri, ignore)]
4822 unsafe fn test_mm_stream_pd() {
4823 #[repr(align(128))]
4824 struct Memory {
4825 pub data: [f64; 2],
4826 }
4827 let a = _mm_set1_pd(7.0);
4828 let mut mem = Memory { data: [-1.0; 2] };
4829
4830 _mm_stream_pd(ptr::addr_of_mut!(mem.data[0]), a);
4831 _mm_sfence();
4832 for i in 0..2 {
4833 assert_eq!(mem.data[i], get_m128d(a, i));
4834 }
4835 }
4836
4837 #[simd_test(enable = "sse2")]
4838 unsafe fn test_mm_store_sd() {
4839 let mut dest = 0.;
4840 let a = _mm_setr_pd(1., 2.);
4841 _mm_store_sd(&mut dest, a);
4842 assert_eq!(dest, _mm_cvtsd_f64(a));
4843 }
4844
4845 #[simd_test(enable = "sse2")]
4846 unsafe fn test_mm_store_pd() {
4847 let mut mem = Memory { data: [0.0f64; 4] };
4848 let vals = &mut mem.data;
4849 let a = _mm_setr_pd(1.0, 2.0);
4850 let d = vals.as_mut_ptr();
4851
4852 _mm_store_pd(d, *black_box(&a));
4853 assert_eq!(vals[0], 1.0);
4854 assert_eq!(vals[1], 2.0);
4855 }
4856
4857 #[simd_test(enable = "sse2")]
4858 unsafe fn test_mm_storeu_pd() {
4859 let mut mem = Memory { data: [0.0f64; 4] };
4860 let vals = &mut mem.data;
4861 let a = _mm_setr_pd(1.0, 2.0);
4862
4863 let mut ofs = 0;
4864 let mut p = vals.as_mut_ptr();
4865
4866 // Make sure p is **not** aligned to 16-byte boundary
4867 if (p as usize) & 0xf == 0 {
4868 ofs = 1;
4869 p = p.add(1);
4870 }
4871
4872 _mm_storeu_pd(p, *black_box(&a));
4873
4874 if ofs > 0 {
4875 assert_eq!(vals[ofs - 1], 0.0);
4876 }
4877 assert_eq!(vals[ofs + 0], 1.0);
4878 assert_eq!(vals[ofs + 1], 2.0);
4879 }
4880
4881 #[simd_test(enable = "sse2")]
4882 unsafe fn test_mm_storeu_si16() {
4883 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
4884 let mut r = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
4885 _mm_storeu_si16(ptr::addr_of_mut!(r).cast(), a);
4886 let e = _mm_setr_epi16(1, 10, 11, 12, 13, 14, 15, 16);
4887 assert_eq_m128i(r, e);
4888 }
4889
4890 #[simd_test(enable = "sse2")]
4891 unsafe fn test_mm_storeu_si32() {
4892 let a = _mm_setr_epi32(1, 2, 3, 4);
4893 let mut r = _mm_setr_epi32(5, 6, 7, 8);
4894 _mm_storeu_si32(ptr::addr_of_mut!(r).cast(), a);
4895 let e = _mm_setr_epi32(1, 6, 7, 8);
4896 assert_eq_m128i(r, e);
4897 }
4898
4899 #[simd_test(enable = "sse2")]
4900 unsafe fn test_mm_storeu_si64() {
4901 let a = _mm_setr_epi64x(1, 2);
4902 let mut r = _mm_setr_epi64x(3, 4);
4903 _mm_storeu_si64(ptr::addr_of_mut!(r).cast(), a);
4904 let e = _mm_setr_epi64x(1, 4);
4905 assert_eq_m128i(r, e);
4906 }
4907
4908 #[simd_test(enable = "sse2")]
4909 unsafe fn test_mm_store1_pd() {
4910 let mut mem = Memory { data: [0.0f64; 4] };
4911 let vals = &mut mem.data;
4912 let a = _mm_setr_pd(1.0, 2.0);
4913 let d = vals.as_mut_ptr();
4914
4915 _mm_store1_pd(d, *black_box(&a));
4916 assert_eq!(vals[0], 1.0);
4917 assert_eq!(vals[1], 1.0);
4918 }
4919
4920 #[simd_test(enable = "sse2")]
4921 unsafe fn test_mm_store_pd1() {
4922 let mut mem = Memory { data: [0.0f64; 4] };
4923 let vals = &mut mem.data;
4924 let a = _mm_setr_pd(1.0, 2.0);
4925 let d = vals.as_mut_ptr();
4926
4927 _mm_store_pd1(d, *black_box(&a));
4928 assert_eq!(vals[0], 1.0);
4929 assert_eq!(vals[1], 1.0);
4930 }
4931
4932 #[simd_test(enable = "sse2")]
4933 unsafe fn test_mm_storer_pd() {
4934 let mut mem = Memory { data: [0.0f64; 4] };
4935 let vals = &mut mem.data;
4936 let a = _mm_setr_pd(1.0, 2.0);
4937 let d = vals.as_mut_ptr();
4938
4939 _mm_storer_pd(d, *black_box(&a));
4940 assert_eq!(vals[0], 2.0);
4941 assert_eq!(vals[1], 1.0);
4942 }
4943
4944 #[simd_test(enable = "sse2")]
4945 unsafe fn test_mm_storeh_pd() {
4946 let mut dest = 0.;
4947 let a = _mm_setr_pd(1., 2.);
4948 _mm_storeh_pd(&mut dest, a);
4949 assert_eq!(dest, get_m128d(a, 1));
4950 }
4951
4952 #[simd_test(enable = "sse2")]
4953 unsafe fn test_mm_storel_pd() {
4954 let mut dest = 0.;
4955 let a = _mm_setr_pd(1., 2.);
4956 _mm_storel_pd(&mut dest, a);
4957 assert_eq!(dest, _mm_cvtsd_f64(a));
4958 }
4959
4960 #[simd_test(enable = "sse2")]
4961 unsafe fn test_mm_loadr_pd() {
4962 let mut mem = Memory {
4963 data: [1.0f64, 2.0, 3.0, 4.0],
4964 };
4965 let vals = &mut mem.data;
4966 let d = vals.as_ptr();
4967
4968 let r = _mm_loadr_pd(d);
4969 assert_eq_m128d(r, _mm_setr_pd(2.0, 1.0));
4970 }
4971
4972 #[simd_test(enable = "sse2")]
4973 unsafe fn test_mm_loadu_pd() {
4974 let mut mem = Memory {
4975 data: [1.0f64, 2.0, 3.0, 4.0],
4976 };
4977 let vals = &mut mem.data;
4978 let mut d = vals.as_ptr();
4979
4980 // make sure d is not aligned to 16-byte boundary
4981 let mut offset = 0;
4982 if (d as usize) & 0xf == 0 {
4983 offset = 1;
4984 d = d.add(offset);
4985 }
4986
4987 let r = _mm_loadu_pd(d);
4988 let e = _mm_add_pd(_mm_setr_pd(1.0, 2.0), _mm_set1_pd(offset as f64));
4989 assert_eq_m128d(r, e);
4990 }
4991
4992 #[simd_test(enable = "sse2")]
4993 unsafe fn test_mm_loadu_si16() {
4994 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
4995 let r = _mm_loadu_si16(ptr::addr_of!(a) as *const _);
4996 assert_eq_m128i(r, _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0));
4997 }
4998
4999 #[simd_test(enable = "sse2")]
5000 unsafe fn test_mm_loadu_si32() {
5001 let a = _mm_setr_epi32(1, 2, 3, 4);
5002 let r = _mm_loadu_si32(ptr::addr_of!(a) as *const _);
5003 assert_eq_m128i(r, _mm_setr_epi32(1, 0, 0, 0));
5004 }
5005
5006 #[simd_test(enable = "sse2")]
5007 unsafe fn test_mm_loadu_si64() {
5008 let a = _mm_setr_epi64x(5, 6);
5009 let r = _mm_loadu_si64(ptr::addr_of!(a) as *const _);
5010 assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
5011 }
5012
5013 #[simd_test(enable = "sse2")]
5014 unsafe fn test_mm_cvtpd_ps() {
5015 let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0));
5016 assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0));
5017
5018 let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0));
5019 assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0));
5020
5021 let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN));
5022 assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0));
5023
5024 let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64));
5025 assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0));
5026 }
5027
5028 #[simd_test(enable = "sse2")]
5029 unsafe fn test_mm_cvtps_pd() {
5030 let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0));
5031 assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0));
5032
5033 let r = _mm_cvtps_pd(_mm_setr_ps(
5034 f32::MAX,
5035 f32::INFINITY,
5036 f32::NEG_INFINITY,
5037 f32::MIN,
5038 ));
5039 assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY));
5040 }
5041
5042 #[simd_test(enable = "sse2")]
5043 unsafe fn test_mm_cvtpd_epi32() {
5044 let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0));
5045 assert_eq_m128i(r, _mm_setr_epi32(-1, 5, 0, 0));
5046
5047 let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0));
5048 assert_eq_m128i(r, _mm_setr_epi32(-1, -5, 0, 0));
5049
5050 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN));
5051 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5052
5053 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY));
5054 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5055
5056 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN));
5057 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5058 }
5059
5060 #[simd_test(enable = "sse2")]
5061 unsafe fn test_mm_cvtsd_si32() {
5062 let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0));
5063 assert_eq!(r, -2);
5064
5065 let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN));
5066 assert_eq!(r, i32::MIN);
5067
5068 let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN));
5069 assert_eq!(r, i32::MIN);
5070 }
5071
5072 #[simd_test(enable = "sse2")]
5073 unsafe fn test_mm_cvtsd_ss() {
5074 let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4);
5075 let b = _mm_setr_pd(2.0, -5.0);
5076
5077 let r = _mm_cvtsd_ss(a, b);
5078
5079 assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4));
5080
5081 let a = _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY);
5082 let b = _mm_setr_pd(f64::INFINITY, -5.0);
5083
5084 let r = _mm_cvtsd_ss(a, b);
5085
5086 assert_eq_m128(
5087 r,
5088 _mm_setr_ps(
5089 f32::INFINITY,
5090 f32::NEG_INFINITY,
5091 f32::MAX,
5092 f32::NEG_INFINITY,
5093 ),
5094 );
5095 }
5096
5097 #[simd_test(enable = "sse2")]
5098 unsafe fn test_mm_cvtsd_f64() {
5099 let r = _mm_cvtsd_f64(_mm_setr_pd(-1.1, 2.2));
5100 assert_eq!(r, -1.1);
5101 }
5102
5103 #[simd_test(enable = "sse2")]
5104 unsafe fn test_mm_cvtss_sd() {
5105 let a = _mm_setr_pd(-1.1, 2.2);
5106 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
5107
5108 let r = _mm_cvtss_sd(a, b);
5109 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.2));
5110
5111 let a = _mm_setr_pd(-1.1, f64::INFINITY);
5112 let b = _mm_setr_ps(f32::NEG_INFINITY, 2.0, 3.0, 4.0);
5113
5114 let r = _mm_cvtss_sd(a, b);
5115 assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY));
5116 }
5117
5118 #[simd_test(enable = "sse2")]
5119 unsafe fn test_mm_cvttpd_epi32() {
5120 let a = _mm_setr_pd(-1.1, 2.2);
5121 let r = _mm_cvttpd_epi32(a);
5122 assert_eq_m128i(r, _mm_setr_epi32(-1, 2, 0, 0));
5123
5124 let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
5125 let r = _mm_cvttpd_epi32(a);
5126 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5127 }
5128
5129 #[simd_test(enable = "sse2")]
5130 unsafe fn test_mm_cvttsd_si32() {
5131 let a = _mm_setr_pd(-1.1, 2.2);
5132 let r = _mm_cvttsd_si32(a);
5133 assert_eq!(r, -1);
5134
5135 let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
5136 let r = _mm_cvttsd_si32(a);
5137 assert_eq!(r, i32::MIN);
5138 }
5139
5140 #[simd_test(enable = "sse2")]
5141 unsafe fn test_mm_cvttps_epi32() {
5142 let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6);
5143 let r = _mm_cvttps_epi32(a);
5144 assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6));
5145
5146 let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX);
5147 let r = _mm_cvttps_epi32(a);
5148 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN));
5149 }
5150
5151 #[simd_test(enable = "sse2")]
5152 unsafe fn test_mm_set_sd() {
5153 let r = _mm_set_sd(-1.0_f64);
5154 assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, 0_f64));
5155 }
5156
5157 #[simd_test(enable = "sse2")]
5158 unsafe fn test_mm_set1_pd() {
5159 let r = _mm_set1_pd(-1.0_f64);
5160 assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, -1.0_f64));
5161 }
5162
5163 #[simd_test(enable = "sse2")]
5164 unsafe fn test_mm_set_pd1() {
5165 let r = _mm_set_pd1(-2.0_f64);
5166 assert_eq_m128d(r, _mm_setr_pd(-2.0_f64, -2.0_f64));
5167 }
5168
5169 #[simd_test(enable = "sse2")]
5170 unsafe fn test_mm_set_pd() {
5171 let r = _mm_set_pd(1.0_f64, 5.0_f64);
5172 assert_eq_m128d(r, _mm_setr_pd(5.0_f64, 1.0_f64));
5173 }
5174
5175 #[simd_test(enable = "sse2")]
5176 unsafe fn test_mm_setr_pd() {
5177 let r = _mm_setr_pd(1.0_f64, -5.0_f64);
5178 assert_eq_m128d(r, _mm_setr_pd(1.0_f64, -5.0_f64));
5179 }
5180
5181 #[simd_test(enable = "sse2")]
5182 unsafe fn test_mm_setzero_pd() {
5183 let r = _mm_setzero_pd();
5184 assert_eq_m128d(r, _mm_setr_pd(0_f64, 0_f64));
5185 }
5186
5187 #[simd_test(enable = "sse2")]
5188 unsafe fn test_mm_load1_pd() {
5189 let d = -5.0;
5190 let r = _mm_load1_pd(&d);
5191 assert_eq_m128d(r, _mm_setr_pd(d, d));
5192 }
5193
5194 #[simd_test(enable = "sse2")]
5195 unsafe fn test_mm_load_pd1() {
5196 let d = -5.0;
5197 let r = _mm_load_pd1(&d);
5198 assert_eq_m128d(r, _mm_setr_pd(d, d));
5199 }
5200
5201 #[simd_test(enable = "sse2")]
5202 unsafe fn test_mm_unpackhi_pd() {
5203 let a = _mm_setr_pd(1.0, 2.0);
5204 let b = _mm_setr_pd(3.0, 4.0);
5205 let r = _mm_unpackhi_pd(a, b);
5206 assert_eq_m128d(r, _mm_setr_pd(2.0, 4.0));
5207 }
5208
5209 #[simd_test(enable = "sse2")]
5210 unsafe fn test_mm_unpacklo_pd() {
5211 let a = _mm_setr_pd(1.0, 2.0);
5212 let b = _mm_setr_pd(3.0, 4.0);
5213 let r = _mm_unpacklo_pd(a, b);
5214 assert_eq_m128d(r, _mm_setr_pd(1.0, 3.0));
5215 }
5216
5217 #[simd_test(enable = "sse2")]
5218 unsafe fn test_mm_shuffle_pd() {
5219 let a = _mm_setr_pd(1., 2.);
5220 let b = _mm_setr_pd(3., 4.);
5221 let expected = _mm_setr_pd(1., 3.);
5222 let r = _mm_shuffle_pd::<0b00_00_00_00>(a, b);
5223 assert_eq_m128d(r, expected);
5224 }
5225
5226 #[simd_test(enable = "sse2")]
5227 unsafe fn test_mm_move_sd() {
5228 let a = _mm_setr_pd(1., 2.);
5229 let b = _mm_setr_pd(3., 4.);
5230 let expected = _mm_setr_pd(3., 2.);
5231 let r = _mm_move_sd(a, b);
5232 assert_eq_m128d(r, expected);
5233 }
5234
5235 #[simd_test(enable = "sse2")]
5236 unsafe fn test_mm_castpd_ps() {
5237 let a = _mm_set1_pd(0.);
5238 let expected = _mm_set1_ps(0.);
5239 let r = _mm_castpd_ps(a);
5240 assert_eq_m128(r, expected);
5241 }
5242
5243 #[simd_test(enable = "sse2")]
5244 unsafe fn test_mm_castpd_si128() {
5245 let a = _mm_set1_pd(0.);
5246 let expected = _mm_set1_epi64x(0);
5247 let r = _mm_castpd_si128(a);
5248 assert_eq_m128i(r, expected);
5249 }
5250
5251 #[simd_test(enable = "sse2")]
5252 unsafe fn test_mm_castps_pd() {
5253 let a = _mm_set1_ps(0.);
5254 let expected = _mm_set1_pd(0.);
5255 let r = _mm_castps_pd(a);
5256 assert_eq_m128d(r, expected);
5257 }
5258
5259 #[simd_test(enable = "sse2")]
5260 unsafe fn test_mm_castps_si128() {
5261 let a = _mm_set1_ps(0.);
5262 let expected = _mm_set1_epi32(0);
5263 let r = _mm_castps_si128(a);
5264 assert_eq_m128i(r, expected);
5265 }
5266
5267 #[simd_test(enable = "sse2")]
5268 unsafe fn test_mm_castsi128_pd() {
5269 let a = _mm_set1_epi64x(0);
5270 let expected = _mm_set1_pd(0.);
5271 let r = _mm_castsi128_pd(a);
5272 assert_eq_m128d(r, expected);
5273 }
5274
5275 #[simd_test(enable = "sse2")]
5276 unsafe fn test_mm_castsi128_ps() {
5277 let a = _mm_set1_epi32(0);
5278 let expected = _mm_set1_ps(0.);
5279 let r = _mm_castsi128_ps(a);
5280 assert_eq_m128(r, expected);
5281 }
5282}
5283