sse2.rs source code [crates/core_arch/src/x86/sse2.rs]

1	//! Streaming SIMD Extensions 2 (SSE2)
2
3	#[cfg(test)]
4	use stdarch_test::assert_instr;
5
6	use crate::{
7	core_arch::{simd::, x86::},
8	intrinsics::simd::*,
9	intrinsics::sqrtf64,
10	mem, ptr,
11	};
12
13	/// Provides a hint to the processor that the code sequence is a spin-wait loop.
14	///
15	/// This can help improve the performance and power consumption of spin-wait
16	/// loops.
17	///
18	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_pause)
19	#[inline]
20	#[cfg_attr(all(test, target_feature = "sse2"), assert_instr(pause))]
21	#[stable(feature = "simd_x86", since = "1.27.0")]
22	pub unsafe fn _mm_pause() {
23	// note: `pause` is guaranteed to be interpreted as a `nop` by CPUs without
24	// the SSE2 target-feature - therefore it does not require any target features
25	pause()
26	}
27
28	/// Invalidates and flushes the cache line that contains `p` from all levels of
29	/// the cache hierarchy.
30	///
31	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clflush)
32	#[inline]
33	#[target_feature(enable = "sse2")]
34	#[cfg_attr(test, assert_instr(clflush))]
35	#[stable(feature = "simd_x86", since = "1.27.0")]
36	pub unsafe fn _mm_clflush(p: *const u8) {
37	clflush(p)
38	}
39
40	/// Performs a serializing operation on all load-from-memory instructions
41	/// that were issued prior to this instruction.
42	///
43	/// Guarantees that every load instruction that precedes, in program order, is
44	/// globally visible before any load instruction which follows the fence in
45	/// program order.
46	///
47	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_lfence)
48	#[inline]
49	#[target_feature(enable = "sse2")]
50	#[cfg_attr(test, assert_instr(lfence))]
51	#[stable(feature = "simd_x86", since = "1.27.0")]
52	pub unsafe fn _mm_lfence() {
53	lfence()
54	}
55
56	/// Performs a serializing operation on all load-from-memory and store-to-memory
57	/// instructions that were issued prior to this instruction.
58	///
59	/// Guarantees that every memory access that precedes, in program order, the
60	/// memory fence instruction is globally visible before any memory instruction
61	/// which follows the fence in program order.
62	///
63	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mfence)
64	#[inline]
65	#[target_feature(enable = "sse2")]
66	#[cfg_attr(test, assert_instr(mfence))]
67	#[stable(feature = "simd_x86", since = "1.27.0")]
68	pub unsafe fn _mm_mfence() {
69	mfence()
70	}
71
72	/// Adds packed 8-bit integers in `a` and `b`.
73	///
74	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi8)
75	#[inline]
76	#[target_feature(enable = "sse2")]
77	#[cfg_attr(test, assert_instr(paddb))]
78	#[stable(feature = "simd_x86", since = "1.27.0")]
79	pub fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i {
80	unsafe { transmute(src:simd_add(x:a.as_i8x16(), y:b.as_i8x16())) }
81	}
82
83	/// Adds packed 16-bit integers in `a` and `b`.
84	///
85	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi16)
86	#[inline]
87	#[target_feature(enable = "sse2")]
88	#[cfg_attr(test, assert_instr(paddw))]
89	#[stable(feature = "simd_x86", since = "1.27.0")]
90	pub fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i {
91	unsafe { transmute(src:simd_add(x:a.as_i16x8(), y:b.as_i16x8())) }
92	}
93
94	/// Adds packed 32-bit integers in `a` and `b`.
95	///
96	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi32)
97	#[inline]
98	#[target_feature(enable = "sse2")]
99	#[cfg_attr(test, assert_instr(paddd))]
100	#[stable(feature = "simd_x86", since = "1.27.0")]
101	pub fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i {
102	unsafe { transmute(src:simd_add(x:a.as_i32x4(), y:b.as_i32x4())) }
103	}
104
105	/// Adds packed 64-bit integers in `a` and `b`.
106	///
107	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi64)
108	#[inline]
109	#[target_feature(enable = "sse2")]
110	#[cfg_attr(test, assert_instr(paddq))]
111	#[stable(feature = "simd_x86", since = "1.27.0")]
112	pub fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i {
113	unsafe { transmute(src:simd_add(x:a.as_i64x2(), y:b.as_i64x2())) }
114	}
115
116	/// Adds packed 8-bit integers in `a` and `b` using saturation.
117	///
118	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epi8)
119	#[inline]
120	#[target_feature(enable = "sse2")]
121	#[cfg_attr(test, assert_instr(paddsb))]
122	#[stable(feature = "simd_x86", since = "1.27.0")]
123	pub fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i {
124	unsafe { transmute(src:simd_saturating_add(x:a.as_i8x16(), y:b.as_i8x16())) }
125	}
126
127	/// Adds packed 16-bit integers in `a` and `b` using saturation.
128	///
129	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epi16)
130	#[inline]
131	#[target_feature(enable = "sse2")]
132	#[cfg_attr(test, assert_instr(paddsw))]
133	#[stable(feature = "simd_x86", since = "1.27.0")]
134	pub fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i {
135	unsafe { transmute(src:simd_saturating_add(x:a.as_i16x8(), y:b.as_i16x8())) }
136	}
137
138	/// Adds packed unsigned 8-bit integers in `a` and `b` using saturation.
139	///
140	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epu8)
141	#[inline]
142	#[target_feature(enable = "sse2")]
143	#[cfg_attr(test, assert_instr(paddusb))]
144	#[stable(feature = "simd_x86", since = "1.27.0")]
145	pub fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i {
146	unsafe { transmute(src:simd_saturating_add(x:a.as_u8x16(), y:b.as_u8x16())) }
147	}
148
149	/// Adds packed unsigned 16-bit integers in `a` and `b` using saturation.
150	///
151	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epu16)
152	#[inline]
153	#[target_feature(enable = "sse2")]
154	#[cfg_attr(test, assert_instr(paddusw))]
155	#[stable(feature = "simd_x86", since = "1.27.0")]
156	pub fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i {
157	unsafe { transmute(src:simd_saturating_add(x:a.as_u16x8(), y:b.as_u16x8())) }
158	}
159
160	/// Averages packed unsigned 8-bit integers in `a` and `b`.
161	///
162	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_avg_epu8)
163	#[inline]
164	#[target_feature(enable = "sse2")]
165	#[cfg_attr(test, assert_instr(pavgb))]
166	#[stable(feature = "simd_x86", since = "1.27.0")]
167	pub fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i {
168	unsafe {
169	let a: u16x16 = simd_cast::<_, u16x16>(a.as_u8x16());
170	let b: u16x16 = simd_cast::<_, u16x16>(b.as_u8x16());
171	let r: u16x16 = simd_shr(lhs:simd_add(simd_add(a, b), u16x16::splat(`1`)), rhs:u16x16::splat(`1`));
172	transmute(src:simd_cast::<_, u8x16>(r))
173	}
174	}
175
176	/// Averages packed unsigned 16-bit integers in `a` and `b`.
177	///
178	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_avg_epu16)
179	#[inline]
180	#[target_feature(enable = "sse2")]
181	#[cfg_attr(test, assert_instr(pavgw))]
182	#[stable(feature = "simd_x86", since = "1.27.0")]
183	pub fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i {
184	unsafe {
185	let a: u32x8 = simd_cast::<_, u32x8>(a.as_u16x8());
186	let b: u32x8 = simd_cast::<_, u32x8>(b.as_u16x8());
187	let r: u32x8 = simd_shr(lhs:simd_add(simd_add(a, b), u32x8::splat(`1`)), rhs:u32x8::splat(`1`));
188	transmute(src:simd_cast::<_, u16x8>(r))
189	}
190	}
191
192	/// Multiplies and then horizontally add signed 16 bit integers in `a` and `b`.
193	///
194	/// Multiplies packed signed 16-bit integers in `a` and `b`, producing
195	/// intermediate signed 32-bit integers. Horizontally add adjacent pairs of
196	/// intermediate 32-bit integers.
197	///
198	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_madd_epi16)
199	#[inline]
200	#[target_feature(enable = "sse2")]
201	#[cfg_attr(test, assert_instr(pmaddwd))]
202	#[stable(feature = "simd_x86", since = "1.27.0")]
203	pub fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i {
204	unsafe { transmute(src:pmaddwd(a.as_i16x8(), b.as_i16x8())) }
205	}
206
207	/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
208	/// maximum values.
209	///
210	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi16)
211	#[inline]
212	#[target_feature(enable = "sse2")]
213	#[cfg_attr(test, assert_instr(pmaxsw))]
214	#[stable(feature = "simd_x86", since = "1.27.0")]
215	pub fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i {
216	unsafe {
217	let a: i16x8 = a.as_i16x8();
218	let b: i16x8 = b.as_i16x8();
219	transmute(src:simd_select::<i16x8, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
220	}
221	}
222
223	/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the
224	/// packed maximum values.
225	///
226	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu8)
227	#[inline]
228	#[target_feature(enable = "sse2")]
229	#[cfg_attr(test, assert_instr(pmaxub))]
230	#[stable(feature = "simd_x86", since = "1.27.0")]
231	pub fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i {
232	unsafe {
233	let a: u8x16 = a.as_u8x16();
234	let b: u8x16 = b.as_u8x16();
235	transmute(src:simd_select::<i8x16, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
236	}
237	}
238
239	/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
240	/// minimum values.
241	///
242	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi16)
243	#[inline]
244	#[target_feature(enable = "sse2")]
245	#[cfg_attr(test, assert_instr(pminsw))]
246	#[stable(feature = "simd_x86", since = "1.27.0")]
247	pub fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i {
248	unsafe {
249	let a: i16x8 = a.as_i16x8();
250	let b: i16x8 = b.as_i16x8();
251	transmute(src:simd_select::<i16x8, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
252	}
253	}
254
255	/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the
256	/// packed minimum values.
257	///
258	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu8)
259	#[inline]
260	#[target_feature(enable = "sse2")]
261	#[cfg_attr(test, assert_instr(pminub))]
262	#[stable(feature = "simd_x86", since = "1.27.0")]
263	pub fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i {
264	unsafe {
265	let a: u8x16 = a.as_u8x16();
266	let b: u8x16 = b.as_u8x16();
267	transmute(src:simd_select::<i8x16, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
268	}
269	}
270
271	/// Multiplies the packed 16-bit integers in `a` and `b`.
272	///
273	/// The multiplication produces intermediate 32-bit integers, and returns the
274	/// high 16 bits of the intermediate integers.
275	///
276	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhi_epi16)
277	#[inline]
278	#[target_feature(enable = "sse2")]
279	#[cfg_attr(test, assert_instr(pmulhw))]
280	#[stable(feature = "simd_x86", since = "1.27.0")]
281	pub fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i {
282	unsafe {
283	let a: i32x8 = simd_cast::<_, i32x8>(a.as_i16x8());
284	let b: i32x8 = simd_cast::<_, i32x8>(b.as_i16x8());
285	let r: i32x8 = simd_shr(lhs:simd_mul(a, b), rhs:i32x8::splat(`16`));
286	transmute(src:simd_cast::<i32x8, i16x8>(r))
287	}
288	}
289
290	/// Multiplies the packed unsigned 16-bit integers in `a` and `b`.
291	///
292	/// The multiplication produces intermediate 32-bit integers, and returns the
293	/// high 16 bits of the intermediate integers.
294	///
295	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhi_epu16)
296	#[inline]
297	#[target_feature(enable = "sse2")]
298	#[cfg_attr(test, assert_instr(pmulhuw))]
299	#[stable(feature = "simd_x86", since = "1.27.0")]
300	pub fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i {
301	unsafe {
302	let a: u32x8 = simd_cast::<_, u32x8>(a.as_u16x8());
303	let b: u32x8 = simd_cast::<_, u32x8>(b.as_u16x8());
304	let r: u32x8 = simd_shr(lhs:simd_mul(a, b), rhs:u32x8::splat(`16`));
305	transmute(src:simd_cast::<u32x8, u16x8>(r))
306	}
307	}
308
309	/// Multiplies the packed 16-bit integers in `a` and `b`.
310	///
311	/// The multiplication produces intermediate 32-bit integers, and returns the
312	/// low 16 bits of the intermediate integers.
313	///
314	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mullo_epi16)
315	#[inline]
316	#[target_feature(enable = "sse2")]
317	#[cfg_attr(test, assert_instr(pmullw))]
318	#[stable(feature = "simd_x86", since = "1.27.0")]
319	pub fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i {
320	unsafe { transmute(src:simd_mul(x:a.as_i16x8(), y:b.as_i16x8())) }
321	}
322
323	/// Multiplies the low unsigned 32-bit integers from each packed 64-bit element
324	/// in `a` and `b`.
325	///
326	/// Returns the unsigned 64-bit results.
327	///
328	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_epu32)
329	#[inline]
330	#[target_feature(enable = "sse2")]
331	#[cfg_attr(test, assert_instr(pmuludq))]
332	#[stable(feature = "simd_x86", since = "1.27.0")]
333	pub fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i {
334	unsafe {
335	let a: u64x2 = a.as_u64x2();
336	let b: u64x2 = b.as_u64x2();
337	let mask: u64x2 = u64x2::splat(u32::MAX.into());
338	transmute(src:simd_mul(x:simd_and(a, mask), y:simd_and(x:b, y:mask)))
339	}
340	}
341
342	/// Sum the absolute differences of packed unsigned 8-bit integers.
343	///
344	/// Computes the absolute differences of packed unsigned 8-bit integers in `a`
345	/// and `b`, then horizontally sum each consecutive 8 differences to produce
346	/// two unsigned 16-bit integers, and pack these unsigned 16-bit integers in
347	/// the low 16 bits of 64-bit elements returned.
348	///
349	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sad_epu8)
350	#[inline]
351	#[target_feature(enable = "sse2")]
352	#[cfg_attr(test, assert_instr(psadbw))]
353	#[stable(feature = "simd_x86", since = "1.27.0")]
354	pub fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i {
355	unsafe { transmute(src:psadbw(a.as_u8x16(), b.as_u8x16())) }
356	}
357
358	/// Subtracts packed 8-bit integers in `b` from packed 8-bit integers in `a`.
359	///
360	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi8)
361	#[inline]
362	#[target_feature(enable = "sse2")]
363	#[cfg_attr(test, assert_instr(psubb))]
364	#[stable(feature = "simd_x86", since = "1.27.0")]
365	pub fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i {
366	unsafe { transmute(src:simd_sub(lhs:a.as_i8x16(), rhs:b.as_i8x16())) }
367	}
368
369	/// Subtracts packed 16-bit integers in `b` from packed 16-bit integers in `a`.
370	///
371	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi16)
372	#[inline]
373	#[target_feature(enable = "sse2")]
374	#[cfg_attr(test, assert_instr(psubw))]
375	#[stable(feature = "simd_x86", since = "1.27.0")]
376	pub fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i {
377	unsafe { transmute(src:simd_sub(lhs:a.as_i16x8(), rhs:b.as_i16x8())) }
378	}
379
380	/// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`.
381	///
382	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi32)
383	#[inline]
384	#[target_feature(enable = "sse2")]
385	#[cfg_attr(test, assert_instr(psubd))]
386	#[stable(feature = "simd_x86", since = "1.27.0")]
387	pub fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i {
388	unsafe { transmute(src:simd_sub(lhs:a.as_i32x4(), rhs:b.as_i32x4())) }
389	}
390
391	/// Subtract packed 64-bit integers in `b` from packed 64-bit integers in `a`.
392	///
393	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi64)
394	#[inline]
395	#[target_feature(enable = "sse2")]
396	#[cfg_attr(test, assert_instr(psubq))]
397	#[stable(feature = "simd_x86", since = "1.27.0")]
398	pub fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i {
399	unsafe { transmute(src:simd_sub(lhs:a.as_i64x2(), rhs:b.as_i64x2())) }
400	}
401
402	/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`
403	/// using saturation.
404	///
405	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epi8)
406	#[inline]
407	#[target_feature(enable = "sse2")]
408	#[cfg_attr(test, assert_instr(psubsb))]
409	#[stable(feature = "simd_x86", since = "1.27.0")]
410	pub fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i {
411	unsafe { transmute(src:simd_saturating_sub(lhs:a.as_i8x16(), rhs:b.as_i8x16())) }
412	}
413
414	/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
415	/// using saturation.
416	///
417	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epi16)
418	#[inline]
419	#[target_feature(enable = "sse2")]
420	#[cfg_attr(test, assert_instr(psubsw))]
421	#[stable(feature = "simd_x86", since = "1.27.0")]
422	pub fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i {
423	unsafe { transmute(src:simd_saturating_sub(lhs:a.as_i16x8(), rhs:b.as_i16x8())) }
424	}
425
426	/// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit
427	/// integers in `a` using saturation.
428	///
429	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epu8)
430	#[inline]
431	#[target_feature(enable = "sse2")]
432	#[cfg_attr(test, assert_instr(psubusb))]
433	#[stable(feature = "simd_x86", since = "1.27.0")]
434	pub fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i {
435	unsafe { transmute(src:simd_saturating_sub(lhs:a.as_u8x16(), rhs:b.as_u8x16())) }
436	}
437
438	/// Subtract packed unsigned 16-bit integers in `b` from packed unsigned 16-bit
439	/// integers in `a` using saturation.
440	///
441	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epu16)
442	#[inline]
443	#[target_feature(enable = "sse2")]
444	#[cfg_attr(test, assert_instr(psubusw))]
445	#[stable(feature = "simd_x86", since = "1.27.0")]
446	pub fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i {
447	unsafe { transmute(src:simd_saturating_sub(lhs:a.as_u16x8(), rhs:b.as_u16x8())) }
448	}
449
450	/// Shifts `a` left by `IMM8` bytes while shifting in zeros.
451	///
452	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_si128)
453	#[inline]
454	#[target_feature(enable = "sse2")]
455	#[cfg_attr(test, assert_instr(pslldq, IMM8 = `1`))]
456	#[rustc_legacy_const_generics(`1`)]
457	#[stable(feature = "simd_x86", since = "1.27.0")]
458	pub fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
459	static_assert_uimm_bits!(IMM8, `8`);
460	unsafe { _mm_slli_si128_impl::<IMM8>(a) }
461	}
462
463	/// Implementation detail: converts the immediate argument of the
464	/// `_mm_slli_si128` intrinsic into a compile-time constant.
465	#[inline]
466	#[target_feature(enable = "sse2")]
467	unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
468	const fn mask(shift: i32, i: u32) -> u32 {
469	let shift = shift as u32 & `0xff`;
470	if shift > `15` { i } else { `16` - shift + i }
471	}
472	transmute::<i8x16, _>(simd_shuffle!(
473	i8x16::ZERO,
474	a.as_i8x16(),
475	[
476	mask(IMM8, `0`),
477	mask(IMM8, `1`),
478	mask(IMM8, `2`),
479	mask(IMM8, `3`),
480	mask(IMM8, `4`),
481	mask(IMM8, `5`),
482	mask(IMM8, `6`),
483	mask(IMM8, `7`),
484	mask(IMM8, `8`),
485	mask(IMM8, `9`),
486	mask(IMM8, `10`),
487	mask(IMM8, `11`),
488	mask(IMM8, `12`),
489	mask(IMM8, `13`),
490	mask(IMM8, `14`),
491	mask(IMM8, `15`),
492	],
493	))
494	}
495
496	/// Shifts `a` left by `IMM8` bytes while shifting in zeros.
497	///
498	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bslli_si128)
499	#[inline]
500	#[target_feature(enable = "sse2")]
501	#[cfg_attr(test, assert_instr(pslldq, IMM8 = `1`))]
502	#[rustc_legacy_const_generics(`1`)]
503	#[stable(feature = "simd_x86", since = "1.27.0")]
504	pub fn _mm_bslli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
505	unsafe {
506	static_assert_uimm_bits!(IMM8, `8`);
507	_mm_slli_si128_impl::<IMM8>(a)
508	}
509	}
510
511	/// Shifts `a` right by `IMM8` bytes while shifting in zeros.
512	///
513	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bsrli_si128)
514	#[inline]
515	#[target_feature(enable = "sse2")]
516	#[cfg_attr(test, assert_instr(psrldq, IMM8 = `1`))]
517	#[rustc_legacy_const_generics(`1`)]
518	#[stable(feature = "simd_x86", since = "1.27.0")]
519	pub fn _mm_bsrli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
520	unsafe {
521	static_assert_uimm_bits!(IMM8, `8`);
522	_mm_srli_si128_impl::<IMM8>(a)
523	}
524	}
525
526	/// Shifts packed 16-bit integers in `a` left by `IMM8` while shifting in zeros.
527	///
528	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi16)
529	#[inline]
530	#[target_feature(enable = "sse2")]
531	#[cfg_attr(test, assert_instr(psllw, IMM8 = `7`))]
532	#[rustc_legacy_const_generics(`1`)]
533	#[stable(feature = "simd_x86", since = "1.27.0")]
534	pub fn _mm_slli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
535	static_assert_uimm_bits!(IMM8, `8`);
536	unsafe {
537	if IMM8 >= `16` {
538	_mm_setzero_si128()
539	} else {
540	transmute(src:simd_shl(lhs:a.as_u16x8(), rhs:u16x8::splat(IMM8 as u16)))
541	}
542	}
543	}
544
545	/// Shifts packed 16-bit integers in `a` left by `count` while shifting in
546	/// zeros.
547	///
548	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi16)
549	#[inline]
550	#[target_feature(enable = "sse2")]
551	#[cfg_attr(test, assert_instr(psllw))]
552	#[stable(feature = "simd_x86", since = "1.27.0")]
553	pub fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i {
554	unsafe { transmute(src:psllw(a.as_i16x8(), count.as_i16x8())) }
555	}
556
557	/// Shifts packed 32-bit integers in `a` left by `IMM8` while shifting in zeros.
558	///
559	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi32)
560	#[inline]
561	#[target_feature(enable = "sse2")]
562	#[cfg_attr(test, assert_instr(pslld, IMM8 = `7`))]
563	#[rustc_legacy_const_generics(`1`)]
564	#[stable(feature = "simd_x86", since = "1.27.0")]
565	pub fn _mm_slli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
566	static_assert_uimm_bits!(IMM8, `8`);
567	unsafe {
568	if IMM8 >= `32` {
569	_mm_setzero_si128()
570	} else {
571	transmute(src:simd_shl(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8 as u32)))
572	}
573	}
574	}
575
576	/// Shifts packed 32-bit integers in `a` left by `count` while shifting in
577	/// zeros.
578	///
579	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi32)
580	#[inline]
581	#[target_feature(enable = "sse2")]
582	#[cfg_attr(test, assert_instr(pslld))]
583	#[stable(feature = "simd_x86", since = "1.27.0")]
584	pub fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i {
585	unsafe { transmute(src:pslld(a.as_i32x4(), count.as_i32x4())) }
586	}
587
588	/// Shifts packed 64-bit integers in `a` left by `IMM8` while shifting in zeros.
589	///
590	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi64)
591	#[inline]
592	#[target_feature(enable = "sse2")]
593	#[cfg_attr(test, assert_instr(psllq, IMM8 = `7`))]
594	#[rustc_legacy_const_generics(`1`)]
595	#[stable(feature = "simd_x86", since = "1.27.0")]
596	pub fn _mm_slli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
597	static_assert_uimm_bits!(IMM8, `8`);
598	unsafe {
599	if IMM8 >= `64` {
600	_mm_setzero_si128()
601	} else {
602	transmute(src:simd_shl(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64)))
603	}
604	}
605	}
606
607	/// Shifts packed 64-bit integers in `a` left by `count` while shifting in
608	/// zeros.
609	///
610	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi64)
611	#[inline]
612	#[target_feature(enable = "sse2")]
613	#[cfg_attr(test, assert_instr(psllq))]
614	#[stable(feature = "simd_x86", since = "1.27.0")]
615	pub fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i {
616	unsafe { transmute(src:psllq(a.as_i64x2(), count.as_i64x2())) }
617	}
618
619	/// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in sign
620	/// bits.
621	///
622	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi16)
623	#[inline]
624	#[target_feature(enable = "sse2")]
625	#[cfg_attr(test, assert_instr(psraw, IMM8 = `1`))]
626	#[rustc_legacy_const_generics(`1`)]
627	#[stable(feature = "simd_x86", since = "1.27.0")]
628	pub fn _mm_srai_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
629	static_assert_uimm_bits!(IMM8, `8`);
630	unsafe { transmute(src:simd_shr(lhs:a.as_i16x8(), rhs:i16x8::splat(IMM8.min(`15`) as i16))) }
631	}
632
633	/// Shifts packed 16-bit integers in `a` right by `count` while shifting in sign
634	/// bits.
635	///
636	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi16)
637	#[inline]
638	#[target_feature(enable = "sse2")]
639	#[cfg_attr(test, assert_instr(psraw))]
640	#[stable(feature = "simd_x86", since = "1.27.0")]
641	pub fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i {
642	unsafe { transmute(src:psraw(a.as_i16x8(), count.as_i16x8())) }
643	}
644
645	/// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in sign
646	/// bits.
647	///
648	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi32)
649	#[inline]
650	#[target_feature(enable = "sse2")]
651	#[cfg_attr(test, assert_instr(psrad, IMM8 = `1`))]
652	#[rustc_legacy_const_generics(`1`)]
653	#[stable(feature = "simd_x86", since = "1.27.0")]
654	pub fn _mm_srai_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
655	static_assert_uimm_bits!(IMM8, `8`);
656	unsafe { transmute(src:simd_shr(lhs:a.as_i32x4(), rhs:i32x4::splat(IMM8.min(`31`)))) }
657	}
658
659	/// Shifts packed 32-bit integers in `a` right by `count` while shifting in sign
660	/// bits.
661	///
662	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi32)
663	#[inline]
664	#[target_feature(enable = "sse2")]
665	#[cfg_attr(test, assert_instr(psrad))]
666	#[stable(feature = "simd_x86", since = "1.27.0")]
667	pub fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i {
668	unsafe { transmute(src:psrad(a.as_i32x4(), count.as_i32x4())) }
669	}
670
671	/// Shifts `a` right by `IMM8` bytes while shifting in zeros.
672	///
673	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_si128)
674	#[inline]
675	#[target_feature(enable = "sse2")]
676	#[cfg_attr(test, assert_instr(psrldq, IMM8 = `1`))]
677	#[rustc_legacy_const_generics(`1`)]
678	#[stable(feature = "simd_x86", since = "1.27.0")]
679	pub fn _mm_srli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
680	static_assert_uimm_bits!(IMM8, `8`);
681	unsafe { _mm_srli_si128_impl::<IMM8>(a) }
682	}
683
684	/// Implementation detail: converts the immediate argument of the
685	/// `_mm_srli_si128` intrinsic into a compile-time constant.
686	#[inline]
687	#[target_feature(enable = "sse2")]
688	unsafe fn _mm_srli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
689	const fn mask(shift: i32, i: u32) -> u32 {
690	if (shift as u32) > `15` {
691	i + `16`
692	} else {
693	i + (shift as u32)
694	}
695	}
696	let x: i8x16 = simd_shuffle!(
697	a.as_i8x16(),
698	i8x16::ZERO,
699	[
700	mask(IMM8, `0`),
701	mask(IMM8, `1`),
702	mask(IMM8, `2`),
703	mask(IMM8, `3`),
704	mask(IMM8, `4`),
705	mask(IMM8, `5`),
706	mask(IMM8, `6`),
707	mask(IMM8, `7`),
708	mask(IMM8, `8`),
709	mask(IMM8, `9`),
710	mask(IMM8, `10`),
711	mask(IMM8, `11`),
712	mask(IMM8, `12`),
713	mask(IMM8, `13`),
714	mask(IMM8, `14`),
715	mask(IMM8, `15`),
716	],
717	);
718	transmute(x)
719	}
720
721	/// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in
722	/// zeros.
723	///
724	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi16)
725	#[inline]
726	#[target_feature(enable = "sse2")]
727	#[cfg_attr(test, assert_instr(psrlw, IMM8 = `1`))]
728	#[rustc_legacy_const_generics(`1`)]
729	#[stable(feature = "simd_x86", since = "1.27.0")]
730	pub fn _mm_srli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
731	static_assert_uimm_bits!(IMM8, `8`);
732	unsafe {
733	if IMM8 >= `16` {
734	_mm_setzero_si128()
735	} else {
736	transmute(src:simd_shr(lhs:a.as_u16x8(), rhs:u16x8::splat(IMM8 as u16)))
737	}
738	}
739	}
740
741	/// Shifts packed 16-bit integers in `a` right by `count` while shifting in
742	/// zeros.
743	///
744	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi16)
745	#[inline]
746	#[target_feature(enable = "sse2")]
747	#[cfg_attr(test, assert_instr(psrlw))]
748	#[stable(feature = "simd_x86", since = "1.27.0")]
749	pub fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i {
750	unsafe { transmute(src:psrlw(a.as_i16x8(), count.as_i16x8())) }
751	}
752
753	/// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in
754	/// zeros.
755	///
756	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi32)
757	#[inline]
758	#[target_feature(enable = "sse2")]
759	#[cfg_attr(test, assert_instr(psrld, IMM8 = `8`))]
760	#[rustc_legacy_const_generics(`1`)]
761	#[stable(feature = "simd_x86", since = "1.27.0")]
762	pub fn _mm_srli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
763	static_assert_uimm_bits!(IMM8, `8`);
764	unsafe {
765	if IMM8 >= `32` {
766	_mm_setzero_si128()
767	} else {
768	transmute(src:simd_shr(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8 as u32)))
769	}
770	}
771	}
772
773	/// Shifts packed 32-bit integers in `a` right by `count` while shifting in
774	/// zeros.
775	///
776	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi32)
777	#[inline]
778	#[target_feature(enable = "sse2")]
779	#[cfg_attr(test, assert_instr(psrld))]
780	#[stable(feature = "simd_x86", since = "1.27.0")]
781	pub fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i {
782	unsafe { transmute(src:psrld(a.as_i32x4(), count.as_i32x4())) }
783	}
784
785	/// Shifts packed 64-bit integers in `a` right by `IMM8` while shifting in
786	/// zeros.
787	///
788	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi64)
789	#[inline]
790	#[target_feature(enable = "sse2")]
791	#[cfg_attr(test, assert_instr(psrlq, IMM8 = `1`))]
792	#[rustc_legacy_const_generics(`1`)]
793	#[stable(feature = "simd_x86", since = "1.27.0")]
794	pub fn _mm_srli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
795	static_assert_uimm_bits!(IMM8, `8`);
796	unsafe {
797	if IMM8 >= `64` {
798	_mm_setzero_si128()
799	} else {
800	transmute(src:simd_shr(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64)))
801	}
802	}
803	}
804
805	/// Shifts packed 64-bit integers in `a` right by `count` while shifting in
806	/// zeros.
807	///
808	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi64)
809	#[inline]
810	#[target_feature(enable = "sse2")]
811	#[cfg_attr(test, assert_instr(psrlq))]
812	#[stable(feature = "simd_x86", since = "1.27.0")]
813	pub fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i {
814	unsafe { transmute(src:psrlq(a.as_i64x2(), count.as_i64x2())) }
815	}
816
817	/// Computes the bitwise AND of 128 bits (representing integer data) in `a` and
818	/// `b`.
819	///
820	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_and_si128)
821	#[inline]
822	#[target_feature(enable = "sse2")]
823	#[cfg_attr(test, assert_instr(andps))]
824	#[stable(feature = "simd_x86", since = "1.27.0")]
825	pub fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i {
826	unsafe { simd_and(x:a, y:b) }
827	}
828
829	/// Computes the bitwise NOT of 128 bits (representing integer data) in `a` and
830	/// then AND with `b`.
831	///
832	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_andnot_si128)
833	#[inline]
834	#[target_feature(enable = "sse2")]
835	#[cfg_attr(test, assert_instr(andnps))]
836	#[stable(feature = "simd_x86", since = "1.27.0")]
837	pub fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i {
838	unsafe { simd_and(x:simd_xor(_mm_set1_epi8(`-1`), a), y:b) }
839	}
840
841	/// Computes the bitwise OR of 128 bits (representing integer data) in `a` and
842	/// `b`.
843	///
844	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_si128)
845	#[inline]
846	#[target_feature(enable = "sse2")]
847	#[cfg_attr(test, assert_instr(orps))]
848	#[stable(feature = "simd_x86", since = "1.27.0")]
849	pub fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i {
850	unsafe { simd_or(x:a, y:b) }
851	}
852
853	/// Computes the bitwise XOR of 128 bits (representing integer data) in `a` and
854	/// `b`.
855	///
856	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_si128)
857	#[inline]
858	#[target_feature(enable = "sse2")]
859	#[cfg_attr(test, assert_instr(xorps))]
860	#[stable(feature = "simd_x86", since = "1.27.0")]
861	pub fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i {
862	unsafe { simd_xor(x:a, y:b) }
863	}
864
865	/// Compares packed 8-bit integers in `a` and `b` for equality.
866	///
867	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi8)
868	#[inline]
869	#[target_feature(enable = "sse2")]
870	#[cfg_attr(test, assert_instr(pcmpeqb))]
871	#[stable(feature = "simd_x86", since = "1.27.0")]
872	pub fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i {
873	unsafe { transmute::<i8x16, _>(src:simd_eq(x:a.as_i8x16(), y:b.as_i8x16())) }
874	}
875
876	/// Compares packed 16-bit integers in `a` and `b` for equality.
877	///
878	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi16)
879	#[inline]
880	#[target_feature(enable = "sse2")]
881	#[cfg_attr(test, assert_instr(pcmpeqw))]
882	#[stable(feature = "simd_x86", since = "1.27.0")]
883	pub fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i {
884	unsafe { transmute::<i16x8, _>(src:simd_eq(x:a.as_i16x8(), y:b.as_i16x8())) }
885	}
886
887	/// Compares packed 32-bit integers in `a` and `b` for equality.
888	///
889	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi32)
890	#[inline]
891	#[target_feature(enable = "sse2")]
892	#[cfg_attr(test, assert_instr(pcmpeqd))]
893	#[stable(feature = "simd_x86", since = "1.27.0")]
894	pub fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i {
895	unsafe { transmute::<i32x4, _>(src:simd_eq(x:a.as_i32x4(), y:b.as_i32x4())) }
896	}
897
898	/// Compares packed 8-bit integers in `a` and `b` for greater-than.
899	///
900	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi8)
901	#[inline]
902	#[target_feature(enable = "sse2")]
903	#[cfg_attr(test, assert_instr(pcmpgtb))]
904	#[stable(feature = "simd_x86", since = "1.27.0")]
905	pub fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i {
906	unsafe { transmute::<i8x16, _>(src:simd_gt(x:a.as_i8x16(), y:b.as_i8x16())) }
907	}
908
909	/// Compares packed 16-bit integers in `a` and `b` for greater-than.
910	///
911	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi16)
912	#[inline]
913	#[target_feature(enable = "sse2")]
914	#[cfg_attr(test, assert_instr(pcmpgtw))]
915	#[stable(feature = "simd_x86", since = "1.27.0")]
916	pub fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i {
917	unsafe { transmute::<i16x8, _>(src:simd_gt(x:a.as_i16x8(), y:b.as_i16x8())) }
918	}
919
920	/// Compares packed 32-bit integers in `a` and `b` for greater-than.
921	///
922	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi32)
923	#[inline]
924	#[target_feature(enable = "sse2")]
925	#[cfg_attr(test, assert_instr(pcmpgtd))]
926	#[stable(feature = "simd_x86", since = "1.27.0")]
927	pub fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i {
928	unsafe { transmute::<i32x4, _>(src:simd_gt(x:a.as_i32x4(), y:b.as_i32x4())) }
929	}
930
931	/// Compares packed 8-bit integers in `a` and `b` for less-than.
932	///
933	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi8)
934	#[inline]
935	#[target_feature(enable = "sse2")]
936	#[cfg_attr(test, assert_instr(pcmpgtb))]
937	#[stable(feature = "simd_x86", since = "1.27.0")]
938	pub fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i {
939	unsafe { transmute::<i8x16, _>(src:simd_lt(x:a.as_i8x16(), y:b.as_i8x16())) }
940	}
941
942	/// Compares packed 16-bit integers in `a` and `b` for less-than.
943	///
944	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi16)
945	#[inline]
946	#[target_feature(enable = "sse2")]
947	#[cfg_attr(test, assert_instr(pcmpgtw))]
948	#[stable(feature = "simd_x86", since = "1.27.0")]
949	pub fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i {
950	unsafe { transmute::<i16x8, _>(src:simd_lt(x:a.as_i16x8(), y:b.as_i16x8())) }
951	}
952
953	/// Compares packed 32-bit integers in `a` and `b` for less-than.
954	///
955	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi32)
956	#[inline]
957	#[target_feature(enable = "sse2")]
958	#[cfg_attr(test, assert_instr(pcmpgtd))]
959	#[stable(feature = "simd_x86", since = "1.27.0")]
960	pub fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i {
961	unsafe { transmute::<i32x4, _>(src:simd_lt(x:a.as_i32x4(), y:b.as_i32x4())) }
962	}
963
964	/// Converts the lower two packed 32-bit integers in `a` to packed
965	/// double-precision (64-bit) floating-point elements.
966	///
967	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_pd)
968	#[inline]
969	#[target_feature(enable = "sse2")]
970	#[cfg_attr(test, assert_instr(cvtdq2pd))]
971	#[stable(feature = "simd_x86", since = "1.27.0")]
972	pub fn _mm_cvtepi32_pd(a: __m128i) -> __m128d {
973	unsafe {
974	let a: i32x4 = a.as_i32x4();
975	simd_cast::<i32x2, __m128d>(simd_shuffle!(a, a, [`0`, `1`]))
976	}
977	}
978
979	/// Returns `a` with its lower element replaced by `b` after converting it to
980	/// an `f64`.
981	///
982	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi32_sd)
983	#[inline]
984	#[target_feature(enable = "sse2")]
985	#[cfg_attr(test, assert_instr(cvtsi2sd))]
986	#[stable(feature = "simd_x86", since = "1.27.0")]
987	pub fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d {
988	unsafe { simd_insert!(a, `0`, b as f64) }
989	}
990
991	/// Converts packed 32-bit integers in `a` to packed single-precision (32-bit)
992	/// floating-point elements.
993	///
994	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_ps)
995	#[inline]
996	#[target_feature(enable = "sse2")]
997	#[cfg_attr(test, assert_instr(cvtdq2ps))]
998	#[stable(feature = "simd_x86", since = "1.27.0")]
999	pub fn _mm_cvtepi32_ps(a: __m128i) -> __m128 {
1000	unsafe { transmute(src:simd_cast::<_, f32x4>(a.as_i32x4())) }
1001	}
1002
1003	/// Converts packed single-precision (32-bit) floating-point elements in `a`
1004	/// to packed 32-bit integers.
1005	///
1006	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epi32)
1007	#[inline]
1008	#[target_feature(enable = "sse2")]
1009	#[cfg_attr(test, assert_instr(cvtps2dq))]
1010	#[stable(feature = "simd_x86", since = "1.27.0")]
1011	pub fn _mm_cvtps_epi32(a: __m128) -> __m128i {
1012	unsafe { transmute(src:cvtps2dq(a)) }
1013	}
1014
1015	/// Returns a vector whose lowest element is `a` and all higher elements are
1016	/// `0`.
1017	///
1018	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi32_si128)
1019	#[inline]
1020	#[target_feature(enable = "sse2")]
1021	#[stable(feature = "simd_x86", since = "1.27.0")]
1022	pub fn _mm_cvtsi32_si128(a: i32) -> __m128i {
1023	unsafe { transmute(src:i32x4::new(x0:a, x1:`0`, x2:`0`, x3:`0`)) }
1024	}
1025
1026	/// Returns the lowest element of `a`.
1027	///
1028	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi128_si32)
1029	#[inline]
1030	#[target_feature(enable = "sse2")]
1031	#[stable(feature = "simd_x86", since = "1.27.0")]
1032	pub fn _mm_cvtsi128_si32(a: __m128i) -> i32 {
1033	unsafe { simd_extract!(a.as_i32x4(), `0`) }
1034	}
1035
1036	/// Sets packed 64-bit integers with the supplied values, from highest to
1037	/// lowest.
1038	///
1039	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi64x)
1040	#[inline]
1041	#[target_feature(enable = "sse2")]
1042	// no particular instruction to test
1043	#[stable(feature = "simd_x86", since = "1.27.0")]
1044	pub fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i {
1045	unsafe { transmute(src:i64x2::new(x0:e0, x1:e1)) }
1046	}
1047
1048	/// Sets packed 32-bit integers with the supplied values.
1049	///
1050	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi32)
1051	#[inline]
1052	#[target_feature(enable = "sse2")]
1053	// no particular instruction to test
1054	#[stable(feature = "simd_x86", since = "1.27.0")]
1055	pub fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1056	unsafe { transmute(src:i32x4::new(x0:e0, x1:e1, x2:e2, x3:e3)) }
1057	}
1058
1059	/// Sets packed 16-bit integers with the supplied values.
1060	///
1061	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi16)
1062	#[inline]
1063	#[target_feature(enable = "sse2")]
1064	// no particular instruction to test
1065	#[stable(feature = "simd_x86", since = "1.27.0")]
1066	pub fn _mm_set_epi16(
1067	e7: i16,
1068	e6: i16,
1069	e5: i16,
1070	e4: i16,
1071	e3: i16,
1072	e2: i16,
1073	e1: i16,
1074	e0: i16,
1075	) -> __m128i {
1076	unsafe { transmute(src:i16x8::new(x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7)) }
1077	}
1078
1079	/// Sets packed 8-bit integers with the supplied values.
1080	///
1081	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi8)
1082	#[inline]
1083	#[target_feature(enable = "sse2")]
1084	// no particular instruction to test
1085	#[stable(feature = "simd_x86", since = "1.27.0")]
1086	pub fn _mm_set_epi8(
1087	e15: i8,
1088	e14: i8,
1089	e13: i8,
1090	e12: i8,
1091	e11: i8,
1092	e10: i8,
1093	e9: i8,
1094	e8: i8,
1095	e7: i8,
1096	e6: i8,
1097	e5: i8,
1098	e4: i8,
1099	e3: i8,
1100	e2: i8,
1101	e1: i8,
1102	e0: i8,
1103	) -> __m128i {
1104	unsafe {
1105	#[rustfmt::skip]
1106	transmute(src:i8x16::new(
1107	x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7, x8:e8, x9:e9, x10:e10, x11:e11, x12:e12, x13:e13, x14:e14, x15:e15,
1108	))
1109	}
1110	}
1111
1112	/// Broadcasts 64-bit integer `a` to all elements.
1113	///
1114	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi64x)
1115	#[inline]
1116	#[target_feature(enable = "sse2")]
1117	// no particular instruction to test
1118	#[stable(feature = "simd_x86", since = "1.27.0")]
1119	pub fn _mm_set1_epi64x(a: i64) -> __m128i {
1120	_mm_set_epi64x(e1:a, e0:a)
1121	}
1122
1123	/// Broadcasts 32-bit integer `a` to all elements.
1124	///
1125	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi32)
1126	#[inline]
1127	#[target_feature(enable = "sse2")]
1128	// no particular instruction to test
1129	#[stable(feature = "simd_x86", since = "1.27.0")]
1130	pub fn _mm_set1_epi32(a: i32) -> __m128i {
1131	_mm_set_epi32(e3:a, e2:a, e1:a, e0:a)
1132	}
1133
1134	/// Broadcasts 16-bit integer `a` to all elements.
1135	///
1136	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi16)
1137	#[inline]
1138	#[target_feature(enable = "sse2")]
1139	// no particular instruction to test
1140	#[stable(feature = "simd_x86", since = "1.27.0")]
1141	pub fn _mm_set1_epi16(a: i16) -> __m128i {
1142	_mm_set_epi16(e7:a, e6:a, e5:a, e4:a, e3:a, e2:a, e1:a, e0:a)
1143	}
1144
1145	/// Broadcasts 8-bit integer `a` to all elements.
1146	///
1147	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi8)
1148	#[inline]
1149	#[target_feature(enable = "sse2")]
1150	// no particular instruction to test
1151	#[stable(feature = "simd_x86", since = "1.27.0")]
1152	pub fn _mm_set1_epi8(a: i8) -> __m128i {
1153	_mm_set_epi8(e15:a, e14:a, e13:a, e12:a, e11:a, e10:a, e9:a, e8:a, e7:a, e6:a, e5:a, e4:a, e3:a, e2:a, e1:a, e0:a)
1154	}
1155
1156	/// Sets packed 32-bit integers with the supplied values in reverse order.
1157	///
1158	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi32)
1159	#[inline]
1160	#[target_feature(enable = "sse2")]
1161	// no particular instruction to test
1162	#[stable(feature = "simd_x86", since = "1.27.0")]
1163	pub fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1164	_mm_set_epi32(e3:e0, e2:e1, e1:e2, e0:e3)
1165	}
1166
1167	/// Sets packed 16-bit integers with the supplied values in reverse order.
1168	///
1169	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi16)
1170	#[inline]
1171	#[target_feature(enable = "sse2")]
1172	// no particular instruction to test
1173	#[stable(feature = "simd_x86", since = "1.27.0")]
1174	pub fn _mm_setr_epi16(
1175	e7: i16,
1176	e6: i16,
1177	e5: i16,
1178	e4: i16,
1179	e3: i16,
1180	e2: i16,
1181	e1: i16,
1182	e0: i16,
1183	) -> __m128i {
1184	_mm_set_epi16(e7:e0, e6:e1, e5:e2, e4:e3, e3:e4, e2:e5, e1:e6, e0:e7)
1185	}
1186
1187	/// Sets packed 8-bit integers with the supplied values in reverse order.
1188	///
1189	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi8)
1190	#[inline]
1191	#[target_feature(enable = "sse2")]
1192	// no particular instruction to test
1193	#[stable(feature = "simd_x86", since = "1.27.0")]
1194	pub fn _mm_setr_epi8(
1195	e15: i8,
1196	e14: i8,
1197	e13: i8,
1198	e12: i8,
1199	e11: i8,
1200	e10: i8,
1201	e9: i8,
1202	e8: i8,
1203	e7: i8,
1204	e6: i8,
1205	e5: i8,
1206	e4: i8,
1207	e3: i8,
1208	e2: i8,
1209	e1: i8,
1210	e0: i8,
1211	) -> __m128i {
1212	#[rustfmt::skip]
1213	_mm_set_epi8(
1214	e15:e0, e14:e1, e13:e2, e12:e3, e11:e4, e10:e5, e9:e6, e8:e7, e7:e8, e6:e9, e5:e10, e4:e11, e3:e12, e2:e13, e1:e14, e0:e15,
1215	)
1216	}
1217
1218	/// Returns a vector with all elements set to zero.
1219	///
1220	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_si128)
1221	#[inline]
1222	#[target_feature(enable = "sse2")]
1223	#[cfg_attr(test, assert_instr(xorps))]
1224	#[stable(feature = "simd_x86", since = "1.27.0")]
1225	pub fn _mm_setzero_si128() -> __m128i {
1226	const { unsafe { mem::zeroed() } }
1227	}
1228
1229	/// Loads 64-bit integer from memory into first element of returned vector.
1230	///
1231	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadl_epi64)
1232	#[inline]
1233	#[target_feature(enable = "sse2")]
1234	#[stable(feature = "simd_x86", since = "1.27.0")]
1235	pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i {
1236	_mm_set_epi64x(e1:`0`, e0:ptr::read_unaligned(src:mem_addr as *const i64))
1237	}
1238
1239	/// Loads 128-bits of integer data from memory into a new vector.
1240	///
1241	/// `mem_addr` must be aligned on a 16-byte boundary.
1242	///
1243	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_si128)
1244	#[inline]
1245	#[target_feature(enable = "sse2")]
1246	#[cfg_attr(
1247	all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1248	assert_instr(movaps)
1249	)]
1250	#[stable(feature = "simd_x86", since = "1.27.0")]
1251	pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i {
1252	*mem_addr
1253	}
1254
1255	/// Loads 128-bits of integer data from memory into a new vector.
1256	///
1257	/// `mem_addr` does not need to be aligned on any particular boundary.
1258	///
1259	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si128)
1260	#[inline]
1261	#[target_feature(enable = "sse2")]
1262	#[cfg_attr(test, assert_instr(movups))]
1263	#[stable(feature = "simd_x86", since = "1.27.0")]
1264	pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
1265	let mut dst: __m128i = _mm_undefined_si128();
1266	ptr::copy_nonoverlapping(
1267	src:mem_addr as *const u8,
1268	dst:ptr::addr_of_mut!(dst) as *mut u8,
1269	count:mem::size_of::<__m128i>(),
1270	);
1271	dst
1272	}
1273
1274	/// Conditionally store 8-bit integer elements from `a` into memory using
1275	/// `mask`.
1276	///
1277	/// Elements are not stored when the highest bit is not set in the
1278	/// corresponding element.
1279	///
1280	/// `mem_addr` should correspond to a 128-bit memory location and does not need
1281	/// to be aligned on any particular boundary.
1282	///
1283	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskmoveu_si128)
1284	#[inline]
1285	#[target_feature(enable = "sse2")]
1286	#[cfg_attr(test, assert_instr(maskmovdqu))]
1287	#[stable(feature = "simd_x86", since = "1.27.0")]
1288	pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8) {
1289	maskmovdqu(a.as_i8x16(), mask.as_i8x16(), mem_addr)
1290	}
1291
1292	/// Stores 128-bits of integer data from `a` into memory.
1293	///
1294	/// `mem_addr` must be aligned on a 16-byte boundary.
1295	///
1296	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_si128)
1297	#[inline]
1298	#[target_feature(enable = "sse2")]
1299	#[cfg_attr(
1300	all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1301	assert_instr(movaps)
1302	)]
1303	#[stable(feature = "simd_x86", since = "1.27.0")]
1304	pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) {
1305	*mem_addr = a;
1306	}
1307
1308	/// Stores 128-bits of integer data from `a` into memory.
1309	///
1310	/// `mem_addr` does not need to be aligned on any particular boundary.
1311	///
1312	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si128)
1313	#[inline]
1314	#[target_feature(enable = "sse2")]
1315	#[cfg_attr(test, assert_instr(movups))] // FIXME movdqu expected
1316	#[stable(feature = "simd_x86", since = "1.27.0")]
1317	pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) {
1318	mem_addr.write_unaligned(val:a);
1319	}
1320
1321	/// Stores the lower 64-bit integer `a` to a memory location.
1322	///
1323	/// `mem_addr` does not need to be aligned on any particular boundary.
1324	///
1325	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storel_epi64)
1326	#[inline]
1327	#[target_feature(enable = "sse2")]
1328	#[stable(feature = "simd_x86", since = "1.27.0")]
1329	pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
1330	ptr::copy_nonoverlapping(src:ptr::addr_of!(a) as *const u8, dst:mem_addr as *mut u8, count:`8`);
1331	}
1332
1333	/// Stores a 128-bit integer vector to a 128-bit aligned memory location.
1334	/// To minimize caching, the data is flagged as non-temporal (unlikely to be
1335	/// used again soon).
1336	///
1337	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si128)
1338	///
1339	/// # Safety of non-temporal stores
1340	///
1341	/// After using this intrinsic, but before any other access to the memory that this intrinsic
1342	/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
1343	/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
1344	/// return.
1345	///
1346	/// See [`_mm_sfence`] for details.
1347	#[inline]
1348	#[target_feature(enable = "sse2")]
1349	#[cfg_attr(test, assert_instr(movntdq))]
1350	#[stable(feature = "simd_x86", since = "1.27.0")]
1351	pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
1352	crate::arch::asm!(
1353	vps!("movntdq", ",{a}"),
1354	p = in(reg) mem_addr,
1355	a = in(xmm_reg) a,
1356	options(nostack, preserves_flags),
1357	);
1358	}
1359
1360	/// Stores a 32-bit integer value in the specified memory location.
1361	/// To minimize caching, the data is flagged as non-temporal (unlikely to be
1362	/// used again soon).
1363	///
1364	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si32)
1365	///
1366	/// # Safety of non-temporal stores
1367	///
1368	/// After using this intrinsic, but before any other access to the memory that this intrinsic
1369	/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
1370	/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
1371	/// return.
1372	///
1373	/// See [`_mm_sfence`] for details.
1374	#[inline]
1375	#[target_feature(enable = "sse2")]
1376	#[cfg_attr(test, assert_instr(movnti))]
1377	#[stable(feature = "simd_x86", since = "1.27.0")]
1378	pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) {
1379	crate::arch::asm!(
1380	vps!("movnti", ",{a:e}"), // `:e` for 32bit value
1381	p = in(reg) mem_addr,
1382	a = in(reg) a,
1383	options(nostack, preserves_flags),
1384	);
1385	}
1386
1387	/// Returns a vector where the low element is extracted from `a` and its upper
1388	/// element is zero.
1389	///
1390	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_move_epi64)
1391	#[inline]
1392	#[target_feature(enable = "sse2")]
1393	// FIXME movd on msvc, movd on i686
1394	#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movq))]
1395	#[stable(feature = "simd_x86", since = "1.27.0")]
1396	pub fn _mm_move_epi64(a: __m128i) -> __m128i {
1397	unsafe {
1398	let r: i64x2 = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [`0`, `2`]);
1399	transmute(src:r)
1400	}
1401	}
1402
1403	/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
1404	/// using signed saturation.
1405	///
1406	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi16)
1407	#[inline]
1408	#[target_feature(enable = "sse2")]
1409	#[cfg_attr(test, assert_instr(packsswb))]
1410	#[stable(feature = "simd_x86", since = "1.27.0")]
1411	pub fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
1412	unsafe { transmute(src:packsswb(a.as_i16x8(), b.as_i16x8())) }
1413	}
1414
1415	/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
1416	/// using signed saturation.
1417	///
1418	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi32)
1419	#[inline]
1420	#[target_feature(enable = "sse2")]
1421	#[cfg_attr(test, assert_instr(packssdw))]
1422	#[stable(feature = "simd_x86", since = "1.27.0")]
1423	pub fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
1424	unsafe { transmute(src:packssdw(a.as_i32x4(), b.as_i32x4())) }
1425	}
1426
1427	/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
1428	/// using unsigned saturation.
1429	///
1430	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi16)
1431	#[inline]
1432	#[target_feature(enable = "sse2")]
1433	#[cfg_attr(test, assert_instr(packuswb))]
1434	#[stable(feature = "simd_x86", since = "1.27.0")]
1435	pub fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
1436	unsafe { transmute(src:packuswb(a.as_i16x8(), b.as_i16x8())) }
1437	}
1438
1439	/// Returns the `imm8` element of `a`.
1440	///
1441	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_epi16)
1442	#[inline]
1443	#[target_feature(enable = "sse2")]
1444	#[cfg_attr(test, assert_instr(pextrw, IMM8 = `7`))]
1445	#[rustc_legacy_const_generics(`1`)]
1446	#[stable(feature = "simd_x86", since = "1.27.0")]
1447	pub fn _mm_extract_epi16<const IMM8: i32>(a: __m128i) -> i32 {
1448	static_assert_uimm_bits!(IMM8, `3`);
1449	unsafe { simd_extract!(a.as_u16x8(), IMM8 as u32, u16) as i32 }
1450	}
1451
1452	/// Returns a new vector where the `imm8` element of `a` is replaced with `i`.
1453	///
1454	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_epi16)
1455	#[inline]
1456	#[target_feature(enable = "sse2")]
1457	#[cfg_attr(test, assert_instr(pinsrw, IMM8 = `7`))]
1458	#[rustc_legacy_const_generics(`2`)]
1459	#[stable(feature = "simd_x86", since = "1.27.0")]
1460	pub fn _mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
1461	static_assert_uimm_bits!(IMM8, `3`);
1462	unsafe { transmute(src:simd_insert!(a.as_i16x8(), IMM8 as u32, i as i16)) }
1463	}
1464
1465	/// Returns a mask of the most significant bit of each element in `a`.
1466	///
1467	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_epi8)
1468	#[inline]
1469	#[target_feature(enable = "sse2")]
1470	#[cfg_attr(test, assert_instr(pmovmskb))]
1471	#[stable(feature = "simd_x86", since = "1.27.0")]
1472	pub fn _mm_movemask_epi8(a: __m128i) -> i32 {
1473	unsafe {
1474	let z: i8x16 = i8x16::ZERO;
1475	let m: i8x16 = simd_lt(x:a.as_i8x16(), y:z);
1476	simd_bitmask::<_, u16>(m) as u32 as i32
1477	}
1478	}
1479
1480	/// Shuffles 32-bit integers in `a` using the control in `IMM8`.
1481	///
1482	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_epi32)
1483	#[inline]
1484	#[target_feature(enable = "sse2")]
1485	#[cfg_attr(test, assert_instr(pshufd, IMM8 = `9`))]
1486	#[rustc_legacy_const_generics(`1`)]
1487	#[stable(feature = "simd_x86", since = "1.27.0")]
1488	pub fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
1489	static_assert_uimm_bits!(IMM8, `8`);
1490	unsafe {
1491	let a: i32x4 = a.as_i32x4();
1492	let x: i32x4 = simd_shuffle!(
1493	a,
1494	a,
1495	[
1496	IMM8 as u32 & `0b11`,
1497	(IMM8 as u32 >> `2`) & `0b11`,
1498	(IMM8 as u32 >> `4`) & `0b11`,
1499	(IMM8 as u32 >> `6`) & `0b11`,
1500	],
1501	);
1502	transmute(src:x)
1503	}
1504	}
1505
1506	/// Shuffles 16-bit integers in the high 64 bits of `a` using the control in
1507	/// `IMM8`.
1508	///
1509	/// Put the results in the high 64 bits of the returned vector, with the low 64
1510	/// bits being copied from `a`.
1511	///
1512	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shufflehi_epi16)
1513	#[inline]
1514	#[target_feature(enable = "sse2")]
1515	#[cfg_attr(test, assert_instr(pshufhw, IMM8 = `9`))]
1516	#[rustc_legacy_const_generics(`1`)]
1517	#[stable(feature = "simd_x86", since = "1.27.0")]
1518	pub fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1519	static_assert_uimm_bits!(IMM8, `8`);
1520	unsafe {
1521	let a: i16x8 = a.as_i16x8();
1522	let x: i16x8 = simd_shuffle!(
1523	a,
1524	a,
1525	[
1526	`0`,
1527	`1`,
1528	`2`,
1529	`3`,
1530	(IMM8 as u32 & `0b11`) + `4`,
1531	((IMM8 as u32 >> `2`) & `0b11`) + `4`,
1532	((IMM8 as u32 >> `4`) & `0b11`) + `4`,
1533	((IMM8 as u32 >> `6`) & `0b11`) + `4`,
1534	],
1535	);
1536	transmute(src:x)
1537	}
1538	}
1539
1540	/// Shuffles 16-bit integers in the low 64 bits of `a` using the control in
1541	/// `IMM8`.
1542	///
1543	/// Put the results in the low 64 bits of the returned vector, with the high 64
1544	/// bits being copied from `a`.
1545	///
1546	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shufflelo_epi16)
1547	#[inline]
1548	#[target_feature(enable = "sse2")]
1549	#[cfg_attr(test, assert_instr(pshuflw, IMM8 = `9`))]
1550	#[rustc_legacy_const_generics(`1`)]
1551	#[stable(feature = "simd_x86", since = "1.27.0")]
1552	pub fn _mm_shufflelo_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1553	static_assert_uimm_bits!(IMM8, `8`);
1554	unsafe {
1555	let a: i16x8 = a.as_i16x8();
1556	let x: i16x8 = simd_shuffle!(
1557	a,
1558	a,
1559	[
1560	IMM8 as u32 & `0b11`,
1561	(IMM8 as u32 >> `2`) & `0b11`,
1562	(IMM8 as u32 >> `4`) & `0b11`,
1563	(IMM8 as u32 >> `6`) & `0b11`,
1564	`4`,
1565	`5`,
1566	`6`,
1567	`7`,
1568	],
1569	);
1570	transmute(src:x)
1571	}
1572	}
1573
1574	/// Unpacks and interleave 8-bit integers from the high half of `a` and `b`.
1575	///
1576	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi8)
1577	#[inline]
1578	#[target_feature(enable = "sse2")]
1579	#[cfg_attr(test, assert_instr(punpckhbw))]
1580	#[stable(feature = "simd_x86", since = "1.27.0")]
1581	pub fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i {
1582	unsafe {
1583	transmute::<i8x16, _>(src:simd_shuffle!(
1584	a.as_i8x16(),
1585	b.as_i8x16(),
1586	[`8`, `24`, `9`, `25`, `10`, `26`, `11`, `27`, `12`, `28`, `13`, `29`, `14`, `30`, `15`, `31`],
1587	))
1588	}
1589	}
1590
1591	/// Unpacks and interleave 16-bit integers from the high half of `a` and `b`.
1592	///
1593	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi16)
1594	#[inline]
1595	#[target_feature(enable = "sse2")]
1596	#[cfg_attr(test, assert_instr(punpckhwd))]
1597	#[stable(feature = "simd_x86", since = "1.27.0")]
1598	pub fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i {
1599	unsafe {
1600	let x: i16x8 = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [`4`, `12`, `5`, `13`, `6`, `14`, `7`, `15`]);
1601	transmute::<i16x8, _>(src:x)
1602	}
1603	}
1604
1605	/// Unpacks and interleave 32-bit integers from the high half of `a` and `b`.
1606	///
1607	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi32)
1608	#[inline]
1609	#[target_feature(enable = "sse2")]
1610	#[cfg_attr(test, assert_instr(unpckhps))]
1611	#[stable(feature = "simd_x86", since = "1.27.0")]
1612	pub fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i {
1613	unsafe { transmute::<i32x4, _>(src:simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [`2`, `6`, `3`, `7`])) }
1614	}
1615
1616	/// Unpacks and interleave 64-bit integers from the high half of `a` and `b`.
1617	///
1618	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi64)
1619	#[inline]
1620	#[target_feature(enable = "sse2")]
1621	#[cfg_attr(test, assert_instr(unpckhpd))]
1622	#[stable(feature = "simd_x86", since = "1.27.0")]
1623	pub fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i {
1624	unsafe { transmute::<i64x2, _>(src:simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [`1`, `3`])) }
1625	}
1626
1627	/// Unpacks and interleave 8-bit integers from the low half of `a` and `b`.
1628	///
1629	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi8)
1630	#[inline]
1631	#[target_feature(enable = "sse2")]
1632	#[cfg_attr(test, assert_instr(punpcklbw))]
1633	#[stable(feature = "simd_x86", since = "1.27.0")]
1634	pub fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i {
1635	unsafe {
1636	transmute::<i8x16, _>(src:simd_shuffle!(
1637	a.as_i8x16(),
1638	b.as_i8x16(),
1639	[`0`, `16`, `1`, `17`, `2`, `18`, `3`, `19`, `4`, `20`, `5`, `21`, `6`, `22`, `7`, `23`],
1640	))
1641	}
1642	}
1643
1644	/// Unpacks and interleave 16-bit integers from the low half of `a` and `b`.
1645	///
1646	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi16)
1647	#[inline]
1648	#[target_feature(enable = "sse2")]
1649	#[cfg_attr(test, assert_instr(punpcklwd))]
1650	#[stable(feature = "simd_x86", since = "1.27.0")]
1651	pub fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i {
1652	unsafe {
1653	let x: i16x8 = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [`0`, `8`, `1`, `9`, `2`, `10`, `3`, `11`]);
1654	transmute::<i16x8, _>(src:x)
1655	}
1656	}
1657
1658	/// Unpacks and interleave 32-bit integers from the low half of `a` and `b`.
1659	///
1660	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi32)
1661	#[inline]
1662	#[target_feature(enable = "sse2")]
1663	#[cfg_attr(test, assert_instr(unpcklps))]
1664	#[stable(feature = "simd_x86", since = "1.27.0")]
1665	pub fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i {
1666	unsafe { transmute::<i32x4, _>(src:simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [`0`, `4`, `1`, `5`])) }
1667	}
1668
1669	/// Unpacks and interleave 64-bit integers from the low half of `a` and `b`.
1670	///
1671	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi64)
1672	#[inline]
1673	#[target_feature(enable = "sse2")]
1674	#[cfg_attr(test, assert_instr(movlhps))]
1675	#[stable(feature = "simd_x86", since = "1.27.0")]
1676	pub fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i {
1677	unsafe { transmute::<i64x2, _>(src:simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [`0`, `2`])) }
1678	}
1679
1680	/// Returns a new vector with the low element of `a` replaced by the sum of the
1681	/// low elements of `a` and `b`.
1682	///
1683	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_sd)
1684	#[inline]
1685	#[target_feature(enable = "sse2")]
1686	#[cfg_attr(test, assert_instr(addsd))]
1687	#[stable(feature = "simd_x86", since = "1.27.0")]
1688	pub fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d {
1689	unsafe { simd_insert!(a, `0`, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b)) }
1690	}
1691
1692	/// Adds packed double-precision (64-bit) floating-point elements in `a` and
1693	/// `b`.
1694	///
1695	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_pd)
1696	#[inline]
1697	#[target_feature(enable = "sse2")]
1698	#[cfg_attr(test, assert_instr(addpd))]
1699	#[stable(feature = "simd_x86", since = "1.27.0")]
1700	pub fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d {
1701	unsafe { simd_add(x:a, y:b) }
1702	}
1703
1704	/// Returns a new vector with the low element of `a` replaced by the result of
1705	/// diving the lower element of `a` by the lower element of `b`.
1706	///
1707	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_sd)
1708	#[inline]
1709	#[target_feature(enable = "sse2")]
1710	#[cfg_attr(test, assert_instr(divsd))]
1711	#[stable(feature = "simd_x86", since = "1.27.0")]
1712	pub fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d {
1713	unsafe { simd_insert!(a, `0`, _mm_cvtsd_f64(a) / _mm_cvtsd_f64(b)) }
1714	}
1715
1716	/// Divide packed double-precision (64-bit) floating-point elements in `a` by
1717	/// packed elements in `b`.
1718	///
1719	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_pd)
1720	#[inline]
1721	#[target_feature(enable = "sse2")]
1722	#[cfg_attr(test, assert_instr(divpd))]
1723	#[stable(feature = "simd_x86", since = "1.27.0")]
1724	pub fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d {
1725	unsafe { simd_div(lhs:a, rhs:b) }
1726	}
1727
1728	/// Returns a new vector with the low element of `a` replaced by the maximum
1729	/// of the lower elements of `a` and `b`.
1730	///
1731	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_sd)
1732	#[inline]
1733	#[target_feature(enable = "sse2")]
1734	#[cfg_attr(test, assert_instr(maxsd))]
1735	#[stable(feature = "simd_x86", since = "1.27.0")]
1736	pub fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d {
1737	unsafe { maxsd(a, b) }
1738	}
1739
1740	/// Returns a new vector with the maximum values from corresponding elements in
1741	/// `a` and `b`.
1742	///
1743	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_pd)
1744	#[inline]
1745	#[target_feature(enable = "sse2")]
1746	#[cfg_attr(test, assert_instr(maxpd))]
1747	#[stable(feature = "simd_x86", since = "1.27.0")]
1748	pub fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d {
1749	unsafe { maxpd(a, b) }
1750	}
1751
1752	/// Returns a new vector with the low element of `a` replaced by the minimum
1753	/// of the lower elements of `a` and `b`.
1754	///
1755	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_sd)
1756	#[inline]
1757	#[target_feature(enable = "sse2")]
1758	#[cfg_attr(test, assert_instr(minsd))]
1759	#[stable(feature = "simd_x86", since = "1.27.0")]
1760	pub fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d {
1761	unsafe { minsd(a, b) }
1762	}
1763
1764	/// Returns a new vector with the minimum values from corresponding elements in
1765	/// `a` and `b`.
1766	///
1767	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_pd)
1768	#[inline]
1769	#[target_feature(enable = "sse2")]
1770	#[cfg_attr(test, assert_instr(minpd))]
1771	#[stable(feature = "simd_x86", since = "1.27.0")]
1772	pub fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d {
1773	unsafe { minpd(a, b) }
1774	}
1775
1776	/// Returns a new vector with the low element of `a` replaced by multiplying the
1777	/// low elements of `a` and `b`.
1778	///
1779	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_sd)
1780	#[inline]
1781	#[target_feature(enable = "sse2")]
1782	#[cfg_attr(test, assert_instr(mulsd))]
1783	#[stable(feature = "simd_x86", since = "1.27.0")]
1784	pub fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d {
1785	unsafe { simd_insert!(a, `0`, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b)) }
1786	}
1787
1788	/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
1789	/// and `b`.
1790	///
1791	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_pd)
1792	#[inline]
1793	#[target_feature(enable = "sse2")]
1794	#[cfg_attr(test, assert_instr(mulpd))]
1795	#[stable(feature = "simd_x86", since = "1.27.0")]
1796	pub fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d {
1797	unsafe { simd_mul(x:a, y:b) }
1798	}
1799
1800	/// Returns a new vector with the low element of `a` replaced by the square
1801	/// root of the lower element `b`.
1802	///
1803	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_sd)
1804	#[inline]
1805	#[target_feature(enable = "sse2")]
1806	#[cfg_attr(test, assert_instr(sqrtsd))]
1807	#[stable(feature = "simd_x86", since = "1.27.0")]
1808	pub fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d {
1809	unsafe { simd_insert!(a, `0`, sqrtf64(_mm_cvtsd_f64(b))) }
1810	}
1811
1812	/// Returns a new vector with the square root of each of the values in `a`.
1813	///
1814	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_pd)
1815	#[inline]
1816	#[target_feature(enable = "sse2")]
1817	#[cfg_attr(test, assert_instr(sqrtpd))]
1818	#[stable(feature = "simd_x86", since = "1.27.0")]
1819	pub fn _mm_sqrt_pd(a: __m128d) -> __m128d {
1820	unsafe { simd_fsqrt(a) }
1821	}
1822
1823	/// Returns a new vector with the low element of `a` replaced by subtracting the
1824	/// low element by `b` from the low element of `a`.
1825	///
1826	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_sd)
1827	#[inline]
1828	#[target_feature(enable = "sse2")]
1829	#[cfg_attr(test, assert_instr(subsd))]
1830	#[stable(feature = "simd_x86", since = "1.27.0")]
1831	pub fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d {
1832	unsafe { simd_insert!(a, `0`, _mm_cvtsd_f64(a) - _mm_cvtsd_f64(b)) }
1833	}
1834
1835	/// Subtract packed double-precision (64-bit) floating-point elements in `b`
1836	/// from `a`.
1837	///
1838	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_pd)
1839	#[inline]
1840	#[target_feature(enable = "sse2")]
1841	#[cfg_attr(test, assert_instr(subpd))]
1842	#[stable(feature = "simd_x86", since = "1.27.0")]
1843	pub fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d {
1844	unsafe { simd_sub(lhs:a, rhs:b) }
1845	}
1846
1847	/// Computes the bitwise AND of packed double-precision (64-bit) floating-point
1848	/// elements in `a` and `b`.
1849	///
1850	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_and_pd)
1851	#[inline]
1852	#[target_feature(enable = "sse2")]
1853	#[cfg_attr(test, assert_instr(andps))]
1854	#[stable(feature = "simd_x86", since = "1.27.0")]
1855	pub fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d {
1856	unsafe {
1857	let a: __m128i = transmute(src:a);
1858	let b: __m128i = transmute(src:b);
1859	transmute(src:_mm_and_si128(a, b))
1860	}
1861	}
1862
1863	/// Computes the bitwise NOT of `a` and then AND with `b`.
1864	///
1865	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_andnot_pd)
1866	#[inline]
1867	#[target_feature(enable = "sse2")]
1868	#[cfg_attr(test, assert_instr(andnps))]
1869	#[stable(feature = "simd_x86", since = "1.27.0")]
1870	pub fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d {
1871	unsafe {
1872	let a: __m128i = transmute(src:a);
1873	let b: __m128i = transmute(src:b);
1874	transmute(src:_mm_andnot_si128(a, b))
1875	}
1876	}
1877
1878	/// Computes the bitwise OR of `a` and `b`.
1879	///
1880	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_pd)
1881	#[inline]
1882	#[target_feature(enable = "sse2")]
1883	#[cfg_attr(test, assert_instr(orps))]
1884	#[stable(feature = "simd_x86", since = "1.27.0")]
1885	pub fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d {
1886	unsafe {
1887	let a: __m128i = transmute(src:a);
1888	let b: __m128i = transmute(src:b);
1889	transmute(src:_mm_or_si128(a, b))
1890	}
1891	}
1892
1893	/// Computes the bitwise XOR of `a` and `b`.
1894	///
1895	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_pd)
1896	#[inline]
1897	#[target_feature(enable = "sse2")]
1898	#[cfg_attr(test, assert_instr(xorps))]
1899	#[stable(feature = "simd_x86", since = "1.27.0")]
1900	pub fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d {
1901	unsafe {
1902	let a: __m128i = transmute(src:a);
1903	let b: __m128i = transmute(src:b);
1904	transmute(src:_mm_xor_si128(a, b))
1905	}
1906	}
1907
1908	/// Returns a new vector with the low element of `a` replaced by the equality
1909	/// comparison of the lower elements of `a` and `b`.
1910	///
1911	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_sd)
1912	#[inline]
1913	#[target_feature(enable = "sse2")]
1914	#[cfg_attr(test, assert_instr(cmpeqsd))]
1915	#[stable(feature = "simd_x86", since = "1.27.0")]
1916	pub fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d {
1917	unsafe { cmpsd(a, b, imm8:`0`) }
1918	}
1919
1920	/// Returns a new vector with the low element of `a` replaced by the less-than
1921	/// comparison of the lower elements of `a` and `b`.
1922	///
1923	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_sd)
1924	#[inline]
1925	#[target_feature(enable = "sse2")]
1926	#[cfg_attr(test, assert_instr(cmpltsd))]
1927	#[stable(feature = "simd_x86", since = "1.27.0")]
1928	pub fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d {
1929	unsafe { cmpsd(a, b, imm8:`1`) }
1930	}
1931
1932	/// Returns a new vector with the low element of `a` replaced by the
1933	/// less-than-or-equal comparison of the lower elements of `a` and `b`.
1934	///
1935	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_sd)
1936	#[inline]
1937	#[target_feature(enable = "sse2")]
1938	#[cfg_attr(test, assert_instr(cmplesd))]
1939	#[stable(feature = "simd_x86", since = "1.27.0")]
1940	pub fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d {
1941	unsafe { cmpsd(a, b, imm8:`2`) }
1942	}
1943
1944	/// Returns a new vector with the low element of `a` replaced by the
1945	/// greater-than comparison of the lower elements of `a` and `b`.
1946	///
1947	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_sd)
1948	#[inline]
1949	#[target_feature(enable = "sse2")]
1950	#[cfg_attr(test, assert_instr(cmpltsd))]
1951	#[stable(feature = "simd_x86", since = "1.27.0")]
1952	pub fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d {
1953	unsafe { simd_insert!(_mm_cmplt_sd(b, a), `1`, simd_extract!(a, `1`, f64)) }
1954	}
1955
1956	/// Returns a new vector with the low element of `a` replaced by the
1957	/// greater-than-or-equal comparison of the lower elements of `a` and `b`.
1958	///
1959	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_sd)
1960	#[inline]
1961	#[target_feature(enable = "sse2")]
1962	#[cfg_attr(test, assert_instr(cmplesd))]
1963	#[stable(feature = "simd_x86", since = "1.27.0")]
1964	pub fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d {
1965	unsafe { simd_insert!(_mm_cmple_sd(b, a), `1`, simd_extract!(a, `1`, f64)) }
1966	}
1967
1968	/// Returns a new vector with the low element of `a` replaced by the result
1969	/// of comparing both of the lower elements of `a` and `b` to `NaN`. If
1970	/// neither are equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0`
1971	/// otherwise.
1972	///
1973	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_sd)
1974	#[inline]
1975	#[target_feature(enable = "sse2")]
1976	#[cfg_attr(test, assert_instr(cmpordsd))]
1977	#[stable(feature = "simd_x86", since = "1.27.0")]
1978	pub fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d {
1979	unsafe { cmpsd(a, b, imm8:`7`) }
1980	}
1981
1982	/// Returns a new vector with the low element of `a` replaced by the result of
1983	/// comparing both of the lower elements of `a` and `b` to `NaN`. If either is
1984	/// equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` otherwise.
1985	///
1986	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_sd)
1987	#[inline]
1988	#[target_feature(enable = "sse2")]
1989	#[cfg_attr(test, assert_instr(cmpunordsd))]
1990	#[stable(feature = "simd_x86", since = "1.27.0")]
1991	pub fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d {
1992	unsafe { cmpsd(a, b, imm8:`3`) }
1993	}
1994
1995	/// Returns a new vector with the low element of `a` replaced by the not-equal
1996	/// comparison of the lower elements of `a` and `b`.
1997	///
1998	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_sd)
1999	#[inline]
2000	#[target_feature(enable = "sse2")]
2001	#[cfg_attr(test, assert_instr(cmpneqsd))]
2002	#[stable(feature = "simd_x86", since = "1.27.0")]
2003	pub fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d {
2004	unsafe { cmpsd(a, b, imm8:`4`) }
2005	}
2006
2007	/// Returns a new vector with the low element of `a` replaced by the
2008	/// not-less-than comparison of the lower elements of `a` and `b`.
2009	///
2010	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_sd)
2011	#[inline]
2012	#[target_feature(enable = "sse2")]
2013	#[cfg_attr(test, assert_instr(cmpnltsd))]
2014	#[stable(feature = "simd_x86", since = "1.27.0")]
2015	pub fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d {
2016	unsafe { cmpsd(a, b, imm8:`5`) }
2017	}
2018
2019	/// Returns a new vector with the low element of `a` replaced by the
2020	/// not-less-than-or-equal comparison of the lower elements of `a` and `b`.
2021	///
2022	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_sd)
2023	#[inline]
2024	#[target_feature(enable = "sse2")]
2025	#[cfg_attr(test, assert_instr(cmpnlesd))]
2026	#[stable(feature = "simd_x86", since = "1.27.0")]
2027	pub fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d {
2028	unsafe { cmpsd(a, b, imm8:`6`) }
2029	}
2030
2031	/// Returns a new vector with the low element of `a` replaced by the
2032	/// not-greater-than comparison of the lower elements of `a` and `b`.
2033	///
2034	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpngt_sd)
2035	#[inline]
2036	#[target_feature(enable = "sse2")]
2037	#[cfg_attr(test, assert_instr(cmpnltsd))]
2038	#[stable(feature = "simd_x86", since = "1.27.0")]
2039	pub fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d {
2040	unsafe { simd_insert!(_mm_cmpnlt_sd(b, a), `1`, simd_extract!(a, `1`, f64)) }
2041	}
2042
2043	/// Returns a new vector with the low element of `a` replaced by the
2044	/// not-greater-than-or-equal comparison of the lower elements of `a` and `b`.
2045	///
2046	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_sd)
2047	#[inline]
2048	#[target_feature(enable = "sse2")]
2049	#[cfg_attr(test, assert_instr(cmpnlesd))]
2050	#[stable(feature = "simd_x86", since = "1.27.0")]
2051	pub fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d {
2052	unsafe { simd_insert!(_mm_cmpnle_sd(b, a), `1`, simd_extract!(a, `1`, f64)) }
2053	}
2054
2055	/// Compares corresponding elements in `a` and `b` for equality.
2056	///
2057	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_pd)
2058	#[inline]
2059	#[target_feature(enable = "sse2")]
2060	#[cfg_attr(test, assert_instr(cmpeqpd))]
2061	#[stable(feature = "simd_x86", since = "1.27.0")]
2062	pub fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d {
2063	unsafe { cmppd(a, b, imm8:`0`) }
2064	}
2065
2066	/// Compares corresponding elements in `a` and `b` for less-than.
2067	///
2068	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_pd)
2069	#[inline]
2070	#[target_feature(enable = "sse2")]
2071	#[cfg_attr(test, assert_instr(cmpltpd))]
2072	#[stable(feature = "simd_x86", since = "1.27.0")]
2073	pub fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d {
2074	unsafe { cmppd(a, b, imm8:`1`) }
2075	}
2076
2077	/// Compares corresponding elements in `a` and `b` for less-than-or-equal
2078	///
2079	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_pd)
2080	#[inline]
2081	#[target_feature(enable = "sse2")]
2082	#[cfg_attr(test, assert_instr(cmplepd))]
2083	#[stable(feature = "simd_x86", since = "1.27.0")]
2084	pub fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d {
2085	unsafe { cmppd(a, b, imm8:`2`) }
2086	}
2087
2088	/// Compares corresponding elements in `a` and `b` for greater-than.
2089	///
2090	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_pd)
2091	#[inline]
2092	#[target_feature(enable = "sse2")]
2093	#[cfg_attr(test, assert_instr(cmpltpd))]
2094	#[stable(feature = "simd_x86", since = "1.27.0")]
2095	pub fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d {
2096	_mm_cmplt_pd(a:b, b:a)
2097	}
2098
2099	/// Compares corresponding elements in `a` and `b` for greater-than-or-equal.
2100	///
2101	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_pd)
2102	#[inline]
2103	#[target_feature(enable = "sse2")]
2104	#[cfg_attr(test, assert_instr(cmplepd))]
2105	#[stable(feature = "simd_x86", since = "1.27.0")]
2106	pub fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d {
2107	_mm_cmple_pd(a:b, b:a)
2108	}
2109
2110	/// Compares corresponding elements in `a` and `b` to see if neither is `NaN`.
2111	///
2112	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_pd)
2113	#[inline]
2114	#[target_feature(enable = "sse2")]
2115	#[cfg_attr(test, assert_instr(cmpordpd))]
2116	#[stable(feature = "simd_x86", since = "1.27.0")]
2117	pub fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d {
2118	unsafe { cmppd(a, b, imm8:`7`) }
2119	}
2120
2121	/// Compares corresponding elements in `a` and `b` to see if either is `NaN`.
2122	///
2123	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_pd)
2124	#[inline]
2125	#[target_feature(enable = "sse2")]
2126	#[cfg_attr(test, assert_instr(cmpunordpd))]
2127	#[stable(feature = "simd_x86", since = "1.27.0")]
2128	pub fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d {
2129	unsafe { cmppd(a, b, imm8:`3`) }
2130	}
2131
2132	/// Compares corresponding elements in `a` and `b` for not-equal.
2133	///
2134	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_pd)
2135	#[inline]
2136	#[target_feature(enable = "sse2")]
2137	#[cfg_attr(test, assert_instr(cmpneqpd))]
2138	#[stable(feature = "simd_x86", since = "1.27.0")]
2139	pub fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d {
2140	unsafe { cmppd(a, b, imm8:`4`) }
2141	}
2142
2143	/// Compares corresponding elements in `a` and `b` for not-less-than.
2144	///
2145	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_pd)
2146	#[inline]
2147	#[target_feature(enable = "sse2")]
2148	#[cfg_attr(test, assert_instr(cmpnltpd))]
2149	#[stable(feature = "simd_x86", since = "1.27.0")]
2150	pub fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d {
2151	unsafe { cmppd(a, b, imm8:`5`) }
2152	}
2153
2154	/// Compares corresponding elements in `a` and `b` for not-less-than-or-equal.
2155	///
2156	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_pd)
2157	#[inline]
2158	#[target_feature(enable = "sse2")]
2159	#[cfg_attr(test, assert_instr(cmpnlepd))]
2160	#[stable(feature = "simd_x86", since = "1.27.0")]
2161	pub fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d {
2162	unsafe { cmppd(a, b, imm8:`6`) }
2163	}
2164
2165	/// Compares corresponding elements in `a` and `b` for not-greater-than.
2166	///
2167	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpngt_pd)
2168	#[inline]
2169	#[target_feature(enable = "sse2")]
2170	#[cfg_attr(test, assert_instr(cmpnltpd))]
2171	#[stable(feature = "simd_x86", since = "1.27.0")]
2172	pub fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d {
2173	_mm_cmpnlt_pd(a:b, b:a)
2174	}
2175
2176	/// Compares corresponding elements in `a` and `b` for
2177	/// not-greater-than-or-equal.
2178	///
2179	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_pd)
2180	#[inline]
2181	#[target_feature(enable = "sse2")]
2182	#[cfg_attr(test, assert_instr(cmpnlepd))]
2183	#[stable(feature = "simd_x86", since = "1.27.0")]
2184	pub fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d {
2185	_mm_cmpnle_pd(a:b, b:a)
2186	}
2187
2188	/// Compares the lower element of `a` and `b` for equality.
2189	///
2190	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comieq_sd)
2191	#[inline]
2192	#[target_feature(enable = "sse2")]
2193	#[cfg_attr(test, assert_instr(comisd))]
2194	#[stable(feature = "simd_x86", since = "1.27.0")]
2195	pub fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 {
2196	unsafe { comieqsd(a, b) }
2197	}
2198
2199	/// Compares the lower element of `a` and `b` for less-than.
2200	///
2201	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comilt_sd)
2202	#[inline]
2203	#[target_feature(enable = "sse2")]
2204	#[cfg_attr(test, assert_instr(comisd))]
2205	#[stable(feature = "simd_x86", since = "1.27.0")]
2206	pub fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 {
2207	unsafe { comiltsd(a, b) }
2208	}
2209
2210	/// Compares the lower element of `a` and `b` for less-than-or-equal.
2211	///
2212	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comile_sd)
2213	#[inline]
2214	#[target_feature(enable = "sse2")]
2215	#[cfg_attr(test, assert_instr(comisd))]
2216	#[stable(feature = "simd_x86", since = "1.27.0")]
2217	pub fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 {
2218	unsafe { comilesd(a, b) }
2219	}
2220
2221	/// Compares the lower element of `a` and `b` for greater-than.
2222	///
2223	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comigt_sd)
2224	#[inline]
2225	#[target_feature(enable = "sse2")]
2226	#[cfg_attr(test, assert_instr(comisd))]
2227	#[stable(feature = "simd_x86", since = "1.27.0")]
2228	pub fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 {
2229	unsafe { comigtsd(a, b) }
2230	}
2231
2232	/// Compares the lower element of `a` and `b` for greater-than-or-equal.
2233	///
2234	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comige_sd)
2235	#[inline]
2236	#[target_feature(enable = "sse2")]
2237	#[cfg_attr(test, assert_instr(comisd))]
2238	#[stable(feature = "simd_x86", since = "1.27.0")]
2239	pub fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 {
2240	unsafe { comigesd(a, b) }
2241	}
2242
2243	/// Compares the lower element of `a` and `b` for not-equal.
2244	///
2245	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comineq_sd)
2246	#[inline]
2247	#[target_feature(enable = "sse2")]
2248	#[cfg_attr(test, assert_instr(comisd))]
2249	#[stable(feature = "simd_x86", since = "1.27.0")]
2250	pub fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 {
2251	unsafe { comineqsd(a, b) }
2252	}
2253
2254	/// Compares the lower element of `a` and `b` for equality.
2255	///
2256	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomieq_sd)
2257	#[inline]
2258	#[target_feature(enable = "sse2")]
2259	#[cfg_attr(test, assert_instr(ucomisd))]
2260	#[stable(feature = "simd_x86", since = "1.27.0")]
2261	pub fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 {
2262	unsafe { ucomieqsd(a, b) }
2263	}
2264
2265	/// Compares the lower element of `a` and `b` for less-than.
2266	///
2267	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomilt_sd)
2268	#[inline]
2269	#[target_feature(enable = "sse2")]
2270	#[cfg_attr(test, assert_instr(ucomisd))]
2271	#[stable(feature = "simd_x86", since = "1.27.0")]
2272	pub fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 {
2273	unsafe { ucomiltsd(a, b) }
2274	}
2275
2276	/// Compares the lower element of `a` and `b` for less-than-or-equal.
2277	///
2278	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomile_sd)
2279	#[inline]
2280	#[target_feature(enable = "sse2")]
2281	#[cfg_attr(test, assert_instr(ucomisd))]
2282	#[stable(feature = "simd_x86", since = "1.27.0")]
2283	pub fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 {
2284	unsafe { ucomilesd(a, b) }
2285	}
2286
2287	/// Compares the lower element of `a` and `b` for greater-than.
2288	///
2289	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomigt_sd)
2290	#[inline]
2291	#[target_feature(enable = "sse2")]
2292	#[cfg_attr(test, assert_instr(ucomisd))]
2293	#[stable(feature = "simd_x86", since = "1.27.0")]
2294	pub fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 {
2295	unsafe { ucomigtsd(a, b) }
2296	}
2297
2298	/// Compares the lower element of `a` and `b` for greater-than-or-equal.
2299	///
2300	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomige_sd)
2301	#[inline]
2302	#[target_feature(enable = "sse2")]
2303	#[cfg_attr(test, assert_instr(ucomisd))]
2304	#[stable(feature = "simd_x86", since = "1.27.0")]
2305	pub fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 {
2306	unsafe { ucomigesd(a, b) }
2307	}
2308
2309	/// Compares the lower element of `a` and `b` for not-equal.
2310	///
2311	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomineq_sd)
2312	#[inline]
2313	#[target_feature(enable = "sse2")]
2314	#[cfg_attr(test, assert_instr(ucomisd))]
2315	#[stable(feature = "simd_x86", since = "1.27.0")]
2316	pub fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 {
2317	unsafe { ucomineqsd(a, b) }
2318	}
2319
2320	/// Converts packed double-precision (64-bit) floating-point elements in `a` to
2321	/// packed single-precision (32-bit) floating-point elements
2322	///
2323	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_ps)
2324	#[inline]
2325	#[target_feature(enable = "sse2")]
2326	#[cfg_attr(test, assert_instr(cvtpd2ps))]
2327	#[stable(feature = "simd_x86", since = "1.27.0")]
2328	pub fn _mm_cvtpd_ps(a: __m128d) -> __m128 {
2329	unsafe {
2330	let r: f32x2 = simd_cast::<_, f32x2>(a.as_f64x2());
2331	let zero: f32x2 = f32x2::ZERO;
2332	transmute::<f32x4, _>(src:simd_shuffle!(r, zero, [`0`, `1`, `2`, `3`]))
2333	}
2334	}
2335
2336	/// Converts packed single-precision (32-bit) floating-point elements in `a` to
2337	/// packed
2338	/// double-precision (64-bit) floating-point elements.
2339	///
2340	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_pd)
2341	#[inline]
2342	#[target_feature(enable = "sse2")]
2343	#[cfg_attr(test, assert_instr(cvtps2pd))]
2344	#[stable(feature = "simd_x86", since = "1.27.0")]
2345	pub fn _mm_cvtps_pd(a: __m128) -> __m128d {
2346	unsafe {
2347	let a: f32x4 = a.as_f32x4();
2348	transmute(src:simd_cast::<f32x2, f64x2>(simd_shuffle!(a, a, [`0`, `1`])))
2349	}
2350	}
2351
2352	/// Converts packed double-precision (64-bit) floating-point elements in `a` to
2353	/// packed 32-bit integers.
2354	///
2355	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epi32)
2356	#[inline]
2357	#[target_feature(enable = "sse2")]
2358	#[cfg_attr(test, assert_instr(cvtpd2dq))]
2359	#[stable(feature = "simd_x86", since = "1.27.0")]
2360	pub fn _mm_cvtpd_epi32(a: __m128d) -> __m128i {
2361	unsafe { transmute(src:cvtpd2dq(a)) }
2362	}
2363
2364	/// Converts the lower double-precision (64-bit) floating-point element in a to
2365	/// a 32-bit integer.
2366	///
2367	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_si32)
2368	#[inline]
2369	#[target_feature(enable = "sse2")]
2370	#[cfg_attr(test, assert_instr(cvtsd2si))]
2371	#[stable(feature = "simd_x86", since = "1.27.0")]
2372	pub fn _mm_cvtsd_si32(a: __m128d) -> i32 {
2373	unsafe { cvtsd2si(a) }
2374	}
2375
2376	/// Converts the lower double-precision (64-bit) floating-point element in `b`
2377	/// to a single-precision (32-bit) floating-point element, store the result in
2378	/// the lower element of the return value, and copies the upper element from `a`
2379	/// to the upper element the return value.
2380	///
2381	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_ss)
2382	#[inline]
2383	#[target_feature(enable = "sse2")]
2384	#[cfg_attr(test, assert_instr(cvtsd2ss))]
2385	#[stable(feature = "simd_x86", since = "1.27.0")]
2386	pub fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 {
2387	unsafe { cvtsd2ss(a, b) }
2388	}
2389
2390	/// Returns the lower double-precision (64-bit) floating-point element of `a`.
2391	///
2392	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_f64)
2393	#[inline]
2394	#[target_feature(enable = "sse2")]
2395	#[stable(feature = "simd_x86", since = "1.27.0")]
2396	pub fn _mm_cvtsd_f64(a: __m128d) -> f64 {
2397	unsafe { simd_extract!(a, `0`) }
2398	}
2399
2400	/// Converts the lower single-precision (32-bit) floating-point element in `b`
2401	/// to a double-precision (64-bit) floating-point element, store the result in
2402	/// the lower element of the return value, and copies the upper element from `a`
2403	/// to the upper element the return value.
2404	///
2405	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtss_sd)
2406	#[inline]
2407	#[target_feature(enable = "sse2")]
2408	#[cfg_attr(test, assert_instr(cvtss2sd))]
2409	#[stable(feature = "simd_x86", since = "1.27.0")]
2410	pub fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d {
2411	unsafe { cvtss2sd(a, b) }
2412	}
2413
2414	/// Converts packed double-precision (64-bit) floating-point elements in `a` to
2415	/// packed 32-bit integers with truncation.
2416	///
2417	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epi32)
2418	#[inline]
2419	#[target_feature(enable = "sse2")]
2420	#[cfg_attr(test, assert_instr(cvttpd2dq))]
2421	#[stable(feature = "simd_x86", since = "1.27.0")]
2422	pub fn _mm_cvttpd_epi32(a: __m128d) -> __m128i {
2423	unsafe { transmute(src:cvttpd2dq(a)) }
2424	}
2425
2426	/// Converts the lower double-precision (64-bit) floating-point element in `a`
2427	/// to a 32-bit integer with truncation.
2428	///
2429	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_si32)
2430	#[inline]
2431	#[target_feature(enable = "sse2")]
2432	#[cfg_attr(test, assert_instr(cvttsd2si))]
2433	#[stable(feature = "simd_x86", since = "1.27.0")]
2434	pub fn _mm_cvttsd_si32(a: __m128d) -> i32 {
2435	unsafe { cvttsd2si(a) }
2436	}
2437
2438	/// Converts packed single-precision (32-bit) floating-point elements in `a` to
2439	/// packed 32-bit integers with truncation.
2440	///
2441	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epi32)
2442	#[inline]
2443	#[target_feature(enable = "sse2")]
2444	#[cfg_attr(test, assert_instr(cvttps2dq))]
2445	#[stable(feature = "simd_x86", since = "1.27.0")]
2446	pub fn _mm_cvttps_epi32(a: __m128) -> __m128i {
2447	unsafe { transmute(src:cvttps2dq(a)) }
2448	}
2449
2450	/// Copies double-precision (64-bit) floating-point element `a` to the lower
2451	/// element of the packed 64-bit return value.
2452	///
2453	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_sd)
2454	#[inline]
2455	#[target_feature(enable = "sse2")]
2456	#[stable(feature = "simd_x86", since = "1.27.0")]
2457	pub fn _mm_set_sd(a: f64) -> __m128d {
2458	_mm_set_pd(a:`0.0`, b:a)
2459	}
2460
2461	/// Broadcasts double-precision (64-bit) floating-point value a to all elements
2462	/// of the return value.
2463	///
2464	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_pd)
2465	#[inline]
2466	#[target_feature(enable = "sse2")]
2467	#[stable(feature = "simd_x86", since = "1.27.0")]
2468	pub fn _mm_set1_pd(a: f64) -> __m128d {
2469	_mm_set_pd(a, b:a)
2470	}
2471
2472	/// Broadcasts double-precision (64-bit) floating-point value a to all elements
2473	/// of the return value.
2474	///
2475	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_pd1)
2476	#[inline]
2477	#[target_feature(enable = "sse2")]
2478	#[stable(feature = "simd_x86", since = "1.27.0")]
2479	pub fn _mm_set_pd1(a: f64) -> __m128d {
2480	_mm_set_pd(a, b:a)
2481	}
2482
2483	/// Sets packed double-precision (64-bit) floating-point elements in the return
2484	/// value with the supplied values.
2485	///
2486	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_pd)
2487	#[inline]
2488	#[target_feature(enable = "sse2")]
2489	#[stable(feature = "simd_x86", since = "1.27.0")]
2490	pub fn _mm_set_pd(a: f64, b: f64) -> __m128d {
2491	__m128d([b, a])
2492	}
2493
2494	/// Sets packed double-precision (64-bit) floating-point elements in the return
2495	/// value with the supplied values in reverse order.
2496	///
2497	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_pd)
2498	#[inline]
2499	#[target_feature(enable = "sse2")]
2500	#[stable(feature = "simd_x86", since = "1.27.0")]
2501	pub fn _mm_setr_pd(a: f64, b: f64) -> __m128d {
2502	_mm_set_pd(a:b, b:a)
2503	}
2504
2505	/// Returns packed double-precision (64-bit) floating-point elements with all
2506	/// zeros.
2507	///
2508	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_pd)
2509	#[inline]
2510	#[target_feature(enable = "sse2")]
2511	#[cfg_attr(test, assert_instr(xorp))]
2512	#[stable(feature = "simd_x86", since = "1.27.0")]
2513	pub fn _mm_setzero_pd() -> __m128d {
2514	const { unsafe { mem::zeroed() } }
2515	}
2516
2517	/// Returns a mask of the most significant bit of each element in `a`.
2518	///
2519	/// The mask is stored in the 2 least significant bits of the return value.
2520	/// All other bits are set to `0`.
2521	///
2522	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_pd)
2523	#[inline]
2524	#[target_feature(enable = "sse2")]
2525	#[cfg_attr(test, assert_instr(movmskpd))]
2526	#[stable(feature = "simd_x86", since = "1.27.0")]
2527	pub fn _mm_movemask_pd(a: __m128d) -> i32 {
2528	// Propagate the highest bit to the rest, because simd_bitmask
2529	// requires all-1 or all-0.
2530	unsafe {
2531	let mask: i64x2 = simd_lt(x:transmute(a), y:i64x2::ZERO);
2532	simd_bitmask::<i64x2, u8>(mask).into()
2533	}
2534	}
2535
2536	/// Loads 128-bits (composed of 2 packed double-precision (64-bit)
2537	/// floating-point elements) from memory into the returned vector.
2538	/// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
2539	/// exception may be generated.
2540	///
2541	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_pd)
2542	#[inline]
2543	#[target_feature(enable = "sse2")]
2544	#[cfg_attr(
2545	all(test, not(all(target_arch = "x86", target_env = "msvc"))),
2546	assert_instr(movaps)
2547	)]
2548	#[stable(feature = "simd_x86", since = "1.27.0")]
2549	#[allow(clippy::cast_ptr_alignment)]
2550	pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d {
2551	(mem_addr as const __m128d)
2552	}
2553
2554	/// Loads a 64-bit double-precision value to the low element of a
2555	/// 128-bit integer vector and clears the upper element.
2556	///
2557	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_sd)
2558	#[inline]
2559	#[target_feature(enable = "sse2")]
2560	#[cfg_attr(test, assert_instr(movsd))]
2561	#[stable(feature = "simd_x86", since = "1.27.0")]
2562	pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d {
2563	_mm_setr_pd(*mem_addr, b:`0.`)
2564	}
2565
2566	/// Loads a double-precision value into the high-order bits of a 128-bit
2567	/// vector of `[2 x double]`. The low-order bits are copied from the low-order
2568	/// bits of the first operand.
2569	///
2570	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadh_pd)
2571	#[inline]
2572	#[target_feature(enable = "sse2")]
2573	#[cfg_attr(test, assert_instr(movhps))]
2574	#[stable(feature = "simd_x86", since = "1.27.0")]
2575	pub unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2576	_mm_setr_pd(a:simd_extract!(a, `0`), *mem_addr)
2577	}
2578
2579	/// Loads a double-precision value into the low-order bits of a 128-bit
2580	/// vector of `[2 x double]`. The high-order bits are copied from the
2581	/// high-order bits of the first operand.
2582	///
2583	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadl_pd)
2584	#[inline]
2585	#[target_feature(enable = "sse2")]
2586	#[cfg_attr(test, assert_instr(movlps))]
2587	#[stable(feature = "simd_x86", since = "1.27.0")]
2588	pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2589	_mm_setr_pd(*mem_addr, b:simd_extract!(a, `1`))
2590	}
2591
2592	/// Stores a 128-bit floating point vector of `[2 x double]` to a 128-bit
2593	/// aligned memory location.
2594	/// To minimize caching, the data is flagged as non-temporal (unlikely to be
2595	/// used again soon).
2596	///
2597	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_pd)
2598	///
2599	/// # Safety of non-temporal stores
2600	///
2601	/// After using this intrinsic, but before any other access to the memory that this intrinsic
2602	/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
2603	/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
2604	/// return.
2605	///
2606	/// See [`_mm_sfence`] for details.
2607	#[inline]
2608	#[target_feature(enable = "sse2")]
2609	#[cfg_attr(test, assert_instr(movntpd))]
2610	#[stable(feature = "simd_x86", since = "1.27.0")]
2611	#[allow(clippy::cast_ptr_alignment)]
2612	pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) {
2613	crate::arch::asm!(
2614	vps!("movntpd", ",{a}"),
2615	p = in(reg) mem_addr,
2616	a = in(xmm_reg) a,
2617	options(nostack, preserves_flags),
2618	);
2619	}
2620
2621	/// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a
2622	/// memory location.
2623	///
2624	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_sd)
2625	#[inline]
2626	#[target_feature(enable = "sse2")]
2627	#[cfg_attr(test, assert_instr(movlps))]
2628	#[stable(feature = "simd_x86", since = "1.27.0")]
2629	pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) {
2630	*mem_addr = simd_extract!(a, `0`)
2631	}
2632
2633	/// Stores 128-bits (composed of 2 packed double-precision (64-bit)
2634	/// floating-point elements) from `a` into memory. `mem_addr` must be aligned
2635	/// on a 16-byte boundary or a general-protection exception may be generated.
2636	///
2637	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_pd)
2638	#[inline]
2639	#[target_feature(enable = "sse2")]
2640	#[cfg_attr(
2641	all(test, not(all(target_arch = "x86", target_env = "msvc"))),
2642	assert_instr(movaps)
2643	)]
2644	#[stable(feature = "simd_x86", since = "1.27.0")]
2645	#[allow(clippy::cast_ptr_alignment)]
2646	pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) {
2647	(mem_addr as mut __m128d) = a;
2648	}
2649
2650	/// Stores 128-bits (composed of 2 packed double-precision (64-bit)
2651	/// floating-point elements) from `a` into memory.
2652	/// `mem_addr` does not need to be aligned on any particular boundary.
2653	///
2654	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_pd)
2655	#[inline]
2656	#[target_feature(enable = "sse2")]
2657	#[cfg_attr(test, assert_instr(movups))] // FIXME movupd expected
2658	#[stable(feature = "simd_x86", since = "1.27.0")]
2659	pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) {
2660	mem_addr.cast::<__m128d>().write_unaligned(val:a);
2661	}
2662
2663	/// Store 16-bit integer from the first element of a into memory.
2664	///
2665	/// `mem_addr` does not need to be aligned on any particular boundary.
2666	///
2667	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si16)
2668	#[inline]
2669	#[target_feature(enable = "sse2")]
2670	#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2671	pub unsafe fn _mm_storeu_si16(mem_addr: *mut u8, a: __m128i) {
2672	ptr::write_unaligned(dst:mem_addr as *mut i16, src:simd_extract(x:a.as_i16x8(), idx:`0`))
2673	}
2674
2675	/// Store 32-bit integer from the first element of a into memory.
2676	///
2677	/// `mem_addr` does not need to be aligned on any particular boundary.
2678	///
2679	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si32)
2680	#[inline]
2681	#[target_feature(enable = "sse2")]
2682	#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2683	pub unsafe fn _mm_storeu_si32(mem_addr: *mut u8, a: __m128i) {
2684	ptr::write_unaligned(dst:mem_addr as *mut i32, src:simd_extract(x:a.as_i32x4(), idx:`0`))
2685	}
2686
2687	/// Store 64-bit integer from the first element of a into memory.
2688	///
2689	/// `mem_addr` does not need to be aligned on any particular boundary.
2690	///
2691	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si64)
2692	#[inline]
2693	#[target_feature(enable = "sse2")]
2694	#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2695	pub unsafe fn _mm_storeu_si64(mem_addr: *mut u8, a: __m128i) {
2696	ptr::write_unaligned(dst:mem_addr as *mut i64, src:simd_extract(x:a.as_i64x2(), idx:`0`))
2697	}
2698
2699	/// Stores the lower double-precision (64-bit) floating-point element from `a`
2700	/// into 2 contiguous elements in memory. `mem_addr` must be aligned on a
2701	/// 16-byte boundary or a general-protection exception may be generated.
2702	///
2703	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store1_pd)
2704	#[inline]
2705	#[target_feature(enable = "sse2")]
2706	#[stable(feature = "simd_x86", since = "1.27.0")]
2707	#[allow(clippy::cast_ptr_alignment)]
2708	pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) {
2709	let b: __m128d = simd_shuffle!(a, a, [`0`, `0`]);
2710	(mem_addr as mut __m128d) = b;
2711	}
2712
2713	/// Stores the lower double-precision (64-bit) floating-point element from `a`
2714	/// into 2 contiguous elements in memory. `mem_addr` must be aligned on a
2715	/// 16-byte boundary or a general-protection exception may be generated.
2716	///
2717	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_pd1)
2718	#[inline]
2719	#[target_feature(enable = "sse2")]
2720	#[stable(feature = "simd_x86", since = "1.27.0")]
2721	#[allow(clippy::cast_ptr_alignment)]
2722	pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) {
2723	let b: __m128d = simd_shuffle!(a, a, [`0`, `0`]);
2724	(mem_addr as mut __m128d) = b;
2725	}
2726
2727	/// Stores 2 double-precision (64-bit) floating-point elements from `a` into
2728	/// memory in reverse order.
2729	/// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
2730	/// exception may be generated.
2731	///
2732	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storer_pd)
2733	#[inline]
2734	#[target_feature(enable = "sse2")]
2735	#[stable(feature = "simd_x86", since = "1.27.0")]
2736	#[allow(clippy::cast_ptr_alignment)]
2737	pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) {
2738	let b: __m128d = simd_shuffle!(a, a, [`1`, `0`]);
2739	(mem_addr as mut __m128d) = b;
2740	}
2741
2742	/// Stores the upper 64 bits of a 128-bit vector of `[2 x double]` to a
2743	/// memory location.
2744	///
2745	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeh_pd)
2746	#[inline]
2747	#[target_feature(enable = "sse2")]
2748	#[cfg_attr(test, assert_instr(movhps))]
2749	#[stable(feature = "simd_x86", since = "1.27.0")]
2750	pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) {
2751	*mem_addr = simd_extract!(a, `1`);
2752	}
2753
2754	/// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a
2755	/// memory location.
2756	///
2757	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storel_pd)
2758	#[inline]
2759	#[target_feature(enable = "sse2")]
2760	#[cfg_attr(test, assert_instr(movlps))]
2761	#[stable(feature = "simd_x86", since = "1.27.0")]
2762	pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) {
2763	*mem_addr = simd_extract!(a, `0`);
2764	}
2765
2766	/// Loads a double-precision (64-bit) floating-point element from memory
2767	/// into both elements of returned vector.
2768	///
2769	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load1_pd)
2770	#[inline]
2771	#[target_feature(enable = "sse2")]
2772	// #[cfg_attr(test, assert_instr(movapd))] // FIXME LLVM uses different codegen
2773	#[stable(feature = "simd_x86", since = "1.27.0")]
2774	pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d {
2775	let d: f64 = *mem_addr;
2776	_mm_setr_pd(a:d, b:d)
2777	}
2778
2779	/// Loads a double-precision (64-bit) floating-point element from memory
2780	/// into both elements of returned vector.
2781	///
2782	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_pd1)
2783	#[inline]
2784	#[target_feature(enable = "sse2")]
2785	// #[cfg_attr(test, assert_instr(movapd))] // FIXME same as _mm_load1_pd
2786	#[stable(feature = "simd_x86", since = "1.27.0")]
2787	pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d {
2788	_mm_load1_pd(mem_addr)
2789	}
2790
2791	/// Loads 2 double-precision (64-bit) floating-point elements from memory into
2792	/// the returned vector in reverse order. `mem_addr` must be aligned on a
2793	/// 16-byte boundary or a general-protection exception may be generated.
2794	///
2795	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadr_pd)
2796	#[inline]
2797	#[target_feature(enable = "sse2")]
2798	#[cfg_attr(
2799	all(test, not(all(target_arch = "x86", target_env = "msvc"))),
2800	assert_instr(movaps)
2801	)]
2802	#[stable(feature = "simd_x86", since = "1.27.0")]
2803	pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d {
2804	let a: __m128d = _mm_load_pd(mem_addr);
2805	simd_shuffle!(a, a, [`1`, `0`])
2806	}
2807
2808	/// Loads 128-bits (composed of 2 packed double-precision (64-bit)
2809	/// floating-point elements) from memory into the returned vector.
2810	/// `mem_addr` does not need to be aligned on any particular boundary.
2811	///
2812	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_pd)
2813	#[inline]
2814	#[target_feature(enable = "sse2")]
2815	#[cfg_attr(test, assert_instr(movups))]
2816	#[stable(feature = "simd_x86", since = "1.27.0")]
2817	pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d {
2818	let mut dst: __m128d = _mm_undefined_pd();
2819	ptr::copy_nonoverlapping(
2820	src:mem_addr as *const u8,
2821	dst:ptr::addr_of_mut!(dst) as *mut u8,
2822	count:mem::size_of::<__m128d>(),
2823	);
2824	dst
2825	}
2826
2827	/// Loads unaligned 16-bits of integer data from memory into new vector.
2828	///
2829	/// `mem_addr` does not need to be aligned on any particular boundary.
2830	///
2831	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si16)
2832	#[inline]
2833	#[target_feature(enable = "sse2")]
2834	#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2835	pub unsafe fn _mm_loadu_si16(mem_addr: *const u8) -> __m128i {
2836	transmute(src:i16x8::new(
2837	x0:ptr::read_unaligned(mem_addr as *const i16),
2838	x1:`0`,
2839	x2:`0`,
2840	x3:`0`,
2841	x4:`0`,
2842	x5:`0`,
2843	x6:`0`,
2844	x7:`0`,
2845	))
2846	}
2847
2848	/// Loads unaligned 32-bits of integer data from memory into new vector.
2849	///
2850	/// `mem_addr` does not need to be aligned on any particular boundary.
2851	///
2852	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si32)
2853	#[inline]
2854	#[target_feature(enable = "sse2")]
2855	#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2856	pub unsafe fn _mm_loadu_si32(mem_addr: *const u8) -> __m128i {
2857	transmute(src:i32x4::new(
2858	x0:ptr::read_unaligned(mem_addr as *const i32),
2859	x1:`0`,
2860	x2:`0`,
2861	x3:`0`,
2862	))
2863	}
2864
2865	/// Loads unaligned 64-bits of integer data from memory into new vector.
2866	///
2867	/// `mem_addr` does not need to be aligned on any particular boundary.
2868	///
2869	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si64)
2870	#[inline]
2871	#[target_feature(enable = "sse2")]
2872	#[stable(feature = "simd_x86_mm_loadu_si64", since = "1.46.0")]
2873	pub unsafe fn _mm_loadu_si64(mem_addr: *const u8) -> __m128i {
2874	transmute(src:i64x2::new(x0:ptr::read_unaligned(mem_addr as *const i64), x1:`0`))
2875	}
2876
2877	/// Constructs a 128-bit floating-point vector of `[2 x double]` from two
2878	/// 128-bit vector parameters of `[2 x double]`, using the immediate-value
2879	/// parameter as a specifier.
2880	///
2881	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_pd)
2882	#[inline]
2883	#[target_feature(enable = "sse2")]
2884	#[cfg_attr(test, assert_instr(shufps, MASK = `2`))]
2885	#[rustc_legacy_const_generics(`2`)]
2886	#[stable(feature = "simd_x86", since = "1.27.0")]
2887	pub fn _mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d {
2888	static_assert_uimm_bits!(MASK, `8`);
2889	unsafe { simd_shuffle!(a, b, [MASK as u32 & `0b1`, ((MASK as u32 >> `1`) & `0b1`) + `2`]) }
2890	}
2891
2892	/// Constructs a 128-bit floating-point vector of `[2 x double]`. The lower
2893	/// 64 bits are set to the lower 64 bits of the second parameter. The upper
2894	/// 64 bits are set to the upper 64 bits of the first parameter.
2895	///
2896	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_move_sd)
2897	#[inline]
2898	#[target_feature(enable = "sse2")]
2899	#[cfg_attr(test, assert_instr(movsd))]
2900	#[stable(feature = "simd_x86", since = "1.27.0")]
2901	pub fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d {
2902	unsafe { _mm_setr_pd(a:simd_extract!(b, `0`), b:simd_extract!(a, `1`)) }
2903	}
2904
2905	/// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit
2906	/// floating-point vector of `[4 x float]`.
2907	///
2908	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castpd_ps)
2909	#[inline]
2910	#[target_feature(enable = "sse2")]
2911	#[stable(feature = "simd_x86", since = "1.27.0")]
2912	pub fn _mm_castpd_ps(a: __m128d) -> __m128 {
2913	unsafe { transmute(src:a) }
2914	}
2915
2916	/// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit
2917	/// integer vector.
2918	///
2919	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castpd_si128)
2920	#[inline]
2921	#[target_feature(enable = "sse2")]
2922	#[stable(feature = "simd_x86", since = "1.27.0")]
2923	pub fn _mm_castpd_si128(a: __m128d) -> __m128i {
2924	unsafe { transmute(src:a) }
2925	}
2926
2927	/// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit
2928	/// floating-point vector of `[2 x double]`.
2929	///
2930	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castps_pd)
2931	#[inline]
2932	#[target_feature(enable = "sse2")]
2933	#[stable(feature = "simd_x86", since = "1.27.0")]
2934	pub fn _mm_castps_pd(a: __m128) -> __m128d {
2935	unsafe { transmute(src:a) }
2936	}
2937
2938	/// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit
2939	/// integer vector.
2940	///
2941	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castps_si128)
2942	#[inline]
2943	#[target_feature(enable = "sse2")]
2944	#[stable(feature = "simd_x86", since = "1.27.0")]
2945	pub fn _mm_castps_si128(a: __m128) -> __m128i {
2946	unsafe { transmute(src:a) }
2947	}
2948
2949	/// Casts a 128-bit integer vector into a 128-bit floating-point vector
2950	/// of `[2 x double]`.
2951	///
2952	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castsi128_pd)
2953	#[inline]
2954	#[target_feature(enable = "sse2")]
2955	#[stable(feature = "simd_x86", since = "1.27.0")]
2956	pub fn _mm_castsi128_pd(a: __m128i) -> __m128d {
2957	unsafe { transmute(src:a) }
2958	}
2959
2960	/// Casts a 128-bit integer vector into a 128-bit floating-point vector
2961	/// of `[4 x float]`.
2962	///
2963	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castsi128_ps)
2964	#[inline]
2965	#[target_feature(enable = "sse2")]
2966	#[stable(feature = "simd_x86", since = "1.27.0")]
2967	pub fn _mm_castsi128_ps(a: __m128i) -> __m128 {
2968	unsafe { transmute(src:a) }
2969	}
2970
2971	/// Returns vector of type __m128d with indeterminate elements.with indetermination elements.
2972	/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
2973	/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
2974	/// In practice, this is typically equivalent to [`mem::zeroed`].
2975	///
2976	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_undefined_pd)
2977	#[inline]
2978	#[target_feature(enable = "sse2")]
2979	#[stable(feature = "simd_x86", since = "1.27.0")]
2980	pub fn _mm_undefined_pd() -> __m128d {
2981	const { unsafe { mem::zeroed() } }
2982	}
2983
2984	/// Returns vector of type __m128i with indeterminate elements.with indetermination elements.
2985	/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
2986	/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
2987	/// In practice, this is typically equivalent to [`mem::zeroed`].
2988	///
2989	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_undefined_si128)
2990	#[inline]
2991	#[target_feature(enable = "sse2")]
2992	#[stable(feature = "simd_x86", since = "1.27.0")]
2993	pub fn _mm_undefined_si128() -> __m128i {
2994	const { unsafe { mem::zeroed() } }
2995	}
2996
2997	/// The resulting `__m128d` element is composed by the low-order values of
2998	/// the two `__m128d` interleaved input elements, i.e.:
2999	///
3000	/// The `[127:64]` bits are copied from the `[127:64]` bits of the second input*
3001	/// The `[63:0]` bits are copied from the `[127:64]` bits of the first input*
3002	///
3003	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_pd)
3004	#[inline]
3005	#[target_feature(enable = "sse2")]
3006	#[cfg_attr(test, assert_instr(unpckhpd))]
3007	#[stable(feature = "simd_x86", since = "1.27.0")]
3008	pub fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d {
3009	unsafe { simd_shuffle!(a, b, [`1`, `3`]) }
3010	}
3011
3012	/// The resulting `__m128d` element is composed by the high-order values of
3013	/// the two `__m128d` interleaved input elements, i.e.:
3014	///
3015	/// The `[127:64]` bits are copied from the `[63:0]` bits of the second input*
3016	/// The `[63:0]` bits are copied from the `[63:0]` bits of the first input*
3017	///
3018	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_pd)
3019	#[inline]
3020	#[target_feature(enable = "sse2")]
3021	#[cfg_attr(test, assert_instr(movlhps))]
3022	#[stable(feature = "simd_x86", since = "1.27.0")]
3023	pub fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d {
3024	unsafe { simd_shuffle!(a, b, [`0`, `2`]) }
3025	}
3026
3027	#[allow(improper_ctypes)]
3028	unsafe extern "C" {
3029	#[link_name = "llvm.x86.sse2.pause"]
3030	unsafefn pause();
3031	#[link_name = "llvm.x86.sse2.clflush"]
3032	unsafefn clflush(p: *const u8);
3033	#[link_name = "llvm.x86.sse2.lfence"]
3034	unsafefn lfence();
3035	#[link_name = "llvm.x86.sse2.mfence"]
3036	unsafefn mfence();
3037	#[link_name = "llvm.x86.sse2.pmadd.wd"]
3038	unsafefn pmaddwd(a: i16x8, b: i16x8) -> i32x4;
3039	#[link_name = "llvm.x86.sse2.psad.bw"]
3040	unsafefn psadbw(a: u8x16, b: u8x16) -> u64x2;
3041	#[link_name = "llvm.x86.sse2.psll.w"]
3042	unsafefn psllw(a: i16x8, count: i16x8) -> i16x8;
3043	#[link_name = "llvm.x86.sse2.psll.d"]
3044	unsafefn pslld(a: i32x4, count: i32x4) -> i32x4;
3045	#[link_name = "llvm.x86.sse2.psll.q"]
3046	unsafefn psllq(a: i64x2, count: i64x2) -> i64x2;
3047	#[link_name = "llvm.x86.sse2.psra.w"]
3048	unsafefn psraw(a: i16x8, count: i16x8) -> i16x8;
3049	#[link_name = "llvm.x86.sse2.psra.d"]
3050	unsafefn psrad(a: i32x4, count: i32x4) -> i32x4;
3051	#[link_name = "llvm.x86.sse2.psrl.w"]
3052	unsafefn psrlw(a: i16x8, count: i16x8) -> i16x8;
3053	#[link_name = "llvm.x86.sse2.psrl.d"]
3054	unsafefn psrld(a: i32x4, count: i32x4) -> i32x4;
3055	#[link_name = "llvm.x86.sse2.psrl.q"]
3056	unsafefn psrlq(a: i64x2, count: i64x2) -> i64x2;
3057	#[link_name = "llvm.x86.sse2.cvtps2dq"]
3058	unsafefn cvtps2dq(a: __m128) -> i32x4;
3059	#[link_name = "llvm.x86.sse2.maskmov.dqu"]
3060	unsafefn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8);
3061	#[link_name = "llvm.x86.sse2.packsswb.128"]
3062	unsafefn packsswb(a: i16x8, b: i16x8) -> i8x16;
3063	#[link_name = "llvm.x86.sse2.packssdw.128"]
3064	unsafefn packssdw(a: i32x4, b: i32x4) -> i16x8;
3065	#[link_name = "llvm.x86.sse2.packuswb.128"]
3066	unsafefn packuswb(a: i16x8, b: i16x8) -> u8x16;
3067	#[link_name = "llvm.x86.sse2.max.sd"]
3068	unsafefn maxsd(a: __m128d, b: __m128d) -> __m128d;
3069	#[link_name = "llvm.x86.sse2.max.pd"]
3070	unsafefn maxpd(a: __m128d, b: __m128d) -> __m128d;
3071	#[link_name = "llvm.x86.sse2.min.sd"]
3072	unsafefn minsd(a: __m128d, b: __m128d) -> __m128d;
3073	#[link_name = "llvm.x86.sse2.min.pd"]
3074	unsafefn minpd(a: __m128d, b: __m128d) -> __m128d;
3075	#[link_name = "llvm.x86.sse2.cmp.sd"]
3076	unsafefn cmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3077	#[link_name = "llvm.x86.sse2.cmp.pd"]
3078	unsafefn cmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3079	#[link_name = "llvm.x86.sse2.comieq.sd"]
3080	unsafefn comieqsd(a: __m128d, b: __m128d) -> i32;
3081	#[link_name = "llvm.x86.sse2.comilt.sd"]
3082	unsafefn comiltsd(a: __m128d, b: __m128d) -> i32;
3083	#[link_name = "llvm.x86.sse2.comile.sd"]
3084	unsafefn comilesd(a: __m128d, b: __m128d) -> i32;
3085	#[link_name = "llvm.x86.sse2.comigt.sd"]
3086	unsafefn comigtsd(a: __m128d, b: __m128d) -> i32;
3087	#[link_name = "llvm.x86.sse2.comige.sd"]
3088	unsafefn comigesd(a: __m128d, b: __m128d) -> i32;
3089	#[link_name = "llvm.x86.sse2.comineq.sd"]
3090	unsafefn comineqsd(a: __m128d, b: __m128d) -> i32;
3091	#[link_name = "llvm.x86.sse2.ucomieq.sd"]
3092	unsafefn ucomieqsd(a: __m128d, b: __m128d) -> i32;
3093	#[link_name = "llvm.x86.sse2.ucomilt.sd"]
3094	unsafefn ucomiltsd(a: __m128d, b: __m128d) -> i32;
3095	#[link_name = "llvm.x86.sse2.ucomile.sd"]
3096	unsafefn ucomilesd(a: __m128d, b: __m128d) -> i32;
3097	#[link_name = "llvm.x86.sse2.ucomigt.sd"]
3098	unsafefn ucomigtsd(a: __m128d, b: __m128d) -> i32;
3099	#[link_name = "llvm.x86.sse2.ucomige.sd"]
3100	unsafefn ucomigesd(a: __m128d, b: __m128d) -> i32;
3101	#[link_name = "llvm.x86.sse2.ucomineq.sd"]
3102	unsafefn ucomineqsd(a: __m128d, b: __m128d) -> i32;
3103	#[link_name = "llvm.x86.sse2.cvtpd2dq"]
3104	unsafefn cvtpd2dq(a: __m128d) -> i32x4;
3105	#[link_name = "llvm.x86.sse2.cvtsd2si"]
3106	unsafefn cvtsd2si(a: __m128d) -> i32;
3107	#[link_name = "llvm.x86.sse2.cvtsd2ss"]
3108	unsafefn cvtsd2ss(a: __m128, b: __m128d) -> __m128;
3109	#[link_name = "llvm.x86.sse2.cvtss2sd"]
3110	unsafefn cvtss2sd(a: __m128d, b: __m128) -> __m128d;
3111	#[link_name = "llvm.x86.sse2.cvttpd2dq"]
3112	unsafefn cvttpd2dq(a: __m128d) -> i32x4;
3113	#[link_name = "llvm.x86.sse2.cvttsd2si"]
3114	unsafefn cvttsd2si(a: __m128d) -> i32;
3115	#[link_name = "llvm.x86.sse2.cvttps2dq"]
3116	unsafefn cvttps2dq(a: __m128) -> i32x4;
3117	}
3118
3119	#[cfg(test)]
3120	mod tests {
3121	use crate::{
3122	core_arch::{simd::, x86::},
3123	hint::black_box,
3124	};
3125	use std::{
3126	boxed, f32, f64,
3127	mem::{self, transmute},
3128	ptr,
3129	};
3130	use stdarch_test::simd_test;
3131
3132	const NAN: f64 = f64::NAN;
3133
3134	#[test]
3135	fn test_mm_pause() {
3136	unsafe { _mm_pause() }
3137	}
3138
3139	#[simd_test(enable = "sse2")]
3140	unsafe fn test_mm_clflush() {
3141	let x = `0_u8`;
3142	_mm_clflush(ptr::addr_of!(x));
3143	}
3144
3145	#[simd_test(enable = "sse2")]
3146	// Miri cannot support this until it is clear how it fits in the Rust memory model
3147	#[cfg_attr(miri, ignore)]
3148	unsafe fn test_mm_lfence() {
3149	_mm_lfence();
3150	}
3151
3152	#[simd_test(enable = "sse2")]
3153	// Miri cannot support this until it is clear how it fits in the Rust memory model
3154	#[cfg_attr(miri, ignore)]
3155	unsafe fn test_mm_mfence() {
3156	_mm_mfence();
3157	}
3158
3159	#[simd_test(enable = "sse2")]
3160	unsafe fn test_mm_add_epi8() {
3161	let a = _mm_setr_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
3162	#[rustfmt::skip]
3163	let b = _mm_setr_epi8(
3164	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
3165	);
3166	let r = _mm_add_epi8(a, b);
3167	#[rustfmt::skip]
3168	let e = _mm_setr_epi8(
3169	`16`, `18`, `20`, `22`, `24`, `26`, `28`, `30`, `32`, `34`, `36`, `38`, `40`, `42`, `44`, `46`,
3170	);
3171	assert_eq_m128i(r, e);
3172	}
3173
3174	#[simd_test(enable = "sse2")]
3175	unsafe fn test_mm_add_epi8_overflow() {
3176	let a = _mm_set1_epi8(`0x7F`);
3177	let b = _mm_set1_epi8(`1`);
3178	let r = _mm_add_epi8(a, b);
3179	assert_eq_m128i(r, _mm_set1_epi8(`-128`));
3180	}
3181
3182	#[simd_test(enable = "sse2")]
3183	unsafe fn test_mm_add_epi16() {
3184	let a = _mm_setr_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
3185	let b = _mm_setr_epi16(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
3186	let r = _mm_add_epi16(a, b);
3187	let e = _mm_setr_epi16(`8`, `10`, `12`, `14`, `16`, `18`, `20`, `22`);
3188	assert_eq_m128i(r, e);
3189	}
3190
3191	#[simd_test(enable = "sse2")]
3192	unsafe fn test_mm_add_epi32() {
3193	let a = _mm_setr_epi32(`0`, `1`, `2`, `3`);
3194	let b = _mm_setr_epi32(`4`, `5`, `6`, `7`);
3195	let r = _mm_add_epi32(a, b);
3196	let e = _mm_setr_epi32(`4`, `6`, `8`, `10`);
3197	assert_eq_m128i(r, e);
3198	}
3199
3200	#[simd_test(enable = "sse2")]
3201	unsafe fn test_mm_add_epi64() {
3202	let a = _mm_setr_epi64x(`0`, `1`);
3203	let b = _mm_setr_epi64x(`2`, `3`);
3204	let r = _mm_add_epi64(a, b);
3205	let e = _mm_setr_epi64x(`2`, `4`);
3206	assert_eq_m128i(r, e);
3207	}
3208
3209	#[simd_test(enable = "sse2")]
3210	unsafe fn test_mm_adds_epi8() {
3211	let a = _mm_setr_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
3212	#[rustfmt::skip]
3213	let b = _mm_setr_epi8(
3214	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
3215	);
3216	let r = _mm_adds_epi8(a, b);
3217	#[rustfmt::skip]
3218	let e = _mm_setr_epi8(
3219	`16`, `18`, `20`, `22`, `24`, `26`, `28`, `30`, `32`, `34`, `36`, `38`, `40`, `42`, `44`, `46`,
3220	);
3221	assert_eq_m128i(r, e);
3222	}
3223
3224	#[simd_test(enable = "sse2")]
3225	unsafe fn test_mm_adds_epi8_saturate_positive() {
3226	let a = _mm_set1_epi8(`0x7F`);
3227	let b = _mm_set1_epi8(`1`);
3228	let r = _mm_adds_epi8(a, b);
3229	assert_eq_m128i(r, a);
3230	}
3231
3232	#[simd_test(enable = "sse2")]
3233	unsafe fn test_mm_adds_epi8_saturate_negative() {
3234	let a = _mm_set1_epi8(`-0x80`);
3235	let b = _mm_set1_epi8(`-1`);
3236	let r = _mm_adds_epi8(a, b);
3237	assert_eq_m128i(r, a);
3238	}
3239
3240	#[simd_test(enable = "sse2")]
3241	unsafe fn test_mm_adds_epi16() {
3242	let a = _mm_setr_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
3243	let b = _mm_setr_epi16(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
3244	let r = _mm_adds_epi16(a, b);
3245	let e = _mm_setr_epi16(`8`, `10`, `12`, `14`, `16`, `18`, `20`, `22`);
3246	assert_eq_m128i(r, e);
3247	}
3248
3249	#[simd_test(enable = "sse2")]
3250	unsafe fn test_mm_adds_epi16_saturate_positive() {
3251	let a = _mm_set1_epi16(`0x7FFF`);
3252	let b = _mm_set1_epi16(`1`);
3253	let r = _mm_adds_epi16(a, b);
3254	assert_eq_m128i(r, a);
3255	}
3256
3257	#[simd_test(enable = "sse2")]
3258	unsafe fn test_mm_adds_epi16_saturate_negative() {
3259	let a = _mm_set1_epi16(`-0x8000`);
3260	let b = _mm_set1_epi16(`-1`);
3261	let r = _mm_adds_epi16(a, b);
3262	assert_eq_m128i(r, a);
3263	}
3264
3265	#[simd_test(enable = "sse2")]
3266	unsafe fn test_mm_adds_epu8() {
3267	let a = _mm_setr_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
3268	#[rustfmt::skip]
3269	let b = _mm_setr_epi8(
3270	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
3271	);
3272	let r = _mm_adds_epu8(a, b);
3273	#[rustfmt::skip]
3274	let e = _mm_setr_epi8(
3275	`16`, `18`, `20`, `22`, `24`, `26`, `28`, `30`, `32`, `34`, `36`, `38`, `40`, `42`, `44`, `46`,
3276	);
3277	assert_eq_m128i(r, e);
3278	}
3279
3280	#[simd_test(enable = "sse2")]
3281	unsafe fn test_mm_adds_epu8_saturate() {
3282	let a = _mm_set1_epi8(!`0`);
3283	let b = _mm_set1_epi8(`1`);
3284	let r = _mm_adds_epu8(a, b);
3285	assert_eq_m128i(r, a);
3286	}
3287
3288	#[simd_test(enable = "sse2")]
3289	unsafe fn test_mm_adds_epu16() {
3290	let a = _mm_setr_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
3291	let b = _mm_setr_epi16(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
3292	let r = _mm_adds_epu16(a, b);
3293	let e = _mm_setr_epi16(`8`, `10`, `12`, `14`, `16`, `18`, `20`, `22`);
3294	assert_eq_m128i(r, e);
3295	}
3296
3297	#[simd_test(enable = "sse2")]
3298	unsafe fn test_mm_adds_epu16_saturate() {
3299	let a = _mm_set1_epi16(!`0`);
3300	let b = _mm_set1_epi16(`1`);
3301	let r = _mm_adds_epu16(a, b);
3302	assert_eq_m128i(r, a);
3303	}
3304
3305	#[simd_test(enable = "sse2")]
3306	unsafe fn test_mm_avg_epu8() {
3307	let (a, b) = (_mm_set1_epi8(`3`), _mm_set1_epi8(`9`));
3308	let r = _mm_avg_epu8(a, b);
3309	assert_eq_m128i(r, _mm_set1_epi8(`6`));
3310	}
3311
3312	#[simd_test(enable = "sse2")]
3313	unsafe fn test_mm_avg_epu16() {
3314	let (a, b) = (_mm_set1_epi16(`3`), _mm_set1_epi16(`9`));
3315	let r = _mm_avg_epu16(a, b);
3316	assert_eq_m128i(r, _mm_set1_epi16(`6`));
3317	}
3318
3319	#[simd_test(enable = "sse2")]
3320	unsafe fn test_mm_madd_epi16() {
3321	let a = _mm_setr_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
3322	let b = _mm_setr_epi16(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
3323	let r = _mm_madd_epi16(a, b);
3324	let e = _mm_setr_epi32(`29`, `81`, `149`, `233`);
3325	assert_eq_m128i(r, e);
3326
3327	// Test large values.
3328	// MINMIN+MINMIN will overflow into i32::MIN.
3329	let a = _mm_setr_epi16(
3330	i16::MAX,
3331	i16::MAX,
3332	i16::MIN,
3333	i16::MIN,
3334	i16::MIN,
3335	i16::MAX,
3336	`0`,
3337	`0`,
3338	);
3339	let b = _mm_setr_epi16(
3340	i16::MAX,
3341	i16::MAX,
3342	i16::MIN,
3343	i16::MIN,
3344	i16::MAX,
3345	i16::MIN,
3346	`0`,
3347	`0`,
3348	);
3349	let r = _mm_madd_epi16(a, b);
3350	let e = _mm_setr_epi32(`0x7FFE0002`, i32::MIN, `-0x7FFF0000`, `0`);
3351	assert_eq_m128i(r, e);
3352	}
3353
3354	#[simd_test(enable = "sse2")]
3355	unsafe fn test_mm_max_epi16() {
3356	let a = _mm_set1_epi16(`1`);
3357	let b = _mm_set1_epi16(`-1`);
3358	let r = _mm_max_epi16(a, b);
3359	assert_eq_m128i(r, a);
3360	}
3361
3362	#[simd_test(enable = "sse2")]
3363	unsafe fn test_mm_max_epu8() {
3364	let a = _mm_set1_epi8(`1`);
3365	let b = _mm_set1_epi8(!`0`);
3366	let r = _mm_max_epu8(a, b);
3367	assert_eq_m128i(r, b);
3368	}
3369
3370	#[simd_test(enable = "sse2")]
3371	unsafe fn test_mm_min_epi16() {
3372	let a = _mm_set1_epi16(`1`);
3373	let b = _mm_set1_epi16(`-1`);
3374	let r = _mm_min_epi16(a, b);
3375	assert_eq_m128i(r, b);
3376	}
3377
3378	#[simd_test(enable = "sse2")]
3379	unsafe fn test_mm_min_epu8() {
3380	let a = _mm_set1_epi8(`1`);
3381	let b = _mm_set1_epi8(!`0`);
3382	let r = _mm_min_epu8(a, b);
3383	assert_eq_m128i(r, a);
3384	}
3385
3386	#[simd_test(enable = "sse2")]
3387	unsafe fn test_mm_mulhi_epi16() {
3388	let (a, b) = (_mm_set1_epi16(`1000`), _mm_set1_epi16(`-1001`));
3389	let r = _mm_mulhi_epi16(a, b);
3390	assert_eq_m128i(r, _mm_set1_epi16(`-16`));
3391	}
3392
3393	#[simd_test(enable = "sse2")]
3394	unsafe fn test_mm_mulhi_epu16() {
3395	let (a, b) = (_mm_set1_epi16(`1000`), _mm_set1_epi16(`1001`));
3396	let r = _mm_mulhi_epu16(a, b);
3397	assert_eq_m128i(r, _mm_set1_epi16(`15`));
3398	}
3399
3400	#[simd_test(enable = "sse2")]
3401	unsafe fn test_mm_mullo_epi16() {
3402	let (a, b) = (_mm_set1_epi16(`1000`), _mm_set1_epi16(`-1001`));
3403	let r = _mm_mullo_epi16(a, b);
3404	assert_eq_m128i(r, _mm_set1_epi16(`-17960`));
3405	}
3406
3407	#[simd_test(enable = "sse2")]
3408	unsafe fn test_mm_mul_epu32() {
3409	let a = _mm_setr_epi64x(`1_000_000_000`, `1` << `34`);
3410	let b = _mm_setr_epi64x(`1_000_000_000`, `1` << `35`);
3411	let r = _mm_mul_epu32(a, b);
3412	let e = _mm_setr_epi64x(`1_000_000_000` * `1_000_000_000`, `0`);
3413	assert_eq_m128i(r, e);
3414	}
3415
3416	#[simd_test(enable = "sse2")]
3417	unsafe fn test_mm_sad_epu8() {
3418	#[rustfmt::skip]
3419	let a = _mm_setr_epi8(
3420	`255u8` as i8, `254u8` as i8, `253u8` as i8, `252u8` as i8,
3421	`1`, `2`, `3`, `4`,
3422	`155u8` as i8, `154u8` as i8, `153u8` as i8, `152u8` as i8,
3423	`1`, `2`, `3`, `4`,
3424	);
3425	let b = _mm_setr_epi8(`0`, `0`, `0`, `0`, `2`, `1`, `2`, `1`, `1`, `1`, `1`, `1`, `1`, `2`, `1`, `2`);
3426	let r = _mm_sad_epu8(a, b);
3427	let e = _mm_setr_epi64x(`1020`, `614`);
3428	assert_eq_m128i(r, e);
3429	}
3430
3431	#[simd_test(enable = "sse2")]
3432	unsafe fn test_mm_sub_epi8() {
3433	let (a, b) = (_mm_set1_epi8(`5`), _mm_set1_epi8(`6`));
3434	let r = _mm_sub_epi8(a, b);
3435	assert_eq_m128i(r, _mm_set1_epi8(`-1`));
3436	}
3437
3438	#[simd_test(enable = "sse2")]
3439	unsafe fn test_mm_sub_epi16() {
3440	let (a, b) = (_mm_set1_epi16(`5`), _mm_set1_epi16(`6`));
3441	let r = _mm_sub_epi16(a, b);
3442	assert_eq_m128i(r, _mm_set1_epi16(`-1`));
3443	}
3444
3445	#[simd_test(enable = "sse2")]
3446	unsafe fn test_mm_sub_epi32() {
3447	let (a, b) = (_mm_set1_epi32(`5`), _mm_set1_epi32(`6`));
3448	let r = _mm_sub_epi32(a, b);
3449	assert_eq_m128i(r, _mm_set1_epi32(`-1`));
3450	}
3451
3452	#[simd_test(enable = "sse2")]
3453	unsafe fn test_mm_sub_epi64() {
3454	let (a, b) = (_mm_set1_epi64x(`5`), _mm_set1_epi64x(`6`));
3455	let r = _mm_sub_epi64(a, b);
3456	assert_eq_m128i(r, _mm_set1_epi64x(`-1`));
3457	}
3458
3459	#[simd_test(enable = "sse2")]
3460	unsafe fn test_mm_subs_epi8() {
3461	let (a, b) = (_mm_set1_epi8(`5`), _mm_set1_epi8(`2`));
3462	let r = _mm_subs_epi8(a, b);
3463	assert_eq_m128i(r, _mm_set1_epi8(`3`));
3464	}
3465
3466	#[simd_test(enable = "sse2")]
3467	unsafe fn test_mm_subs_epi8_saturate_positive() {
3468	let a = _mm_set1_epi8(`0x7F`);
3469	let b = _mm_set1_epi8(`-1`);
3470	let r = _mm_subs_epi8(a, b);
3471	assert_eq_m128i(r, a);
3472	}
3473
3474	#[simd_test(enable = "sse2")]
3475	unsafe fn test_mm_subs_epi8_saturate_negative() {
3476	let a = _mm_set1_epi8(`-0x80`);
3477	let b = _mm_set1_epi8(`1`);
3478	let r = _mm_subs_epi8(a, b);
3479	assert_eq_m128i(r, a);
3480	}
3481
3482	#[simd_test(enable = "sse2")]
3483	unsafe fn test_mm_subs_epi16() {
3484	let (a, b) = (_mm_set1_epi16(`5`), _mm_set1_epi16(`2`));
3485	let r = _mm_subs_epi16(a, b);
3486	assert_eq_m128i(r, _mm_set1_epi16(`3`));
3487	}
3488
3489	#[simd_test(enable = "sse2")]
3490	unsafe fn test_mm_subs_epi16_saturate_positive() {
3491	let a = _mm_set1_epi16(`0x7FFF`);
3492	let b = _mm_set1_epi16(`-1`);
3493	let r = _mm_subs_epi16(a, b);
3494	assert_eq_m128i(r, a);
3495	}
3496
3497	#[simd_test(enable = "sse2")]
3498	unsafe fn test_mm_subs_epi16_saturate_negative() {
3499	let a = _mm_set1_epi16(`-0x8000`);
3500	let b = _mm_set1_epi16(`1`);
3501	let r = _mm_subs_epi16(a, b);
3502	assert_eq_m128i(r, a);
3503	}
3504
3505	#[simd_test(enable = "sse2")]
3506	unsafe fn test_mm_subs_epu8() {
3507	let (a, b) = (_mm_set1_epi8(`5`), _mm_set1_epi8(`2`));
3508	let r = _mm_subs_epu8(a, b);
3509	assert_eq_m128i(r, _mm_set1_epi8(`3`));
3510	}
3511
3512	#[simd_test(enable = "sse2")]
3513	unsafe fn test_mm_subs_epu8_saturate() {
3514	let a = _mm_set1_epi8(`0`);
3515	let b = _mm_set1_epi8(`1`);
3516	let r = _mm_subs_epu8(a, b);
3517	assert_eq_m128i(r, a);
3518	}
3519
3520	#[simd_test(enable = "sse2")]
3521	unsafe fn test_mm_subs_epu16() {
3522	let (a, b) = (_mm_set1_epi16(`5`), _mm_set1_epi16(`2`));
3523	let r = _mm_subs_epu16(a, b);
3524	assert_eq_m128i(r, _mm_set1_epi16(`3`));
3525	}
3526
3527	#[simd_test(enable = "sse2")]
3528	unsafe fn test_mm_subs_epu16_saturate() {
3529	let a = _mm_set1_epi16(`0`);
3530	let b = _mm_set1_epi16(`1`);
3531	let r = _mm_subs_epu16(a, b);
3532	assert_eq_m128i(r, a);
3533	}
3534
3535	#[simd_test(enable = "sse2")]
3536	unsafe fn test_mm_slli_si128() {
3537	#[rustfmt::skip]
3538	let a = _mm_setr_epi8(
3539	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
3540	);
3541	let r = _mm_slli_si128::<`1`>(a);
3542	let e = _mm_setr_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
3543	assert_eq_m128i(r, e);
3544
3545	#[rustfmt::skip]
3546	let a = _mm_setr_epi8(
3547	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
3548	);
3549	let r = _mm_slli_si128::<`15`>(a);
3550	let e = _mm_setr_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`);
3551	assert_eq_m128i(r, e);
3552
3553	#[rustfmt::skip]
3554	let a = _mm_setr_epi8(
3555	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
3556	);
3557	let r = _mm_slli_si128::<`16`>(a);
3558	assert_eq_m128i(r, _mm_set1_epi8(`0`));
3559	}
3560
3561	#[simd_test(enable = "sse2")]
3562	unsafe fn test_mm_slli_epi16() {
3563	let a = _mm_setr_epi16(`0xCC`, `-0xCC`, `0xDD`, `-0xDD`, `0xEE`, `-0xEE`, `0xFF`, `-0xFF`);
3564	let r = _mm_slli_epi16::<`4`>(a);
3565	assert_eq_m128i(
3566	r,
3567	_mm_setr_epi16(`0xCC0`, `-0xCC0`, `0xDD0`, `-0xDD0`, `0xEE0`, `-0xEE0`, `0xFF0`, `-0xFF0`),
3568	);
3569	let r = _mm_slli_epi16::<`16`>(a);
3570	assert_eq_m128i(r, _mm_set1_epi16(`0`));
3571	}
3572
3573	#[simd_test(enable = "sse2")]
3574	unsafe fn test_mm_sll_epi16() {
3575	let a = _mm_setr_epi16(`0xCC`, `-0xCC`, `0xDD`, `-0xDD`, `0xEE`, `-0xEE`, `0xFF`, `-0xFF`);
3576	let r = _mm_sll_epi16(a, _mm_set_epi64x(`0`, `4`));
3577	assert_eq_m128i(
3578	r,
3579	_mm_setr_epi16(`0xCC0`, `-0xCC0`, `0xDD0`, `-0xDD0`, `0xEE0`, `-0xEE0`, `0xFF0`, `-0xFF0`),
3580	);
3581	let r = _mm_sll_epi16(a, _mm_set_epi64x(`4`, `0`));
3582	assert_eq_m128i(r, a);
3583	let r = _mm_sll_epi16(a, _mm_set_epi64x(`0`, `16`));
3584	assert_eq_m128i(r, _mm_set1_epi16(`0`));
3585	let r = _mm_sll_epi16(a, _mm_set_epi64x(`0`, i64::MAX));
3586	assert_eq_m128i(r, _mm_set1_epi16(`0`));
3587	}
3588
3589	#[simd_test(enable = "sse2")]
3590	unsafe fn test_mm_slli_epi32() {
3591	let a = _mm_setr_epi32(`0xEEEE`, `-0xEEEE`, `0xFFFF`, `-0xFFFF`);
3592	let r = _mm_slli_epi32::<`4`>(a);
3593	assert_eq_m128i(r, _mm_setr_epi32(`0xEEEE0`, `-0xEEEE0`, `0xFFFF0`, `-0xFFFF0`));
3594	let r = _mm_slli_epi32::<`32`>(a);
3595	assert_eq_m128i(r, _mm_set1_epi32(`0`));
3596	}
3597
3598	#[simd_test(enable = "sse2")]
3599	unsafe fn test_mm_sll_epi32() {
3600	let a = _mm_setr_epi32(`0xEEEE`, `-0xEEEE`, `0xFFFF`, `-0xFFFF`);
3601	let r = _mm_sll_epi32(a, _mm_set_epi64x(`0`, `4`));
3602	assert_eq_m128i(r, _mm_setr_epi32(`0xEEEE0`, `-0xEEEE0`, `0xFFFF0`, `-0xFFFF0`));
3603	let r = _mm_sll_epi32(a, _mm_set_epi64x(`4`, `0`));
3604	assert_eq_m128i(r, a);
3605	let r = _mm_sll_epi32(a, _mm_set_epi64x(`0`, `32`));
3606	assert_eq_m128i(r, _mm_set1_epi32(`0`));
3607	let r = _mm_sll_epi32(a, _mm_set_epi64x(`0`, i64::MAX));
3608	assert_eq_m128i(r, _mm_set1_epi32(`0`));
3609	}
3610
3611	#[simd_test(enable = "sse2")]
3612	unsafe fn test_mm_slli_epi64() {
3613	let a = _mm_set_epi64x(`0xFFFFFFFF`, `-0xFFFFFFFF`);
3614	let r = _mm_slli_epi64::<`4`>(a);
3615	assert_eq_m128i(r, _mm_set_epi64x(`0xFFFFFFFF0`, `-0xFFFFFFFF0`));
3616	let r = _mm_slli_epi64::<`64`>(a);
3617	assert_eq_m128i(r, _mm_set1_epi64x(`0`));
3618	}
3619
3620	#[simd_test(enable = "sse2")]
3621	unsafe fn test_mm_sll_epi64() {
3622	let a = _mm_set_epi64x(`0xFFFFFFFF`, `-0xFFFFFFFF`);
3623	let r = _mm_sll_epi64(a, _mm_set_epi64x(`0`, `4`));
3624	assert_eq_m128i(r, _mm_set_epi64x(`0xFFFFFFFF0`, `-0xFFFFFFFF0`));
3625	let r = _mm_sll_epi64(a, _mm_set_epi64x(`4`, `0`));
3626	assert_eq_m128i(r, a);
3627	let r = _mm_sll_epi64(a, _mm_set_epi64x(`0`, `64`));
3628	assert_eq_m128i(r, _mm_set1_epi64x(`0`));
3629	let r = _mm_sll_epi64(a, _mm_set_epi64x(`0`, i64::MAX));
3630	assert_eq_m128i(r, _mm_set1_epi64x(`0`));
3631	}
3632
3633	#[simd_test(enable = "sse2")]
3634	unsafe fn test_mm_srai_epi16() {
3635	let a = _mm_setr_epi16(`0xCC`, `-0xCC`, `0xDD`, `-0xDD`, `0xEE`, `-0xEE`, `0xFF`, `-0xFF`);
3636	let r = _mm_srai_epi16::<`4`>(a);
3637	assert_eq_m128i(
3638	r,
3639	_mm_setr_epi16(`0xC`, `-0xD`, `0xD`, `-0xE`, `0xE`, `-0xF`, `0xF`, `-0x10`),
3640	);
3641	let r = _mm_srai_epi16::<`16`>(a);
3642	assert_eq_m128i(r, _mm_setr_epi16(`0`, `-1`, `0`, `-1`, `0`, `-1`, `0`, `-1`));
3643	}
3644
3645	#[simd_test(enable = "sse2")]
3646	unsafe fn test_mm_sra_epi16() {
3647	let a = _mm_setr_epi16(`0xCC`, `-0xCC`, `0xDD`, `-0xDD`, `0xEE`, `-0xEE`, `0xFF`, `-0xFF`);
3648	let r = _mm_sra_epi16(a, _mm_set_epi64x(`0`, `4`));
3649	assert_eq_m128i(
3650	r,
3651	_mm_setr_epi16(`0xC`, `-0xD`, `0xD`, `-0xE`, `0xE`, `-0xF`, `0xF`, `-0x10`),
3652	);
3653	let r = _mm_sra_epi16(a, _mm_set_epi64x(`4`, `0`));
3654	assert_eq_m128i(r, a);
3655	let r = _mm_sra_epi16(a, _mm_set_epi64x(`0`, `16`));
3656	assert_eq_m128i(r, _mm_setr_epi16(`0`, `-1`, `0`, `-1`, `0`, `-1`, `0`, `-1`));
3657	let r = _mm_sra_epi16(a, _mm_set_epi64x(`0`, i64::MAX));
3658	assert_eq_m128i(r, _mm_setr_epi16(`0`, `-1`, `0`, `-1`, `0`, `-1`, `0`, `-1`));
3659	}
3660
3661	#[simd_test(enable = "sse2")]
3662	unsafe fn test_mm_srai_epi32() {
3663	let a = _mm_setr_epi32(`0xEEEE`, `-0xEEEE`, `0xFFFF`, `-0xFFFF`);
3664	let r = _mm_srai_epi32::<`4`>(a);
3665	assert_eq_m128i(r, _mm_setr_epi32(`0xEEE`, `-0xEEF`, `0xFFF`, `-0x1000`));
3666	let r = _mm_srai_epi32::<`32`>(a);
3667	assert_eq_m128i(r, _mm_setr_epi32(`0`, `-1`, `0`, `-1`));
3668	}
3669
3670	#[simd_test(enable = "sse2")]
3671	unsafe fn test_mm_sra_epi32() {
3672	let a = _mm_setr_epi32(`0xEEEE`, `-0xEEEE`, `0xFFFF`, `-0xFFFF`);
3673	let r = _mm_sra_epi32(a, _mm_set_epi64x(`0`, `4`));
3674	assert_eq_m128i(r, _mm_setr_epi32(`0xEEE`, `-0xEEF`, `0xFFF`, `-0x1000`));
3675	let r = _mm_sra_epi32(a, _mm_set_epi64x(`4`, `0`));
3676	assert_eq_m128i(r, a);
3677	let r = _mm_sra_epi32(a, _mm_set_epi64x(`0`, `32`));
3678	assert_eq_m128i(r, _mm_setr_epi32(`0`, `-1`, `0`, `-1`));
3679	let r = _mm_sra_epi32(a, _mm_set_epi64x(`0`, i64::MAX));
3680	assert_eq_m128i(r, _mm_setr_epi32(`0`, `-1`, `0`, `-1`));
3681	}
3682
3683	#[simd_test(enable = "sse2")]
3684	unsafe fn test_mm_srli_si128() {
3685	#[rustfmt::skip]
3686	let a = _mm_setr_epi8(
3687	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
3688	);
3689	let r = _mm_srli_si128::<`1`>(a);
3690	#[rustfmt::skip]
3691	let e = _mm_setr_epi8(
3692	`2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `0`,
3693	);
3694	assert_eq_m128i(r, e);
3695
3696	#[rustfmt::skip]
3697	let a = _mm_setr_epi8(
3698	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
3699	);
3700	let r = _mm_srli_si128::<`15`>(a);
3701	let e = _mm_setr_epi8(`16`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
3702	assert_eq_m128i(r, e);
3703
3704	#[rustfmt::skip]
3705	let a = _mm_setr_epi8(
3706	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
3707	);
3708	let r = _mm_srli_si128::<`16`>(a);
3709	assert_eq_m128i(r, _mm_set1_epi8(`0`));
3710	}
3711
3712	#[simd_test(enable = "sse2")]
3713	unsafe fn test_mm_srli_epi16() {
3714	let a = _mm_setr_epi16(`0xCC`, `-0xCC`, `0xDD`, `-0xDD`, `0xEE`, `-0xEE`, `0xFF`, `-0xFF`);
3715	let r = _mm_srli_epi16::<`4`>(a);
3716	assert_eq_m128i(
3717	r,
3718	_mm_setr_epi16(`0xC`, `0xFF3`, `0xD`, `0xFF2`, `0xE`, `0xFF1`, `0xF`, `0xFF0`),
3719	);
3720	let r = _mm_srli_epi16::<`16`>(a);
3721	assert_eq_m128i(r, _mm_set1_epi16(`0`));
3722	}
3723
3724	#[simd_test(enable = "sse2")]
3725	unsafe fn test_mm_srl_epi16() {
3726	let a = _mm_setr_epi16(`0xCC`, `-0xCC`, `0xDD`, `-0xDD`, `0xEE`, `-0xEE`, `0xFF`, `-0xFF`);
3727	let r = _mm_srl_epi16(a, _mm_set_epi64x(`0`, `4`));
3728	assert_eq_m128i(
3729	r,
3730	_mm_setr_epi16(`0xC`, `0xFF3`, `0xD`, `0xFF2`, `0xE`, `0xFF1`, `0xF`, `0xFF0`),
3731	);
3732	let r = _mm_srl_epi16(a, _mm_set_epi64x(`4`, `0`));
3733	assert_eq_m128i(r, a);
3734	let r = _mm_srl_epi16(a, _mm_set_epi64x(`0`, `16`));
3735	assert_eq_m128i(r, _mm_set1_epi16(`0`));
3736	let r = _mm_srl_epi16(a, _mm_set_epi64x(`0`, i64::MAX));
3737	assert_eq_m128i(r, _mm_set1_epi16(`0`));
3738	}
3739
3740	#[simd_test(enable = "sse2")]
3741	unsafe fn test_mm_srli_epi32() {
3742	let a = _mm_setr_epi32(`0xEEEE`, `-0xEEEE`, `0xFFFF`, `-0xFFFF`);
3743	let r = _mm_srli_epi32::<`4`>(a);
3744	assert_eq_m128i(r, _mm_setr_epi32(`0xEEE`, `0xFFFF111`, `0xFFF`, `0xFFFF000`));
3745	let r = _mm_srli_epi32::<`32`>(a);
3746	assert_eq_m128i(r, _mm_set1_epi32(`0`));
3747	}
3748
3749	#[simd_test(enable = "sse2")]
3750	unsafe fn test_mm_srl_epi32() {
3751	let a = _mm_setr_epi32(`0xEEEE`, `-0xEEEE`, `0xFFFF`, `-0xFFFF`);
3752	let r = _mm_srl_epi32(a, _mm_set_epi64x(`0`, `4`));
3753	assert_eq_m128i(r, _mm_setr_epi32(`0xEEE`, `0xFFFF111`, `0xFFF`, `0xFFFF000`));
3754	let r = _mm_srl_epi32(a, _mm_set_epi64x(`4`, `0`));
3755	assert_eq_m128i(r, a);
3756	let r = _mm_srl_epi32(a, _mm_set_epi64x(`0`, `32`));
3757	assert_eq_m128i(r, _mm_set1_epi32(`0`));
3758	let r = _mm_srl_epi32(a, _mm_set_epi64x(`0`, i64::MAX));
3759	assert_eq_m128i(r, _mm_set1_epi32(`0`));
3760	}
3761
3762	#[simd_test(enable = "sse2")]
3763	unsafe fn test_mm_srli_epi64() {
3764	let a = _mm_set_epi64x(`0xFFFFFFFF`, `-0xFFFFFFFF`);
3765	let r = _mm_srli_epi64::<`4`>(a);
3766	assert_eq_m128i(r, _mm_set_epi64x(`0xFFFFFFF`, `0xFFFFFFFF0000000`));
3767	let r = _mm_srli_epi64::<`64`>(a);
3768	assert_eq_m128i(r, _mm_set1_epi64x(`0`));
3769	}
3770
3771	#[simd_test(enable = "sse2")]
3772	unsafe fn test_mm_srl_epi64() {
3773	let a = _mm_set_epi64x(`0xFFFFFFFF`, `-0xFFFFFFFF`);
3774	let r = _mm_srl_epi64(a, _mm_set_epi64x(`0`, `4`));
3775	assert_eq_m128i(r, _mm_set_epi64x(`0xFFFFFFF`, `0xFFFFFFFF0000000`));
3776	let r = _mm_srl_epi64(a, _mm_set_epi64x(`4`, `0`));
3777	assert_eq_m128i(r, a);
3778	let r = _mm_srl_epi64(a, _mm_set_epi64x(`0`, `64`));
3779	assert_eq_m128i(r, _mm_set1_epi64x(`0`));
3780	let r = _mm_srl_epi64(a, _mm_set_epi64x(`0`, i64::MAX));
3781	assert_eq_m128i(r, _mm_set1_epi64x(`0`));
3782	}
3783
3784	#[simd_test(enable = "sse2")]
3785	unsafe fn test_mm_and_si128() {
3786	let a = _mm_set1_epi8(`5`);
3787	let b = _mm_set1_epi8(`3`);
3788	let r = _mm_and_si128(a, b);
3789	assert_eq_m128i(r, _mm_set1_epi8(`1`));
3790	}
3791
3792	#[simd_test(enable = "sse2")]
3793	unsafe fn test_mm_andnot_si128() {
3794	let a = _mm_set1_epi8(`5`);
3795	let b = _mm_set1_epi8(`3`);
3796	let r = _mm_andnot_si128(a, b);
3797	assert_eq_m128i(r, _mm_set1_epi8(`2`));
3798	}
3799
3800	#[simd_test(enable = "sse2")]
3801	unsafe fn test_mm_or_si128() {
3802	let a = _mm_set1_epi8(`5`);
3803	let b = _mm_set1_epi8(`3`);
3804	let r = _mm_or_si128(a, b);
3805	assert_eq_m128i(r, _mm_set1_epi8(`7`));
3806	}
3807
3808	#[simd_test(enable = "sse2")]
3809	unsafe fn test_mm_xor_si128() {
3810	let a = _mm_set1_epi8(`5`);
3811	let b = _mm_set1_epi8(`3`);
3812	let r = _mm_xor_si128(a, b);
3813	assert_eq_m128i(r, _mm_set1_epi8(`6`));
3814	}
3815
3816	#[simd_test(enable = "sse2")]
3817	unsafe fn test_mm_cmpeq_epi8() {
3818	let a = _mm_setr_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
3819	let b = _mm_setr_epi8(`15`, `14`, `2`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
3820	let r = _mm_cmpeq_epi8(a, b);
3821	#[rustfmt::skip]
3822	assert_eq_m128i(
3823	r,
3824	_mm_setr_epi8(
3825	`0`, `0`, `0xFFu8` as i8, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`
3826	)
3827	);
3828	}
3829
3830	#[simd_test(enable = "sse2")]
3831	unsafe fn test_mm_cmpeq_epi16() {
3832	let a = _mm_setr_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
3833	let b = _mm_setr_epi16(`7`, `6`, `2`, `4`, `3`, `2`, `1`, `0`);
3834	let r = _mm_cmpeq_epi16(a, b);
3835	assert_eq_m128i(r, _mm_setr_epi16(`0`, `0`, !`0`, `0`, `0`, `0`, `0`, `0`));
3836	}
3837
3838	#[simd_test(enable = "sse2")]
3839	unsafe fn test_mm_cmpeq_epi32() {
3840	let a = _mm_setr_epi32(`0`, `1`, `2`, `3`);
3841	let b = _mm_setr_epi32(`3`, `2`, `2`, `0`);
3842	let r = _mm_cmpeq_epi32(a, b);
3843	assert_eq_m128i(r, _mm_setr_epi32(`0`, `0`, !`0`, `0`));
3844	}
3845
3846	#[simd_test(enable = "sse2")]
3847	unsafe fn test_mm_cmpgt_epi8() {
3848	let a = _mm_set_epi8(`5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
3849	let b = _mm_set1_epi8(`0`);
3850	let r = _mm_cmpgt_epi8(a, b);
3851	let e = _mm_set_epi8(!`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
3852	assert_eq_m128i(r, e);
3853	}
3854
3855	#[simd_test(enable = "sse2")]
3856	unsafe fn test_mm_cmpgt_epi16() {
3857	let a = _mm_set_epi16(`5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
3858	let b = _mm_set1_epi16(`0`);
3859	let r = _mm_cmpgt_epi16(a, b);
3860	let e = _mm_set_epi16(!`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
3861	assert_eq_m128i(r, e);
3862	}
3863
3864	#[simd_test(enable = "sse2")]
3865	unsafe fn test_mm_cmpgt_epi32() {
3866	let a = _mm_set_epi32(`5`, `0`, `0`, `0`);
3867	let b = _mm_set1_epi32(`0`);
3868	let r = _mm_cmpgt_epi32(a, b);
3869	assert_eq_m128i(r, _mm_set_epi32(!`0`, `0`, `0`, `0`));
3870	}
3871
3872	#[simd_test(enable = "sse2")]
3873	unsafe fn test_mm_cmplt_epi8() {
3874	let a = _mm_set1_epi8(`0`);
3875	let b = _mm_set_epi8(`5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
3876	let r = _mm_cmplt_epi8(a, b);
3877	let e = _mm_set_epi8(!`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
3878	assert_eq_m128i(r, e);
3879	}
3880
3881	#[simd_test(enable = "sse2")]
3882	unsafe fn test_mm_cmplt_epi16() {
3883	let a = _mm_set1_epi16(`0`);
3884	let b = _mm_set_epi16(`5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
3885	let r = _mm_cmplt_epi16(a, b);
3886	let e = _mm_set_epi16(!`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
3887	assert_eq_m128i(r, e);
3888	}
3889
3890	#[simd_test(enable = "sse2")]
3891	unsafe fn test_mm_cmplt_epi32() {
3892	let a = _mm_set1_epi32(`0`);
3893	let b = _mm_set_epi32(`5`, `0`, `0`, `0`);
3894	let r = _mm_cmplt_epi32(a, b);
3895	assert_eq_m128i(r, _mm_set_epi32(!`0`, `0`, `0`, `0`));
3896	}
3897
3898	#[simd_test(enable = "sse2")]
3899	unsafe fn test_mm_cvtepi32_pd() {
3900	let a = _mm_set_epi32(`35`, `25`, `15`, `5`);
3901	let r = _mm_cvtepi32_pd(a);
3902	assert_eq_m128d(r, _mm_setr_pd(`5.0`, `15.0`));
3903	}
3904
3905	#[simd_test(enable = "sse2")]
3906	unsafe fn test_mm_cvtsi32_sd() {
3907	let a = _mm_set1_pd(`3.5`);
3908	let r = _mm_cvtsi32_sd(a, `5`);
3909	assert_eq_m128d(r, _mm_setr_pd(`5.0`, `3.5`));
3910	}
3911
3912	#[simd_test(enable = "sse2")]
3913	unsafe fn test_mm_cvtepi32_ps() {
3914	let a = _mm_setr_epi32(`1`, `2`, `3`, `4`);
3915	let r = _mm_cvtepi32_ps(a);
3916	assert_eq_m128(r, _mm_setr_ps(`1.0`, `2.0`, `3.0`, `4.0`));
3917	}
3918
3919	#[simd_test(enable = "sse2")]
3920	unsafe fn test_mm_cvtps_epi32() {
3921	let a = _mm_setr_ps(`1.0`, `2.0`, `3.0`, `4.0`);
3922	let r = _mm_cvtps_epi32(a);
3923	assert_eq_m128i(r, _mm_setr_epi32(`1`, `2`, `3`, `4`));
3924	}
3925
3926	#[simd_test(enable = "sse2")]
3927	unsafe fn test_mm_cvtsi32_si128() {
3928	let r = _mm_cvtsi32_si128(`5`);
3929	assert_eq_m128i(r, _mm_setr_epi32(`5`, `0`, `0`, `0`));
3930	}
3931
3932	#[simd_test(enable = "sse2")]
3933	unsafe fn test_mm_cvtsi128_si32() {
3934	let r = _mm_cvtsi128_si32(_mm_setr_epi32(`5`, `0`, `0`, `0`));
3935	assert_eq!(r, `5`);
3936	}
3937
3938	#[simd_test(enable = "sse2")]
3939	unsafe fn test_mm_set_epi64x() {
3940	let r = _mm_set_epi64x(`0`, `1`);
3941	assert_eq_m128i(r, _mm_setr_epi64x(`1`, `0`));
3942	}
3943
3944	#[simd_test(enable = "sse2")]
3945	unsafe fn test_mm_set_epi32() {
3946	let r = _mm_set_epi32(`0`, `1`, `2`, `3`);
3947	assert_eq_m128i(r, _mm_setr_epi32(`3`, `2`, `1`, `0`));
3948	}
3949
3950	#[simd_test(enable = "sse2")]
3951	unsafe fn test_mm_set_epi16() {
3952	let r = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
3953	assert_eq_m128i(r, _mm_setr_epi16(`7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`));
3954	}
3955
3956	#[simd_test(enable = "sse2")]
3957	unsafe fn test_mm_set_epi8() {
3958	#[rustfmt::skip]
3959	let r = _mm_set_epi8(
3960	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
3961	);
3962	#[rustfmt::skip]
3963	let e = _mm_setr_epi8(
3964	`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`,
3965	`7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`,
3966	);
3967	assert_eq_m128i(r, e);
3968	}
3969
3970	#[simd_test(enable = "sse2")]
3971	unsafe fn test_mm_set1_epi64x() {
3972	let r = _mm_set1_epi64x(`1`);
3973	assert_eq_m128i(r, _mm_set1_epi64x(`1`));
3974	}
3975
3976	#[simd_test(enable = "sse2")]
3977	unsafe fn test_mm_set1_epi32() {
3978	let r = _mm_set1_epi32(`1`);
3979	assert_eq_m128i(r, _mm_set1_epi32(`1`));
3980	}
3981
3982	#[simd_test(enable = "sse2")]
3983	unsafe fn test_mm_set1_epi16() {
3984	let r = _mm_set1_epi16(`1`);
3985	assert_eq_m128i(r, _mm_set1_epi16(`1`));
3986	}
3987
3988	#[simd_test(enable = "sse2")]
3989	unsafe fn test_mm_set1_epi8() {
3990	let r = _mm_set1_epi8(`1`);
3991	assert_eq_m128i(r, _mm_set1_epi8(`1`));
3992	}
3993
3994	#[simd_test(enable = "sse2")]
3995	unsafe fn test_mm_setr_epi32() {
3996	let r = _mm_setr_epi32(`0`, `1`, `2`, `3`);
3997	assert_eq_m128i(r, _mm_setr_epi32(`0`, `1`, `2`, `3`));
3998	}
3999
4000	#[simd_test(enable = "sse2")]
4001	unsafe fn test_mm_setr_epi16() {
4002	let r = _mm_setr_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
4003	assert_eq_m128i(r, _mm_setr_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`));
4004	}
4005
4006	#[simd_test(enable = "sse2")]
4007	unsafe fn test_mm_setr_epi8() {
4008	#[rustfmt::skip]
4009	let r = _mm_setr_epi8(
4010	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
4011	);
4012	#[rustfmt::skip]
4013	let e = _mm_setr_epi8(
4014	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
4015	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
4016	);
4017	assert_eq_m128i(r, e);
4018	}
4019
4020	#[simd_test(enable = "sse2")]
4021	unsafe fn test_mm_setzero_si128() {
4022	let r = _mm_setzero_si128();
4023	assert_eq_m128i(r, _mm_set1_epi64x(`0`));
4024	}
4025
4026	#[simd_test(enable = "sse2")]
4027	unsafe fn test_mm_loadl_epi64() {
4028	let a = _mm_setr_epi64x(`6`, `5`);
4029	let r = _mm_loadl_epi64(ptr::addr_of!(a));
4030	assert_eq_m128i(r, _mm_setr_epi64x(`6`, `0`));
4031	}
4032
4033	#[simd_test(enable = "sse2")]
4034	unsafe fn test_mm_load_si128() {
4035	let a = _mm_set_epi64x(`5`, `6`);
4036	let r = _mm_load_si128(ptr::addr_of!(a) as *const _);
4037	assert_eq_m128i(a, r);
4038	}
4039
4040	#[simd_test(enable = "sse2")]
4041	unsafe fn test_mm_loadu_si128() {
4042	let a = _mm_set_epi64x(`5`, `6`);
4043	let r = _mm_loadu_si128(ptr::addr_of!(a) as *const _);
4044	assert_eq_m128i(a, r);
4045	}
4046
4047	#[simd_test(enable = "sse2")]
4048	// Miri cannot support this until it is clear how it fits in the Rust memory model
4049	// (non-temporal store)
4050	#[cfg_attr(miri, ignore)]
4051	unsafe fn test_mm_maskmoveu_si128() {
4052	let a = _mm_set1_epi8(`9`);
4053	#[rustfmt::skip]
4054	let mask = _mm_set_epi8(
4055	`0`, `0`, `0x80u8` as i8, `0`, `0`, `0`, `0`, `0`,
4056	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
4057	);
4058	let mut r = _mm_set1_epi8(`0`);
4059	_mm_maskmoveu_si128(a, mask, ptr::addr_of_mut!(r) as *mut i8);
4060	let e = _mm_set_epi8(`0`, `0`, `9`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
4061	assert_eq_m128i(r, e);
4062	}
4063
4064	#[simd_test(enable = "sse2")]
4065	unsafe fn test_mm_store_si128() {
4066	let a = _mm_set1_epi8(`9`);
4067	let mut r = _mm_set1_epi8(`0`);
4068	_mm_store_si128(&mut r, a);
4069	assert_eq_m128i(r, a);
4070	}
4071
4072	#[simd_test(enable = "sse2")]
4073	unsafe fn test_mm_storeu_si128() {
4074	let a = _mm_set1_epi8(`9`);
4075	let mut r = _mm_set1_epi8(`0`);
4076	_mm_storeu_si128(&mut r, a);
4077	assert_eq_m128i(r, a);
4078	}
4079
4080	#[simd_test(enable = "sse2")]
4081	unsafe fn test_mm_storel_epi64() {
4082	let a = _mm_setr_epi64x(`2`, `9`);
4083	let mut r = _mm_set1_epi8(`0`);
4084	_mm_storel_epi64(&mut r, a);
4085	assert_eq_m128i(r, _mm_setr_epi64x(`2`, `0`));
4086	}
4087
4088	#[simd_test(enable = "sse2")]
4089	// Miri cannot support this until it is clear how it fits in the Rust memory model
4090	// (non-temporal store)
4091	#[cfg_attr(miri, ignore)]
4092	unsafe fn test_mm_stream_si128() {
4093	let a = _mm_setr_epi32(`1`, `2`, `3`, `4`);
4094	let mut r = _mm_undefined_si128();
4095	_mm_stream_si128(ptr::addr_of_mut!(r), a);
4096	assert_eq_m128i(r, a);
4097	}
4098
4099	#[simd_test(enable = "sse2")]
4100	// Miri cannot support this until it is clear how it fits in the Rust memory model
4101	// (non-temporal store)
4102	#[cfg_attr(miri, ignore)]
4103	unsafe fn test_mm_stream_si32() {
4104	let a: i32 = `7`;
4105	let mut mem = boxed::Box::<i32>::new(`-1`);
4106	_mm_stream_si32(ptr::addr_of_mut!(*mem), a);
4107	assert_eq!(a, *mem);
4108	}
4109
4110	#[simd_test(enable = "sse2")]
4111	unsafe fn test_mm_move_epi64() {
4112	let a = _mm_setr_epi64x(`5`, `6`);
4113	let r = _mm_move_epi64(a);
4114	assert_eq_m128i(r, _mm_setr_epi64x(`5`, `0`));
4115	}
4116
4117	#[simd_test(enable = "sse2")]
4118	unsafe fn test_mm_packs_epi16() {
4119	let a = _mm_setr_epi16(`0x80`, `-0x81`, `0`, `0`, `0`, `0`, `0`, `0`);
4120	let b = _mm_setr_epi16(`0`, `0`, `0`, `0`, `0`, `0`, `-0x81`, `0x80`);
4121	let r = _mm_packs_epi16(a, b);
4122	#[rustfmt::skip]
4123	assert_eq_m128i(
4124	r,
4125	_mm_setr_epi8(
4126	`0x7F`, `-0x80`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `-0x80`, `0x7F`
4127	)
4128	);
4129	}
4130
4131	#[simd_test(enable = "sse2")]
4132	unsafe fn test_mm_packs_epi32() {
4133	let a = _mm_setr_epi32(`0x8000`, `-0x8001`, `0`, `0`);
4134	let b = _mm_setr_epi32(`0`, `0`, `-0x8001`, `0x8000`);
4135	let r = _mm_packs_epi32(a, b);
4136	assert_eq_m128i(
4137	r,
4138	_mm_setr_epi16(`0x7FFF`, `-0x8000`, `0`, `0`, `0`, `0`, `-0x8000`, `0x7FFF`),
4139	);
4140	}
4141
4142	#[simd_test(enable = "sse2")]
4143	unsafe fn test_mm_packus_epi16() {
4144	let a = _mm_setr_epi16(`0x100`, `-1`, `0`, `0`, `0`, `0`, `0`, `0`);
4145	let b = _mm_setr_epi16(`0`, `0`, `0`, `0`, `0`, `0`, `-1`, `0x100`);
4146	let r = _mm_packus_epi16(a, b);
4147	assert_eq_m128i(
4148	r,
4149	_mm_setr_epi8(!`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, !`0`),
4150	);
4151	}
4152
4153	#[simd_test(enable = "sse2")]
4154	unsafe fn test_mm_extract_epi16() {
4155	let a = _mm_setr_epi16(`-1`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
4156	let r1 = _mm_extract_epi16::<`0`>(a);
4157	let r2 = _mm_extract_epi16::<`3`>(a);
4158	assert_eq!(r1, `0xFFFF`);
4159	assert_eq!(r2, `3`);
4160	}
4161
4162	#[simd_test(enable = "sse2")]
4163	unsafe fn test_mm_insert_epi16() {
4164	let a = _mm_setr_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
4165	let r = _mm_insert_epi16::<`0`>(a, `9`);
4166	let e = _mm_setr_epi16(`9`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
4167	assert_eq_m128i(r, e);
4168	}
4169
4170	#[simd_test(enable = "sse2")]
4171	unsafe fn test_mm_movemask_epi8() {
4172	#[rustfmt::skip]
4173	let a = _mm_setr_epi8(
4174	`0b1000_0000u8` as i8, `0b0`, `0b1000_0000u8` as i8, `0b01`,
4175	`0b0101`, `0b1111_0000u8` as i8, `0`, `0`,
4176	`0`, `0b1011_0101u8` as i8, `0b1111_0000u8` as i8, `0b0101`,
4177	`0b01`, `0b1000_0000u8` as i8, `0b0`, `0b1000_0000u8` as i8,
4178	);
4179	let r = _mm_movemask_epi8(a);
4180	assert_eq!(r, `0b10100110_00100101`);
4181	}
4182
4183	#[simd_test(enable = "sse2")]
4184	unsafe fn test_mm_shuffle_epi32() {
4185	let a = _mm_setr_epi32(`5`, `10`, `15`, `20`);
4186	let r = _mm_shuffle_epi32::<`0b00_01_01_11`>(a);
4187	let e = _mm_setr_epi32(`20`, `10`, `10`, `5`);
4188	assert_eq_m128i(r, e);
4189	}
4190
4191	#[simd_test(enable = "sse2")]
4192	unsafe fn test_mm_shufflehi_epi16() {
4193	let a = _mm_setr_epi16(`1`, `2`, `3`, `4`, `5`, `10`, `15`, `20`);
4194	let r = _mm_shufflehi_epi16::<`0b00_01_01_11`>(a);
4195	let e = _mm_setr_epi16(`1`, `2`, `3`, `4`, `20`, `10`, `10`, `5`);
4196	assert_eq_m128i(r, e);
4197	}
4198
4199	#[simd_test(enable = "sse2")]
4200	unsafe fn test_mm_shufflelo_epi16() {
4201	let a = _mm_setr_epi16(`5`, `10`, `15`, `20`, `1`, `2`, `3`, `4`);
4202	let r = _mm_shufflelo_epi16::<`0b00_01_01_11`>(a);
4203	let e = _mm_setr_epi16(`20`, `10`, `10`, `5`, `1`, `2`, `3`, `4`);
4204	assert_eq_m128i(r, e);
4205	}
4206
4207	#[simd_test(enable = "sse2")]
4208	unsafe fn test_mm_unpackhi_epi8() {
4209	#[rustfmt::skip]
4210	let a = _mm_setr_epi8(
4211	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
4212	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
4213	);
4214	#[rustfmt::skip]
4215	let b = _mm_setr_epi8(
4216	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
4217	);
4218	let r = _mm_unpackhi_epi8(a, b);
4219	#[rustfmt::skip]
4220	let e = _mm_setr_epi8(
4221	`8`, `24`, `9`, `25`, `10`, `26`, `11`, `27`, `12`, `28`, `13`, `29`, `14`, `30`, `15`, `31`,
4222	);
4223	assert_eq_m128i(r, e);
4224	}
4225
4226	#[simd_test(enable = "sse2")]
4227	unsafe fn test_mm_unpackhi_epi16() {
4228	let a = _mm_setr_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
4229	let b = _mm_setr_epi16(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
4230	let r = _mm_unpackhi_epi16(a, b);
4231	let e = _mm_setr_epi16(`4`, `12`, `5`, `13`, `6`, `14`, `7`, `15`);
4232	assert_eq_m128i(r, e);
4233	}
4234
4235	#[simd_test(enable = "sse2")]
4236	unsafe fn test_mm_unpackhi_epi32() {
4237	let a = _mm_setr_epi32(`0`, `1`, `2`, `3`);
4238	let b = _mm_setr_epi32(`4`, `5`, `6`, `7`);
4239	let r = _mm_unpackhi_epi32(a, b);
4240	let e = _mm_setr_epi32(`2`, `6`, `3`, `7`);
4241	assert_eq_m128i(r, e);
4242	}
4243
4244	#[simd_test(enable = "sse2")]
4245	unsafe fn test_mm_unpackhi_epi64() {
4246	let a = _mm_setr_epi64x(`0`, `1`);
4247	let b = _mm_setr_epi64x(`2`, `3`);
4248	let r = _mm_unpackhi_epi64(a, b);
4249	let e = _mm_setr_epi64x(`1`, `3`);
4250	assert_eq_m128i(r, e);
4251	}
4252
4253	#[simd_test(enable = "sse2")]
4254	unsafe fn test_mm_unpacklo_epi8() {
4255	#[rustfmt::skip]
4256	let a = _mm_setr_epi8(
4257	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
4258	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
4259	);
4260	#[rustfmt::skip]
4261	let b = _mm_setr_epi8(
4262	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
4263	);
4264	let r = _mm_unpacklo_epi8(a, b);
4265	#[rustfmt::skip]
4266	let e = _mm_setr_epi8(
4267	`0`, `16`, `1`, `17`, `2`, `18`, `3`, `19`,
4268	`4`, `20`, `5`, `21`, `6`, `22`, `7`, `23`,
4269	);
4270	assert_eq_m128i(r, e);
4271	}
4272
4273	#[simd_test(enable = "sse2")]
4274	unsafe fn test_mm_unpacklo_epi16() {
4275	let a = _mm_setr_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
4276	let b = _mm_setr_epi16(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
4277	let r = _mm_unpacklo_epi16(a, b);
4278	let e = _mm_setr_epi16(`0`, `8`, `1`, `9`, `2`, `10`, `3`, `11`);
4279	assert_eq_m128i(r, e);
4280	}
4281
4282	#[simd_test(enable = "sse2")]
4283	unsafe fn test_mm_unpacklo_epi32() {
4284	let a = _mm_setr_epi32(`0`, `1`, `2`, `3`);
4285	let b = _mm_setr_epi32(`4`, `5`, `6`, `7`);
4286	let r = _mm_unpacklo_epi32(a, b);
4287	let e = _mm_setr_epi32(`0`, `4`, `1`, `5`);
4288	assert_eq_m128i(r, e);
4289	}
4290
4291	#[simd_test(enable = "sse2")]
4292	unsafe fn test_mm_unpacklo_epi64() {
4293	let a = _mm_setr_epi64x(`0`, `1`);
4294	let b = _mm_setr_epi64x(`2`, `3`);
4295	let r = _mm_unpacklo_epi64(a, b);
4296	let e = _mm_setr_epi64x(`0`, `2`);
4297	assert_eq_m128i(r, e);
4298	}
4299
4300	#[simd_test(enable = "sse2")]
4301	unsafe fn test_mm_add_sd() {
4302	let a = _mm_setr_pd(`1.0`, `2.0`);
4303	let b = _mm_setr_pd(`5.0`, `10.0`);
4304	let r = _mm_add_sd(a, b);
4305	assert_eq_m128d(r, _mm_setr_pd(`6.0`, `2.0`));
4306	}
4307
4308	#[simd_test(enable = "sse2")]
4309	unsafe fn test_mm_add_pd() {
4310	let a = _mm_setr_pd(`1.0`, `2.0`);
4311	let b = _mm_setr_pd(`5.0`, `10.0`);
4312	let r = _mm_add_pd(a, b);
4313	assert_eq_m128d(r, _mm_setr_pd(`6.0`, `12.0`));
4314	}
4315
4316	#[simd_test(enable = "sse2")]
4317	unsafe fn test_mm_div_sd() {
4318	let a = _mm_setr_pd(`1.0`, `2.0`);
4319	let b = _mm_setr_pd(`5.0`, `10.0`);
4320	let r = _mm_div_sd(a, b);
4321	assert_eq_m128d(r, _mm_setr_pd(`0.2`, `2.0`));
4322	}
4323
4324	#[simd_test(enable = "sse2")]
4325	unsafe fn test_mm_div_pd() {
4326	let a = _mm_setr_pd(`1.0`, `2.0`);
4327	let b = _mm_setr_pd(`5.0`, `10.0`);
4328	let r = _mm_div_pd(a, b);
4329	assert_eq_m128d(r, _mm_setr_pd(`0.2`, `0.2`));
4330	}
4331
4332	#[simd_test(enable = "sse2")]
4333	unsafe fn test_mm_max_sd() {
4334	let a = _mm_setr_pd(`1.0`, `2.0`);
4335	let b = _mm_setr_pd(`5.0`, `10.0`);
4336	let r = _mm_max_sd(a, b);
4337	assert_eq_m128d(r, _mm_setr_pd(`5.0`, `2.0`));
4338	}
4339
4340	#[simd_test(enable = "sse2")]
4341	unsafe fn test_mm_max_pd() {
4342	let a = _mm_setr_pd(`1.0`, `2.0`);
4343	let b = _mm_setr_pd(`5.0`, `10.0`);
4344	let r = _mm_max_pd(a, b);
4345	assert_eq_m128d(r, _mm_setr_pd(`5.0`, `10.0`));
4346
4347	// Check SSE(2)-specific semantics for -0.0 handling.
4348	let a = _mm_setr_pd(`-0.0`, `0.0`);
4349	let b = _mm_setr_pd(`0.0`, `0.0`);
4350	let r1: [u8; `16`] = transmute(_mm_max_pd(a, b));
4351	let r2: [u8; `16`] = transmute(_mm_max_pd(b, a));
4352	let a: [u8; `16`] = transmute(a);
4353	let b: [u8; `16`] = transmute(b);
4354	assert_eq!(r1, b);
4355	assert_eq!(r2, a);
4356	assert_ne!(a, b); // sanity check that -0.0 is actually present
4357	}
4358
4359	#[simd_test(enable = "sse2")]
4360	unsafe fn test_mm_min_sd() {
4361	let a = _mm_setr_pd(`1.0`, `2.0`);
4362	let b = _mm_setr_pd(`5.0`, `10.0`);
4363	let r = _mm_min_sd(a, b);
4364	assert_eq_m128d(r, _mm_setr_pd(`1.0`, `2.0`));
4365	}
4366
4367	#[simd_test(enable = "sse2")]
4368	unsafe fn test_mm_min_pd() {
4369	let a = _mm_setr_pd(`1.0`, `2.0`);
4370	let b = _mm_setr_pd(`5.0`, `10.0`);
4371	let r = _mm_min_pd(a, b);
4372	assert_eq_m128d(r, _mm_setr_pd(`1.0`, `2.0`));
4373
4374	// Check SSE(2)-specific semantics for -0.0 handling.
4375	let a = _mm_setr_pd(`-0.0`, `0.0`);
4376	let b = _mm_setr_pd(`0.0`, `0.0`);
4377	let r1: [u8; `16`] = transmute(_mm_min_pd(a, b));
4378	let r2: [u8; `16`] = transmute(_mm_min_pd(b, a));
4379	let a: [u8; `16`] = transmute(a);
4380	let b: [u8; `16`] = transmute(b);
4381	assert_eq!(r1, b);
4382	assert_eq!(r2, a);
4383	assert_ne!(a, b); // sanity check that -0.0 is actually present
4384	}
4385
4386	#[simd_test(enable = "sse2")]
4387	unsafe fn test_mm_mul_sd() {
4388	let a = _mm_setr_pd(`1.0`, `2.0`);
4389	let b = _mm_setr_pd(`5.0`, `10.0`);
4390	let r = _mm_mul_sd(a, b);
4391	assert_eq_m128d(r, _mm_setr_pd(`5.0`, `2.0`));
4392	}
4393
4394	#[simd_test(enable = "sse2")]
4395	unsafe fn test_mm_mul_pd() {
4396	let a = _mm_setr_pd(`1.0`, `2.0`);
4397	let b = _mm_setr_pd(`5.0`, `10.0`);
4398	let r = _mm_mul_pd(a, b);
4399	assert_eq_m128d(r, _mm_setr_pd(`5.0`, `20.0`));
4400	}
4401
4402	#[simd_test(enable = "sse2")]
4403	unsafe fn test_mm_sqrt_sd() {
4404	let a = _mm_setr_pd(`1.0`, `2.0`);
4405	let b = _mm_setr_pd(`5.0`, `10.0`);
4406	let r = _mm_sqrt_sd(a, b);
4407	assert_eq_m128d(r, _mm_setr_pd(`5.0f64`.sqrt(), `2.0`));
4408	}
4409
4410	#[simd_test(enable = "sse2")]
4411	unsafe fn test_mm_sqrt_pd() {
4412	let r = _mm_sqrt_pd(_mm_setr_pd(`1.0`, `2.0`));
4413	assert_eq_m128d(r, _mm_setr_pd(`1.0f64`.sqrt(), `2.0f64`.sqrt()));
4414	}
4415
4416	#[simd_test(enable = "sse2")]
4417	unsafe fn test_mm_sub_sd() {
4418	let a = _mm_setr_pd(`1.0`, `2.0`);
4419	let b = _mm_setr_pd(`5.0`, `10.0`);
4420	let r = _mm_sub_sd(a, b);
4421	assert_eq_m128d(r, _mm_setr_pd(`-4.0`, `2.0`));
4422	}
4423
4424	#[simd_test(enable = "sse2")]
4425	unsafe fn test_mm_sub_pd() {
4426	let a = _mm_setr_pd(`1.0`, `2.0`);
4427	let b = _mm_setr_pd(`5.0`, `10.0`);
4428	let r = _mm_sub_pd(a, b);
4429	assert_eq_m128d(r, _mm_setr_pd(`-4.0`, `-8.0`));
4430	}
4431
4432	#[simd_test(enable = "sse2")]
4433	unsafe fn test_mm_and_pd() {
4434	let a = transmute(u64x2::splat(`5`));
4435	let b = transmute(u64x2::splat(`3`));
4436	let r = _mm_and_pd(a, b);
4437	let e = transmute(u64x2::splat(`1`));
4438	assert_eq_m128d(r, e);
4439	}
4440
4441	#[simd_test(enable = "sse2")]
4442	unsafe fn test_mm_andnot_pd() {
4443	let a = transmute(u64x2::splat(`5`));
4444	let b = transmute(u64x2::splat(`3`));
4445	let r = _mm_andnot_pd(a, b);
4446	let e = transmute(u64x2::splat(`2`));
4447	assert_eq_m128d(r, e);
4448	}
4449
4450	#[simd_test(enable = "sse2")]
4451	unsafe fn test_mm_or_pd() {
4452	let a = transmute(u64x2::splat(`5`));
4453	let b = transmute(u64x2::splat(`3`));
4454	let r = _mm_or_pd(a, b);
4455	let e = transmute(u64x2::splat(`7`));
4456	assert_eq_m128d(r, e);
4457	}
4458
4459	#[simd_test(enable = "sse2")]
4460	unsafe fn test_mm_xor_pd() {
4461	let a = transmute(u64x2::splat(`5`));
4462	let b = transmute(u64x2::splat(`3`));
4463	let r = _mm_xor_pd(a, b);
4464	let e = transmute(u64x2::splat(`6`));
4465	assert_eq_m128d(r, e);
4466	}
4467
4468	#[simd_test(enable = "sse2")]
4469	unsafe fn test_mm_cmpeq_sd() {
4470	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4471	let e = _mm_setr_epi64x(!`0`, `2.0f64`.to_bits() as i64);
4472	let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b));
4473	assert_eq_m128i(r, e);
4474	}
4475
4476	#[simd_test(enable = "sse2")]
4477	unsafe fn test_mm_cmplt_sd() {
4478	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`5.0`, `3.0`));
4479	let e = _mm_setr_epi64x(!`0`, `2.0f64`.to_bits() as i64);
4480	let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b));
4481	assert_eq_m128i(r, e);
4482	}
4483
4484	#[simd_test(enable = "sse2")]
4485	unsafe fn test_mm_cmple_sd() {
4486	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4487	let e = _mm_setr_epi64x(!`0`, `2.0f64`.to_bits() as i64);
4488	let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b));
4489	assert_eq_m128i(r, e);
4490	}
4491
4492	#[simd_test(enable = "sse2")]
4493	unsafe fn test_mm_cmpgt_sd() {
4494	let (a, b) = (_mm_setr_pd(`5.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4495	let e = _mm_setr_epi64x(!`0`, `2.0f64`.to_bits() as i64);
4496	let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b));
4497	assert_eq_m128i(r, e);
4498	}
4499
4500	#[simd_test(enable = "sse2")]
4501	unsafe fn test_mm_cmpge_sd() {
4502	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4503	let e = _mm_setr_epi64x(!`0`, `2.0f64`.to_bits() as i64);
4504	let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b));
4505	assert_eq_m128i(r, e);
4506	}
4507
4508	#[simd_test(enable = "sse2")]
4509	unsafe fn test_mm_cmpord_sd() {
4510	let (a, b) = (_mm_setr_pd(NAN, `2.0`), _mm_setr_pd(`5.0`, `3.0`));
4511	let e = _mm_setr_epi64x(`0`, `2.0f64`.to_bits() as i64);
4512	let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b));
4513	assert_eq_m128i(r, e);
4514	}
4515
4516	#[simd_test(enable = "sse2")]
4517	unsafe fn test_mm_cmpunord_sd() {
4518	let (a, b) = (_mm_setr_pd(NAN, `2.0`), _mm_setr_pd(`5.0`, `3.0`));
4519	let e = _mm_setr_epi64x(!`0`, `2.0f64`.to_bits() as i64);
4520	let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b));
4521	assert_eq_m128i(r, e);
4522	}
4523
4524	#[simd_test(enable = "sse2")]
4525	unsafe fn test_mm_cmpneq_sd() {
4526	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`5.0`, `3.0`));
4527	let e = _mm_setr_epi64x(!`0`, `2.0f64`.to_bits() as i64);
4528	let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b));
4529	assert_eq_m128i(r, e);
4530	}
4531
4532	#[simd_test(enable = "sse2")]
4533	unsafe fn test_mm_cmpnlt_sd() {
4534	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`5.0`, `3.0`));
4535	let e = _mm_setr_epi64x(`0`, `2.0f64`.to_bits() as i64);
4536	let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b));
4537	assert_eq_m128i(r, e);
4538	}
4539
4540	#[simd_test(enable = "sse2")]
4541	unsafe fn test_mm_cmpnle_sd() {
4542	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4543	let e = _mm_setr_epi64x(`0`, `2.0f64`.to_bits() as i64);
4544	let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b));
4545	assert_eq_m128i(r, e);
4546	}
4547
4548	#[simd_test(enable = "sse2")]
4549	unsafe fn test_mm_cmpngt_sd() {
4550	let (a, b) = (_mm_setr_pd(`5.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4551	let e = _mm_setr_epi64x(`0`, `2.0f64`.to_bits() as i64);
4552	let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b));
4553	assert_eq_m128i(r, e);
4554	}
4555
4556	#[simd_test(enable = "sse2")]
4557	unsafe fn test_mm_cmpnge_sd() {
4558	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4559	let e = _mm_setr_epi64x(`0`, `2.0f64`.to_bits() as i64);
4560	let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b));
4561	assert_eq_m128i(r, e);
4562	}
4563
4564	#[simd_test(enable = "sse2")]
4565	unsafe fn test_mm_cmpeq_pd() {
4566	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4567	let e = _mm_setr_epi64x(!`0`, `0`);
4568	let r = transmute::<_, __m128i>(_mm_cmpeq_pd(a, b));
4569	assert_eq_m128i(r, e);
4570	}
4571
4572	#[simd_test(enable = "sse2")]
4573	unsafe fn test_mm_cmplt_pd() {
4574	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4575	let e = _mm_setr_epi64x(`0`, !`0`);
4576	let r = transmute::<_, __m128i>(_mm_cmplt_pd(a, b));
4577	assert_eq_m128i(r, e);
4578	}
4579
4580	#[simd_test(enable = "sse2")]
4581	unsafe fn test_mm_cmple_pd() {
4582	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4583	let e = _mm_setr_epi64x(!`0`, !`0`);
4584	let r = transmute::<_, __m128i>(_mm_cmple_pd(a, b));
4585	assert_eq_m128i(r, e);
4586	}
4587
4588	#[simd_test(enable = "sse2")]
4589	unsafe fn test_mm_cmpgt_pd() {
4590	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4591	let e = _mm_setr_epi64x(`0`, `0`);
4592	let r = transmute::<_, __m128i>(_mm_cmpgt_pd(a, b));
4593	assert_eq_m128i(r, e);
4594	}
4595
4596	#[simd_test(enable = "sse2")]
4597	unsafe fn test_mm_cmpge_pd() {
4598	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4599	let e = _mm_setr_epi64x(!`0`, `0`);
4600	let r = transmute::<_, __m128i>(_mm_cmpge_pd(a, b));
4601	assert_eq_m128i(r, e);
4602	}
4603
4604	#[simd_test(enable = "sse2")]
4605	unsafe fn test_mm_cmpord_pd() {
4606	let (a, b) = (_mm_setr_pd(NAN, `2.0`), _mm_setr_pd(`5.0`, `3.0`));
4607	let e = _mm_setr_epi64x(`0`, !`0`);
4608	let r = transmute::<_, __m128i>(_mm_cmpord_pd(a, b));
4609	assert_eq_m128i(r, e);
4610	}
4611
4612	#[simd_test(enable = "sse2")]
4613	unsafe fn test_mm_cmpunord_pd() {
4614	let (a, b) = (_mm_setr_pd(NAN, `2.0`), _mm_setr_pd(`5.0`, `3.0`));
4615	let e = _mm_setr_epi64x(!`0`, `0`);
4616	let r = transmute::<_, __m128i>(_mm_cmpunord_pd(a, b));
4617	assert_eq_m128i(r, e);
4618	}
4619
4620	#[simd_test(enable = "sse2")]
4621	unsafe fn test_mm_cmpneq_pd() {
4622	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`5.0`, `3.0`));
4623	let e = _mm_setr_epi64x(!`0`, !`0`);
4624	let r = transmute::<_, __m128i>(_mm_cmpneq_pd(a, b));
4625	assert_eq_m128i(r, e);
4626	}
4627
4628	#[simd_test(enable = "sse2")]
4629	unsafe fn test_mm_cmpnlt_pd() {
4630	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`5.0`, `3.0`));
4631	let e = _mm_setr_epi64x(`0`, `0`);
4632	let r = transmute::<_, __m128i>(_mm_cmpnlt_pd(a, b));
4633	assert_eq_m128i(r, e);
4634	}
4635
4636	#[simd_test(enable = "sse2")]
4637	unsafe fn test_mm_cmpnle_pd() {
4638	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4639	let e = _mm_setr_epi64x(`0`, `0`);
4640	let r = transmute::<_, __m128i>(_mm_cmpnle_pd(a, b));
4641	assert_eq_m128i(r, e);
4642	}
4643
4644	#[simd_test(enable = "sse2")]
4645	unsafe fn test_mm_cmpngt_pd() {
4646	let (a, b) = (_mm_setr_pd(`5.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4647	let e = _mm_setr_epi64x(`0`, !`0`);
4648	let r = transmute::<_, __m128i>(_mm_cmpngt_pd(a, b));
4649	assert_eq_m128i(r, e);
4650	}
4651
4652	#[simd_test(enable = "sse2")]
4653	unsafe fn test_mm_cmpnge_pd() {
4654	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4655	let e = _mm_setr_epi64x(`0`, !`0`);
4656	let r = transmute::<_, __m128i>(_mm_cmpnge_pd(a, b));
4657	assert_eq_m128i(r, e);
4658	}
4659
4660	#[simd_test(enable = "sse2")]
4661	unsafe fn test_mm_comieq_sd() {
4662	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4663	assert!(_mm_comieq_sd(a, b) != `0`);
4664
4665	let (a, b) = (_mm_setr_pd(NAN, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4666	assert!(_mm_comieq_sd(a, b) == `0`);
4667	}
4668
4669	#[simd_test(enable = "sse2")]
4670	unsafe fn test_mm_comilt_sd() {
4671	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4672	assert!(_mm_comilt_sd(a, b) == `0`);
4673	}
4674
4675	#[simd_test(enable = "sse2")]
4676	unsafe fn test_mm_comile_sd() {
4677	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4678	assert!(_mm_comile_sd(a, b) != `0`);
4679	}
4680
4681	#[simd_test(enable = "sse2")]
4682	unsafe fn test_mm_comigt_sd() {
4683	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4684	assert!(_mm_comigt_sd(a, b) == `0`);
4685	}
4686
4687	#[simd_test(enable = "sse2")]
4688	unsafe fn test_mm_comige_sd() {
4689	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4690	assert!(_mm_comige_sd(a, b) != `0`);
4691	}
4692
4693	#[simd_test(enable = "sse2")]
4694	unsafe fn test_mm_comineq_sd() {
4695	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4696	assert!(_mm_comineq_sd(a, b) == `0`);
4697	}
4698
4699	#[simd_test(enable = "sse2")]
4700	unsafe fn test_mm_ucomieq_sd() {
4701	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4702	assert!(_mm_ucomieq_sd(a, b) != `0`);
4703
4704	let (a, b) = (_mm_setr_pd(NAN, `2.0`), _mm_setr_pd(NAN, `3.0`));
4705	assert!(_mm_ucomieq_sd(a, b) == `0`);
4706	}
4707
4708	#[simd_test(enable = "sse2")]
4709	unsafe fn test_mm_ucomilt_sd() {
4710	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4711	assert!(_mm_ucomilt_sd(a, b) == `0`);
4712	}
4713
4714	#[simd_test(enable = "sse2")]
4715	unsafe fn test_mm_ucomile_sd() {
4716	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4717	assert!(_mm_ucomile_sd(a, b) != `0`);
4718	}
4719
4720	#[simd_test(enable = "sse2")]
4721	unsafe fn test_mm_ucomigt_sd() {
4722	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4723	assert!(_mm_ucomigt_sd(a, b) == `0`);
4724	}
4725
4726	#[simd_test(enable = "sse2")]
4727	unsafe fn test_mm_ucomige_sd() {
4728	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4729	assert!(_mm_ucomige_sd(a, b) != `0`);
4730	}
4731
4732	#[simd_test(enable = "sse2")]
4733	unsafe fn test_mm_ucomineq_sd() {
4734	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4735	assert!(_mm_ucomineq_sd(a, b) == `0`);
4736	}
4737
4738	#[simd_test(enable = "sse2")]
4739	unsafe fn test_mm_movemask_pd() {
4740	let r = _mm_movemask_pd(_mm_setr_pd(`-1.0`, `5.0`));
4741	assert_eq!(r, `0b01`);
4742
4743	let r = _mm_movemask_pd(_mm_setr_pd(`-1.0`, `-5.0`));
4744	assert_eq!(r, `0b11`);
4745	}
4746
4747	#[repr(align(`16`))]
4748	struct Memory {
4749	data: [f64; `4`],
4750	}
4751
4752	#[simd_test(enable = "sse2")]
4753	unsafe fn test_mm_load_pd() {
4754	let mem = Memory {
4755	data: [`1.0f64`, `2.0`, `3.0`, `4.0`],
4756	};
4757	let vals = &mem.data;
4758	let d = vals.as_ptr();
4759
4760	let r = _mm_load_pd(d);
4761	assert_eq_m128d(r, _mm_setr_pd(`1.0`, `2.0`));
4762	}
4763
4764	#[simd_test(enable = "sse2")]
4765	unsafe fn test_mm_load_sd() {
4766	let a = `1.`;
4767	let expected = _mm_setr_pd(a, `0.`);
4768	let r = _mm_load_sd(&a);
4769	assert_eq_m128d(r, expected);
4770	}
4771
4772	#[simd_test(enable = "sse2")]
4773	unsafe fn test_mm_loadh_pd() {
4774	let a = _mm_setr_pd(`1.`, `2.`);
4775	let b = `3.`;
4776	let expected = _mm_setr_pd(_mm_cvtsd_f64(a), `3.`);
4777	let r = _mm_loadh_pd(a, &b);
4778	assert_eq_m128d(r, expected);
4779	}
4780
4781	#[simd_test(enable = "sse2")]
4782	unsafe fn test_mm_loadl_pd() {
4783	let a = _mm_setr_pd(`1.`, `2.`);
4784	let b = `3.`;
4785	let expected = _mm_setr_pd(`3.`, get_m128d(a, `1`));
4786	let r = _mm_loadl_pd(a, &b);
4787	assert_eq_m128d(r, expected);
4788	}
4789
4790	#[simd_test(enable = "sse2")]
4791	// Miri cannot support this until it is clear how it fits in the Rust memory model
4792	// (non-temporal store)
4793	#[cfg_attr(miri, ignore)]
4794	unsafe fn test_mm_stream_pd() {
4795	#[repr(align(`128`))]
4796	struct Memory {
4797	pub data: [f64; `2`],
4798	}
4799	let a = _mm_set1_pd(`7.0`);
4800	let mut mem = Memory { data: [`-1.0`; `2`] };
4801
4802	_mm_stream_pd(ptr::addr_of_mut!(mem.data[`0`]), a);
4803	for i in `0`..`2` {
4804	assert_eq!(mem.data[i], get_m128d(a, i));
4805	}
4806	}
4807
4808	#[simd_test(enable = "sse2")]
4809	unsafe fn test_mm_store_sd() {
4810	let mut dest = `0.`;
4811	let a = _mm_setr_pd(`1.`, `2.`);
4812	_mm_store_sd(&mut dest, a);
4813	assert_eq!(dest, _mm_cvtsd_f64(a));
4814	}
4815
4816	#[simd_test(enable = "sse2")]
4817	unsafe fn test_mm_store_pd() {
4818	let mut mem = Memory { data: [`0.0f64`; `4`] };
4819	let vals = &mut mem.data;
4820	let a = _mm_setr_pd(`1.0`, `2.0`);
4821	let d = vals.as_mut_ptr();
4822
4823	_mm_store_pd(d, *black_box(&a));
4824	assert_eq!(vals[`0`], `1.0`);
4825	assert_eq!(vals[`1`], `2.0`);
4826	}
4827
4828	#[simd_test(enable = "sse2")]
4829	unsafe fn test_mm_storeu_pd() {
4830	let mut mem = Memory { data: [`0.0f64`; `4`] };
4831	let vals = &mut mem.data;
4832	let a = _mm_setr_pd(`1.0`, `2.0`);
4833
4834	let mut ofs = `0`;
4835	let mut p = vals.as_mut_ptr();
4836
4837	// Make sure p is not* aligned to 16-byte boundary*
4838	if (p as usize) & `0xf` == `0` {
4839	ofs = `1`;
4840	p = p.add(`1`);
4841	}
4842
4843	_mm_storeu_pd(p, *black_box(&a));
4844
4845	if ofs > `0` {
4846	assert_eq!(vals[ofs - `1`], `0.0`);
4847	}
4848	assert_eq!(vals[ofs + `0`], `1.0`);
4849	assert_eq!(vals[ofs + `1`], `2.0`);
4850	}
4851
4852	#[simd_test(enable = "sse2")]
4853	unsafe fn test_mm_storeu_si16() {
4854	let a = _mm_setr_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
4855	let mut r = _mm_setr_epi16(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
4856	_mm_storeu_si16(ptr::addr_of_mut!(r).cast(), a);
4857	let e = _mm_setr_epi16(`1`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
4858	assert_eq_m128i(r, e);
4859	}
4860
4861	#[simd_test(enable = "sse2")]
4862	unsafe fn test_mm_storeu_si32() {
4863	let a = _mm_setr_epi32(`1`, `2`, `3`, `4`);
4864	let mut r = _mm_setr_epi32(`5`, `6`, `7`, `8`);
4865	_mm_storeu_si32(ptr::addr_of_mut!(r).cast(), a);
4866	let e = _mm_setr_epi32(`1`, `6`, `7`, `8`);
4867	assert_eq_m128i(r, e);
4868	}
4869
4870	#[simd_test(enable = "sse2")]
4871	unsafe fn test_mm_storeu_si64() {
4872	let a = _mm_setr_epi64x(`1`, `2`);
4873	let mut r = _mm_setr_epi64x(`3`, `4`);
4874	_mm_storeu_si64(ptr::addr_of_mut!(r).cast(), a);
4875	let e = _mm_setr_epi64x(`1`, `4`);
4876	assert_eq_m128i(r, e);
4877	}
4878
4879	#[simd_test(enable = "sse2")]
4880	unsafe fn test_mm_store1_pd() {
4881	let mut mem = Memory { data: [`0.0f64`; `4`] };
4882	let vals = &mut mem.data;
4883	let a = _mm_setr_pd(`1.0`, `2.0`);
4884	let d = vals.as_mut_ptr();
4885
4886	_mm_store1_pd(d, *black_box(&a));
4887	assert_eq!(vals[`0`], `1.0`);
4888	assert_eq!(vals[`1`], `1.0`);
4889	}
4890
4891	#[simd_test(enable = "sse2")]
4892	unsafe fn test_mm_store_pd1() {
4893	let mut mem = Memory { data: [`0.0f64`; `4`] };
4894	let vals = &mut mem.data;
4895	let a = _mm_setr_pd(`1.0`, `2.0`);
4896	let d = vals.as_mut_ptr();
4897
4898	_mm_store_pd1(d, *black_box(&a));
4899	assert_eq!(vals[`0`], `1.0`);
4900	assert_eq!(vals[`1`], `1.0`);
4901	}
4902
4903	#[simd_test(enable = "sse2")]
4904	unsafe fn test_mm_storer_pd() {
4905	let mut mem = Memory { data: [`0.0f64`; `4`] };
4906	let vals = &mut mem.data;
4907	let a = _mm_setr_pd(`1.0`, `2.0`);
4908	let d = vals.as_mut_ptr();
4909
4910	_mm_storer_pd(d, *black_box(&a));
4911	assert_eq!(vals[`0`], `2.0`);
4912	assert_eq!(vals[`1`], `1.0`);
4913	}
4914
4915	#[simd_test(enable = "sse2")]
4916	unsafe fn test_mm_storeh_pd() {
4917	let mut dest = `0.`;
4918	let a = _mm_setr_pd(`1.`, `2.`);
4919	_mm_storeh_pd(&mut dest, a);
4920	assert_eq!(dest, get_m128d(a, `1`));
4921	}
4922
4923	#[simd_test(enable = "sse2")]
4924	unsafe fn test_mm_storel_pd() {
4925	let mut dest = `0.`;
4926	let a = _mm_setr_pd(`1.`, `2.`);
4927	_mm_storel_pd(&mut dest, a);
4928	assert_eq!(dest, _mm_cvtsd_f64(a));
4929	}
4930
4931	#[simd_test(enable = "sse2")]
4932	unsafe fn test_mm_loadr_pd() {
4933	let mut mem = Memory {
4934	data: [`1.0f64`, `2.0`, `3.0`, `4.0`],
4935	};
4936	let vals = &mut mem.data;
4937	let d = vals.as_ptr();
4938
4939	let r = _mm_loadr_pd(d);
4940	assert_eq_m128d(r, _mm_setr_pd(`2.0`, `1.0`));
4941	}
4942
4943	#[simd_test(enable = "sse2")]
4944	unsafe fn test_mm_loadu_pd() {
4945	let mut mem = Memory {
4946	data: [`1.0f64`, `2.0`, `3.0`, `4.0`],
4947	};
4948	let vals = &mut mem.data;
4949	let mut d = vals.as_ptr();
4950
4951	// make sure d is not aligned to 16-byte boundary
4952	let mut offset = `0`;
4953	if (d as usize) & `0xf` == `0` {
4954	offset = `1`;
4955	d = d.add(offset);
4956	}
4957
4958	let r = _mm_loadu_pd(d);
4959	let e = _mm_add_pd(_mm_setr_pd(`1.0`, `2.0`), _mm_set1_pd(offset as f64));
4960	assert_eq_m128d(r, e);
4961	}
4962
4963	#[simd_test(enable = "sse2")]
4964	unsafe fn test_mm_loadu_si16() {
4965	let a = _mm_setr_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
4966	let r = _mm_loadu_si16(ptr::addr_of!(a) as *const _);
4967	assert_eq_m128i(r, _mm_setr_epi16(`1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`));
4968	}
4969
4970	#[simd_test(enable = "sse2")]
4971	unsafe fn test_mm_loadu_si32() {
4972	let a = _mm_setr_epi32(`1`, `2`, `3`, `4`);
4973	let r = _mm_loadu_si32(ptr::addr_of!(a) as *const _);
4974	assert_eq_m128i(r, _mm_setr_epi32(`1`, `0`, `0`, `0`));
4975	}
4976
4977	#[simd_test(enable = "sse2")]
4978	unsafe fn test_mm_loadu_si64() {
4979	let a = _mm_setr_epi64x(`5`, `6`);
4980	let r = _mm_loadu_si64(ptr::addr_of!(a) as *const _);
4981	assert_eq_m128i(r, _mm_setr_epi64x(`5`, `0`));
4982	}
4983
4984	#[simd_test(enable = "sse2")]
4985	unsafe fn test_mm_cvtpd_ps() {
4986	let r = _mm_cvtpd_ps(_mm_setr_pd(`-1.0`, `5.0`));
4987	assert_eq_m128(r, _mm_setr_ps(`-1.0`, `5.0`, `0.0`, `0.0`));
4988
4989	let r = _mm_cvtpd_ps(_mm_setr_pd(`-1.0`, `-5.0`));
4990	assert_eq_m128(r, _mm_setr_ps(`-1.0`, `-5.0`, `0.0`, `0.0`));
4991
4992	let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN));
4993	assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, `0.0`, `0.0`));
4994
4995	let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64));
4996	assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, `0.0`, `0.0`));
4997	}
4998
4999	#[simd_test(enable = "sse2")]
5000	unsafe fn test_mm_cvtps_pd() {
5001	let r = _mm_cvtps_pd(_mm_setr_ps(`-1.0`, `2.0`, `-3.0`, `5.0`));
5002	assert_eq_m128d(r, _mm_setr_pd(`-1.0`, `2.0`));
5003
5004	let r = _mm_cvtps_pd(_mm_setr_ps(
5005	f32::MAX,
5006	f32::INFINITY,
5007	f32::NEG_INFINITY,
5008	f32::MIN,
5009	));
5010	assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY));
5011	}
5012
5013	#[simd_test(enable = "sse2")]
5014	unsafe fn test_mm_cvtpd_epi32() {
5015	let r = _mm_cvtpd_epi32(_mm_setr_pd(`-1.0`, `5.0`));
5016	assert_eq_m128i(r, _mm_setr_epi32(`-1`, `5`, `0`, `0`));
5017
5018	let r = _mm_cvtpd_epi32(_mm_setr_pd(`-1.0`, `-5.0`));
5019	assert_eq_m128i(r, _mm_setr_epi32(`-1`, `-5`, `0`, `0`));
5020
5021	let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN));
5022	assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, `0`, `0`));
5023
5024	let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY));
5025	assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, `0`, `0`));
5026
5027	let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN));
5028	assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, `0`, `0`));
5029	}
5030
5031	#[simd_test(enable = "sse2")]
5032	unsafe fn test_mm_cvtsd_si32() {
5033	let r = _mm_cvtsd_si32(_mm_setr_pd(`-2.0`, `5.0`));
5034	assert_eq!(r, -`2`);
5035
5036	let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN));
5037	assert_eq!(r, i32::MIN);
5038
5039	let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN));
5040	assert_eq!(r, i32::MIN);
5041	}
5042
5043	#[simd_test(enable = "sse2")]
5044	unsafe fn test_mm_cvtsd_ss() {
5045	let a = _mm_setr_ps(`-1.1`, `-2.2`, `3.3`, `4.4`);
5046	let b = _mm_setr_pd(`2.0`, `-5.0`);
5047
5048	let r = _mm_cvtsd_ss(a, b);
5049
5050	assert_eq_m128(r, _mm_setr_ps(`2.0`, `-2.2`, `3.3`, `4.4`));
5051
5052	let a = _mm_setr_ps(`-1.1`, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY);
5053	let b = _mm_setr_pd(f64::INFINITY, `-5.0`);
5054
5055	let r = _mm_cvtsd_ss(a, b);
5056
5057	assert_eq_m128(
5058	r,
5059	_mm_setr_ps(
5060	f32::INFINITY,
5061	f32::NEG_INFINITY,
5062	f32::MAX,
5063	f32::NEG_INFINITY,
5064	),
5065	);
5066	}
5067
5068	#[simd_test(enable = "sse2")]
5069	unsafe fn test_mm_cvtsd_f64() {
5070	let r = _mm_cvtsd_f64(_mm_setr_pd(`-1.1`, `2.2`));
5071	assert_eq!(r, -`1.1`);
5072	}
5073
5074	#[simd_test(enable = "sse2")]
5075	unsafe fn test_mm_cvtss_sd() {
5076	let a = _mm_setr_pd(`-1.1`, `2.2`);
5077	let b = _mm_setr_ps(`1.0`, `2.0`, `3.0`, `4.0`);
5078
5079	let r = _mm_cvtss_sd(a, b);
5080	assert_eq_m128d(r, _mm_setr_pd(`1.0`, `2.2`));
5081
5082	let a = _mm_setr_pd(`-1.1`, f64::INFINITY);
5083	let b = _mm_setr_ps(f32::NEG_INFINITY, `2.0`, `3.0`, `4.0`);
5084
5085	let r = _mm_cvtss_sd(a, b);
5086	assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY));
5087	}
5088
5089	#[simd_test(enable = "sse2")]
5090	unsafe fn test_mm_cvttpd_epi32() {
5091	let a = _mm_setr_pd(`-1.1`, `2.2`);
5092	let r = _mm_cvttpd_epi32(a);
5093	assert_eq_m128i(r, _mm_setr_epi32(`-1`, `2`, `0`, `0`));
5094
5095	let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
5096	let r = _mm_cvttpd_epi32(a);
5097	assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, `0`, `0`));
5098	}
5099
5100	#[simd_test(enable = "sse2")]
5101	unsafe fn test_mm_cvttsd_si32() {
5102	let a = _mm_setr_pd(`-1.1`, `2.2`);
5103	let r = _mm_cvttsd_si32(a);
5104	assert_eq!(r, -`1`);
5105
5106	let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
5107	let r = _mm_cvttsd_si32(a);
5108	assert_eq!(r, i32::MIN);
5109	}
5110
5111	#[simd_test(enable = "sse2")]
5112	unsafe fn test_mm_cvttps_epi32() {
5113	let a = _mm_setr_ps(`-1.1`, `2.2`, `-3.3`, `6.6`);
5114	let r = _mm_cvttps_epi32(a);
5115	assert_eq_m128i(r, _mm_setr_epi32(`-1`, `2`, `-3`, `6`));
5116
5117	let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX);
5118	let r = _mm_cvttps_epi32(a);
5119	assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN));
5120	}
5121
5122	#[simd_test(enable = "sse2")]
5123	unsafe fn test_mm_set_sd() {
5124	let r = _mm_set_sd(`-1.0_f64`);
5125	assert_eq_m128d(r, _mm_setr_pd(`-1.0_f64`, `0_f64`));
5126	}
5127
5128	#[simd_test(enable = "sse2")]
5129	unsafe fn test_mm_set1_pd() {
5130	let r = _mm_set1_pd(`-1.0_f64`);
5131	assert_eq_m128d(r, _mm_setr_pd(`-1.0_f64`, `-1.0_f64`));
5132	}
5133
5134	#[simd_test(enable = "sse2")]
5135	unsafe fn test_mm_set_pd1() {
5136	let r = _mm_set_pd1(`-2.0_f64`);
5137	assert_eq_m128d(r, _mm_setr_pd(`-2.0_f64`, `-2.0_f64`));
5138	}
5139
5140	#[simd_test(enable = "sse2")]
5141	unsafe fn test_mm_set_pd() {
5142	let r = _mm_set_pd(`1.0_f64`, `5.0_f64`);
5143	assert_eq_m128d(r, _mm_setr_pd(`5.0_f64`, `1.0_f64`));
5144	}
5145
5146	#[simd_test(enable = "sse2")]
5147	unsafe fn test_mm_setr_pd() {
5148	let r = _mm_setr_pd(`1.0_f64`, `-5.0_f64`);
5149	assert_eq_m128d(r, _mm_setr_pd(`1.0_f64`, `-5.0_f64`));
5150	}
5151
5152	#[simd_test(enable = "sse2")]
5153	unsafe fn test_mm_setzero_pd() {
5154	let r = _mm_setzero_pd();
5155	assert_eq_m128d(r, _mm_setr_pd(`0_f64`, `0_f64`));
5156	}
5157
5158	#[simd_test(enable = "sse2")]
5159	unsafe fn test_mm_load1_pd() {
5160	let d = `-5.0`;
5161	let r = _mm_load1_pd(&d);
5162	assert_eq_m128d(r, _mm_setr_pd(d, d));
5163	}
5164
5165	#[simd_test(enable = "sse2")]
5166	unsafe fn test_mm_load_pd1() {
5167	let d = `-5.0`;
5168	let r = _mm_load_pd1(&d);
5169	assert_eq_m128d(r, _mm_setr_pd(d, d));
5170	}
5171
5172	#[simd_test(enable = "sse2")]
5173	unsafe fn test_mm_unpackhi_pd() {
5174	let a = _mm_setr_pd(`1.0`, `2.0`);
5175	let b = _mm_setr_pd(`3.0`, `4.0`);
5176	let r = _mm_unpackhi_pd(a, b);
5177	assert_eq_m128d(r, _mm_setr_pd(`2.0`, `4.0`));
5178	}
5179
5180	#[simd_test(enable = "sse2")]
5181	unsafe fn test_mm_unpacklo_pd() {
5182	let a = _mm_setr_pd(`1.0`, `2.0`);
5183	let b = _mm_setr_pd(`3.0`, `4.0`);
5184	let r = _mm_unpacklo_pd(a, b);
5185	assert_eq_m128d(r, _mm_setr_pd(`1.0`, `3.0`));
5186	}
5187
5188	#[simd_test(enable = "sse2")]
5189	unsafe fn test_mm_shuffle_pd() {
5190	let a = _mm_setr_pd(`1.`, `2.`);
5191	let b = _mm_setr_pd(`3.`, `4.`);
5192	let expected = _mm_setr_pd(`1.`, `3.`);
5193	let r = _mm_shuffle_pd::<`0b00_00_00_00`>(a, b);
5194	assert_eq_m128d(r, expected);
5195	}
5196
5197	#[simd_test(enable = "sse2")]
5198	unsafe fn test_mm_move_sd() {
5199	let a = _mm_setr_pd(`1.`, `2.`);
5200	let b = _mm_setr_pd(`3.`, `4.`);
5201	let expected = _mm_setr_pd(`3.`, `2.`);
5202	let r = _mm_move_sd(a, b);
5203	assert_eq_m128d(r, expected);
5204	}
5205
5206	#[simd_test(enable = "sse2")]
5207	unsafe fn test_mm_castpd_ps() {
5208	let a = _mm_set1_pd(`0.`);
5209	let expected = _mm_set1_ps(`0.`);
5210	let r = _mm_castpd_ps(a);
5211	assert_eq_m128(r, expected);
5212	}
5213
5214	#[simd_test(enable = "sse2")]
5215	unsafe fn test_mm_castpd_si128() {
5216	let a = _mm_set1_pd(`0.`);
5217	let expected = _mm_set1_epi64x(`0`);
5218	let r = _mm_castpd_si128(a);
5219	assert_eq_m128i(r, expected);
5220	}
5221
5222	#[simd_test(enable = "sse2")]
5223	unsafe fn test_mm_castps_pd() {
5224	let a = _mm_set1_ps(`0.`);
5225	let expected = _mm_set1_pd(`0.`);
5226	let r = _mm_castps_pd(a);
5227	assert_eq_m128d(r, expected);
5228	}
5229
5230	#[simd_test(enable = "sse2")]
5231	unsafe fn test_mm_castps_si128() {
5232	let a = _mm_set1_ps(`0.`);
5233	let expected = _mm_set1_epi32(`0`);
5234	let r = _mm_castps_si128(a);
5235	assert_eq_m128i(r, expected);
5236	}
5237
5238	#[simd_test(enable = "sse2")]
5239	unsafe fn test_mm_castsi128_pd() {
5240	let a = _mm_set1_epi64x(`0`);
5241	let expected = _mm_set1_pd(`0.`);
5242	let r = _mm_castsi128_pd(a);
5243	assert_eq_m128d(r, expected);
5244	}
5245
5246	#[simd_test(enable = "sse2")]
5247	unsafe fn test_mm_castsi128_ps() {
5248	let a = _mm_set1_epi32(`0`);
5249	let expected = _mm_set1_ps(`0.`);
5250	let r = _mm_castsi128_ps(a);
5251	assert_eq_m128(r, expected);
5252	}
5253	}
5254