sse2.rs source code [crates/core_arch/src/x86/sse2.rs]

1	//! Streaming SIMD Extensions 2 (SSE2)
2
3	#[cfg(test)]
4	use stdarch_test::assert_instr;
5
6	use crate::{
7	core_arch::{simd::, simd_llvm::, x86::*},
8	intrinsics,
9	mem::{self, transmute},
10	ptr,
11	};
12
13	/// Provides a hint to the processor that the code sequence is a spin-wait loop.
14	///
15	/// This can help improve the performance and power consumption of spin-wait
16	/// loops.
17	///
18	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_pause)
19	#[inline]
20	#[cfg_attr(all(test, target_feature = "sse2"), assert_instr(pause))]
21	#[stable(feature = "simd_x86", since = "1.27.0")]
22	pub unsafe fn _mm_pause() {
23	// note: `pause` is guaranteed to be interpreted as a `nop` by CPUs without
24	// the SSE2 target-feature - therefore it does not require any target features
25	pause()
26	}
27
28	/// Invalidates and flushes the cache line that contains `p` from all levels of
29	/// the cache hierarchy.
30	///
31	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clflush)
32	#[inline]
33	#[target_feature(enable = "sse2")]
34	#[cfg_attr(test, assert_instr(clflush))]
35	#[stable(feature = "simd_x86", since = "1.27.0")]
36	pub unsafe fn _mm_clflush(p: *const u8) {
37	clflush(p)
38	}
39
40	/// Performs a serializing operation on all load-from-memory instructions
41	/// that were issued prior to this instruction.
42	///
43	/// Guarantees that every load instruction that precedes, in program order, is
44	/// globally visible before any load instruction which follows the fence in
45	/// program order.
46	///
47	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_lfence)
48	#[inline]
49	#[target_feature(enable = "sse2")]
50	#[cfg_attr(test, assert_instr(lfence))]
51	#[stable(feature = "simd_x86", since = "1.27.0")]
52	pub unsafe fn _mm_lfence() {
53	lfence()
54	}
55
56	/// Performs a serializing operation on all load-from-memory and store-to-memory
57	/// instructions that were issued prior to this instruction.
58	///
59	/// Guarantees that every memory access that precedes, in program order, the
60	/// memory fence instruction is globally visible before any memory instruction
61	/// which follows the fence in program order.
62	///
63	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mfence)
64	#[inline]
65	#[target_feature(enable = "sse2")]
66	#[cfg_attr(test, assert_instr(mfence))]
67	#[stable(feature = "simd_x86", since = "1.27.0")]
68	pub unsafe fn _mm_mfence() {
69	mfence()
70	}
71
72	/// Adds packed 8-bit integers in `a` and `b`.
73	///
74	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi8)
75	#[inline]
76	#[target_feature(enable = "sse2")]
77	#[cfg_attr(test, assert_instr(paddb))]
78	#[stable(feature = "simd_x86", since = "1.27.0")]
79	pub unsafe fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i {
80	transmute(src:simd_add(x:a.as_i8x16(), y:b.as_i8x16()))
81	}
82
83	/// Adds packed 16-bit integers in `a` and `b`.
84	///
85	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi16)
86	#[inline]
87	#[target_feature(enable = "sse2")]
88	#[cfg_attr(test, assert_instr(paddw))]
89	#[stable(feature = "simd_x86", since = "1.27.0")]
90	pub unsafe fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i {
91	transmute(src:simd_add(x:a.as_i16x8(), y:b.as_i16x8()))
92	}
93
94	/// Adds packed 32-bit integers in `a` and `b`.
95	///
96	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi32)
97	#[inline]
98	#[target_feature(enable = "sse2")]
99	#[cfg_attr(test, assert_instr(paddd))]
100	#[stable(feature = "simd_x86", since = "1.27.0")]
101	pub unsafe fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i {
102	transmute(src:simd_add(x:a.as_i32x4(), y:b.as_i32x4()))
103	}
104
105	/// Adds packed 64-bit integers in `a` and `b`.
106	///
107	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi64)
108	#[inline]
109	#[target_feature(enable = "sse2")]
110	#[cfg_attr(test, assert_instr(paddq))]
111	#[stable(feature = "simd_x86", since = "1.27.0")]
112	pub unsafe fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i {
113	transmute(src:simd_add(x:a.as_i64x2(), y:b.as_i64x2()))
114	}
115
116	/// Adds packed 8-bit integers in `a` and `b` using saturation.
117	///
118	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epi8)
119	#[inline]
120	#[target_feature(enable = "sse2")]
121	#[cfg_attr(test, assert_instr(paddsb))]
122	#[stable(feature = "simd_x86", since = "1.27.0")]
123	pub unsafe fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i {
124	transmute(src:simd_saturating_add(x:a.as_i8x16(), y:b.as_i8x16()))
125	}
126
127	/// Adds packed 16-bit integers in `a` and `b` using saturation.
128	///
129	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epi16)
130	#[inline]
131	#[target_feature(enable = "sse2")]
132	#[cfg_attr(test, assert_instr(paddsw))]
133	#[stable(feature = "simd_x86", since = "1.27.0")]
134	pub unsafe fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i {
135	transmute(src:simd_saturating_add(x:a.as_i16x8(), y:b.as_i16x8()))
136	}
137
138	/// Adds packed unsigned 8-bit integers in `a` and `b` using saturation.
139	///
140	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epu8)
141	#[inline]
142	#[target_feature(enable = "sse2")]
143	#[cfg_attr(test, assert_instr(paddusb))]
144	#[stable(feature = "simd_x86", since = "1.27.0")]
145	pub unsafe fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i {
146	transmute(src:simd_saturating_add(x:a.as_u8x16(), y:b.as_u8x16()))
147	}
148
149	/// Adds packed unsigned 16-bit integers in `a` and `b` using saturation.
150	///
151	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epu16)
152	#[inline]
153	#[target_feature(enable = "sse2")]
154	#[cfg_attr(test, assert_instr(paddusw))]
155	#[stable(feature = "simd_x86", since = "1.27.0")]
156	pub unsafe fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i {
157	transmute(src:simd_saturating_add(x:a.as_u16x8(), y:b.as_u16x8()))
158	}
159
160	/// Averages packed unsigned 8-bit integers in `a` and `b`.
161	///
162	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_avg_epu8)
163	#[inline]
164	#[target_feature(enable = "sse2")]
165	#[cfg_attr(test, assert_instr(pavgb))]
166	#[stable(feature = "simd_x86", since = "1.27.0")]
167	pub unsafe fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i {
168	let a: u16x16 = simd_cast::<_, u16x16>(a.as_u8x16());
169	let b: u16x16 = simd_cast::<_, u16x16>(b.as_u8x16());
170	let r: u16x16 = simd_shr(x:simd_add(simd_add(a, b), u16x16::splat(`1`)), y:u16x16::splat(`1`));
171	transmute(src:simd_cast::<_, u8x16>(r))
172	}
173
174	/// Averages packed unsigned 16-bit integers in `a` and `b`.
175	///
176	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_avg_epu16)
177	#[inline]
178	#[target_feature(enable = "sse2")]
179	#[cfg_attr(test, assert_instr(pavgw))]
180	#[stable(feature = "simd_x86", since = "1.27.0")]
181	pub unsafe fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i {
182	let a: u32x8 = simd_cast::<_, u32x8>(a.as_u16x8());
183	let b: u32x8 = simd_cast::<_, u32x8>(b.as_u16x8());
184	let r: u32x8 = simd_shr(x:simd_add(simd_add(a, b), u32x8::splat(`1`)), y:u32x8::splat(`1`));
185	transmute(src:simd_cast::<_, u16x8>(r))
186	}
187
188	/// Multiplies and then horizontally add signed 16 bit integers in `a` and `b`.
189	///
190	/// Multiplies packed signed 16-bit integers in `a` and `b`, producing
191	/// intermediate signed 32-bit integers. Horizontally add adjacent pairs of
192	/// intermediate 32-bit integers.
193	///
194	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_madd_epi16)
195	#[inline]
196	#[target_feature(enable = "sse2")]
197	#[cfg_attr(test, assert_instr(pmaddwd))]
198	#[stable(feature = "simd_x86", since = "1.27.0")]
199	pub unsafe fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i {
200	transmute(src:pmaddwd(a:a.as_i16x8(), b:b.as_i16x8()))
201	}
202
203	/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
204	/// maximum values.
205	///
206	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi16)
207	#[inline]
208	#[target_feature(enable = "sse2")]
209	#[cfg_attr(test, assert_instr(pmaxsw))]
210	#[stable(feature = "simd_x86", since = "1.27.0")]
211	pub unsafe fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i {
212	let a: i16x8 = a.as_i16x8();
213	let b: i16x8 = b.as_i16x8();
214	transmute(src:simd_select::<i16x8, _>(m:simd_gt(x:a, y:b), a, b))
215	}
216
217	/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the
218	/// packed maximum values.
219	///
220	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu8)
221	#[inline]
222	#[target_feature(enable = "sse2")]
223	#[cfg_attr(test, assert_instr(pmaxub))]
224	#[stable(feature = "simd_x86", since = "1.27.0")]
225	pub unsafe fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i {
226	let a: u8x16 = a.as_u8x16();
227	let b: u8x16 = b.as_u8x16();
228	transmute(src:simd_select::<i8x16, _>(m:simd_gt(x:a, y:b), a, b))
229	}
230
231	/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
232	/// minimum values.
233	///
234	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi16)
235	#[inline]
236	#[target_feature(enable = "sse2")]
237	#[cfg_attr(test, assert_instr(pminsw))]
238	#[stable(feature = "simd_x86", since = "1.27.0")]
239	pub unsafe fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i {
240	let a: i16x8 = a.as_i16x8();
241	let b: i16x8 = b.as_i16x8();
242	transmute(src:simd_select::<i16x8, _>(m:simd_lt(x:a, y:b), a, b))
243	}
244
245	/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the
246	/// packed minimum values.
247	///
248	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu8)
249	#[inline]
250	#[target_feature(enable = "sse2")]
251	#[cfg_attr(test, assert_instr(pminub))]
252	#[stable(feature = "simd_x86", since = "1.27.0")]
253	pub unsafe fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i {
254	let a: u8x16 = a.as_u8x16();
255	let b: u8x16 = b.as_u8x16();
256	transmute(src:simd_select::<i8x16, _>(m:simd_lt(x:a, y:b), a, b))
257	}
258
259	/// Multiplies the packed 16-bit integers in `a` and `b`.
260	///
261	/// The multiplication produces intermediate 32-bit integers, and returns the
262	/// high 16 bits of the intermediate integers.
263	///
264	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhi_epi16)
265	#[inline]
266	#[target_feature(enable = "sse2")]
267	#[cfg_attr(test, assert_instr(pmulhw))]
268	#[stable(feature = "simd_x86", since = "1.27.0")]
269	pub unsafe fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i {
270	let a: i32x8 = simd_cast::<_, i32x8>(a.as_i16x8());
271	let b: i32x8 = simd_cast::<_, i32x8>(b.as_i16x8());
272	let r: i32x8 = simd_shr(x:simd_mul(a, b), y:i32x8::splat(`16`));
273	transmute(src:simd_cast::<i32x8, i16x8>(r))
274	}
275
276	/// Multiplies the packed unsigned 16-bit integers in `a` and `b`.
277	///
278	/// The multiplication produces intermediate 32-bit integers, and returns the
279	/// high 16 bits of the intermediate integers.
280	///
281	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhi_epu16)
282	#[inline]
283	#[target_feature(enable = "sse2")]
284	#[cfg_attr(test, assert_instr(pmulhuw))]
285	#[stable(feature = "simd_x86", since = "1.27.0")]
286	pub unsafe fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i {
287	let a: u32x8 = simd_cast::<_, u32x8>(a.as_u16x8());
288	let b: u32x8 = simd_cast::<_, u32x8>(b.as_u16x8());
289	let r: u32x8 = simd_shr(x:simd_mul(a, b), y:u32x8::splat(`16`));
290	transmute(src:simd_cast::<u32x8, u16x8>(r))
291	}
292
293	/// Multiplies the packed 16-bit integers in `a` and `b`.
294	///
295	/// The multiplication produces intermediate 32-bit integers, and returns the
296	/// low 16 bits of the intermediate integers.
297	///
298	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mullo_epi16)
299	#[inline]
300	#[target_feature(enable = "sse2")]
301	#[cfg_attr(test, assert_instr(pmullw))]
302	#[stable(feature = "simd_x86", since = "1.27.0")]
303	pub unsafe fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i {
304	transmute(src:simd_mul(x:a.as_i16x8(), y:b.as_i16x8()))
305	}
306
307	/// Multiplies the low unsigned 32-bit integers from each packed 64-bit element
308	/// in `a` and `b`.
309	///
310	/// Returns the unsigned 64-bit results.
311	///
312	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_epu32)
313	#[inline]
314	#[target_feature(enable = "sse2")]
315	#[cfg_attr(test, assert_instr(pmuludq))]
316	#[stable(feature = "simd_x86", since = "1.27.0")]
317	pub unsafe fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i {
318	let a: u64x2 = a.as_u64x2();
319	let b: u64x2 = b.as_u64x2();
320	let mask: u64x2 = u64x2::splat(u32::MAX.into());
321	transmute(src:simd_mul(x:simd_and(a, mask), y:simd_and(x:b, y:mask)))
322	}
323
324	/// Sum the absolute differences of packed unsigned 8-bit integers.
325	///
326	/// Computes the absolute differences of packed unsigned 8-bit integers in `a`
327	/// and `b`, then horizontally sum each consecutive 8 differences to produce
328	/// two unsigned 16-bit integers, and pack these unsigned 16-bit integers in
329	/// the low 16 bits of 64-bit elements returned.
330	///
331	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sad_epu8)
332	#[inline]
333	#[target_feature(enable = "sse2")]
334	#[cfg_attr(test, assert_instr(psadbw))]
335	#[stable(feature = "simd_x86", since = "1.27.0")]
336	pub unsafe fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i {
337	transmute(src:psadbw(a:a.as_u8x16(), b:b.as_u8x16()))
338	}
339
340	/// Subtracts packed 8-bit integers in `b` from packed 8-bit integers in `a`.
341	///
342	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi8)
343	#[inline]
344	#[target_feature(enable = "sse2")]
345	#[cfg_attr(test, assert_instr(psubb))]
346	#[stable(feature = "simd_x86", since = "1.27.0")]
347	pub unsafe fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i {
348	transmute(src:simd_sub(x:a.as_i8x16(), y:b.as_i8x16()))
349	}
350
351	/// Subtracts packed 16-bit integers in `b` from packed 16-bit integers in `a`.
352	///
353	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi16)
354	#[inline]
355	#[target_feature(enable = "sse2")]
356	#[cfg_attr(test, assert_instr(psubw))]
357	#[stable(feature = "simd_x86", since = "1.27.0")]
358	pub unsafe fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i {
359	transmute(src:simd_sub(x:a.as_i16x8(), y:b.as_i16x8()))
360	}
361
362	/// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`.
363	///
364	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi32)
365	#[inline]
366	#[target_feature(enable = "sse2")]
367	#[cfg_attr(test, assert_instr(psubd))]
368	#[stable(feature = "simd_x86", since = "1.27.0")]
369	pub unsafe fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i {
370	transmute(src:simd_sub(x:a.as_i32x4(), y:b.as_i32x4()))
371	}
372
373	/// Subtract packed 64-bit integers in `b` from packed 64-bit integers in `a`.
374	///
375	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi64)
376	#[inline]
377	#[target_feature(enable = "sse2")]
378	#[cfg_attr(test, assert_instr(psubq))]
379	#[stable(feature = "simd_x86", since = "1.27.0")]
380	pub unsafe fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i {
381	transmute(src:simd_sub(x:a.as_i64x2(), y:b.as_i64x2()))
382	}
383
384	/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`
385	/// using saturation.
386	///
387	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epi8)
388	#[inline]
389	#[target_feature(enable = "sse2")]
390	#[cfg_attr(test, assert_instr(psubsb))]
391	#[stable(feature = "simd_x86", since = "1.27.0")]
392	pub unsafe fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i {
393	transmute(src:simd_saturating_sub(x:a.as_i8x16(), y:b.as_i8x16()))
394	}
395
396	/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
397	/// using saturation.
398	///
399	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epi16)
400	#[inline]
401	#[target_feature(enable = "sse2")]
402	#[cfg_attr(test, assert_instr(psubsw))]
403	#[stable(feature = "simd_x86", since = "1.27.0")]
404	pub unsafe fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i {
405	transmute(src:simd_saturating_sub(x:a.as_i16x8(), y:b.as_i16x8()))
406	}
407
408	/// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit
409	/// integers in `a` using saturation.
410	///
411	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epu8)
412	#[inline]
413	#[target_feature(enable = "sse2")]
414	#[cfg_attr(test, assert_instr(psubusb))]
415	#[stable(feature = "simd_x86", since = "1.27.0")]
416	pub unsafe fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i {
417	transmute(src:simd_saturating_sub(x:a.as_u8x16(), y:b.as_u8x16()))
418	}
419
420	/// Subtract packed unsigned 16-bit integers in `b` from packed unsigned 16-bit
421	/// integers in `a` using saturation.
422	///
423	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epu16)
424	#[inline]
425	#[target_feature(enable = "sse2")]
426	#[cfg_attr(test, assert_instr(psubusw))]
427	#[stable(feature = "simd_x86", since = "1.27.0")]
428	pub unsafe fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i {
429	transmute(src:simd_saturating_sub(x:a.as_u16x8(), y:b.as_u16x8()))
430	}
431
432	/// Shifts `a` left by `IMM8` bytes while shifting in zeros.
433	///
434	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_si128)
435	#[inline]
436	#[target_feature(enable = "sse2")]
437	#[cfg_attr(test, assert_instr(pslldq, IMM8 = `1`))]
438	#[rustc_legacy_const_generics(`1`)]
439	#[stable(feature = "simd_x86", since = "1.27.0")]
440	pub unsafe fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
441	static_assert_uimm_bits!(IMM8, `8`);
442	_mm_slli_si128_impl::<IMM8>(a)
443	}
444
445	/// Implementation detail: converts the immediate argument of the
446	/// `_mm_slli_si128` intrinsic into a compile-time constant.
447	#[inline]
448	#[target_feature(enable = "sse2")]
449	unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
450	const fn mask(shift: i32, i: u32) -> u32 {
451	let shift = shift as u32 & `0xff`;
452	if shift > `15` {
453	i
454	} else {
455	`16` - shift + i
456	}
457	}
458	let zero = _mm_set1_epi8(`0`).as_i8x16();
459	transmute::<i8x16, _>(simd_shuffle!(
460	zero,
461	a.as_i8x16(),
462	[
463	mask(IMM8, `0`),
464	mask(IMM8, `1`),
465	mask(IMM8, `2`),
466	mask(IMM8, `3`),
467	mask(IMM8, `4`),
468	mask(IMM8, `5`),
469	mask(IMM8, `6`),
470	mask(IMM8, `7`),
471	mask(IMM8, `8`),
472	mask(IMM8, `9`),
473	mask(IMM8, `10`),
474	mask(IMM8, `11`),
475	mask(IMM8, `12`),
476	mask(IMM8, `13`),
477	mask(IMM8, `14`),
478	mask(IMM8, `15`),
479	],
480	))
481	}
482
483	/// Shifts `a` left by `IMM8` bytes while shifting in zeros.
484	///
485	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bslli_si128)
486	#[inline]
487	#[target_feature(enable = "sse2")]
488	#[cfg_attr(test, assert_instr(pslldq, IMM8 = `1`))]
489	#[rustc_legacy_const_generics(`1`)]
490	#[stable(feature = "simd_x86", since = "1.27.0")]
491	pub unsafe fn _mm_bslli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
492	static_assert_uimm_bits!(IMM8, `8`);
493	_mm_slli_si128_impl::<IMM8>(a)
494	}
495
496	/// Shifts `a` right by `IMM8` bytes while shifting in zeros.
497	///
498	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bsrli_si128)
499	#[inline]
500	#[target_feature(enable = "sse2")]
501	#[cfg_attr(test, assert_instr(psrldq, IMM8 = `1`))]
502	#[rustc_legacy_const_generics(`1`)]
503	#[stable(feature = "simd_x86", since = "1.27.0")]
504	pub unsafe fn _mm_bsrli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
505	static_assert_uimm_bits!(IMM8, `8`);
506	_mm_srli_si128_impl::<IMM8>(a)
507	}
508
509	/// Shifts packed 16-bit integers in `a` left by `IMM8` while shifting in zeros.
510	///
511	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi16)
512	#[inline]
513	#[target_feature(enable = "sse2")]
514	#[cfg_attr(test, assert_instr(psllw, IMM8 = `7`))]
515	#[rustc_legacy_const_generics(`1`)]
516	#[stable(feature = "simd_x86", since = "1.27.0")]
517	pub unsafe fn _mm_slli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
518	static_assert_uimm_bits!(IMM8, `8`);
519	if IMM8 >= `16` {
520	_mm_setzero_si128()
521	} else {
522	transmute(src:simd_shl(x:a.as_u16x8(), y:u16x8::splat(IMM8 as u16)))
523	}
524	}
525
526	/// Shifts packed 16-bit integers in `a` left by `count` while shifting in
527	/// zeros.
528	///
529	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi16)
530	#[inline]
531	#[target_feature(enable = "sse2")]
532	#[cfg_attr(test, assert_instr(psllw))]
533	#[stable(feature = "simd_x86", since = "1.27.0")]
534	pub unsafe fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i {
535	transmute(src:psllw(a:a.as_i16x8(), count:count.as_i16x8()))
536	}
537
538	/// Shifts packed 32-bit integers in `a` left by `IMM8` while shifting in zeros.
539	///
540	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi32)
541	#[inline]
542	#[target_feature(enable = "sse2")]
543	#[cfg_attr(test, assert_instr(pslld, IMM8 = `7`))]
544	#[rustc_legacy_const_generics(`1`)]
545	#[stable(feature = "simd_x86", since = "1.27.0")]
546	pub unsafe fn _mm_slli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
547	static_assert_uimm_bits!(IMM8, `8`);
548	if IMM8 >= `32` {
549	_mm_setzero_si128()
550	} else {
551	transmute(src:simd_shl(x:a.as_u32x4(), y:u32x4::splat(IMM8 as u32)))
552	}
553	}
554
555	/// Shifts packed 32-bit integers in `a` left by `count` while shifting in
556	/// zeros.
557	///
558	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi32)
559	#[inline]
560	#[target_feature(enable = "sse2")]
561	#[cfg_attr(test, assert_instr(pslld))]
562	#[stable(feature = "simd_x86", since = "1.27.0")]
563	pub unsafe fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i {
564	transmute(src:pslld(a:a.as_i32x4(), count:count.as_i32x4()))
565	}
566
567	/// Shifts packed 64-bit integers in `a` left by `IMM8` while shifting in zeros.
568	///
569	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi64)
570	#[inline]
571	#[target_feature(enable = "sse2")]
572	#[cfg_attr(test, assert_instr(psllq, IMM8 = `7`))]
573	#[rustc_legacy_const_generics(`1`)]
574	#[stable(feature = "simd_x86", since = "1.27.0")]
575	pub unsafe fn _mm_slli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
576	static_assert_uimm_bits!(IMM8, `8`);
577	if IMM8 >= `64` {
578	_mm_setzero_si128()
579	} else {
580	transmute(src:simd_shl(x:a.as_u64x2(), y:u64x2::splat(IMM8 as u64)))
581	}
582	}
583
584	/// Shifts packed 64-bit integers in `a` left by `count` while shifting in
585	/// zeros.
586	///
587	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi64)
588	#[inline]
589	#[target_feature(enable = "sse2")]
590	#[cfg_attr(test, assert_instr(psllq))]
591	#[stable(feature = "simd_x86", since = "1.27.0")]
592	pub unsafe fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i {
593	transmute(src:psllq(a:a.as_i64x2(), count:count.as_i64x2()))
594	}
595
596	/// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in sign
597	/// bits.
598	///
599	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi16)
600	#[inline]
601	#[target_feature(enable = "sse2")]
602	#[cfg_attr(test, assert_instr(psraw, IMM8 = `1`))]
603	#[rustc_legacy_const_generics(`1`)]
604	#[stable(feature = "simd_x86", since = "1.27.0")]
605	pub unsafe fn _mm_srai_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
606	static_assert_uimm_bits!(IMM8, `8`);
607	transmute(src:simd_shr(x:a.as_i16x8(), y:i16x8::splat(IMM8.min(`15`) as i16)))
608	}
609
610	/// Shifts packed 16-bit integers in `a` right by `count` while shifting in sign
611	/// bits.
612	///
613	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi16)
614	#[inline]
615	#[target_feature(enable = "sse2")]
616	#[cfg_attr(test, assert_instr(psraw))]
617	#[stable(feature = "simd_x86", since = "1.27.0")]
618	pub unsafe fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i {
619	transmute(src:psraw(a:a.as_i16x8(), count:count.as_i16x8()))
620	}
621
622	/// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in sign
623	/// bits.
624	///
625	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi32)
626	#[inline]
627	#[target_feature(enable = "sse2")]
628	#[cfg_attr(test, assert_instr(psrad, IMM8 = `1`))]
629	#[rustc_legacy_const_generics(`1`)]
630	#[stable(feature = "simd_x86", since = "1.27.0")]
631	pub unsafe fn _mm_srai_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
632	static_assert_uimm_bits!(IMM8, `8`);
633	transmute(src:simd_shr(x:a.as_i32x4(), y:i32x4::splat(IMM8.min(`31`))))
634	}
635
636	/// Shifts packed 32-bit integers in `a` right by `count` while shifting in sign
637	/// bits.
638	///
639	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi32)
640	#[inline]
641	#[target_feature(enable = "sse2")]
642	#[cfg_attr(test, assert_instr(psrad))]
643	#[stable(feature = "simd_x86", since = "1.27.0")]
644	pub unsafe fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i {
645	transmute(src:psrad(a:a.as_i32x4(), count:count.as_i32x4()))
646	}
647
648	/// Shifts `a` right by `IMM8` bytes while shifting in zeros.
649	///
650	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_si128)
651	#[inline]
652	#[target_feature(enable = "sse2")]
653	#[cfg_attr(test, assert_instr(psrldq, IMM8 = `1`))]
654	#[rustc_legacy_const_generics(`1`)]
655	#[stable(feature = "simd_x86", since = "1.27.0")]
656	pub unsafe fn _mm_srli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
657	static_assert_uimm_bits!(IMM8, `8`);
658	_mm_srli_si128_impl::<IMM8>(a)
659	}
660
661	/// Implementation detail: converts the immediate argument of the
662	/// `_mm_srli_si128` intrinsic into a compile-time constant.
663	#[inline]
664	#[target_feature(enable = "sse2")]
665	unsafe fn _mm_srli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
666	const fn mask(shift: i32, i: u32) -> u32 {
667	if (shift as u32) > `15` {
668	i + `16`
669	} else {
670	i + (shift as u32)
671	}
672	}
673	let zero = _mm_set1_epi8(`0`).as_i8x16();
674	let x: i8x16 = simd_shuffle!(
675	a.as_i8x16(),
676	zero,
677	[
678	mask(IMM8, `0`),
679	mask(IMM8, `1`),
680	mask(IMM8, `2`),
681	mask(IMM8, `3`),
682	mask(IMM8, `4`),
683	mask(IMM8, `5`),
684	mask(IMM8, `6`),
685	mask(IMM8, `7`),
686	mask(IMM8, `8`),
687	mask(IMM8, `9`),
688	mask(IMM8, `10`),
689	mask(IMM8, `11`),
690	mask(IMM8, `12`),
691	mask(IMM8, `13`),
692	mask(IMM8, `14`),
693	mask(IMM8, `15`),
694	],
695	);
696	transmute(x)
697	}
698
699	/// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in
700	/// zeros.
701	///
702	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi16)
703	#[inline]
704	#[target_feature(enable = "sse2")]
705	#[cfg_attr(test, assert_instr(psrlw, IMM8 = `1`))]
706	#[rustc_legacy_const_generics(`1`)]
707	#[stable(feature = "simd_x86", since = "1.27.0")]
708	pub unsafe fn _mm_srli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
709	static_assert_uimm_bits!(IMM8, `8`);
710	if IMM8 >= `16` {
711	_mm_setzero_si128()
712	} else {
713	transmute(src:simd_shr(x:a.as_u16x8(), y:u16x8::splat(IMM8 as u16)))
714	}
715	}
716
717	/// Shifts packed 16-bit integers in `a` right by `count` while shifting in
718	/// zeros.
719	///
720	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi16)
721	#[inline]
722	#[target_feature(enable = "sse2")]
723	#[cfg_attr(test, assert_instr(psrlw))]
724	#[stable(feature = "simd_x86", since = "1.27.0")]
725	pub unsafe fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i {
726	transmute(src:psrlw(a:a.as_i16x8(), count:count.as_i16x8()))
727	}
728
729	/// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in
730	/// zeros.
731	///
732	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi32)
733	#[inline]
734	#[target_feature(enable = "sse2")]
735	#[cfg_attr(test, assert_instr(psrld, IMM8 = `8`))]
736	#[rustc_legacy_const_generics(`1`)]
737	#[stable(feature = "simd_x86", since = "1.27.0")]
738	pub unsafe fn _mm_srli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
739	static_assert_uimm_bits!(IMM8, `8`);
740	if IMM8 >= `32` {
741	_mm_setzero_si128()
742	} else {
743	transmute(src:simd_shr(x:a.as_u32x4(), y:u32x4::splat(IMM8 as u32)))
744	}
745	}
746
747	/// Shifts packed 32-bit integers in `a` right by `count` while shifting in
748	/// zeros.
749	///
750	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi32)
751	#[inline]
752	#[target_feature(enable = "sse2")]
753	#[cfg_attr(test, assert_instr(psrld))]
754	#[stable(feature = "simd_x86", since = "1.27.0")]
755	pub unsafe fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i {
756	transmute(src:psrld(a:a.as_i32x4(), count:count.as_i32x4()))
757	}
758
759	/// Shifts packed 64-bit integers in `a` right by `IMM8` while shifting in
760	/// zeros.
761	///
762	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi64)
763	#[inline]
764	#[target_feature(enable = "sse2")]
765	#[cfg_attr(test, assert_instr(psrlq, IMM8 = `1`))]
766	#[rustc_legacy_const_generics(`1`)]
767	#[stable(feature = "simd_x86", since = "1.27.0")]
768	pub unsafe fn _mm_srli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
769	static_assert_uimm_bits!(IMM8, `8`);
770	if IMM8 >= `64` {
771	_mm_setzero_si128()
772	} else {
773	transmute(src:simd_shr(x:a.as_u64x2(), y:u64x2::splat(IMM8 as u64)))
774	}
775	}
776
777	/// Shifts packed 64-bit integers in `a` right by `count` while shifting in
778	/// zeros.
779	///
780	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi64)
781	#[inline]
782	#[target_feature(enable = "sse2")]
783	#[cfg_attr(test, assert_instr(psrlq))]
784	#[stable(feature = "simd_x86", since = "1.27.0")]
785	pub unsafe fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i {
786	transmute(src:psrlq(a:a.as_i64x2(), count:count.as_i64x2()))
787	}
788
789	/// Computes the bitwise AND of 128 bits (representing integer data) in `a` and
790	/// `b`.
791	///
792	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_and_si128)
793	#[inline]
794	#[target_feature(enable = "sse2")]
795	#[cfg_attr(test, assert_instr(andps))]
796	#[stable(feature = "simd_x86", since = "1.27.0")]
797	pub unsafe fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i {
798	simd_and(x:a, y:b)
799	}
800
801	/// Computes the bitwise NOT of 128 bits (representing integer data) in `a` and
802	/// then AND with `b`.
803	///
804	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_andnot_si128)
805	#[inline]
806	#[target_feature(enable = "sse2")]
807	#[cfg_attr(test, assert_instr(andnps))]
808	#[stable(feature = "simd_x86", since = "1.27.0")]
809	pub unsafe fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i {
810	simd_and(x:simd_xor(_mm_set1_epi8(`-1`), a), y:b)
811	}
812
813	/// Computes the bitwise OR of 128 bits (representing integer data) in `a` and
814	/// `b`.
815	///
816	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_si128)
817	#[inline]
818	#[target_feature(enable = "sse2")]
819	#[cfg_attr(test, assert_instr(orps))]
820	#[stable(feature = "simd_x86", since = "1.27.0")]
821	pub unsafe fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i {
822	simd_or(x:a, y:b)
823	}
824
825	/// Computes the bitwise XOR of 128 bits (representing integer data) in `a` and
826	/// `b`.
827	///
828	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_si128)
829	#[inline]
830	#[target_feature(enable = "sse2")]
831	#[cfg_attr(test, assert_instr(xorps))]
832	#[stable(feature = "simd_x86", since = "1.27.0")]
833	pub unsafe fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i {
834	simd_xor(x:a, y:b)
835	}
836
837	/// Compares packed 8-bit integers in `a` and `b` for equality.
838	///
839	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi8)
840	#[inline]
841	#[target_feature(enable = "sse2")]
842	#[cfg_attr(test, assert_instr(pcmpeqb))]
843	#[stable(feature = "simd_x86", since = "1.27.0")]
844	pub unsafe fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i {
845	transmute::<i8x16, _>(src:simd_eq(x:a.as_i8x16(), y:b.as_i8x16()))
846	}
847
848	/// Compares packed 16-bit integers in `a` and `b` for equality.
849	///
850	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi16)
851	#[inline]
852	#[target_feature(enable = "sse2")]
853	#[cfg_attr(test, assert_instr(pcmpeqw))]
854	#[stable(feature = "simd_x86", since = "1.27.0")]
855	pub unsafe fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i {
856	transmute::<i16x8, _>(src:simd_eq(x:a.as_i16x8(), y:b.as_i16x8()))
857	}
858
859	/// Compares packed 32-bit integers in `a` and `b` for equality.
860	///
861	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi32)
862	#[inline]
863	#[target_feature(enable = "sse2")]
864	#[cfg_attr(test, assert_instr(pcmpeqd))]
865	#[stable(feature = "simd_x86", since = "1.27.0")]
866	pub unsafe fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i {
867	transmute::<i32x4, _>(src:simd_eq(x:a.as_i32x4(), y:b.as_i32x4()))
868	}
869
870	/// Compares packed 8-bit integers in `a` and `b` for greater-than.
871	///
872	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi8)
873	#[inline]
874	#[target_feature(enable = "sse2")]
875	#[cfg_attr(test, assert_instr(pcmpgtb))]
876	#[stable(feature = "simd_x86", since = "1.27.0")]
877	pub unsafe fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i {
878	transmute::<i8x16, _>(src:simd_gt(x:a.as_i8x16(), y:b.as_i8x16()))
879	}
880
881	/// Compares packed 16-bit integers in `a` and `b` for greater-than.
882	///
883	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi16)
884	#[inline]
885	#[target_feature(enable = "sse2")]
886	#[cfg_attr(test, assert_instr(pcmpgtw))]
887	#[stable(feature = "simd_x86", since = "1.27.0")]
888	pub unsafe fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i {
889	transmute::<i16x8, _>(src:simd_gt(x:a.as_i16x8(), y:b.as_i16x8()))
890	}
891
892	/// Compares packed 32-bit integers in `a` and `b` for greater-than.
893	///
894	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi32)
895	#[inline]
896	#[target_feature(enable = "sse2")]
897	#[cfg_attr(test, assert_instr(pcmpgtd))]
898	#[stable(feature = "simd_x86", since = "1.27.0")]
899	pub unsafe fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i {
900	transmute::<i32x4, _>(src:simd_gt(x:a.as_i32x4(), y:b.as_i32x4()))
901	}
902
903	/// Compares packed 8-bit integers in `a` and `b` for less-than.
904	///
905	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi8)
906	#[inline]
907	#[target_feature(enable = "sse2")]
908	#[cfg_attr(test, assert_instr(pcmpgtb))]
909	#[stable(feature = "simd_x86", since = "1.27.0")]
910	pub unsafe fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i {
911	transmute::<i8x16, _>(src:simd_lt(x:a.as_i8x16(), y:b.as_i8x16()))
912	}
913
914	/// Compares packed 16-bit integers in `a` and `b` for less-than.
915	///
916	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi16)
917	#[inline]
918	#[target_feature(enable = "sse2")]
919	#[cfg_attr(test, assert_instr(pcmpgtw))]
920	#[stable(feature = "simd_x86", since = "1.27.0")]
921	pub unsafe fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i {
922	transmute::<i16x8, _>(src:simd_lt(x:a.as_i16x8(), y:b.as_i16x8()))
923	}
924
925	/// Compares packed 32-bit integers in `a` and `b` for less-than.
926	///
927	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi32)
928	#[inline]
929	#[target_feature(enable = "sse2")]
930	#[cfg_attr(test, assert_instr(pcmpgtd))]
931	#[stable(feature = "simd_x86", since = "1.27.0")]
932	pub unsafe fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i {
933	transmute::<i32x4, _>(src:simd_lt(x:a.as_i32x4(), y:b.as_i32x4()))
934	}
935
936	/// Converts the lower two packed 32-bit integers in `a` to packed
937	/// double-precision (64-bit) floating-point elements.
938	///
939	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_pd)
940	#[inline]
941	#[target_feature(enable = "sse2")]
942	#[cfg_attr(test, assert_instr(cvtdq2pd))]
943	#[stable(feature = "simd_x86", since = "1.27.0")]
944	pub unsafe fn _mm_cvtepi32_pd(a: __m128i) -> __m128d {
945	let a: i32x4 = a.as_i32x4();
946	simd_cast::<i32x2, __m128d>(simd_shuffle!(a, a, [`0`, `1`]))
947	}
948
949	/// Returns `a` with its lower element replaced by `b` after converting it to
950	/// an `f64`.
951	///
952	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi32_sd)
953	#[inline]
954	#[target_feature(enable = "sse2")]
955	#[cfg_attr(test, assert_instr(cvtsi2sd))]
956	#[stable(feature = "simd_x86", since = "1.27.0")]
957	pub unsafe fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d {
958	simd_insert(x:a, idx:`0`, val:b as f64)
959	}
960
961	/// Converts packed 32-bit integers in `a` to packed single-precision (32-bit)
962	/// floating-point elements.
963	///
964	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_ps)
965	#[inline]
966	#[target_feature(enable = "sse2")]
967	#[cfg_attr(test, assert_instr(cvtdq2ps))]
968	#[stable(feature = "simd_x86", since = "1.27.0")]
969	pub unsafe fn _mm_cvtepi32_ps(a: __m128i) -> __m128 {
970	transmute(src:simd_cast::<_, f32x4>(a.as_i32x4()))
971	}
972
973	/// Converts packed single-precision (32-bit) floating-point elements in `a`
974	/// to packed 32-bit integers.
975	///
976	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epi32)
977	#[inline]
978	#[target_feature(enable = "sse2")]
979	#[cfg_attr(test, assert_instr(cvtps2dq))]
980	#[stable(feature = "simd_x86", since = "1.27.0")]
981	pub unsafe fn _mm_cvtps_epi32(a: __m128) -> __m128i {
982	transmute(src:cvtps2dq(a))
983	}
984
985	/// Returns a vector whose lowest element is `a` and all higher elements are
986	/// `0`.
987	///
988	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi32_si128)
989	#[inline]
990	#[target_feature(enable = "sse2")]
991	#[stable(feature = "simd_x86", since = "1.27.0")]
992	pub unsafe fn _mm_cvtsi32_si128(a: i32) -> __m128i {
993	transmute(src:i32x4::new(x0:a, x1:`0`, x2:`0`, x3:`0`))
994	}
995
996	/// Returns the lowest element of `a`.
997	///
998	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi128_si32)
999	#[inline]
1000	#[target_feature(enable = "sse2")]
1001	#[stable(feature = "simd_x86", since = "1.27.0")]
1002	pub unsafe fn _mm_cvtsi128_si32(a: __m128i) -> i32 {
1003	simd_extract(x:a.as_i32x4(), idx:`0`)
1004	}
1005
1006	/// Sets packed 64-bit integers with the supplied values, from highest to
1007	/// lowest.
1008	///
1009	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi64x)
1010	#[inline]
1011	#[target_feature(enable = "sse2")]
1012	// no particular instruction to test
1013	#[stable(feature = "simd_x86", since = "1.27.0")]
1014	pub unsafe fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i {
1015	transmute(src:i64x2::new(x0:e0, x1:e1))
1016	}
1017
1018	/// Sets packed 32-bit integers with the supplied values.
1019	///
1020	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi32)
1021	#[inline]
1022	#[target_feature(enable = "sse2")]
1023	// no particular instruction to test
1024	#[stable(feature = "simd_x86", since = "1.27.0")]
1025	pub unsafe fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1026	transmute(src:i32x4::new(x0:e0, x1:e1, x2:e2, x3:e3))
1027	}
1028
1029	/// Sets packed 16-bit integers with the supplied values.
1030	///
1031	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi16)
1032	#[inline]
1033	#[target_feature(enable = "sse2")]
1034	// no particular instruction to test
1035	#[stable(feature = "simd_x86", since = "1.27.0")]
1036	pub unsafe fn _mm_set_epi16(
1037	e7: i16,
1038	e6: i16,
1039	e5: i16,
1040	e4: i16,
1041	e3: i16,
1042	e2: i16,
1043	e1: i16,
1044	e0: i16,
1045	) -> __m128i {
1046	transmute(src:i16x8::new(x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7))
1047	}
1048
1049	/// Sets packed 8-bit integers with the supplied values.
1050	///
1051	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi8)
1052	#[inline]
1053	#[target_feature(enable = "sse2")]
1054	// no particular instruction to test
1055	#[stable(feature = "simd_x86", since = "1.27.0")]
1056	pub unsafe fn _mm_set_epi8(
1057	e15: i8,
1058	e14: i8,
1059	e13: i8,
1060	e12: i8,
1061	e11: i8,
1062	e10: i8,
1063	e9: i8,
1064	e8: i8,
1065	e7: i8,
1066	e6: i8,
1067	e5: i8,
1068	e4: i8,
1069	e3: i8,
1070	e2: i8,
1071	e1: i8,
1072	e0: i8,
1073	) -> __m128i {
1074	#[rustfmt::skip]
1075	transmute(src:i8x16::new(
1076	x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7, x8:e8, x9:e9, x10:e10, x11:e11, x12:e12, x13:e13, x14:e14, x15:e15,
1077	))
1078	}
1079
1080	/// Broadcasts 64-bit integer `a` to all elements.
1081	///
1082	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi64x)
1083	#[inline]
1084	#[target_feature(enable = "sse2")]
1085	// no particular instruction to test
1086	#[stable(feature = "simd_x86", since = "1.27.0")]
1087	pub unsafe fn _mm_set1_epi64x(a: i64) -> __m128i {
1088	_mm_set_epi64x(e1:a, e0:a)
1089	}
1090
1091	/// Broadcasts 32-bit integer `a` to all elements.
1092	///
1093	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi32)
1094	#[inline]
1095	#[target_feature(enable = "sse2")]
1096	// no particular instruction to test
1097	#[stable(feature = "simd_x86", since = "1.27.0")]
1098	pub unsafe fn _mm_set1_epi32(a: i32) -> __m128i {
1099	_mm_set_epi32(e3:a, e2:a, e1:a, e0:a)
1100	}
1101
1102	/// Broadcasts 16-bit integer `a` to all elements.
1103	///
1104	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi16)
1105	#[inline]
1106	#[target_feature(enable = "sse2")]
1107	// no particular instruction to test
1108	#[stable(feature = "simd_x86", since = "1.27.0")]
1109	pub unsafe fn _mm_set1_epi16(a: i16) -> __m128i {
1110	_mm_set_epi16(e7:a, e6:a, e5:a, e4:a, e3:a, e2:a, e1:a, e0:a)
1111	}
1112
1113	/// Broadcasts 8-bit integer `a` to all elements.
1114	///
1115	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi8)
1116	#[inline]
1117	#[target_feature(enable = "sse2")]
1118	// no particular instruction to test
1119	#[stable(feature = "simd_x86", since = "1.27.0")]
1120	pub unsafe fn _mm_set1_epi8(a: i8) -> __m128i {
1121	_mm_set_epi8(e15:a, e14:a, e13:a, e12:a, e11:a, e10:a, e9:a, e8:a, e7:a, e6:a, e5:a, e4:a, e3:a, e2:a, e1:a, e0:a)
1122	}
1123
1124	/// Sets packed 32-bit integers with the supplied values in reverse order.
1125	///
1126	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi32)
1127	#[inline]
1128	#[target_feature(enable = "sse2")]
1129	// no particular instruction to test
1130	#[stable(feature = "simd_x86", since = "1.27.0")]
1131	pub unsafe fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1132	_mm_set_epi32(e3:e0, e2:e1, e1:e2, e0:e3)
1133	}
1134
1135	/// Sets packed 16-bit integers with the supplied values in reverse order.
1136	///
1137	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi16)
1138	#[inline]
1139	#[target_feature(enable = "sse2")]
1140	// no particular instruction to test
1141	#[stable(feature = "simd_x86", since = "1.27.0")]
1142	pub unsafe fn _mm_setr_epi16(
1143	e7: i16,
1144	e6: i16,
1145	e5: i16,
1146	e4: i16,
1147	e3: i16,
1148	e2: i16,
1149	e1: i16,
1150	e0: i16,
1151	) -> __m128i {
1152	_mm_set_epi16(e7:e0, e6:e1, e5:e2, e4:e3, e3:e4, e2:e5, e1:e6, e0:e7)
1153	}
1154
1155	/// Sets packed 8-bit integers with the supplied values in reverse order.
1156	///
1157	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi8)
1158	#[inline]
1159	#[target_feature(enable = "sse2")]
1160	// no particular instruction to test
1161	#[stable(feature = "simd_x86", since = "1.27.0")]
1162	pub unsafe fn _mm_setr_epi8(
1163	e15: i8,
1164	e14: i8,
1165	e13: i8,
1166	e12: i8,
1167	e11: i8,
1168	e10: i8,
1169	e9: i8,
1170	e8: i8,
1171	e7: i8,
1172	e6: i8,
1173	e5: i8,
1174	e4: i8,
1175	e3: i8,
1176	e2: i8,
1177	e1: i8,
1178	e0: i8,
1179	) -> __m128i {
1180	#[rustfmt::skip]
1181	_mm_set_epi8(
1182	e15:e0, e14:e1, e13:e2, e12:e3, e11:e4, e10:e5, e9:e6, e8:e7, e7:e8, e6:e9, e5:e10, e4:e11, e3:e12, e2:e13, e1:e14, e0:e15,
1183	)
1184	}
1185
1186	/// Returns a vector with all elements set to zero.
1187	///
1188	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_si128)
1189	#[inline]
1190	#[target_feature(enable = "sse2")]
1191	#[cfg_attr(test, assert_instr(xorps))]
1192	#[stable(feature = "simd_x86", since = "1.27.0")]
1193	pub unsafe fn _mm_setzero_si128() -> __m128i {
1194	_mm_set1_epi64x(`0`)
1195	}
1196
1197	/// Loads 64-bit integer from memory into first element of returned vector.
1198	///
1199	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadl_epi64)
1200	#[inline]
1201	#[target_feature(enable = "sse2")]
1202	// FIXME movsd on windows
1203	#[cfg_attr(
1204	all(
1205	test,
1206	not(windows),
1207	not(all(target_os = "linux", target_arch = "x86_64")),
1208	target_arch = "x86_64"
1209	),
1210	assert_instr(movq)
1211	)]
1212	#[stable(feature = "simd_x86", since = "1.27.0")]
1213	pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i {
1214	_mm_set_epi64x(e1:`0`, e0:ptr::read_unaligned(src:mem_addr as *const i64))
1215	}
1216
1217	/// Loads 128-bits of integer data from memory into a new vector.
1218	///
1219	/// `mem_addr` must be aligned on a 16-byte boundary.
1220	///
1221	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_si128)
1222	#[inline]
1223	#[target_feature(enable = "sse2")]
1224	#[cfg_attr(test, assert_instr(movaps))]
1225	#[stable(feature = "simd_x86", since = "1.27.0")]
1226	pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i {
1227	*mem_addr
1228	}
1229
1230	/// Loads 128-bits of integer data from memory into a new vector.
1231	///
1232	/// `mem_addr` does not need to be aligned on any particular boundary.
1233	///
1234	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si128)
1235	#[inline]
1236	#[target_feature(enable = "sse2")]
1237	#[cfg_attr(test, assert_instr(movups))]
1238	#[stable(feature = "simd_x86", since = "1.27.0")]
1239	pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
1240	let mut dst: __m128i = _mm_undefined_si128();
1241	ptr::copy_nonoverlapping(
1242	src:mem_addr as *const u8,
1243	&mut dst as *mut __m128i as *mut u8,
1244	count:mem::size_of::<__m128i>(),
1245	);
1246	dst
1247	}
1248
1249	/// Conditionally store 8-bit integer elements from `a` into memory using
1250	/// `mask`.
1251	///
1252	/// Elements are not stored when the highest bit is not set in the
1253	/// corresponding element.
1254	///
1255	/// `mem_addr` should correspond to a 128-bit memory location and does not need
1256	/// to be aligned on any particular boundary.
1257	///
1258	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskmoveu_si128)
1259	#[inline]
1260	#[target_feature(enable = "sse2")]
1261	#[cfg_attr(test, assert_instr(maskmovdqu))]
1262	#[stable(feature = "simd_x86", since = "1.27.0")]
1263	pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8) {
1264	maskmovdqu(a:a.as_i8x16(), mask:mask.as_i8x16(), mem_addr)
1265	}
1266
1267	/// Stores 128-bits of integer data from `a` into memory.
1268	///
1269	/// `mem_addr` must be aligned on a 16-byte boundary.
1270	///
1271	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_si128)
1272	#[inline]
1273	#[target_feature(enable = "sse2")]
1274	#[cfg_attr(test, assert_instr(movaps))]
1275	#[stable(feature = "simd_x86", since = "1.27.0")]
1276	pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) {
1277	*mem_addr = a;
1278	}
1279
1280	/// Stores 128-bits of integer data from `a` into memory.
1281	///
1282	/// `mem_addr` does not need to be aligned on any particular boundary.
1283	///
1284	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si128)
1285	#[inline]
1286	#[target_feature(enable = "sse2")]
1287	#[cfg_attr(test, assert_instr(movups))] // FIXME movdqu expected
1288	#[stable(feature = "simd_x86", since = "1.27.0")]
1289	pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) {
1290	mem_addr.write_unaligned(val:a);
1291	}
1292
1293	/// Stores the lower 64-bit integer `a` to a memory location.
1294	///
1295	/// `mem_addr` does not need to be aligned on any particular boundary.
1296	///
1297	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storel_epi64)
1298	#[inline]
1299	#[target_feature(enable = "sse2")]
1300	// FIXME mov on windows, movlps on i686
1301	#[cfg_attr(
1302	all(
1303	test,
1304	not(windows),
1305	not(all(target_os = "linux", target_arch = "x86_64")),
1306	target_arch = "x86_64"
1307	),
1308	assert_instr(movq)
1309	)]
1310	#[stable(feature = "simd_x86", since = "1.27.0")]
1311	pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
1312	ptr::copy_nonoverlapping(&a as *const _ as *const u8, dst:mem_addr as *mut u8, count:`8`);
1313	}
1314
1315	/// Stores a 128-bit integer vector to a 128-bit aligned memory location.
1316	/// To minimize caching, the data is flagged as non-temporal (unlikely to be
1317	/// used again soon).
1318	///
1319	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si128)
1320	#[inline]
1321	#[target_feature(enable = "sse2")]
1322	#[cfg_attr(test, assert_instr(movntps))] // FIXME movntdq
1323	#[stable(feature = "simd_x86", since = "1.27.0")]
1324	pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
1325	intrinsics::nontemporal_store(ptr:mem_addr, val:a);
1326	}
1327
1328	/// Stores a 32-bit integer value in the specified memory location.
1329	/// To minimize caching, the data is flagged as non-temporal (unlikely to be
1330	/// used again soon).
1331	///
1332	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si32)
1333	#[inline]
1334	#[target_feature(enable = "sse2")]
1335	#[cfg_attr(test, assert_instr(movnti))]
1336	#[stable(feature = "simd_x86", since = "1.27.0")]
1337	pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) {
1338	intrinsics::nontemporal_store(ptr:mem_addr, val:a);
1339	}
1340
1341	/// Returns a vector where the low element is extracted from `a` and its upper
1342	/// element is zero.
1343	///
1344	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_move_epi64)
1345	#[inline]
1346	#[target_feature(enable = "sse2")]
1347	// FIXME movd on windows, movd on i686
1348	#[cfg_attr(all(test, not(windows), target_arch = "x86_64"), assert_instr(movq))]
1349	#[stable(feature = "simd_x86", since = "1.27.0")]
1350	pub unsafe fn _mm_move_epi64(a: __m128i) -> __m128i {
1351	let zero: __m128i = _mm_setzero_si128();
1352	let r: i64x2 = simd_shuffle!(a.as_i64x2(), zero.as_i64x2(), [`0`, `2`]);
1353	transmute(src:r)
1354	}
1355
1356	/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
1357	/// using signed saturation.
1358	///
1359	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi16)
1360	#[inline]
1361	#[target_feature(enable = "sse2")]
1362	#[cfg_attr(test, assert_instr(packsswb))]
1363	#[stable(feature = "simd_x86", since = "1.27.0")]
1364	pub unsafe fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
1365	transmute(src:packsswb(a:a.as_i16x8(), b:b.as_i16x8()))
1366	}
1367
1368	/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
1369	/// using signed saturation.
1370	///
1371	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi32)
1372	#[inline]
1373	#[target_feature(enable = "sse2")]
1374	#[cfg_attr(test, assert_instr(packssdw))]
1375	#[stable(feature = "simd_x86", since = "1.27.0")]
1376	pub unsafe fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
1377	transmute(src:packssdw(a:a.as_i32x4(), b:b.as_i32x4()))
1378	}
1379
1380	/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
1381	/// using unsigned saturation.
1382	///
1383	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi16)
1384	#[inline]
1385	#[target_feature(enable = "sse2")]
1386	#[cfg_attr(test, assert_instr(packuswb))]
1387	#[stable(feature = "simd_x86", since = "1.27.0")]
1388	pub unsafe fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
1389	transmute(src:packuswb(a:a.as_i16x8(), b:b.as_i16x8()))
1390	}
1391
1392	/// Returns the `imm8` element of `a`.
1393	///
1394	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_epi16)
1395	#[inline]
1396	#[target_feature(enable = "sse2")]
1397	#[cfg_attr(test, assert_instr(pextrw, IMM8 = `7`))]
1398	#[rustc_legacy_const_generics(`1`)]
1399	#[stable(feature = "simd_x86", since = "1.27.0")]
1400	pub unsafe fn _mm_extract_epi16<const IMM8: i32>(a: __m128i) -> i32 {
1401	static_assert_uimm_bits!(IMM8, `3`);
1402	simd_extract::<_, u16>(x:a.as_u16x8(), IMM8 as u32) as i32
1403	}
1404
1405	/// Returns a new vector where the `imm8` element of `a` is replaced with `i`.
1406	///
1407	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_epi16)
1408	#[inline]
1409	#[target_feature(enable = "sse2")]
1410	#[cfg_attr(test, assert_instr(pinsrw, IMM8 = `7`))]
1411	#[rustc_legacy_const_generics(`2`)]
1412	#[stable(feature = "simd_x86", since = "1.27.0")]
1413	pub unsafe fn _mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
1414	static_assert_uimm_bits!(IMM8, `3`);
1415	transmute(src:simd_insert(x:a.as_i16x8(), IMM8 as u32, val:i as i16))
1416	}
1417
1418	/// Returns a mask of the most significant bit of each element in `a`.
1419	///
1420	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_epi8)
1421	#[inline]
1422	#[target_feature(enable = "sse2")]
1423	#[cfg_attr(test, assert_instr(pmovmskb))]
1424	#[stable(feature = "simd_x86", since = "1.27.0")]
1425	pub unsafe fn _mm_movemask_epi8(a: __m128i) -> i32 {
1426	let z: i8x16 = i8x16::splat(`0`);
1427	let m: i8x16 = simd_lt(x:a.as_i8x16(), y:z);
1428	simd_bitmask::<_, u16>(m) as u32 as i32
1429	}
1430
1431	/// Shuffles 32-bit integers in `a` using the control in `IMM8`.
1432	///
1433	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_epi32)
1434	#[inline]
1435	#[target_feature(enable = "sse2")]
1436	#[cfg_attr(test, assert_instr(pshufd, IMM8 = `9`))]
1437	#[rustc_legacy_const_generics(`1`)]
1438	#[stable(feature = "simd_x86", since = "1.27.0")]
1439	pub unsafe fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
1440	static_assert_uimm_bits!(IMM8, `8`);
1441	let a: i32x4 = a.as_i32x4();
1442	let x: i32x4 = simd_shuffle!(
1443	a,
1444	a,
1445	[
1446	IMM8 as u32 & `0b11`,
1447	(IMM8 as u32 >> `2`) & `0b11`,
1448	(IMM8 as u32 >> `4`) & `0b11`,
1449	(IMM8 as u32 >> `6`) & `0b11`,
1450	],
1451	);
1452	transmute(src:x)
1453	}
1454
1455	/// Shuffles 16-bit integers in the high 64 bits of `a` using the control in
1456	/// `IMM8`.
1457	///
1458	/// Put the results in the high 64 bits of the returned vector, with the low 64
1459	/// bits being copied from `a`.
1460	///
1461	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shufflehi_epi16)
1462	#[inline]
1463	#[target_feature(enable = "sse2")]
1464	#[cfg_attr(test, assert_instr(pshufhw, IMM8 = `9`))]
1465	#[rustc_legacy_const_generics(`1`)]
1466	#[stable(feature = "simd_x86", since = "1.27.0")]
1467	pub unsafe fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1468	static_assert_uimm_bits!(IMM8, `8`);
1469	let a: i16x8 = a.as_i16x8();
1470	let x: i16x8 = simd_shuffle!(
1471	a,
1472	a,
1473	[
1474	`0`,
1475	`1`,
1476	`2`,
1477	`3`,
1478	(IMM8 as u32 & `0b11`) + `4`,
1479	((IMM8 as u32 >> `2`) & `0b11`) + `4`,
1480	((IMM8 as u32 >> `4`) & `0b11`) + `4`,
1481	((IMM8 as u32 >> `6`) & `0b11`) + `4`,
1482	],
1483	);
1484	transmute(src:x)
1485	}
1486
1487	/// Shuffles 16-bit integers in the low 64 bits of `a` using the control in
1488	/// `IMM8`.
1489	///
1490	/// Put the results in the low 64 bits of the returned vector, with the high 64
1491	/// bits being copied from `a`.
1492	///
1493	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shufflelo_epi16)
1494	#[inline]
1495	#[target_feature(enable = "sse2")]
1496	#[cfg_attr(test, assert_instr(pshuflw, IMM8 = `9`))]
1497	#[rustc_legacy_const_generics(`1`)]
1498	#[stable(feature = "simd_x86", since = "1.27.0")]
1499	pub unsafe fn _mm_shufflelo_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1500	static_assert_uimm_bits!(IMM8, `8`);
1501	let a: i16x8 = a.as_i16x8();
1502	let x: i16x8 = simd_shuffle!(
1503	a,
1504	a,
1505	[
1506	IMM8 as u32 & `0b11`,
1507	(IMM8 as u32 >> `2`) & `0b11`,
1508	(IMM8 as u32 >> `4`) & `0b11`,
1509	(IMM8 as u32 >> `6`) & `0b11`,
1510	`4`,
1511	`5`,
1512	`6`,
1513	`7`,
1514	],
1515	);
1516	transmute(src:x)
1517	}
1518
1519	/// Unpacks and interleave 8-bit integers from the high half of `a` and `b`.
1520	///
1521	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi8)
1522	#[inline]
1523	#[target_feature(enable = "sse2")]
1524	#[cfg_attr(test, assert_instr(punpckhbw))]
1525	#[stable(feature = "simd_x86", since = "1.27.0")]
1526	pub unsafe fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i {
1527	transmute::<i8x16, _>(src:simd_shuffle!(
1528	a.as_i8x16(),
1529	b.as_i8x16(),
1530	[`8`, `24`, `9`, `25`, `10`, `26`, `11`, `27`, `12`, `28`, `13`, `29`, `14`, `30`, `15`, `31`],
1531	))
1532	}
1533
1534	/// Unpacks and interleave 16-bit integers from the high half of `a` and `b`.
1535	///
1536	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi16)
1537	#[inline]
1538	#[target_feature(enable = "sse2")]
1539	#[cfg_attr(test, assert_instr(punpckhwd))]
1540	#[stable(feature = "simd_x86", since = "1.27.0")]
1541	pub unsafe fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i {
1542	let x: i16x8 = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [`4`, `12`, `5`, `13`, `6`, `14`, `7`, `15`]);
1543	transmute::<i16x8, _>(src:x)
1544	}
1545
1546	/// Unpacks and interleave 32-bit integers from the high half of `a` and `b`.
1547	///
1548	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi32)
1549	#[inline]
1550	#[target_feature(enable = "sse2")]
1551	#[cfg_attr(test, assert_instr(unpckhps))]
1552	#[stable(feature = "simd_x86", since = "1.27.0")]
1553	pub unsafe fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i {
1554	transmute::<i32x4, _>(src:simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [`2`, `6`, `3`, `7`]))
1555	}
1556
1557	/// Unpacks and interleave 64-bit integers from the high half of `a` and `b`.
1558	///
1559	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi64)
1560	#[inline]
1561	#[target_feature(enable = "sse2")]
1562	#[cfg_attr(test, assert_instr(unpckhpd))]
1563	#[stable(feature = "simd_x86", since = "1.27.0")]
1564	pub unsafe fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i {
1565	transmute::<i64x2, _>(src:simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [`1`, `3`]))
1566	}
1567
1568	/// Unpacks and interleave 8-bit integers from the low half of `a` and `b`.
1569	///
1570	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi8)
1571	#[inline]
1572	#[target_feature(enable = "sse2")]
1573	#[cfg_attr(test, assert_instr(punpcklbw))]
1574	#[stable(feature = "simd_x86", since = "1.27.0")]
1575	pub unsafe fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i {
1576	transmute::<i8x16, _>(src:simd_shuffle!(
1577	a.as_i8x16(),
1578	b.as_i8x16(),
1579	[`0`, `16`, `1`, `17`, `2`, `18`, `3`, `19`, `4`, `20`, `5`, `21`, `6`, `22`, `7`, `23`],
1580	))
1581	}
1582
1583	/// Unpacks and interleave 16-bit integers from the low half of `a` and `b`.
1584	///
1585	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi16)
1586	#[inline]
1587	#[target_feature(enable = "sse2")]
1588	#[cfg_attr(test, assert_instr(punpcklwd))]
1589	#[stable(feature = "simd_x86", since = "1.27.0")]
1590	pub unsafe fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i {
1591	let x: i16x8 = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [`0`, `8`, `1`, `9`, `2`, `10`, `3`, `11`]);
1592	transmute::<i16x8, _>(src:x)
1593	}
1594
1595	/// Unpacks and interleave 32-bit integers from the low half of `a` and `b`.
1596	///
1597	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi32)
1598	#[inline]
1599	#[target_feature(enable = "sse2")]
1600	#[cfg_attr(test, assert_instr(unpcklps))]
1601	#[stable(feature = "simd_x86", since = "1.27.0")]
1602	pub unsafe fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i {
1603	transmute::<i32x4, _>(src:simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [`0`, `4`, `1`, `5`]))
1604	}
1605
1606	/// Unpacks and interleave 64-bit integers from the low half of `a` and `b`.
1607	///
1608	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi64)
1609	#[inline]
1610	#[target_feature(enable = "sse2")]
1611	#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlhps))]
1612	#[stable(feature = "simd_x86", since = "1.27.0")]
1613	pub unsafe fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i {
1614	transmute::<i64x2, _>(src:simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [`0`, `2`]))
1615	}
1616
1617	/// Returns a new vector with the low element of `a` replaced by the sum of the
1618	/// low elements of `a` and `b`.
1619	///
1620	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_sd)
1621	#[inline]
1622	#[target_feature(enable = "sse2")]
1623	#[cfg_attr(test, assert_instr(addsd))]
1624	#[stable(feature = "simd_x86", since = "1.27.0")]
1625	pub unsafe fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d {
1626	simd_insert(x:a, idx:`0`, val:_mm_cvtsd_f64(a) + _mm_cvtsd_f64(b))
1627	}
1628
1629	/// Adds packed double-precision (64-bit) floating-point elements in `a` and
1630	/// `b`.
1631	///
1632	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_pd)
1633	#[inline]
1634	#[target_feature(enable = "sse2")]
1635	#[cfg_attr(test, assert_instr(addpd))]
1636	#[stable(feature = "simd_x86", since = "1.27.0")]
1637	pub unsafe fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d {
1638	simd_add(x:a, y:b)
1639	}
1640
1641	/// Returns a new vector with the low element of `a` replaced by the result of
1642	/// diving the lower element of `a` by the lower element of `b`.
1643	///
1644	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_sd)
1645	#[inline]
1646	#[target_feature(enable = "sse2")]
1647	#[cfg_attr(test, assert_instr(divsd))]
1648	#[stable(feature = "simd_x86", since = "1.27.0")]
1649	pub unsafe fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d {
1650	simd_insert(x:a, idx:`0`, val:_mm_cvtsd_f64(a) / _mm_cvtsd_f64(b))
1651	}
1652
1653	/// Divide packed double-precision (64-bit) floating-point elements in `a` by
1654	/// packed elements in `b`.
1655	///
1656	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_pd)
1657	#[inline]
1658	#[target_feature(enable = "sse2")]
1659	#[cfg_attr(test, assert_instr(divpd))]
1660	#[stable(feature = "simd_x86", since = "1.27.0")]
1661	pub unsafe fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d {
1662	simd_div(x:a, y:b)
1663	}
1664
1665	/// Returns a new vector with the low element of `a` replaced by the maximum
1666	/// of the lower elements of `a` and `b`.
1667	///
1668	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_sd)
1669	#[inline]
1670	#[target_feature(enable = "sse2")]
1671	#[cfg_attr(test, assert_instr(maxsd))]
1672	#[stable(feature = "simd_x86", since = "1.27.0")]
1673	pub unsafe fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d {
1674	maxsd(a, b)
1675	}
1676
1677	/// Returns a new vector with the maximum values from corresponding elements in
1678	/// `a` and `b`.
1679	///
1680	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_pd)
1681	#[inline]
1682	#[target_feature(enable = "sse2")]
1683	#[cfg_attr(test, assert_instr(maxpd))]
1684	#[stable(feature = "simd_x86", since = "1.27.0")]
1685	pub unsafe fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d {
1686	maxpd(a, b)
1687	}
1688
1689	/// Returns a new vector with the low element of `a` replaced by the minimum
1690	/// of the lower elements of `a` and `b`.
1691	///
1692	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_sd)
1693	#[inline]
1694	#[target_feature(enable = "sse2")]
1695	#[cfg_attr(test, assert_instr(minsd))]
1696	#[stable(feature = "simd_x86", since = "1.27.0")]
1697	pub unsafe fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d {
1698	minsd(a, b)
1699	}
1700
1701	/// Returns a new vector with the minimum values from corresponding elements in
1702	/// `a` and `b`.
1703	///
1704	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_pd)
1705	#[inline]
1706	#[target_feature(enable = "sse2")]
1707	#[cfg_attr(test, assert_instr(minpd))]
1708	#[stable(feature = "simd_x86", since = "1.27.0")]
1709	pub unsafe fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d {
1710	minpd(a, b)
1711	}
1712
1713	/// Returns a new vector with the low element of `a` replaced by multiplying the
1714	/// low elements of `a` and `b`.
1715	///
1716	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_sd)
1717	#[inline]
1718	#[target_feature(enable = "sse2")]
1719	#[cfg_attr(test, assert_instr(mulsd))]
1720	#[stable(feature = "simd_x86", since = "1.27.0")]
1721	pub unsafe fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d {
1722	simd_insert(x:a, idx:`0`, val:_mm_cvtsd_f64(a) * _mm_cvtsd_f64(b))
1723	}
1724
1725	/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
1726	/// and `b`.
1727	///
1728	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_pd)
1729	#[inline]
1730	#[target_feature(enable = "sse2")]
1731	#[cfg_attr(test, assert_instr(mulpd))]
1732	#[stable(feature = "simd_x86", since = "1.27.0")]
1733	pub unsafe fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d {
1734	simd_mul(x:a, y:b)
1735	}
1736
1737	/// Returns a new vector with the low element of `a` replaced by the square
1738	/// root of the lower element `b`.
1739	///
1740	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_sd)
1741	#[inline]
1742	#[target_feature(enable = "sse2")]
1743	#[cfg_attr(test, assert_instr(sqrtsd))]
1744	#[stable(feature = "simd_x86", since = "1.27.0")]
1745	pub unsafe fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d {
1746	simd_insert(x:a, idx:`0`, val:_mm_cvtsd_f64(sqrtsd(b)))
1747	}
1748
1749	/// Returns a new vector with the square root of each of the values in `a`.
1750	///
1751	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_pd)
1752	#[inline]
1753	#[target_feature(enable = "sse2")]
1754	#[cfg_attr(test, assert_instr(sqrtpd))]
1755	#[stable(feature = "simd_x86", since = "1.27.0")]
1756	pub unsafe fn _mm_sqrt_pd(a: __m128d) -> __m128d {
1757	simd_fsqrt(a)
1758	}
1759
1760	/// Returns a new vector with the low element of `a` replaced by subtracting the
1761	/// low element by `b` from the low element of `a`.
1762	///
1763	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_sd)
1764	#[inline]
1765	#[target_feature(enable = "sse2")]
1766	#[cfg_attr(test, assert_instr(subsd))]
1767	#[stable(feature = "simd_x86", since = "1.27.0")]
1768	pub unsafe fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d {
1769	simd_insert(x:a, idx:`0`, val:_mm_cvtsd_f64(a) - _mm_cvtsd_f64(b))
1770	}
1771
1772	/// Subtract packed double-precision (64-bit) floating-point elements in `b`
1773	/// from `a`.
1774	///
1775	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_pd)
1776	#[inline]
1777	#[target_feature(enable = "sse2")]
1778	#[cfg_attr(test, assert_instr(subpd))]
1779	#[stable(feature = "simd_x86", since = "1.27.0")]
1780	pub unsafe fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d {
1781	simd_sub(x:a, y:b)
1782	}
1783
1784	/// Computes the bitwise AND of packed double-precision (64-bit) floating-point
1785	/// elements in `a` and `b`.
1786	///
1787	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_and_pd)
1788	#[inline]
1789	#[target_feature(enable = "sse2")]
1790	#[cfg_attr(test, assert_instr(andps))]
1791	#[stable(feature = "simd_x86", since = "1.27.0")]
1792	pub unsafe fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d {
1793	let a: __m128i = transmute(src:a);
1794	let b: __m128i = transmute(src:b);
1795	transmute(src:_mm_and_si128(a, b))
1796	}
1797
1798	/// Computes the bitwise NOT of `a` and then AND with `b`.
1799	///
1800	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_andnot_pd)
1801	#[inline]
1802	#[target_feature(enable = "sse2")]
1803	#[cfg_attr(test, assert_instr(andnps))]
1804	#[stable(feature = "simd_x86", since = "1.27.0")]
1805	pub unsafe fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d {
1806	let a: __m128i = transmute(src:a);
1807	let b: __m128i = transmute(src:b);
1808	transmute(src:_mm_andnot_si128(a, b))
1809	}
1810
1811	/// Computes the bitwise OR of `a` and `b`.
1812	///
1813	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_pd)
1814	#[inline]
1815	#[target_feature(enable = "sse2")]
1816	#[cfg_attr(test, assert_instr(orps))]
1817	#[stable(feature = "simd_x86", since = "1.27.0")]
1818	pub unsafe fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d {
1819	let a: __m128i = transmute(src:a);
1820	let b: __m128i = transmute(src:b);
1821	transmute(src:_mm_or_si128(a, b))
1822	}
1823
1824	/// Computes the bitwise XOR of `a` and `b`.
1825	///
1826	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_pd)
1827	#[inline]
1828	#[target_feature(enable = "sse2")]
1829	#[cfg_attr(test, assert_instr(xorps))]
1830	#[stable(feature = "simd_x86", since = "1.27.0")]
1831	pub unsafe fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d {
1832	let a: __m128i = transmute(src:a);
1833	let b: __m128i = transmute(src:b);
1834	transmute(src:_mm_xor_si128(a, b))
1835	}
1836
1837	/// Returns a new vector with the low element of `a` replaced by the equality
1838	/// comparison of the lower elements of `a` and `b`.
1839	///
1840	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_sd)
1841	#[inline]
1842	#[target_feature(enable = "sse2")]
1843	#[cfg_attr(test, assert_instr(cmpeqsd))]
1844	#[stable(feature = "simd_x86", since = "1.27.0")]
1845	pub unsafe fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d {
1846	cmpsd(a, b, imm8:`0`)
1847	}
1848
1849	/// Returns a new vector with the low element of `a` replaced by the less-than
1850	/// comparison of the lower elements of `a` and `b`.
1851	///
1852	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_sd)
1853	#[inline]
1854	#[target_feature(enable = "sse2")]
1855	#[cfg_attr(test, assert_instr(cmpltsd))]
1856	#[stable(feature = "simd_x86", since = "1.27.0")]
1857	pub unsafe fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d {
1858	cmpsd(a, b, imm8:`1`)
1859	}
1860
1861	/// Returns a new vector with the low element of `a` replaced by the
1862	/// less-than-or-equal comparison of the lower elements of `a` and `b`.
1863	///
1864	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_sd)
1865	#[inline]
1866	#[target_feature(enable = "sse2")]
1867	#[cfg_attr(test, assert_instr(cmplesd))]
1868	#[stable(feature = "simd_x86", since = "1.27.0")]
1869	pub unsafe fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d {
1870	cmpsd(a, b, imm8:`2`)
1871	}
1872
1873	/// Returns a new vector with the low element of `a` replaced by the
1874	/// greater-than comparison of the lower elements of `a` and `b`.
1875	///
1876	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_sd)
1877	#[inline]
1878	#[target_feature(enable = "sse2")]
1879	#[cfg_attr(test, assert_instr(cmpltsd))]
1880	#[stable(feature = "simd_x86", since = "1.27.0")]
1881	pub unsafe fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d {
1882	simd_insert(x:_mm_cmplt_sd(b, a), idx:`1`, val:simd_extract::<_, f64>(x:a, idx:`1`))
1883	}
1884
1885	/// Returns a new vector with the low element of `a` replaced by the
1886	/// greater-than-or-equal comparison of the lower elements of `a` and `b`.
1887	///
1888	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_sd)
1889	#[inline]
1890	#[target_feature(enable = "sse2")]
1891	#[cfg_attr(test, assert_instr(cmplesd))]
1892	#[stable(feature = "simd_x86", since = "1.27.0")]
1893	pub unsafe fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d {
1894	simd_insert(x:_mm_cmple_sd(b, a), idx:`1`, val:simd_extract::<_, f64>(x:a, idx:`1`))
1895	}
1896
1897	/// Returns a new vector with the low element of `a` replaced by the result
1898	/// of comparing both of the lower elements of `a` and `b` to `NaN`. If
1899	/// neither are equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0`
1900	/// otherwise.
1901	///
1902	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_sd)
1903	#[inline]
1904	#[target_feature(enable = "sse2")]
1905	#[cfg_attr(test, assert_instr(cmpordsd))]
1906	#[stable(feature = "simd_x86", since = "1.27.0")]
1907	pub unsafe fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d {
1908	cmpsd(a, b, imm8:`7`)
1909	}
1910
1911	/// Returns a new vector with the low element of `a` replaced by the result of
1912	/// comparing both of the lower elements of `a` and `b` to `NaN`. If either is
1913	/// equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` otherwise.
1914	///
1915	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_sd)
1916	#[inline]
1917	#[target_feature(enable = "sse2")]
1918	#[cfg_attr(test, assert_instr(cmpunordsd))]
1919	#[stable(feature = "simd_x86", since = "1.27.0")]
1920	pub unsafe fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d {
1921	cmpsd(a, b, imm8:`3`)
1922	}
1923
1924	/// Returns a new vector with the low element of `a` replaced by the not-equal
1925	/// comparison of the lower elements of `a` and `b`.
1926	///
1927	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_sd)
1928	#[inline]
1929	#[target_feature(enable = "sse2")]
1930	#[cfg_attr(test, assert_instr(cmpneqsd))]
1931	#[stable(feature = "simd_x86", since = "1.27.0")]
1932	pub unsafe fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d {
1933	cmpsd(a, b, imm8:`4`)
1934	}
1935
1936	/// Returns a new vector with the low element of `a` replaced by the
1937	/// not-less-than comparison of the lower elements of `a` and `b`.
1938	///
1939	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_sd)
1940	#[inline]
1941	#[target_feature(enable = "sse2")]
1942	#[cfg_attr(test, assert_instr(cmpnltsd))]
1943	#[stable(feature = "simd_x86", since = "1.27.0")]
1944	pub unsafe fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d {
1945	cmpsd(a, b, imm8:`5`)
1946	}
1947
1948	/// Returns a new vector with the low element of `a` replaced by the
1949	/// not-less-than-or-equal comparison of the lower elements of `a` and `b`.
1950	///
1951	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_sd)
1952	#[inline]
1953	#[target_feature(enable = "sse2")]
1954	#[cfg_attr(test, assert_instr(cmpnlesd))]
1955	#[stable(feature = "simd_x86", since = "1.27.0")]
1956	pub unsafe fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d {
1957	cmpsd(a, b, imm8:`6`)
1958	}
1959
1960	/// Returns a new vector with the low element of `a` replaced by the
1961	/// not-greater-than comparison of the lower elements of `a` and `b`.
1962	///
1963	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpngt_sd)
1964	#[inline]
1965	#[target_feature(enable = "sse2")]
1966	#[cfg_attr(test, assert_instr(cmpnltsd))]
1967	#[stable(feature = "simd_x86", since = "1.27.0")]
1968	pub unsafe fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d {
1969	simd_insert(x:_mm_cmpnlt_sd(b, a), idx:`1`, val:simd_extract::<_, f64>(x:a, idx:`1`))
1970	}
1971
1972	/// Returns a new vector with the low element of `a` replaced by the
1973	/// not-greater-than-or-equal comparison of the lower elements of `a` and `b`.
1974	///
1975	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_sd)
1976	#[inline]
1977	#[target_feature(enable = "sse2")]
1978	#[cfg_attr(test, assert_instr(cmpnlesd))]
1979	#[stable(feature = "simd_x86", since = "1.27.0")]
1980	pub unsafe fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d {
1981	simd_insert(x:_mm_cmpnle_sd(b, a), idx:`1`, val:simd_extract::<_, f64>(x:a, idx:`1`))
1982	}
1983
1984	/// Compares corresponding elements in `a` and `b` for equality.
1985	///
1986	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_pd)
1987	#[inline]
1988	#[target_feature(enable = "sse2")]
1989	#[cfg_attr(test, assert_instr(cmpeqpd))]
1990	#[stable(feature = "simd_x86", since = "1.27.0")]
1991	pub unsafe fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d {
1992	cmppd(a, b, imm8:`0`)
1993	}
1994
1995	/// Compares corresponding elements in `a` and `b` for less-than.
1996	///
1997	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_pd)
1998	#[inline]
1999	#[target_feature(enable = "sse2")]
2000	#[cfg_attr(test, assert_instr(cmpltpd))]
2001	#[stable(feature = "simd_x86", since = "1.27.0")]
2002	pub unsafe fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d {
2003	cmppd(a, b, imm8:`1`)
2004	}
2005
2006	/// Compares corresponding elements in `a` and `b` for less-than-or-equal
2007	///
2008	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_pd)
2009	#[inline]
2010	#[target_feature(enable = "sse2")]
2011	#[cfg_attr(test, assert_instr(cmplepd))]
2012	#[stable(feature = "simd_x86", since = "1.27.0")]
2013	pub unsafe fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d {
2014	cmppd(a, b, imm8:`2`)
2015	}
2016
2017	/// Compares corresponding elements in `a` and `b` for greater-than.
2018	///
2019	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_pd)
2020	#[inline]
2021	#[target_feature(enable = "sse2")]
2022	#[cfg_attr(test, assert_instr(cmpltpd))]
2023	#[stable(feature = "simd_x86", since = "1.27.0")]
2024	pub unsafe fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d {
2025	_mm_cmplt_pd(a:b, b:a)
2026	}
2027
2028	/// Compares corresponding elements in `a` and `b` for greater-than-or-equal.
2029	///
2030	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_pd)
2031	#[inline]
2032	#[target_feature(enable = "sse2")]
2033	#[cfg_attr(test, assert_instr(cmplepd))]
2034	#[stable(feature = "simd_x86", since = "1.27.0")]
2035	pub unsafe fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d {
2036	_mm_cmple_pd(a:b, b:a)
2037	}
2038
2039	/// Compares corresponding elements in `a` and `b` to see if neither is `NaN`.
2040	///
2041	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_pd)
2042	#[inline]
2043	#[target_feature(enable = "sse2")]
2044	#[cfg_attr(test, assert_instr(cmpordpd))]
2045	#[stable(feature = "simd_x86", since = "1.27.0")]
2046	pub unsafe fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d {
2047	cmppd(a, b, imm8:`7`)
2048	}
2049
2050	/// Compares corresponding elements in `a` and `b` to see if either is `NaN`.
2051	///
2052	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_pd)
2053	#[inline]
2054	#[target_feature(enable = "sse2")]
2055	#[cfg_attr(test, assert_instr(cmpunordpd))]
2056	#[stable(feature = "simd_x86", since = "1.27.0")]
2057	pub unsafe fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d {
2058	cmppd(a, b, imm8:`3`)
2059	}
2060
2061	/// Compares corresponding elements in `a` and `b` for not-equal.
2062	///
2063	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_pd)
2064	#[inline]
2065	#[target_feature(enable = "sse2")]
2066	#[cfg_attr(test, assert_instr(cmpneqpd))]
2067	#[stable(feature = "simd_x86", since = "1.27.0")]
2068	pub unsafe fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d {
2069	cmppd(a, b, imm8:`4`)
2070	}
2071
2072	/// Compares corresponding elements in `a` and `b` for not-less-than.
2073	///
2074	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_pd)
2075	#[inline]
2076	#[target_feature(enable = "sse2")]
2077	#[cfg_attr(test, assert_instr(cmpnltpd))]
2078	#[stable(feature = "simd_x86", since = "1.27.0")]
2079	pub unsafe fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d {
2080	cmppd(a, b, imm8:`5`)
2081	}
2082
2083	/// Compares corresponding elements in `a` and `b` for not-less-than-or-equal.
2084	///
2085	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_pd)
2086	#[inline]
2087	#[target_feature(enable = "sse2")]
2088	#[cfg_attr(test, assert_instr(cmpnlepd))]
2089	#[stable(feature = "simd_x86", since = "1.27.0")]
2090	pub unsafe fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d {
2091	cmppd(a, b, imm8:`6`)
2092	}
2093
2094	/// Compares corresponding elements in `a` and `b` for not-greater-than.
2095	///
2096	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpngt_pd)
2097	#[inline]
2098	#[target_feature(enable = "sse2")]
2099	#[cfg_attr(test, assert_instr(cmpnltpd))]
2100	#[stable(feature = "simd_x86", since = "1.27.0")]
2101	pub unsafe fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d {
2102	_mm_cmpnlt_pd(a:b, b:a)
2103	}
2104
2105	/// Compares corresponding elements in `a` and `b` for
2106	/// not-greater-than-or-equal.
2107	///
2108	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_pd)
2109	#[inline]
2110	#[target_feature(enable = "sse2")]
2111	#[cfg_attr(test, assert_instr(cmpnlepd))]
2112	#[stable(feature = "simd_x86", since = "1.27.0")]
2113	pub unsafe fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d {
2114	_mm_cmpnle_pd(a:b, b:a)
2115	}
2116
2117	/// Compares the lower element of `a` and `b` for equality.
2118	///
2119	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comieq_sd)
2120	#[inline]
2121	#[target_feature(enable = "sse2")]
2122	#[cfg_attr(test, assert_instr(comisd))]
2123	#[stable(feature = "simd_x86", since = "1.27.0")]
2124	pub unsafe fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 {
2125	comieqsd(a, b)
2126	}
2127
2128	/// Compares the lower element of `a` and `b` for less-than.
2129	///
2130	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comilt_sd)
2131	#[inline]
2132	#[target_feature(enable = "sse2")]
2133	#[cfg_attr(test, assert_instr(comisd))]
2134	#[stable(feature = "simd_x86", since = "1.27.0")]
2135	pub unsafe fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 {
2136	comiltsd(a, b)
2137	}
2138
2139	/// Compares the lower element of `a` and `b` for less-than-or-equal.
2140	///
2141	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comile_sd)
2142	#[inline]
2143	#[target_feature(enable = "sse2")]
2144	#[cfg_attr(test, assert_instr(comisd))]
2145	#[stable(feature = "simd_x86", since = "1.27.0")]
2146	pub unsafe fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 {
2147	comilesd(a, b)
2148	}
2149
2150	/// Compares the lower element of `a` and `b` for greater-than.
2151	///
2152	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comigt_sd)
2153	#[inline]
2154	#[target_feature(enable = "sse2")]
2155	#[cfg_attr(test, assert_instr(comisd))]
2156	#[stable(feature = "simd_x86", since = "1.27.0")]
2157	pub unsafe fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 {
2158	comigtsd(a, b)
2159	}
2160
2161	/// Compares the lower element of `a` and `b` for greater-than-or-equal.
2162	///
2163	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comige_sd)
2164	#[inline]
2165	#[target_feature(enable = "sse2")]
2166	#[cfg_attr(test, assert_instr(comisd))]
2167	#[stable(feature = "simd_x86", since = "1.27.0")]
2168	pub unsafe fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 {
2169	comigesd(a, b)
2170	}
2171
2172	/// Compares the lower element of `a` and `b` for not-equal.
2173	///
2174	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comineq_sd)
2175	#[inline]
2176	#[target_feature(enable = "sse2")]
2177	#[cfg_attr(test, assert_instr(comisd))]
2178	#[stable(feature = "simd_x86", since = "1.27.0")]
2179	pub unsafe fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 {
2180	comineqsd(a, b)
2181	}
2182
2183	/// Compares the lower element of `a` and `b` for equality.
2184	///
2185	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomieq_sd)
2186	#[inline]
2187	#[target_feature(enable = "sse2")]
2188	#[cfg_attr(test, assert_instr(ucomisd))]
2189	#[stable(feature = "simd_x86", since = "1.27.0")]
2190	pub unsafe fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 {
2191	ucomieqsd(a, b)
2192	}
2193
2194	/// Compares the lower element of `a` and `b` for less-than.
2195	///
2196	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomilt_sd)
2197	#[inline]
2198	#[target_feature(enable = "sse2")]
2199	#[cfg_attr(test, assert_instr(ucomisd))]
2200	#[stable(feature = "simd_x86", since = "1.27.0")]
2201	pub unsafe fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 {
2202	ucomiltsd(a, b)
2203	}
2204
2205	/// Compares the lower element of `a` and `b` for less-than-or-equal.
2206	///
2207	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomile_sd)
2208	#[inline]
2209	#[target_feature(enable = "sse2")]
2210	#[cfg_attr(test, assert_instr(ucomisd))]
2211	#[stable(feature = "simd_x86", since = "1.27.0")]
2212	pub unsafe fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 {
2213	ucomilesd(a, b)
2214	}
2215
2216	/// Compares the lower element of `a` and `b` for greater-than.
2217	///
2218	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomigt_sd)
2219	#[inline]
2220	#[target_feature(enable = "sse2")]
2221	#[cfg_attr(test, assert_instr(ucomisd))]
2222	#[stable(feature = "simd_x86", since = "1.27.0")]
2223	pub unsafe fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 {
2224	ucomigtsd(a, b)
2225	}
2226
2227	/// Compares the lower element of `a` and `b` for greater-than-or-equal.
2228	///
2229	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomige_sd)
2230	#[inline]
2231	#[target_feature(enable = "sse2")]
2232	#[cfg_attr(test, assert_instr(ucomisd))]
2233	#[stable(feature = "simd_x86", since = "1.27.0")]
2234	pub unsafe fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 {
2235	ucomigesd(a, b)
2236	}
2237
2238	/// Compares the lower element of `a` and `b` for not-equal.
2239	///
2240	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomineq_sd)
2241	#[inline]
2242	#[target_feature(enable = "sse2")]
2243	#[cfg_attr(test, assert_instr(ucomisd))]
2244	#[stable(feature = "simd_x86", since = "1.27.0")]
2245	pub unsafe fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 {
2246	ucomineqsd(a, b)
2247	}
2248
2249	/// Converts packed double-precision (64-bit) floating-point elements in `a` to
2250	/// packed single-precision (32-bit) floating-point elements
2251	///
2252	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_ps)
2253	#[inline]
2254	#[target_feature(enable = "sse2")]
2255	#[cfg_attr(test, assert_instr(cvtpd2ps))]
2256	#[stable(feature = "simd_x86", since = "1.27.0")]
2257	pub unsafe fn _mm_cvtpd_ps(a: __m128d) -> __m128 {
2258	let r: f32x2 = simd_cast::<_, f32x2>(a.as_f64x2());
2259	let zero: f32x2 = f32x2::new(x0:`0.0`, x1:`0.0`);
2260	transmute::<f32x4, _>(src:simd_shuffle!(r, zero, [`0`, `1`, `2`, `3`]))
2261	}
2262
2263	/// Converts packed single-precision (32-bit) floating-point elements in `a` to
2264	/// packed
2265	/// double-precision (64-bit) floating-point elements.
2266	///
2267	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_pd)
2268	#[inline]
2269	#[target_feature(enable = "sse2")]
2270	#[cfg_attr(test, assert_instr(cvtps2pd))]
2271	#[stable(feature = "simd_x86", since = "1.27.0")]
2272	pub unsafe fn _mm_cvtps_pd(a: __m128) -> __m128d {
2273	let a: f32x4 = a.as_f32x4();
2274	transmute(src:simd_cast::<f32x2, f64x2>(simd_shuffle!(a, a, [`0`, `1`])))
2275	}
2276
2277	/// Converts packed double-precision (64-bit) floating-point elements in `a` to
2278	/// packed 32-bit integers.
2279	///
2280	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epi32)
2281	#[inline]
2282	#[target_feature(enable = "sse2")]
2283	#[cfg_attr(test, assert_instr(cvtpd2dq))]
2284	#[stable(feature = "simd_x86", since = "1.27.0")]
2285	pub unsafe fn _mm_cvtpd_epi32(a: __m128d) -> __m128i {
2286	transmute(src:cvtpd2dq(a))
2287	}
2288
2289	/// Converts the lower double-precision (64-bit) floating-point element in a to
2290	/// a 32-bit integer.
2291	///
2292	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_si32)
2293	#[inline]
2294	#[target_feature(enable = "sse2")]
2295	#[cfg_attr(test, assert_instr(cvtsd2si))]
2296	#[stable(feature = "simd_x86", since = "1.27.0")]
2297	pub unsafe fn _mm_cvtsd_si32(a: __m128d) -> i32 {
2298	cvtsd2si(a)
2299	}
2300
2301	/// Converts the lower double-precision (64-bit) floating-point element in `b`
2302	/// to a single-precision (32-bit) floating-point element, store the result in
2303	/// the lower element of the return value, and copies the upper element from `a`
2304	/// to the upper element the return value.
2305	///
2306	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_ss)
2307	#[inline]
2308	#[target_feature(enable = "sse2")]
2309	#[cfg_attr(test, assert_instr(cvtsd2ss))]
2310	#[stable(feature = "simd_x86", since = "1.27.0")]
2311	pub unsafe fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 {
2312	cvtsd2ss(a, b)
2313	}
2314
2315	/// Returns the lower double-precision (64-bit) floating-point element of `a`.
2316	///
2317	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_f64)
2318	#[inline]
2319	#[target_feature(enable = "sse2")]
2320	#[stable(feature = "simd_x86", since = "1.27.0")]
2321	pub unsafe fn _mm_cvtsd_f64(a: __m128d) -> f64 {
2322	simd_extract(x:a, idx:`0`)
2323	}
2324
2325	/// Converts the lower single-precision (32-bit) floating-point element in `b`
2326	/// to a double-precision (64-bit) floating-point element, store the result in
2327	/// the lower element of the return value, and copies the upper element from `a`
2328	/// to the upper element the return value.
2329	///
2330	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtss_sd)
2331	#[inline]
2332	#[target_feature(enable = "sse2")]
2333	#[cfg_attr(test, assert_instr(cvtss2sd))]
2334	#[stable(feature = "simd_x86", since = "1.27.0")]
2335	pub unsafe fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d {
2336	cvtss2sd(a, b)
2337	}
2338
2339	/// Converts packed double-precision (64-bit) floating-point elements in `a` to
2340	/// packed 32-bit integers with truncation.
2341	///
2342	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epi32)
2343	#[inline]
2344	#[target_feature(enable = "sse2")]
2345	#[cfg_attr(test, assert_instr(cvttpd2dq))]
2346	#[stable(feature = "simd_x86", since = "1.27.0")]
2347	pub unsafe fn _mm_cvttpd_epi32(a: __m128d) -> __m128i {
2348	transmute(src:cvttpd2dq(a))
2349	}
2350
2351	/// Converts the lower double-precision (64-bit) floating-point element in `a`
2352	/// to a 32-bit integer with truncation.
2353	///
2354	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_si32)
2355	#[inline]
2356	#[target_feature(enable = "sse2")]
2357	#[cfg_attr(test, assert_instr(cvttsd2si))]
2358	#[stable(feature = "simd_x86", since = "1.27.0")]
2359	pub unsafe fn _mm_cvttsd_si32(a: __m128d) -> i32 {
2360	cvttsd2si(a)
2361	}
2362
2363	/// Converts packed single-precision (32-bit) floating-point elements in `a` to
2364	/// packed 32-bit integers with truncation.
2365	///
2366	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epi32)
2367	#[inline]
2368	#[target_feature(enable = "sse2")]
2369	#[cfg_attr(test, assert_instr(cvttps2dq))]
2370	#[stable(feature = "simd_x86", since = "1.27.0")]
2371	pub unsafe fn _mm_cvttps_epi32(a: __m128) -> __m128i {
2372	transmute(src:cvttps2dq(a))
2373	}
2374
2375	/// Copies double-precision (64-bit) floating-point element `a` to the lower
2376	/// element of the packed 64-bit return value.
2377	///
2378	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_sd)
2379	#[inline]
2380	#[target_feature(enable = "sse2")]
2381	#[stable(feature = "simd_x86", since = "1.27.0")]
2382	pub unsafe fn _mm_set_sd(a: f64) -> __m128d {
2383	_mm_set_pd(a:`0.0`, b:a)
2384	}
2385
2386	/// Broadcasts double-precision (64-bit) floating-point value a to all elements
2387	/// of the return value.
2388	///
2389	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_pd)
2390	#[inline]
2391	#[target_feature(enable = "sse2")]
2392	#[stable(feature = "simd_x86", since = "1.27.0")]
2393	pub unsafe fn _mm_set1_pd(a: f64) -> __m128d {
2394	_mm_set_pd(a, b:a)
2395	}
2396
2397	/// Broadcasts double-precision (64-bit) floating-point value a to all elements
2398	/// of the return value.
2399	///
2400	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_pd1)
2401	#[inline]
2402	#[target_feature(enable = "sse2")]
2403	#[stable(feature = "simd_x86", since = "1.27.0")]
2404	pub unsafe fn _mm_set_pd1(a: f64) -> __m128d {
2405	_mm_set_pd(a, b:a)
2406	}
2407
2408	/// Sets packed double-precision (64-bit) floating-point elements in the return
2409	/// value with the supplied values.
2410	///
2411	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_pd)
2412	#[inline]
2413	#[target_feature(enable = "sse2")]
2414	#[stable(feature = "simd_x86", since = "1.27.0")]
2415	pub unsafe fn _mm_set_pd(a: f64, b: f64) -> __m128d {
2416	__m128d(b, a)
2417	}
2418
2419	/// Sets packed double-precision (64-bit) floating-point elements in the return
2420	/// value with the supplied values in reverse order.
2421	///
2422	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_pd)
2423	#[inline]
2424	#[target_feature(enable = "sse2")]
2425	#[stable(feature = "simd_x86", since = "1.27.0")]
2426	pub unsafe fn _mm_setr_pd(a: f64, b: f64) -> __m128d {
2427	_mm_set_pd(a:b, b:a)
2428	}
2429
2430	/// Returns packed double-precision (64-bit) floating-point elements with all
2431	/// zeros.
2432	///
2433	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_pd)
2434	#[inline]
2435	#[target_feature(enable = "sse2")]
2436	#[cfg_attr(test, assert_instr(xorps))] // FIXME xorpd expected
2437	#[stable(feature = "simd_x86", since = "1.27.0")]
2438	pub unsafe fn _mm_setzero_pd() -> __m128d {
2439	_mm_set_pd(a:`0.0`, b:`0.0`)
2440	}
2441
2442	/// Returns a mask of the most significant bit of each element in `a`.
2443	///
2444	/// The mask is stored in the 2 least significant bits of the return value.
2445	/// All other bits are set to `0`.
2446	///
2447	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_pd)
2448	#[inline]
2449	#[target_feature(enable = "sse2")]
2450	#[cfg_attr(test, assert_instr(movmskpd))]
2451	#[stable(feature = "simd_x86", since = "1.27.0")]
2452	pub unsafe fn _mm_movemask_pd(a: __m128d) -> i32 {
2453	// Propagate the highest bit to the rest, because simd_bitmask
2454	// requires all-1 or all-0.
2455	let mask: i64x2 = simd_lt(x:transmute(a), y:i64x2::splat(`0`));
2456	simd_bitmask::<i64x2, u8>(mask).into()
2457	}
2458
2459	/// Loads 128-bits (composed of 2 packed double-precision (64-bit)
2460	/// floating-point elements) from memory into the returned vector.
2461	/// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
2462	/// exception may be generated.
2463	///
2464	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_pd)
2465	#[inline]
2466	#[target_feature(enable = "sse2")]
2467	#[cfg_attr(test, assert_instr(movaps))]
2468	#[stable(feature = "simd_x86", since = "1.27.0")]
2469	#[allow(clippy::cast_ptr_alignment)]
2470	pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d {
2471	(mem_addr as const __m128d)
2472	}
2473
2474	/// Loads a 64-bit double-precision value to the low element of a
2475	/// 128-bit integer vector and clears the upper element.
2476	///
2477	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_sd)
2478	#[inline]
2479	#[target_feature(enable = "sse2")]
2480	#[cfg_attr(test, assert_instr(movsd))]
2481	#[stable(feature = "simd_x86", since = "1.27.0")]
2482	pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d {
2483	_mm_setr_pd(*mem_addr, b:`0.`)
2484	}
2485
2486	/// Loads a double-precision value into the high-order bits of a 128-bit
2487	/// vector of `[2 x double]`. The low-order bits are copied from the low-order
2488	/// bits of the first operand.
2489	///
2490	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadh_pd)
2491	#[inline]
2492	#[target_feature(enable = "sse2")]
2493	#[cfg_attr(test, assert_instr(movhps))]
2494	#[stable(feature = "simd_x86", since = "1.27.0")]
2495	pub unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2496	_mm_setr_pd(a:simd_extract(a, `0`), *mem_addr)
2497	}
2498
2499	/// Loads a double-precision value into the low-order bits of a 128-bit
2500	/// vector of `[2 x double]`. The high-order bits are copied from the
2501	/// high-order bits of the first operand.
2502	///
2503	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadl_pd)
2504	#[inline]
2505	#[target_feature(enable = "sse2")]
2506	#[cfg_attr(test, assert_instr(movlps))]
2507	#[stable(feature = "simd_x86", since = "1.27.0")]
2508	pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2509	_mm_setr_pd(*mem_addr, b:simd_extract(x:a, idx:`1`))
2510	}
2511
2512	/// Stores a 128-bit floating point vector of `[2 x double]` to a 128-bit
2513	/// aligned memory location.
2514	/// To minimize caching, the data is flagged as non-temporal (unlikely to be
2515	/// used again soon).
2516	///
2517	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_pd)
2518	#[inline]
2519	#[target_feature(enable = "sse2")]
2520	#[cfg_attr(test, assert_instr(movntps))] // FIXME movntpd
2521	#[stable(feature = "simd_x86", since = "1.27.0")]
2522	#[allow(clippy::cast_ptr_alignment)]
2523	pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) {
2524	intrinsics::nontemporal_store(ptr:mem_addr as *mut __m128d, val:a);
2525	}
2526
2527	/// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a
2528	/// memory location.
2529	///
2530	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_sd)
2531	#[inline]
2532	#[target_feature(enable = "sse2")]
2533	#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlps))]
2534	#[stable(feature = "simd_x86", since = "1.27.0")]
2535	pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) {
2536	*mem_addr = simd_extract(x:a, idx:`0`)
2537	}
2538
2539	/// Stores 128-bits (composed of 2 packed double-precision (64-bit)
2540	/// floating-point elements) from `a` into memory. `mem_addr` must be aligned
2541	/// on a 16-byte boundary or a general-protection exception may be generated.
2542	///
2543	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_pd)
2544	#[inline]
2545	#[target_feature(enable = "sse2")]
2546	#[cfg_attr(test, assert_instr(movaps))]
2547	#[stable(feature = "simd_x86", since = "1.27.0")]
2548	#[allow(clippy::cast_ptr_alignment)]
2549	pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) {
2550	(mem_addr as mut __m128d) = a;
2551	}
2552
2553	/// Stores 128-bits (composed of 2 packed double-precision (64-bit)
2554	/// floating-point elements) from `a` into memory.
2555	/// `mem_addr` does not need to be aligned on any particular boundary.
2556	///
2557	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_pd)
2558	#[inline]
2559	#[target_feature(enable = "sse2")]
2560	#[cfg_attr(test, assert_instr(movups))] // FIXME movupd expected
2561	#[stable(feature = "simd_x86", since = "1.27.0")]
2562	pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) {
2563	mem_addr.cast::<__m128d>().write_unaligned(val:a);
2564	}
2565
2566	/// Stores the lower double-precision (64-bit) floating-point element from `a`
2567	/// into 2 contiguous elements in memory. `mem_addr` must be aligned on a
2568	/// 16-byte boundary or a general-protection exception may be generated.
2569	///
2570	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store1_pd)
2571	#[inline]
2572	#[target_feature(enable = "sse2")]
2573	#[stable(feature = "simd_x86", since = "1.27.0")]
2574	#[allow(clippy::cast_ptr_alignment)]
2575	pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) {
2576	let b: __m128d = simd_shuffle!(a, a, [`0`, `0`]);
2577	(mem_addr as mut __m128d) = b;
2578	}
2579
2580	/// Stores the lower double-precision (64-bit) floating-point element from `a`
2581	/// into 2 contiguous elements in memory. `mem_addr` must be aligned on a
2582	/// 16-byte boundary or a general-protection exception may be generated.
2583	///
2584	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_pd1)
2585	#[inline]
2586	#[target_feature(enable = "sse2")]
2587	#[stable(feature = "simd_x86", since = "1.27.0")]
2588	#[allow(clippy::cast_ptr_alignment)]
2589	pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) {
2590	let b: __m128d = simd_shuffle!(a, a, [`0`, `0`]);
2591	(mem_addr as mut __m128d) = b;
2592	}
2593
2594	/// Stores 2 double-precision (64-bit) floating-point elements from `a` into
2595	/// memory in reverse order.
2596	/// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
2597	/// exception may be generated.
2598	///
2599	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storer_pd)
2600	#[inline]
2601	#[target_feature(enable = "sse2")]
2602	#[stable(feature = "simd_x86", since = "1.27.0")]
2603	#[allow(clippy::cast_ptr_alignment)]
2604	pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) {
2605	let b: __m128d = simd_shuffle!(a, a, [`1`, `0`]);
2606	(mem_addr as mut __m128d) = b;
2607	}
2608
2609	/// Stores the upper 64 bits of a 128-bit vector of `[2 x double]` to a
2610	/// memory location.
2611	///
2612	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeh_pd)
2613	#[inline]
2614	#[target_feature(enable = "sse2")]
2615	#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movhps))]
2616	#[stable(feature = "simd_x86", since = "1.27.0")]
2617	pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) {
2618	*mem_addr = simd_extract(x:a, idx:`1`);
2619	}
2620
2621	/// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a
2622	/// memory location.
2623	///
2624	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storel_pd)
2625	#[inline]
2626	#[target_feature(enable = "sse2")]
2627	#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlps))]
2628	#[stable(feature = "simd_x86", since = "1.27.0")]
2629	pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) {
2630	*mem_addr = simd_extract(x:a, idx:`0`);
2631	}
2632
2633	/// Loads a double-precision (64-bit) floating-point element from memory
2634	/// into both elements of returned vector.
2635	///
2636	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load1_pd)
2637	#[inline]
2638	#[target_feature(enable = "sse2")]
2639	// #[cfg_attr(test, assert_instr(movapd))] // FIXME LLVM uses different codegen
2640	#[stable(feature = "simd_x86", since = "1.27.0")]
2641	pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d {
2642	let d: f64 = *mem_addr;
2643	_mm_setr_pd(a:d, b:d)
2644	}
2645
2646	/// Loads a double-precision (64-bit) floating-point element from memory
2647	/// into both elements of returned vector.
2648	///
2649	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_pd1)
2650	#[inline]
2651	#[target_feature(enable = "sse2")]
2652	// #[cfg_attr(test, assert_instr(movapd))] // FIXME same as _mm_load1_pd
2653	#[stable(feature = "simd_x86", since = "1.27.0")]
2654	pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d {
2655	_mm_load1_pd(mem_addr)
2656	}
2657
2658	/// Loads 2 double-precision (64-bit) floating-point elements from memory into
2659	/// the returned vector in reverse order. `mem_addr` must be aligned on a
2660	/// 16-byte boundary or a general-protection exception may be generated.
2661	///
2662	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadr_pd)
2663	#[inline]
2664	#[target_feature(enable = "sse2")]
2665	#[cfg_attr(test, assert_instr(movaps))]
2666	#[stable(feature = "simd_x86", since = "1.27.0")]
2667	pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d {
2668	let a: __m128d = _mm_load_pd(mem_addr);
2669	simd_shuffle!(a, a, [`1`, `0`])
2670	}
2671
2672	/// Loads 128-bits (composed of 2 packed double-precision (64-bit)
2673	/// floating-point elements) from memory into the returned vector.
2674	/// `mem_addr` does not need to be aligned on any particular boundary.
2675	///
2676	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_pd)
2677	#[inline]
2678	#[target_feature(enable = "sse2")]
2679	#[cfg_attr(test, assert_instr(movups))]
2680	#[stable(feature = "simd_x86", since = "1.27.0")]
2681	pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d {
2682	let mut dst: __m128d = _mm_undefined_pd();
2683	ptr::copy_nonoverlapping(
2684	src:mem_addr as *const u8,
2685	&mut dst as *mut __m128d as *mut u8,
2686	count:mem::size_of::<__m128d>(),
2687	);
2688	dst
2689	}
2690
2691	/// Constructs a 128-bit floating-point vector of `[2 x double]` from two
2692	/// 128-bit vector parameters of `[2 x double]`, using the immediate-value
2693	/// parameter as a specifier.
2694	///
2695	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_pd)
2696	#[inline]
2697	#[target_feature(enable = "sse2")]
2698	#[cfg_attr(test, assert_instr(shufps, MASK = `2`))]
2699	#[rustc_legacy_const_generics(`2`)]
2700	#[stable(feature = "simd_x86", since = "1.27.0")]
2701	pub unsafe fn _mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d {
2702	static_assert_uimm_bits!(MASK, `8`);
2703	simd_shuffle!(a, b, [MASK as u32 & `0b1`, ((MASK as u32 >> `1`) & `0b1`) + `2`])
2704	}
2705
2706	/// Constructs a 128-bit floating-point vector of `[2 x double]`. The lower
2707	/// 64 bits are set to the lower 64 bits of the second parameter. The upper
2708	/// 64 bits are set to the upper 64 bits of the first parameter.
2709	///
2710	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_move_sd)
2711	#[inline]
2712	#[target_feature(enable = "sse2")]
2713	#[cfg_attr(test, assert_instr(movsd))]
2714	#[stable(feature = "simd_x86", since = "1.27.0")]
2715	pub unsafe fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d {
2716	_mm_setr_pd(a:simd_extract(b, `0`), b:simd_extract(x:a, idx:`1`))
2717	}
2718
2719	/// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit
2720	/// floating-point vector of `[4 x float]`.
2721	///
2722	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castpd_ps)
2723	#[inline]
2724	#[target_feature(enable = "sse2")]
2725	#[stable(feature = "simd_x86", since = "1.27.0")]
2726	pub unsafe fn _mm_castpd_ps(a: __m128d) -> __m128 {
2727	transmute(src:a)
2728	}
2729
2730	/// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit
2731	/// integer vector.
2732	///
2733	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castpd_si128)
2734	#[inline]
2735	#[target_feature(enable = "sse2")]
2736	#[stable(feature = "simd_x86", since = "1.27.0")]
2737	pub unsafe fn _mm_castpd_si128(a: __m128d) -> __m128i {
2738	transmute(src:a)
2739	}
2740
2741	/// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit
2742	/// floating-point vector of `[2 x double]`.
2743	///
2744	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castps_pd)
2745	#[inline]
2746	#[target_feature(enable = "sse2")]
2747	#[stable(feature = "simd_x86", since = "1.27.0")]
2748	pub unsafe fn _mm_castps_pd(a: __m128) -> __m128d {
2749	transmute(src:a)
2750	}
2751
2752	/// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit
2753	/// integer vector.
2754	///
2755	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castps_si128)
2756	#[inline]
2757	#[target_feature(enable = "sse2")]
2758	#[stable(feature = "simd_x86", since = "1.27.0")]
2759	pub unsafe fn _mm_castps_si128(a: __m128) -> __m128i {
2760	transmute(src:a)
2761	}
2762
2763	/// Casts a 128-bit integer vector into a 128-bit floating-point vector
2764	/// of `[2 x double]`.
2765	///
2766	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castsi128_pd)
2767	#[inline]
2768	#[target_feature(enable = "sse2")]
2769	#[stable(feature = "simd_x86", since = "1.27.0")]
2770	pub unsafe fn _mm_castsi128_pd(a: __m128i) -> __m128d {
2771	transmute(src:a)
2772	}
2773
2774	/// Casts a 128-bit integer vector into a 128-bit floating-point vector
2775	/// of `[4 x float]`.
2776	///
2777	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castsi128_ps)
2778	#[inline]
2779	#[target_feature(enable = "sse2")]
2780	#[stable(feature = "simd_x86", since = "1.27.0")]
2781	pub unsafe fn _mm_castsi128_ps(a: __m128i) -> __m128 {
2782	transmute(src:a)
2783	}
2784
2785	/// Returns vector of type __m128d with indeterminate elements.
2786	/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
2787	/// In practice, this is equivalent to [`mem::zeroed`].
2788	///
2789	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_undefined_pd)
2790	#[inline]
2791	#[target_feature(enable = "sse2")]
2792	#[stable(feature = "simd_x86", since = "1.27.0")]
2793	pub unsafe fn _mm_undefined_pd() -> __m128d {
2794	__m128d(`0.0`, `0.0`)
2795	}
2796
2797	/// Returns vector of type __m128i with indeterminate elements.
2798	/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
2799	/// In practice, this is equivalent to [`mem::zeroed`].
2800	///
2801	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_undefined_si128)
2802	#[inline]
2803	#[target_feature(enable = "sse2")]
2804	#[stable(feature = "simd_x86", since = "1.27.0")]
2805	pub unsafe fn _mm_undefined_si128() -> __m128i {
2806	__m128i(`0`, `0`)
2807	}
2808
2809	/// The resulting `__m128d` element is composed by the low-order values of
2810	/// the two `__m128d` interleaved input elements, i.e.:
2811	///
2812	/// The `[127:64]` bits are copied from the `[127:64]` bits of the second*
2813	/// input The `[63:0]` bits are copied from the `[127:64]` bits of the first*
2814	/// input
2815	///
2816	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_pd)
2817	#[inline]
2818	#[target_feature(enable = "sse2")]
2819	#[cfg_attr(test, assert_instr(unpckhpd))]
2820	#[stable(feature = "simd_x86", since = "1.27.0")]
2821	pub unsafe fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d {
2822	simd_shuffle!(a, b, [`1`, `3`])
2823	}
2824
2825	/// The resulting `__m128d` element is composed by the high-order values of
2826	/// the two `__m128d` interleaved input elements, i.e.:
2827	///
2828	/// The `[127:64]` bits are copied from the `[63:0]` bits of the second input*
2829	/// The `[63:0]` bits are copied from the `[63:0]` bits of the first input*
2830	///
2831	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_pd)
2832	#[inline]
2833	#[target_feature(enable = "sse2")]
2834	#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlhps))]
2835	#[stable(feature = "simd_x86", since = "1.27.0")]
2836	pub unsafe fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d {
2837	simd_shuffle!(a, b, [`0`, `2`])
2838	}
2839
2840	#[allow(improper_ctypes)]
2841	extern "C" {
2842	#[link_name = "llvm.x86.sse2.pause"]
2843	fn pause();
2844	#[link_name = "llvm.x86.sse2.clflush"]
2845	fn clflush(p: *const u8);
2846	#[link_name = "llvm.x86.sse2.lfence"]
2847	fn lfence();
2848	#[link_name = "llvm.x86.sse2.mfence"]
2849	fn mfence();
2850	#[link_name = "llvm.x86.sse2.pmadd.wd"]
2851	fn pmaddwd(a: i16x8, b: i16x8) -> i32x4;
2852	#[link_name = "llvm.x86.sse2.psad.bw"]
2853	fn psadbw(a: u8x16, b: u8x16) -> u64x2;
2854	#[link_name = "llvm.x86.sse2.psll.w"]
2855	fn psllw(a: i16x8, count: i16x8) -> i16x8;
2856	#[link_name = "llvm.x86.sse2.psll.d"]
2857	fn pslld(a: i32x4, count: i32x4) -> i32x4;
2858	#[link_name = "llvm.x86.sse2.psll.q"]
2859	fn psllq(a: i64x2, count: i64x2) -> i64x2;
2860	#[link_name = "llvm.x86.sse2.psra.w"]
2861	fn psraw(a: i16x8, count: i16x8) -> i16x8;
2862	#[link_name = "llvm.x86.sse2.psra.d"]
2863	fn psrad(a: i32x4, count: i32x4) -> i32x4;
2864	#[link_name = "llvm.x86.sse2.psrl.w"]
2865	fn psrlw(a: i16x8, count: i16x8) -> i16x8;
2866	#[link_name = "llvm.x86.sse2.psrl.d"]
2867	fn psrld(a: i32x4, count: i32x4) -> i32x4;
2868	#[link_name = "llvm.x86.sse2.psrl.q"]
2869	fn psrlq(a: i64x2, count: i64x2) -> i64x2;
2870	#[link_name = "llvm.x86.sse2.cvtps2dq"]
2871	fn cvtps2dq(a: __m128) -> i32x4;
2872	#[link_name = "llvm.x86.sse2.maskmov.dqu"]
2873	fn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8);
2874	#[link_name = "llvm.x86.sse2.packsswb.128"]
2875	fn packsswb(a: i16x8, b: i16x8) -> i8x16;
2876	#[link_name = "llvm.x86.sse2.packssdw.128"]
2877	fn packssdw(a: i32x4, b: i32x4) -> i16x8;
2878	#[link_name = "llvm.x86.sse2.packuswb.128"]
2879	fn packuswb(a: i16x8, b: i16x8) -> u8x16;
2880	#[link_name = "llvm.x86.sse2.max.sd"]
2881	fn maxsd(a: __m128d, b: __m128d) -> __m128d;
2882	#[link_name = "llvm.x86.sse2.max.pd"]
2883	fn maxpd(a: __m128d, b: __m128d) -> __m128d;
2884	#[link_name = "llvm.x86.sse2.min.sd"]
2885	fn minsd(a: __m128d, b: __m128d) -> __m128d;
2886	#[link_name = "llvm.x86.sse2.min.pd"]
2887	fn minpd(a: __m128d, b: __m128d) -> __m128d;
2888	#[link_name = "llvm.x86.sse2.sqrt.sd"]
2889	fn sqrtsd(a: __m128d) -> __m128d;
2890	#[link_name = "llvm.x86.sse2.sqrt.pd"]
2891	fn sqrtpd(a: __m128d) -> __m128d;
2892	#[link_name = "llvm.x86.sse2.cmp.sd"]
2893	fn cmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
2894	#[link_name = "llvm.x86.sse2.cmp.pd"]
2895	fn cmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
2896	#[link_name = "llvm.x86.sse2.comieq.sd"]
2897	fn comieqsd(a: __m128d, b: __m128d) -> i32;
2898	#[link_name = "llvm.x86.sse2.comilt.sd"]
2899	fn comiltsd(a: __m128d, b: __m128d) -> i32;
2900	#[link_name = "llvm.x86.sse2.comile.sd"]
2901	fn comilesd(a: __m128d, b: __m128d) -> i32;
2902	#[link_name = "llvm.x86.sse2.comigt.sd"]
2903	fn comigtsd(a: __m128d, b: __m128d) -> i32;
2904	#[link_name = "llvm.x86.sse2.comige.sd"]
2905	fn comigesd(a: __m128d, b: __m128d) -> i32;
2906	#[link_name = "llvm.x86.sse2.comineq.sd"]
2907	fn comineqsd(a: __m128d, b: __m128d) -> i32;
2908	#[link_name = "llvm.x86.sse2.ucomieq.sd"]
2909	fn ucomieqsd(a: __m128d, b: __m128d) -> i32;
2910	#[link_name = "llvm.x86.sse2.ucomilt.sd"]
2911	fn ucomiltsd(a: __m128d, b: __m128d) -> i32;
2912	#[link_name = "llvm.x86.sse2.ucomile.sd"]
2913	fn ucomilesd(a: __m128d, b: __m128d) -> i32;
2914	#[link_name = "llvm.x86.sse2.ucomigt.sd"]
2915	fn ucomigtsd(a: __m128d, b: __m128d) -> i32;
2916	#[link_name = "llvm.x86.sse2.ucomige.sd"]
2917	fn ucomigesd(a: __m128d, b: __m128d) -> i32;
2918	#[link_name = "llvm.x86.sse2.ucomineq.sd"]
2919	fn ucomineqsd(a: __m128d, b: __m128d) -> i32;
2920	#[link_name = "llvm.x86.sse2.cvtpd2dq"]
2921	fn cvtpd2dq(a: __m128d) -> i32x4;
2922	#[link_name = "llvm.x86.sse2.cvtsd2si"]
2923	fn cvtsd2si(a: __m128d) -> i32;
2924	#[link_name = "llvm.x86.sse2.cvtsd2ss"]
2925	fn cvtsd2ss(a: __m128, b: __m128d) -> __m128;
2926	#[link_name = "llvm.x86.sse2.cvtss2sd"]
2927	fn cvtss2sd(a: __m128d, b: __m128) -> __m128d;
2928	#[link_name = "llvm.x86.sse2.cvttpd2dq"]
2929	fn cvttpd2dq(a: __m128d) -> i32x4;
2930	#[link_name = "llvm.x86.sse2.cvttsd2si"]
2931	fn cvttsd2si(a: __m128d) -> i32;
2932	#[link_name = "llvm.x86.sse2.cvttps2dq"]
2933	fn cvttps2dq(a: __m128) -> i32x4;
2934	}
2935
2936	#[cfg(test)]
2937	mod tests {
2938	use crate::{
2939	core_arch::{simd::, x86::},
2940	hint::black_box,
2941	};
2942	use std::{
2943	boxed, f32,
2944	f64::{self, NAN},
2945	i32,
2946	mem::{self, transmute},
2947	};
2948	use stdarch_test::simd_test;
2949
2950	#[test]
2951	fn test_mm_pause() {
2952	unsafe { _mm_pause() }
2953	}
2954
2955	#[simd_test(enable = "sse2")]
2956	unsafe fn test_mm_clflush() {
2957	let x = `0_u8`;
2958	_mm_clflush(&x as *const _);
2959	}
2960
2961	#[simd_test(enable = "sse2")]
2962	// Miri cannot support this until it is clear how it fits in the Rust memory model
2963	#[cfg_attr(miri, ignore)]
2964	unsafe fn test_mm_lfence() {
2965	_mm_lfence();
2966	}
2967
2968	#[simd_test(enable = "sse2")]
2969	// Miri cannot support this until it is clear how it fits in the Rust memory model
2970	#[cfg_attr(miri, ignore)]
2971	unsafe fn test_mm_mfence() {
2972	_mm_mfence();
2973	}
2974
2975	#[simd_test(enable = "sse2")]
2976	unsafe fn test_mm_add_epi8() {
2977	let a = _mm_setr_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
2978	#[rustfmt::skip]
2979	let b = _mm_setr_epi8(
2980	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
2981	);
2982	let r = _mm_add_epi8(a, b);
2983	#[rustfmt::skip]
2984	let e = _mm_setr_epi8(
2985	`16`, `18`, `20`, `22`, `24`, `26`, `28`, `30`, `32`, `34`, `36`, `38`, `40`, `42`, `44`, `46`,
2986	);
2987	assert_eq_m128i(r, e);
2988	}
2989
2990	#[simd_test(enable = "sse2")]
2991	unsafe fn test_mm_add_epi8_overflow() {
2992	let a = _mm_set1_epi8(`0x7F`);
2993	let b = _mm_set1_epi8(`1`);
2994	let r = _mm_add_epi8(a, b);
2995	assert_eq_m128i(r, _mm_set1_epi8(`-128`));
2996	}
2997
2998	#[simd_test(enable = "sse2")]
2999	unsafe fn test_mm_add_epi16() {
3000	let a = _mm_setr_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
3001	let b = _mm_setr_epi16(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
3002	let r = _mm_add_epi16(a, b);
3003	let e = _mm_setr_epi16(`8`, `10`, `12`, `14`, `16`, `18`, `20`, `22`);
3004	assert_eq_m128i(r, e);
3005	}
3006
3007	#[simd_test(enable = "sse2")]
3008	unsafe fn test_mm_add_epi32() {
3009	let a = _mm_setr_epi32(`0`, `1`, `2`, `3`);
3010	let b = _mm_setr_epi32(`4`, `5`, `6`, `7`);
3011	let r = _mm_add_epi32(a, b);
3012	let e = _mm_setr_epi32(`4`, `6`, `8`, `10`);
3013	assert_eq_m128i(r, e);
3014	}
3015
3016	#[simd_test(enable = "sse2")]
3017	unsafe fn test_mm_add_epi64() {
3018	let a = _mm_setr_epi64x(`0`, `1`);
3019	let b = _mm_setr_epi64x(`2`, `3`);
3020	let r = _mm_add_epi64(a, b);
3021	let e = _mm_setr_epi64x(`2`, `4`);
3022	assert_eq_m128i(r, e);
3023	}
3024
3025	#[simd_test(enable = "sse2")]
3026	unsafe fn test_mm_adds_epi8() {
3027	let a = _mm_setr_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
3028	#[rustfmt::skip]
3029	let b = _mm_setr_epi8(
3030	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
3031	);
3032	let r = _mm_adds_epi8(a, b);
3033	#[rustfmt::skip]
3034	let e = _mm_setr_epi8(
3035	`16`, `18`, `20`, `22`, `24`, `26`, `28`, `30`, `32`, `34`, `36`, `38`, `40`, `42`, `44`, `46`,
3036	);
3037	assert_eq_m128i(r, e);
3038	}
3039
3040	#[simd_test(enable = "sse2")]
3041	unsafe fn test_mm_adds_epi8_saturate_positive() {
3042	let a = _mm_set1_epi8(`0x7F`);
3043	let b = _mm_set1_epi8(`1`);
3044	let r = _mm_adds_epi8(a, b);
3045	assert_eq_m128i(r, a);
3046	}
3047
3048	#[simd_test(enable = "sse2")]
3049	unsafe fn test_mm_adds_epi8_saturate_negative() {
3050	let a = _mm_set1_epi8(`-0x80`);
3051	let b = _mm_set1_epi8(`-1`);
3052	let r = _mm_adds_epi8(a, b);
3053	assert_eq_m128i(r, a);
3054	}
3055
3056	#[simd_test(enable = "sse2")]
3057	unsafe fn test_mm_adds_epi16() {
3058	let a = _mm_setr_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
3059	let b = _mm_setr_epi16(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
3060	let r = _mm_adds_epi16(a, b);
3061	let e = _mm_setr_epi16(`8`, `10`, `12`, `14`, `16`, `18`, `20`, `22`);
3062	assert_eq_m128i(r, e);
3063	}
3064
3065	#[simd_test(enable = "sse2")]
3066	unsafe fn test_mm_adds_epi16_saturate_positive() {
3067	let a = _mm_set1_epi16(`0x7FFF`);
3068	let b = _mm_set1_epi16(`1`);
3069	let r = _mm_adds_epi16(a, b);
3070	assert_eq_m128i(r, a);
3071	}
3072
3073	#[simd_test(enable = "sse2")]
3074	unsafe fn test_mm_adds_epi16_saturate_negative() {
3075	let a = _mm_set1_epi16(`-0x8000`);
3076	let b = _mm_set1_epi16(`-1`);
3077	let r = _mm_adds_epi16(a, b);
3078	assert_eq_m128i(r, a);
3079	}
3080
3081	#[simd_test(enable = "sse2")]
3082	unsafe fn test_mm_adds_epu8() {
3083	let a = _mm_setr_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
3084	#[rustfmt::skip]
3085	let b = _mm_setr_epi8(
3086	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
3087	);
3088	let r = _mm_adds_epu8(a, b);
3089	#[rustfmt::skip]
3090	let e = _mm_setr_epi8(
3091	`16`, `18`, `20`, `22`, `24`, `26`, `28`, `30`, `32`, `34`, `36`, `38`, `40`, `42`, `44`, `46`,
3092	);
3093	assert_eq_m128i(r, e);
3094	}
3095
3096	#[simd_test(enable = "sse2")]
3097	unsafe fn test_mm_adds_epu8_saturate() {
3098	let a = _mm_set1_epi8(!`0`);
3099	let b = _mm_set1_epi8(`1`);
3100	let r = _mm_adds_epu8(a, b);
3101	assert_eq_m128i(r, a);
3102	}
3103
3104	#[simd_test(enable = "sse2")]
3105	unsafe fn test_mm_adds_epu16() {
3106	let a = _mm_setr_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
3107	let b = _mm_setr_epi16(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
3108	let r = _mm_adds_epu16(a, b);
3109	let e = _mm_setr_epi16(`8`, `10`, `12`, `14`, `16`, `18`, `20`, `22`);
3110	assert_eq_m128i(r, e);
3111	}
3112
3113	#[simd_test(enable = "sse2")]
3114	unsafe fn test_mm_adds_epu16_saturate() {
3115	let a = _mm_set1_epi16(!`0`);
3116	let b = _mm_set1_epi16(`1`);
3117	let r = _mm_adds_epu16(a, b);
3118	assert_eq_m128i(r, a);
3119	}
3120
3121	#[simd_test(enable = "sse2")]
3122	unsafe fn test_mm_avg_epu8() {
3123	let (a, b) = (_mm_set1_epi8(`3`), _mm_set1_epi8(`9`));
3124	let r = _mm_avg_epu8(a, b);
3125	assert_eq_m128i(r, _mm_set1_epi8(`6`));
3126	}
3127
3128	#[simd_test(enable = "sse2")]
3129	unsafe fn test_mm_avg_epu16() {
3130	let (a, b) = (_mm_set1_epi16(`3`), _mm_set1_epi16(`9`));
3131	let r = _mm_avg_epu16(a, b);
3132	assert_eq_m128i(r, _mm_set1_epi16(`6`));
3133	}
3134
3135	#[simd_test(enable = "sse2")]
3136	unsafe fn test_mm_madd_epi16() {
3137	let a = _mm_setr_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
3138	let b = _mm_setr_epi16(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
3139	let r = _mm_madd_epi16(a, b);
3140	let e = _mm_setr_epi32(`29`, `81`, `149`, `233`);
3141	assert_eq_m128i(r, e);
3142	}
3143
3144	#[simd_test(enable = "sse2")]
3145	unsafe fn test_mm_max_epi16() {
3146	let a = _mm_set1_epi16(`1`);
3147	let b = _mm_set1_epi16(`-1`);
3148	let r = _mm_max_epi16(a, b);
3149	assert_eq_m128i(r, a);
3150	}
3151
3152	#[simd_test(enable = "sse2")]
3153	unsafe fn test_mm_max_epu8() {
3154	let a = _mm_set1_epi8(`1`);
3155	let b = _mm_set1_epi8(!`0`);
3156	let r = _mm_max_epu8(a, b);
3157	assert_eq_m128i(r, b);
3158	}
3159
3160	#[simd_test(enable = "sse2")]
3161	unsafe fn test_mm_min_epi16() {
3162	let a = _mm_set1_epi16(`1`);
3163	let b = _mm_set1_epi16(`-1`);
3164	let r = _mm_min_epi16(a, b);
3165	assert_eq_m128i(r, b);
3166	}
3167
3168	#[simd_test(enable = "sse2")]
3169	unsafe fn test_mm_min_epu8() {
3170	let a = _mm_set1_epi8(`1`);
3171	let b = _mm_set1_epi8(!`0`);
3172	let r = _mm_min_epu8(a, b);
3173	assert_eq_m128i(r, a);
3174	}
3175
3176	#[simd_test(enable = "sse2")]
3177	unsafe fn test_mm_mulhi_epi16() {
3178	let (a, b) = (_mm_set1_epi16(`1000`), _mm_set1_epi16(`-1001`));
3179	let r = _mm_mulhi_epi16(a, b);
3180	assert_eq_m128i(r, _mm_set1_epi16(`-16`));
3181	}
3182
3183	#[simd_test(enable = "sse2")]
3184	unsafe fn test_mm_mulhi_epu16() {
3185	let (a, b) = (_mm_set1_epi16(`1000`), _mm_set1_epi16(`1001`));
3186	let r = _mm_mulhi_epu16(a, b);
3187	assert_eq_m128i(r, _mm_set1_epi16(`15`));
3188	}
3189
3190	#[simd_test(enable = "sse2")]
3191	unsafe fn test_mm_mullo_epi16() {
3192	let (a, b) = (_mm_set1_epi16(`1000`), _mm_set1_epi16(`-1001`));
3193	let r = _mm_mullo_epi16(a, b);
3194	assert_eq_m128i(r, _mm_set1_epi16(`-17960`));
3195	}
3196
3197	#[simd_test(enable = "sse2")]
3198	unsafe fn test_mm_mul_epu32() {
3199	let a = _mm_setr_epi64x(`1_000_000_000`, `1` << `34`);
3200	let b = _mm_setr_epi64x(`1_000_000_000`, `1` << `35`);
3201	let r = _mm_mul_epu32(a, b);
3202	let e = _mm_setr_epi64x(`1_000_000_000` * `1_000_000_000`, `0`);
3203	assert_eq_m128i(r, e);
3204	}
3205
3206	#[simd_test(enable = "sse2")]
3207	unsafe fn test_mm_sad_epu8() {
3208	#[rustfmt::skip]
3209	let a = _mm_setr_epi8(
3210	`255u8` as i8, `254u8` as i8, `253u8` as i8, `252u8` as i8,
3211	`1`, `2`, `3`, `4`,
3212	`155u8` as i8, `154u8` as i8, `153u8` as i8, `152u8` as i8,
3213	`1`, `2`, `3`, `4`,
3214	);
3215	let b = _mm_setr_epi8(`0`, `0`, `0`, `0`, `2`, `1`, `2`, `1`, `1`, `1`, `1`, `1`, `1`, `2`, `1`, `2`);
3216	let r = _mm_sad_epu8(a, b);
3217	let e = _mm_setr_epi64x(`1020`, `614`);
3218	assert_eq_m128i(r, e);
3219	}
3220
3221	#[simd_test(enable = "sse2")]
3222	unsafe fn test_mm_sub_epi8() {
3223	let (a, b) = (_mm_set1_epi8(`5`), _mm_set1_epi8(`6`));
3224	let r = _mm_sub_epi8(a, b);
3225	assert_eq_m128i(r, _mm_set1_epi8(`-1`));
3226	}
3227
3228	#[simd_test(enable = "sse2")]
3229	unsafe fn test_mm_sub_epi16() {
3230	let (a, b) = (_mm_set1_epi16(`5`), _mm_set1_epi16(`6`));
3231	let r = _mm_sub_epi16(a, b);
3232	assert_eq_m128i(r, _mm_set1_epi16(`-1`));
3233	}
3234
3235	#[simd_test(enable = "sse2")]
3236	unsafe fn test_mm_sub_epi32() {
3237	let (a, b) = (_mm_set1_epi32(`5`), _mm_set1_epi32(`6`));
3238	let r = _mm_sub_epi32(a, b);
3239	assert_eq_m128i(r, _mm_set1_epi32(`-1`));
3240	}
3241
3242	#[simd_test(enable = "sse2")]
3243	unsafe fn test_mm_sub_epi64() {
3244	let (a, b) = (_mm_set1_epi64x(`5`), _mm_set1_epi64x(`6`));
3245	let r = _mm_sub_epi64(a, b);
3246	assert_eq_m128i(r, _mm_set1_epi64x(`-1`));
3247	}
3248
3249	#[simd_test(enable = "sse2")]
3250	unsafe fn test_mm_subs_epi8() {
3251	let (a, b) = (_mm_set1_epi8(`5`), _mm_set1_epi8(`2`));
3252	let r = _mm_subs_epi8(a, b);
3253	assert_eq_m128i(r, _mm_set1_epi8(`3`));
3254	}
3255
3256	#[simd_test(enable = "sse2")]
3257	unsafe fn test_mm_subs_epi8_saturate_positive() {
3258	let a = _mm_set1_epi8(`0x7F`);
3259	let b = _mm_set1_epi8(`-1`);
3260	let r = _mm_subs_epi8(a, b);
3261	assert_eq_m128i(r, a);
3262	}
3263
3264	#[simd_test(enable = "sse2")]
3265	unsafe fn test_mm_subs_epi8_saturate_negative() {
3266	let a = _mm_set1_epi8(`-0x80`);
3267	let b = _mm_set1_epi8(`1`);
3268	let r = _mm_subs_epi8(a, b);
3269	assert_eq_m128i(r, a);
3270	}
3271
3272	#[simd_test(enable = "sse2")]
3273	unsafe fn test_mm_subs_epi16() {
3274	let (a, b) = (_mm_set1_epi16(`5`), _mm_set1_epi16(`2`));
3275	let r = _mm_subs_epi16(a, b);
3276	assert_eq_m128i(r, _mm_set1_epi16(`3`));
3277	}
3278
3279	#[simd_test(enable = "sse2")]
3280	unsafe fn test_mm_subs_epi16_saturate_positive() {
3281	let a = _mm_set1_epi16(`0x7FFF`);
3282	let b = _mm_set1_epi16(`-1`);
3283	let r = _mm_subs_epi16(a, b);
3284	assert_eq_m128i(r, a);
3285	}
3286
3287	#[simd_test(enable = "sse2")]
3288	unsafe fn test_mm_subs_epi16_saturate_negative() {
3289	let a = _mm_set1_epi16(`-0x8000`);
3290	let b = _mm_set1_epi16(`1`);
3291	let r = _mm_subs_epi16(a, b);
3292	assert_eq_m128i(r, a);
3293	}
3294
3295	#[simd_test(enable = "sse2")]
3296	unsafe fn test_mm_subs_epu8() {
3297	let (a, b) = (_mm_set1_epi8(`5`), _mm_set1_epi8(`2`));
3298	let r = _mm_subs_epu8(a, b);
3299	assert_eq_m128i(r, _mm_set1_epi8(`3`));
3300	}
3301
3302	#[simd_test(enable = "sse2")]
3303	unsafe fn test_mm_subs_epu8_saturate() {
3304	let a = _mm_set1_epi8(`0`);
3305	let b = _mm_set1_epi8(`1`);
3306	let r = _mm_subs_epu8(a, b);
3307	assert_eq_m128i(r, a);
3308	}
3309
3310	#[simd_test(enable = "sse2")]
3311	unsafe fn test_mm_subs_epu16() {
3312	let (a, b) = (_mm_set1_epi16(`5`), _mm_set1_epi16(`2`));
3313	let r = _mm_subs_epu16(a, b);
3314	assert_eq_m128i(r, _mm_set1_epi16(`3`));
3315	}
3316
3317	#[simd_test(enable = "sse2")]
3318	unsafe fn test_mm_subs_epu16_saturate() {
3319	let a = _mm_set1_epi16(`0`);
3320	let b = _mm_set1_epi16(`1`);
3321	let r = _mm_subs_epu16(a, b);
3322	assert_eq_m128i(r, a);
3323	}
3324
3325	#[simd_test(enable = "sse2")]
3326	unsafe fn test_mm_slli_si128() {
3327	#[rustfmt::skip]
3328	let a = _mm_setr_epi8(
3329	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
3330	);
3331	let r = _mm_slli_si128::<`1`>(a);
3332	let e = _mm_setr_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
3333	assert_eq_m128i(r, e);
3334
3335	#[rustfmt::skip]
3336	let a = _mm_setr_epi8(
3337	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
3338	);
3339	let r = _mm_slli_si128::<`15`>(a);
3340	let e = _mm_setr_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`);
3341	assert_eq_m128i(r, e);
3342
3343	#[rustfmt::skip]
3344	let a = _mm_setr_epi8(
3345	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
3346	);
3347	let r = _mm_slli_si128::<`16`>(a);
3348	assert_eq_m128i(r, _mm_set1_epi8(`0`));
3349	}
3350
3351	#[simd_test(enable = "sse2")]
3352	unsafe fn test_mm_slli_epi16() {
3353	let a = _mm_setr_epi16(`0xCC`, `-0xCC`, `0xDD`, `-0xDD`, `0xEE`, `-0xEE`, `0xFF`, `-0xFF`);
3354	let r = _mm_slli_epi16::<`4`>(a);
3355	assert_eq_m128i(
3356	r,
3357	_mm_setr_epi16(`0xCC0`, `-0xCC0`, `0xDD0`, `-0xDD0`, `0xEE0`, `-0xEE0`, `0xFF0`, `-0xFF0`),
3358	);
3359	let r = _mm_slli_epi16::<`16`>(a);
3360	assert_eq_m128i(r, _mm_set1_epi16(`0`));
3361	}
3362
3363	#[simd_test(enable = "sse2")]
3364	unsafe fn test_mm_sll_epi16() {
3365	let a = _mm_setr_epi16(`0xCC`, `-0xCC`, `0xDD`, `-0xDD`, `0xEE`, `-0xEE`, `0xFF`, `-0xFF`);
3366	let r = _mm_sll_epi16(a, _mm_set_epi64x(`0`, `4`));
3367	assert_eq_m128i(
3368	r,
3369	_mm_setr_epi16(`0xCC0`, `-0xCC0`, `0xDD0`, `-0xDD0`, `0xEE0`, `-0xEE0`, `0xFF0`, `-0xFF0`),
3370	);
3371	let r = _mm_sll_epi16(a, _mm_set_epi64x(`4`, `0`));
3372	assert_eq_m128i(r, a);
3373	let r = _mm_sll_epi16(a, _mm_set_epi64x(`0`, `16`));
3374	assert_eq_m128i(r, _mm_set1_epi16(`0`));
3375	let r = _mm_sll_epi16(a, _mm_set_epi64x(`0`, i64::MAX));
3376	assert_eq_m128i(r, _mm_set1_epi16(`0`));
3377	}
3378
3379	#[simd_test(enable = "sse2")]
3380	unsafe fn test_mm_slli_epi32() {
3381	let a = _mm_setr_epi32(`0xEEEE`, `-0xEEEE`, `0xFFFF`, `-0xFFFF`);
3382	let r = _mm_slli_epi32::<`4`>(a);
3383	assert_eq_m128i(r, _mm_setr_epi32(`0xEEEE0`, `-0xEEEE0`, `0xFFFF0`, `-0xFFFF0`));
3384	let r = _mm_slli_epi32::<`32`>(a);
3385	assert_eq_m128i(r, _mm_set1_epi32(`0`));
3386	}
3387
3388	#[simd_test(enable = "sse2")]
3389	unsafe fn test_mm_sll_epi32() {
3390	let a = _mm_setr_epi32(`0xEEEE`, `-0xEEEE`, `0xFFFF`, `-0xFFFF`);
3391	let r = _mm_sll_epi32(a, _mm_set_epi64x(`0`, `4`));
3392	assert_eq_m128i(r, _mm_setr_epi32(`0xEEEE0`, `-0xEEEE0`, `0xFFFF0`, `-0xFFFF0`));
3393	let r = _mm_sll_epi32(a, _mm_set_epi64x(`4`, `0`));
3394	assert_eq_m128i(r, a);
3395	let r = _mm_sll_epi32(a, _mm_set_epi64x(`0`, `32`));
3396	assert_eq_m128i(r, _mm_set1_epi32(`0`));
3397	let r = _mm_sll_epi32(a, _mm_set_epi64x(`0`, i64::MAX));
3398	assert_eq_m128i(r, _mm_set1_epi32(`0`));
3399	}
3400
3401	#[simd_test(enable = "sse2")]
3402	unsafe fn test_mm_slli_epi64() {
3403	let a = _mm_set_epi64x(`0xFFFFFFFF`, `-0xFFFFFFFF`);
3404	let r = _mm_slli_epi64::<`4`>(a);
3405	assert_eq_m128i(r, _mm_set_epi64x(`0xFFFFFFFF0`, `-0xFFFFFFFF0`));
3406	let r = _mm_slli_epi64::<`64`>(a);
3407	assert_eq_m128i(r, _mm_set1_epi64x(`0`));
3408	}
3409
3410	#[simd_test(enable = "sse2")]
3411	unsafe fn test_mm_sll_epi64() {
3412	let a = _mm_set_epi64x(`0xFFFFFFFF`, `-0xFFFFFFFF`);
3413	let r = _mm_sll_epi64(a, _mm_set_epi64x(`0`, `4`));
3414	assert_eq_m128i(r, _mm_set_epi64x(`0xFFFFFFFF0`, `-0xFFFFFFFF0`));
3415	let r = _mm_sll_epi64(a, _mm_set_epi64x(`4`, `0`));
3416	assert_eq_m128i(r, a);
3417	let r = _mm_sll_epi64(a, _mm_set_epi64x(`0`, `64`));
3418	assert_eq_m128i(r, _mm_set1_epi64x(`0`));
3419	let r = _mm_sll_epi64(a, _mm_set_epi64x(`0`, i64::MAX));
3420	assert_eq_m128i(r, _mm_set1_epi64x(`0`));
3421	}
3422
3423	#[simd_test(enable = "sse2")]
3424	unsafe fn test_mm_srai_epi16() {
3425	let a = _mm_setr_epi16(`0xCC`, `-0xCC`, `0xDD`, `-0xDD`, `0xEE`, `-0xEE`, `0xFF`, `-0xFF`);
3426	let r = _mm_srai_epi16::<`4`>(a);
3427	assert_eq_m128i(
3428	r,
3429	_mm_setr_epi16(`0xC`, `-0xD`, `0xD`, `-0xE`, `0xE`, `-0xF`, `0xF`, `-0x10`),
3430	);
3431	let r = _mm_srai_epi16::<`16`>(a);
3432	assert_eq_m128i(r, _mm_setr_epi16(`0`, `-1`, `0`, `-1`, `0`, `-1`, `0`, `-1`));
3433	}
3434
3435	#[simd_test(enable = "sse2")]
3436	unsafe fn test_mm_sra_epi16() {
3437	let a = _mm_setr_epi16(`0xCC`, `-0xCC`, `0xDD`, `-0xDD`, `0xEE`, `-0xEE`, `0xFF`, `-0xFF`);
3438	let r = _mm_sra_epi16(a, _mm_set_epi64x(`0`, `4`));
3439	assert_eq_m128i(
3440	r,
3441	_mm_setr_epi16(`0xC`, `-0xD`, `0xD`, `-0xE`, `0xE`, `-0xF`, `0xF`, `-0x10`),
3442	);
3443	let r = _mm_sra_epi16(a, _mm_set_epi64x(`4`, `0`));
3444	assert_eq_m128i(r, a);
3445	let r = _mm_sra_epi16(a, _mm_set_epi64x(`0`, `16`));
3446	assert_eq_m128i(r, _mm_setr_epi16(`0`, `-1`, `0`, `-1`, `0`, `-1`, `0`, `-1`));
3447	let r = _mm_sra_epi16(a, _mm_set_epi64x(`0`, i64::MAX));
3448	assert_eq_m128i(r, _mm_setr_epi16(`0`, `-1`, `0`, `-1`, `0`, `-1`, `0`, `-1`));
3449	}
3450
3451	#[simd_test(enable = "sse2")]
3452	unsafe fn test_mm_srai_epi32() {
3453	let a = _mm_setr_epi32(`0xEEEE`, `-0xEEEE`, `0xFFFF`, `-0xFFFF`);
3454	let r = _mm_srai_epi32::<`4`>(a);
3455	assert_eq_m128i(r, _mm_setr_epi32(`0xEEE`, `-0xEEF`, `0xFFF`, `-0x1000`));
3456	let r = _mm_srai_epi32::<`32`>(a);
3457	assert_eq_m128i(r, _mm_setr_epi32(`0`, `-1`, `0`, `-1`));
3458	}
3459
3460	#[simd_test(enable = "sse2")]
3461	unsafe fn test_mm_sra_epi32() {
3462	let a = _mm_setr_epi32(`0xEEEE`, `-0xEEEE`, `0xFFFF`, `-0xFFFF`);
3463	let r = _mm_sra_epi32(a, _mm_set_epi64x(`0`, `4`));
3464	assert_eq_m128i(r, _mm_setr_epi32(`0xEEE`, `-0xEEF`, `0xFFF`, `-0x1000`));
3465	let r = _mm_sra_epi32(a, _mm_set_epi64x(`4`, `0`));
3466	assert_eq_m128i(r, a);
3467	let r = _mm_sra_epi32(a, _mm_set_epi64x(`0`, `32`));
3468	assert_eq_m128i(r, _mm_setr_epi32(`0`, `-1`, `0`, `-1`));
3469	let r = _mm_sra_epi32(a, _mm_set_epi64x(`0`, i64::MAX));
3470	assert_eq_m128i(r, _mm_setr_epi32(`0`, `-1`, `0`, `-1`));
3471	}
3472
3473	#[simd_test(enable = "sse2")]
3474	unsafe fn test_mm_srli_si128() {
3475	#[rustfmt::skip]
3476	let a = _mm_setr_epi8(
3477	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
3478	);
3479	let r = _mm_srli_si128::<`1`>(a);
3480	#[rustfmt::skip]
3481	let e = _mm_setr_epi8(
3482	`2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `0`,
3483	);
3484	assert_eq_m128i(r, e);
3485
3486	#[rustfmt::skip]
3487	let a = _mm_setr_epi8(
3488	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
3489	);
3490	let r = _mm_srli_si128::<`15`>(a);
3491	let e = _mm_setr_epi8(`16`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
3492	assert_eq_m128i(r, e);
3493
3494	#[rustfmt::skip]
3495	let a = _mm_setr_epi8(
3496	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`,
3497	);
3498	let r = _mm_srli_si128::<`16`>(a);
3499	assert_eq_m128i(r, _mm_set1_epi8(`0`));
3500	}
3501
3502	#[simd_test(enable = "sse2")]
3503	unsafe fn test_mm_srli_epi16() {
3504	let a = _mm_setr_epi16(`0xCC`, `-0xCC`, `0xDD`, `-0xDD`, `0xEE`, `-0xEE`, `0xFF`, `-0xFF`);
3505	let r = _mm_srli_epi16::<`4`>(a);
3506	assert_eq_m128i(
3507	r,
3508	_mm_setr_epi16(`0xC`, `0xFF3`, `0xD`, `0xFF2`, `0xE`, `0xFF1`, `0xF`, `0xFF0`),
3509	);
3510	let r = _mm_srli_epi16::<`16`>(a);
3511	assert_eq_m128i(r, _mm_set1_epi16(`0`));
3512	}
3513
3514	#[simd_test(enable = "sse2")]
3515	unsafe fn test_mm_srl_epi16() {
3516	let a = _mm_setr_epi16(`0xCC`, `-0xCC`, `0xDD`, `-0xDD`, `0xEE`, `-0xEE`, `0xFF`, `-0xFF`);
3517	let r = _mm_srl_epi16(a, _mm_set_epi64x(`0`, `4`));
3518	assert_eq_m128i(
3519	r,
3520	_mm_setr_epi16(`0xC`, `0xFF3`, `0xD`, `0xFF2`, `0xE`, `0xFF1`, `0xF`, `0xFF0`),
3521	);
3522	let r = _mm_srl_epi16(a, _mm_set_epi64x(`4`, `0`));
3523	assert_eq_m128i(r, a);
3524	let r = _mm_srl_epi16(a, _mm_set_epi64x(`0`, `16`));
3525	assert_eq_m128i(r, _mm_set1_epi16(`0`));
3526	let r = _mm_srl_epi16(a, _mm_set_epi64x(`0`, i64::MAX));
3527	assert_eq_m128i(r, _mm_set1_epi16(`0`));
3528	}
3529
3530	#[simd_test(enable = "sse2")]
3531	unsafe fn test_mm_srli_epi32() {
3532	let a = _mm_setr_epi32(`0xEEEE`, `-0xEEEE`, `0xFFFF`, `-0xFFFF`);
3533	let r = _mm_srli_epi32::<`4`>(a);
3534	assert_eq_m128i(r, _mm_setr_epi32(`0xEEE`, `0xFFFF111`, `0xFFF`, `0xFFFF000`));
3535	let r = _mm_srli_epi32::<`32`>(a);
3536	assert_eq_m128i(r, _mm_set1_epi32(`0`));
3537	}
3538
3539	#[simd_test(enable = "sse2")]
3540	unsafe fn test_mm_srl_epi32() {
3541	let a = _mm_setr_epi32(`0xEEEE`, `-0xEEEE`, `0xFFFF`, `-0xFFFF`);
3542	let r = _mm_srl_epi32(a, _mm_set_epi64x(`0`, `4`));
3543	assert_eq_m128i(r, _mm_setr_epi32(`0xEEE`, `0xFFFF111`, `0xFFF`, `0xFFFF000`));
3544	let r = _mm_srl_epi32(a, _mm_set_epi64x(`4`, `0`));
3545	assert_eq_m128i(r, a);
3546	let r = _mm_srl_epi32(a, _mm_set_epi64x(`0`, `32`));
3547	assert_eq_m128i(r, _mm_set1_epi32(`0`));
3548	let r = _mm_srl_epi32(a, _mm_set_epi64x(`0`, i64::MAX));
3549	assert_eq_m128i(r, _mm_set1_epi32(`0`));
3550	}
3551
3552	#[simd_test(enable = "sse2")]
3553	unsafe fn test_mm_srli_epi64() {
3554	let a = _mm_set_epi64x(`0xFFFFFFFF`, `-0xFFFFFFFF`);
3555	let r = _mm_srli_epi64::<`4`>(a);
3556	assert_eq_m128i(r, _mm_set_epi64x(`0xFFFFFFF`, `0xFFFFFFFF0000000`));
3557	let r = _mm_srli_epi64::<`64`>(a);
3558	assert_eq_m128i(r, _mm_set1_epi64x(`0`));
3559	}
3560
3561	#[simd_test(enable = "sse2")]
3562	unsafe fn test_mm_srl_epi64() {
3563	let a = _mm_set_epi64x(`0xFFFFFFFF`, `-0xFFFFFFFF`);
3564	let r = _mm_srl_epi64(a, _mm_set_epi64x(`0`, `4`));
3565	assert_eq_m128i(r, _mm_set_epi64x(`0xFFFFFFF`, `0xFFFFFFFF0000000`));
3566	let r = _mm_srl_epi64(a, _mm_set_epi64x(`4`, `0`));
3567	assert_eq_m128i(r, a);
3568	let r = _mm_srl_epi64(a, _mm_set_epi64x(`0`, `64`));
3569	assert_eq_m128i(r, _mm_set1_epi64x(`0`));
3570	let r = _mm_srl_epi64(a, _mm_set_epi64x(`0`, i64::MAX));
3571	assert_eq_m128i(r, _mm_set1_epi64x(`0`));
3572	}
3573
3574	#[simd_test(enable = "sse2")]
3575	unsafe fn test_mm_and_si128() {
3576	let a = _mm_set1_epi8(`5`);
3577	let b = _mm_set1_epi8(`3`);
3578	let r = _mm_and_si128(a, b);
3579	assert_eq_m128i(r, _mm_set1_epi8(`1`));
3580	}
3581
3582	#[simd_test(enable = "sse2")]
3583	unsafe fn test_mm_andnot_si128() {
3584	let a = _mm_set1_epi8(`5`);
3585	let b = _mm_set1_epi8(`3`);
3586	let r = _mm_andnot_si128(a, b);
3587	assert_eq_m128i(r, _mm_set1_epi8(`2`));
3588	}
3589
3590	#[simd_test(enable = "sse2")]
3591	unsafe fn test_mm_or_si128() {
3592	let a = _mm_set1_epi8(`5`);
3593	let b = _mm_set1_epi8(`3`);
3594	let r = _mm_or_si128(a, b);
3595	assert_eq_m128i(r, _mm_set1_epi8(`7`));
3596	}
3597
3598	#[simd_test(enable = "sse2")]
3599	unsafe fn test_mm_xor_si128() {
3600	let a = _mm_set1_epi8(`5`);
3601	let b = _mm_set1_epi8(`3`);
3602	let r = _mm_xor_si128(a, b);
3603	assert_eq_m128i(r, _mm_set1_epi8(`6`));
3604	}
3605
3606	#[simd_test(enable = "sse2")]
3607	unsafe fn test_mm_cmpeq_epi8() {
3608	let a = _mm_setr_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
3609	let b = _mm_setr_epi8(`15`, `14`, `2`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
3610	let r = _mm_cmpeq_epi8(a, b);
3611	#[rustfmt::skip]
3612	assert_eq_m128i(
3613	r,
3614	_mm_setr_epi8(
3615	`0`, `0`, `0xFFu8` as i8, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`
3616	)
3617	);
3618	}
3619
3620	#[simd_test(enable = "sse2")]
3621	unsafe fn test_mm_cmpeq_epi16() {
3622	let a = _mm_setr_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
3623	let b = _mm_setr_epi16(`7`, `6`, `2`, `4`, `3`, `2`, `1`, `0`);
3624	let r = _mm_cmpeq_epi16(a, b);
3625	assert_eq_m128i(r, _mm_setr_epi16(`0`, `0`, !`0`, `0`, `0`, `0`, `0`, `0`));
3626	}
3627
3628	#[simd_test(enable = "sse2")]
3629	unsafe fn test_mm_cmpeq_epi32() {
3630	let a = _mm_setr_epi32(`0`, `1`, `2`, `3`);
3631	let b = _mm_setr_epi32(`3`, `2`, `2`, `0`);
3632	let r = _mm_cmpeq_epi32(a, b);
3633	assert_eq_m128i(r, _mm_setr_epi32(`0`, `0`, !`0`, `0`));
3634	}
3635
3636	#[simd_test(enable = "sse2")]
3637	unsafe fn test_mm_cmpgt_epi8() {
3638	let a = _mm_set_epi8(`5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
3639	let b = _mm_set1_epi8(`0`);
3640	let r = _mm_cmpgt_epi8(a, b);
3641	let e = _mm_set_epi8(!`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
3642	assert_eq_m128i(r, e);
3643	}
3644
3645	#[simd_test(enable = "sse2")]
3646	unsafe fn test_mm_cmpgt_epi16() {
3647	let a = _mm_set_epi16(`5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
3648	let b = _mm_set1_epi16(`0`);
3649	let r = _mm_cmpgt_epi16(a, b);
3650	let e = _mm_set_epi16(!`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
3651	assert_eq_m128i(r, e);
3652	}
3653
3654	#[simd_test(enable = "sse2")]
3655	unsafe fn test_mm_cmpgt_epi32() {
3656	let a = _mm_set_epi32(`5`, `0`, `0`, `0`);
3657	let b = _mm_set1_epi32(`0`);
3658	let r = _mm_cmpgt_epi32(a, b);
3659	assert_eq_m128i(r, _mm_set_epi32(!`0`, `0`, `0`, `0`));
3660	}
3661
3662	#[simd_test(enable = "sse2")]
3663	unsafe fn test_mm_cmplt_epi8() {
3664	let a = _mm_set1_epi8(`0`);
3665	let b = _mm_set_epi8(`5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
3666	let r = _mm_cmplt_epi8(a, b);
3667	let e = _mm_set_epi8(!`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
3668	assert_eq_m128i(r, e);
3669	}
3670
3671	#[simd_test(enable = "sse2")]
3672	unsafe fn test_mm_cmplt_epi16() {
3673	let a = _mm_set1_epi16(`0`);
3674	let b = _mm_set_epi16(`5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
3675	let r = _mm_cmplt_epi16(a, b);
3676	let e = _mm_set_epi16(!`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
3677	assert_eq_m128i(r, e);
3678	}
3679
3680	#[simd_test(enable = "sse2")]
3681	unsafe fn test_mm_cmplt_epi32() {
3682	let a = _mm_set1_epi32(`0`);
3683	let b = _mm_set_epi32(`5`, `0`, `0`, `0`);
3684	let r = _mm_cmplt_epi32(a, b);
3685	assert_eq_m128i(r, _mm_set_epi32(!`0`, `0`, `0`, `0`));
3686	}
3687
3688	#[simd_test(enable = "sse2")]
3689	unsafe fn test_mm_cvtepi32_pd() {
3690	let a = _mm_set_epi32(`35`, `25`, `15`, `5`);
3691	let r = _mm_cvtepi32_pd(a);
3692	assert_eq_m128d(r, _mm_setr_pd(`5.0`, `15.0`));
3693	}
3694
3695	#[simd_test(enable = "sse2")]
3696	unsafe fn test_mm_cvtsi32_sd() {
3697	let a = _mm_set1_pd(`3.5`);
3698	let r = _mm_cvtsi32_sd(a, `5`);
3699	assert_eq_m128d(r, _mm_setr_pd(`5.0`, `3.5`));
3700	}
3701
3702	#[simd_test(enable = "sse2")]
3703	unsafe fn test_mm_cvtepi32_ps() {
3704	let a = _mm_setr_epi32(`1`, `2`, `3`, `4`);
3705	let r = _mm_cvtepi32_ps(a);
3706	assert_eq_m128(r, _mm_setr_ps(`1.0`, `2.0`, `3.0`, `4.0`));
3707	}
3708
3709	#[simd_test(enable = "sse2")]
3710	unsafe fn test_mm_cvtps_epi32() {
3711	let a = _mm_setr_ps(`1.0`, `2.0`, `3.0`, `4.0`);
3712	let r = _mm_cvtps_epi32(a);
3713	assert_eq_m128i(r, _mm_setr_epi32(`1`, `2`, `3`, `4`));
3714	}
3715
3716	#[simd_test(enable = "sse2")]
3717	unsafe fn test_mm_cvtsi32_si128() {
3718	let r = _mm_cvtsi32_si128(`5`);
3719	assert_eq_m128i(r, _mm_setr_epi32(`5`, `0`, `0`, `0`));
3720	}
3721
3722	#[simd_test(enable = "sse2")]
3723	unsafe fn test_mm_cvtsi128_si32() {
3724	let r = _mm_cvtsi128_si32(_mm_setr_epi32(`5`, `0`, `0`, `0`));
3725	assert_eq!(r, `5`);
3726	}
3727
3728	#[simd_test(enable = "sse2")]
3729	unsafe fn test_mm_set_epi64x() {
3730	let r = _mm_set_epi64x(`0`, `1`);
3731	assert_eq_m128i(r, _mm_setr_epi64x(`1`, `0`));
3732	}
3733
3734	#[simd_test(enable = "sse2")]
3735	unsafe fn test_mm_set_epi32() {
3736	let r = _mm_set_epi32(`0`, `1`, `2`, `3`);
3737	assert_eq_m128i(r, _mm_setr_epi32(`3`, `2`, `1`, `0`));
3738	}
3739
3740	#[simd_test(enable = "sse2")]
3741	unsafe fn test_mm_set_epi16() {
3742	let r = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
3743	assert_eq_m128i(r, _mm_setr_epi16(`7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`));
3744	}
3745
3746	#[simd_test(enable = "sse2")]
3747	unsafe fn test_mm_set_epi8() {
3748	#[rustfmt::skip]
3749	let r = _mm_set_epi8(
3750	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
3751	);
3752	#[rustfmt::skip]
3753	let e = _mm_setr_epi8(
3754	`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`,
3755	`7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`,
3756	);
3757	assert_eq_m128i(r, e);
3758	}
3759
3760	#[simd_test(enable = "sse2")]
3761	unsafe fn test_mm_set1_epi64x() {
3762	let r = _mm_set1_epi64x(`1`);
3763	assert_eq_m128i(r, _mm_set1_epi64x(`1`));
3764	}
3765
3766	#[simd_test(enable = "sse2")]
3767	unsafe fn test_mm_set1_epi32() {
3768	let r = _mm_set1_epi32(`1`);
3769	assert_eq_m128i(r, _mm_set1_epi32(`1`));
3770	}
3771
3772	#[simd_test(enable = "sse2")]
3773	unsafe fn test_mm_set1_epi16() {
3774	let r = _mm_set1_epi16(`1`);
3775	assert_eq_m128i(r, _mm_set1_epi16(`1`));
3776	}
3777
3778	#[simd_test(enable = "sse2")]
3779	unsafe fn test_mm_set1_epi8() {
3780	let r = _mm_set1_epi8(`1`);
3781	assert_eq_m128i(r, _mm_set1_epi8(`1`));
3782	}
3783
3784	#[simd_test(enable = "sse2")]
3785	unsafe fn test_mm_setr_epi32() {
3786	let r = _mm_setr_epi32(`0`, `1`, `2`, `3`);
3787	assert_eq_m128i(r, _mm_setr_epi32(`0`, `1`, `2`, `3`));
3788	}
3789
3790	#[simd_test(enable = "sse2")]
3791	unsafe fn test_mm_setr_epi16() {
3792	let r = _mm_setr_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
3793	assert_eq_m128i(r, _mm_setr_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`));
3794	}
3795
3796	#[simd_test(enable = "sse2")]
3797	unsafe fn test_mm_setr_epi8() {
3798	#[rustfmt::skip]
3799	let r = _mm_setr_epi8(
3800	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
3801	);
3802	#[rustfmt::skip]
3803	let e = _mm_setr_epi8(
3804	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
3805	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
3806	);
3807	assert_eq_m128i(r, e);
3808	}
3809
3810	#[simd_test(enable = "sse2")]
3811	unsafe fn test_mm_setzero_si128() {
3812	let r = _mm_setzero_si128();
3813	assert_eq_m128i(r, _mm_set1_epi64x(`0`));
3814	}
3815
3816	#[simd_test(enable = "sse2")]
3817	unsafe fn test_mm_loadl_epi64() {
3818	let a = _mm_setr_epi64x(`6`, `5`);
3819	let r = _mm_loadl_epi64(&a as *const _);
3820	assert_eq_m128i(r, _mm_setr_epi64x(`6`, `0`));
3821	}
3822
3823	#[simd_test(enable = "sse2")]
3824	unsafe fn test_mm_load_si128() {
3825	let a = _mm_set_epi64x(`5`, `6`);
3826	let r = _mm_load_si128(&a as *const _ as *const _);
3827	assert_eq_m128i(a, r);
3828	}
3829
3830	#[simd_test(enable = "sse2")]
3831	unsafe fn test_mm_loadu_si128() {
3832	let a = _mm_set_epi64x(`5`, `6`);
3833	let r = _mm_loadu_si128(&a as *const _ as *const _);
3834	assert_eq_m128i(a, r);
3835	}
3836
3837	#[simd_test(enable = "sse2")]
3838	// Miri cannot support this until it is clear how it fits in the Rust memory model
3839	// (non-temporal store)
3840	#[cfg_attr(miri, ignore)]
3841	unsafe fn test_mm_maskmoveu_si128() {
3842	let a = _mm_set1_epi8(`9`);
3843	#[rustfmt::skip]
3844	let mask = _mm_set_epi8(
3845	`0`, `0`, `0x80u8` as i8, `0`, `0`, `0`, `0`, `0`,
3846	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
3847	);
3848	let mut r = _mm_set1_epi8(`0`);
3849	_mm_maskmoveu_si128(a, mask, &mut r as *mut _ as *mut i8);
3850	let e = _mm_set_epi8(`0`, `0`, `9`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
3851	assert_eq_m128i(r, e);
3852	}
3853
3854	#[simd_test(enable = "sse2")]
3855	unsafe fn test_mm_store_si128() {
3856	let a = _mm_set1_epi8(`9`);
3857	let mut r = _mm_set1_epi8(`0`);
3858	_mm_store_si128(&mut r as *mut _ as *mut __m128i, a);
3859	assert_eq_m128i(r, a);
3860	}
3861
3862	#[simd_test(enable = "sse2")]
3863	unsafe fn test_mm_storeu_si128() {
3864	let a = _mm_set1_epi8(`9`);
3865	let mut r = _mm_set1_epi8(`0`);
3866	_mm_storeu_si128(&mut r as *mut _ as *mut __m128i, a);
3867	assert_eq_m128i(r, a);
3868	}
3869
3870	#[simd_test(enable = "sse2")]
3871	unsafe fn test_mm_storel_epi64() {
3872	let a = _mm_setr_epi64x(`2`, `9`);
3873	let mut r = _mm_set1_epi8(`0`);
3874	_mm_storel_epi64(&mut r as *mut _ as *mut __m128i, a);
3875	assert_eq_m128i(r, _mm_setr_epi64x(`2`, `0`));
3876	}
3877
3878	#[simd_test(enable = "sse2")]
3879	// Miri cannot support this until it is clear how it fits in the Rust memory model
3880	// (non-temporal store)
3881	#[cfg_attr(miri, ignore)]
3882	unsafe fn test_mm_stream_si128() {
3883	let a = _mm_setr_epi32(`1`, `2`, `3`, `4`);
3884	let mut r = _mm_undefined_si128();
3885	_mm_stream_si128(&mut r as *mut _, a);
3886	assert_eq_m128i(r, a);
3887	}
3888
3889	#[simd_test(enable = "sse2")]
3890	// Miri cannot support this until it is clear how it fits in the Rust memory model
3891	// (non-temporal store)
3892	#[cfg_attr(miri, ignore)]
3893	unsafe fn test_mm_stream_si32() {
3894	let a: i32 = `7`;
3895	let mut mem = boxed::Box::<i32>::new(`-1`);
3896	_mm_stream_si32(&mut mem as mut i32, a);
3897	assert_eq!(a, *mem);
3898	}
3899
3900	#[simd_test(enable = "sse2")]
3901	unsafe fn test_mm_move_epi64() {
3902	let a = _mm_setr_epi64x(`5`, `6`);
3903	let r = _mm_move_epi64(a);
3904	assert_eq_m128i(r, _mm_setr_epi64x(`5`, `0`));
3905	}
3906
3907	#[simd_test(enable = "sse2")]
3908	unsafe fn test_mm_packs_epi16() {
3909	let a = _mm_setr_epi16(`0x80`, `-0x81`, `0`, `0`, `0`, `0`, `0`, `0`);
3910	let b = _mm_setr_epi16(`0`, `0`, `0`, `0`, `0`, `0`, `-0x81`, `0x80`);
3911	let r = _mm_packs_epi16(a, b);
3912	#[rustfmt::skip]
3913	assert_eq_m128i(
3914	r,
3915	_mm_setr_epi8(
3916	`0x7F`, `-0x80`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `-0x80`, `0x7F`
3917	)
3918	);
3919	}
3920
3921	#[simd_test(enable = "sse2")]
3922	unsafe fn test_mm_packs_epi32() {
3923	let a = _mm_setr_epi32(`0x8000`, `-0x8001`, `0`, `0`);
3924	let b = _mm_setr_epi32(`0`, `0`, `-0x8001`, `0x8000`);
3925	let r = _mm_packs_epi32(a, b);
3926	assert_eq_m128i(
3927	r,
3928	_mm_setr_epi16(`0x7FFF`, `-0x8000`, `0`, `0`, `0`, `0`, `-0x8000`, `0x7FFF`),
3929	);
3930	}
3931
3932	#[simd_test(enable = "sse2")]
3933	unsafe fn test_mm_packus_epi16() {
3934	let a = _mm_setr_epi16(`0x100`, `-1`, `0`, `0`, `0`, `0`, `0`, `0`);
3935	let b = _mm_setr_epi16(`0`, `0`, `0`, `0`, `0`, `0`, `-1`, `0x100`);
3936	let r = _mm_packus_epi16(a, b);
3937	assert_eq_m128i(
3938	r,
3939	_mm_setr_epi8(!`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, !`0`),
3940	);
3941	}
3942
3943	#[simd_test(enable = "sse2")]
3944	unsafe fn test_mm_extract_epi16() {
3945	let a = _mm_setr_epi16(`-1`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
3946	let r1 = _mm_extract_epi16::<`0`>(a);
3947	let r2 = _mm_extract_epi16::<`3`>(a);
3948	assert_eq!(r1, `0xFFFF`);
3949	assert_eq!(r2, `3`);
3950	}
3951
3952	#[simd_test(enable = "sse2")]
3953	unsafe fn test_mm_insert_epi16() {
3954	let a = _mm_setr_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
3955	let r = _mm_insert_epi16::<`0`>(a, `9`);
3956	let e = _mm_setr_epi16(`9`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
3957	assert_eq_m128i(r, e);
3958	}
3959
3960	#[simd_test(enable = "sse2")]
3961	unsafe fn test_mm_movemask_epi8() {
3962	#[rustfmt::skip]
3963	let a = _mm_setr_epi8(
3964	`0b1000_0000u8` as i8, `0b0`, `0b1000_0000u8` as i8, `0b01`,
3965	`0b0101`, `0b1111_0000u8` as i8, `0`, `0`,
3966	`0`, `0b1011_0101u8` as i8, `0b1111_0000u8` as i8, `0b0101`,
3967	`0b01`, `0b1000_0000u8` as i8, `0b0`, `0b1000_0000u8` as i8,
3968	);
3969	let r = _mm_movemask_epi8(a);
3970	assert_eq!(r, `0b10100110_00100101`);
3971	}
3972
3973	#[simd_test(enable = "sse2")]
3974	unsafe fn test_mm_shuffle_epi32() {
3975	let a = _mm_setr_epi32(`5`, `10`, `15`, `20`);
3976	let r = _mm_shuffle_epi32::<`0b00_01_01_11`>(a);
3977	let e = _mm_setr_epi32(`20`, `10`, `10`, `5`);
3978	assert_eq_m128i(r, e);
3979	}
3980
3981	#[simd_test(enable = "sse2")]
3982	unsafe fn test_mm_shufflehi_epi16() {
3983	let a = _mm_setr_epi16(`1`, `2`, `3`, `4`, `5`, `10`, `15`, `20`);
3984	let r = _mm_shufflehi_epi16::<`0b00_01_01_11`>(a);
3985	let e = _mm_setr_epi16(`1`, `2`, `3`, `4`, `20`, `10`, `10`, `5`);
3986	assert_eq_m128i(r, e);
3987	}
3988
3989	#[simd_test(enable = "sse2")]
3990	unsafe fn test_mm_shufflelo_epi16() {
3991	let a = _mm_setr_epi16(`5`, `10`, `15`, `20`, `1`, `2`, `3`, `4`);
3992	let r = _mm_shufflelo_epi16::<`0b00_01_01_11`>(a);
3993	let e = _mm_setr_epi16(`20`, `10`, `10`, `5`, `1`, `2`, `3`, `4`);
3994	assert_eq_m128i(r, e);
3995	}
3996
3997	#[simd_test(enable = "sse2")]
3998	unsafe fn test_mm_unpackhi_epi8() {
3999	#[rustfmt::skip]
4000	let a = _mm_setr_epi8(
4001	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
4002	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
4003	);
4004	#[rustfmt::skip]
4005	let b = _mm_setr_epi8(
4006	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
4007	);
4008	let r = _mm_unpackhi_epi8(a, b);
4009	#[rustfmt::skip]
4010	let e = _mm_setr_epi8(
4011	`8`, `24`, `9`, `25`, `10`, `26`, `11`, `27`, `12`, `28`, `13`, `29`, `14`, `30`, `15`, `31`,
4012	);
4013	assert_eq_m128i(r, e);
4014	}
4015
4016	#[simd_test(enable = "sse2")]
4017	unsafe fn test_mm_unpackhi_epi16() {
4018	let a = _mm_setr_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
4019	let b = _mm_setr_epi16(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
4020	let r = _mm_unpackhi_epi16(a, b);
4021	let e = _mm_setr_epi16(`4`, `12`, `5`, `13`, `6`, `14`, `7`, `15`);
4022	assert_eq_m128i(r, e);
4023	}
4024
4025	#[simd_test(enable = "sse2")]
4026	unsafe fn test_mm_unpackhi_epi32() {
4027	let a = _mm_setr_epi32(`0`, `1`, `2`, `3`);
4028	let b = _mm_setr_epi32(`4`, `5`, `6`, `7`);
4029	let r = _mm_unpackhi_epi32(a, b);
4030	let e = _mm_setr_epi32(`2`, `6`, `3`, `7`);
4031	assert_eq_m128i(r, e);
4032	}
4033
4034	#[simd_test(enable = "sse2")]
4035	unsafe fn test_mm_unpackhi_epi64() {
4036	let a = _mm_setr_epi64x(`0`, `1`);
4037	let b = _mm_setr_epi64x(`2`, `3`);
4038	let r = _mm_unpackhi_epi64(a, b);
4039	let e = _mm_setr_epi64x(`1`, `3`);
4040	assert_eq_m128i(r, e);
4041	}
4042
4043	#[simd_test(enable = "sse2")]
4044	unsafe fn test_mm_unpacklo_epi8() {
4045	#[rustfmt::skip]
4046	let a = _mm_setr_epi8(
4047	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
4048	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
4049	);
4050	#[rustfmt::skip]
4051	let b = _mm_setr_epi8(
4052	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
4053	);
4054	let r = _mm_unpacklo_epi8(a, b);
4055	#[rustfmt::skip]
4056	let e = _mm_setr_epi8(
4057	`0`, `16`, `1`, `17`, `2`, `18`, `3`, `19`,
4058	`4`, `20`, `5`, `21`, `6`, `22`, `7`, `23`,
4059	);
4060	assert_eq_m128i(r, e);
4061	}
4062
4063	#[simd_test(enable = "sse2")]
4064	unsafe fn test_mm_unpacklo_epi16() {
4065	let a = _mm_setr_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
4066	let b = _mm_setr_epi16(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
4067	let r = _mm_unpacklo_epi16(a, b);
4068	let e = _mm_setr_epi16(`0`, `8`, `1`, `9`, `2`, `10`, `3`, `11`);
4069	assert_eq_m128i(r, e);
4070	}
4071
4072	#[simd_test(enable = "sse2")]
4073	unsafe fn test_mm_unpacklo_epi32() {
4074	let a = _mm_setr_epi32(`0`, `1`, `2`, `3`);
4075	let b = _mm_setr_epi32(`4`, `5`, `6`, `7`);
4076	let r = _mm_unpacklo_epi32(a, b);
4077	let e = _mm_setr_epi32(`0`, `4`, `1`, `5`);
4078	assert_eq_m128i(r, e);
4079	}
4080
4081	#[simd_test(enable = "sse2")]
4082	unsafe fn test_mm_unpacklo_epi64() {
4083	let a = _mm_setr_epi64x(`0`, `1`);
4084	let b = _mm_setr_epi64x(`2`, `3`);
4085	let r = _mm_unpacklo_epi64(a, b);
4086	let e = _mm_setr_epi64x(`0`, `2`);
4087	assert_eq_m128i(r, e);
4088	}
4089
4090	#[simd_test(enable = "sse2")]
4091	unsafe fn test_mm_add_sd() {
4092	let a = _mm_setr_pd(`1.0`, `2.0`);
4093	let b = _mm_setr_pd(`5.0`, `10.0`);
4094	let r = _mm_add_sd(a, b);
4095	assert_eq_m128d(r, _mm_setr_pd(`6.0`, `2.0`));
4096	}
4097
4098	#[simd_test(enable = "sse2")]
4099	unsafe fn test_mm_add_pd() {
4100	let a = _mm_setr_pd(`1.0`, `2.0`);
4101	let b = _mm_setr_pd(`5.0`, `10.0`);
4102	let r = _mm_add_pd(a, b);
4103	assert_eq_m128d(r, _mm_setr_pd(`6.0`, `12.0`));
4104	}
4105
4106	#[simd_test(enable = "sse2")]
4107	unsafe fn test_mm_div_sd() {
4108	let a = _mm_setr_pd(`1.0`, `2.0`);
4109	let b = _mm_setr_pd(`5.0`, `10.0`);
4110	let r = _mm_div_sd(a, b);
4111	assert_eq_m128d(r, _mm_setr_pd(`0.2`, `2.0`));
4112	}
4113
4114	#[simd_test(enable = "sse2")]
4115	unsafe fn test_mm_div_pd() {
4116	let a = _mm_setr_pd(`1.0`, `2.0`);
4117	let b = _mm_setr_pd(`5.0`, `10.0`);
4118	let r = _mm_div_pd(a, b);
4119	assert_eq_m128d(r, _mm_setr_pd(`0.2`, `0.2`));
4120	}
4121
4122	#[simd_test(enable = "sse2")]
4123	unsafe fn test_mm_max_sd() {
4124	let a = _mm_setr_pd(`1.0`, `2.0`);
4125	let b = _mm_setr_pd(`5.0`, `10.0`);
4126	let r = _mm_max_sd(a, b);
4127	assert_eq_m128d(r, _mm_setr_pd(`5.0`, `2.0`));
4128	}
4129
4130	#[simd_test(enable = "sse2")]
4131	unsafe fn test_mm_max_pd() {
4132	let a = _mm_setr_pd(`1.0`, `2.0`);
4133	let b = _mm_setr_pd(`5.0`, `10.0`);
4134	let r = _mm_max_pd(a, b);
4135	assert_eq_m128d(r, _mm_setr_pd(`5.0`, `10.0`));
4136
4137	// Check SSE(2)-specific semantics for -0.0 handling.
4138	let a = _mm_setr_pd(`-0.0`, `0.0`);
4139	let b = _mm_setr_pd(`0.0`, `0.0`);
4140	let r1: [u8; `16`] = transmute(_mm_max_pd(a, b));
4141	let r2: [u8; `16`] = transmute(_mm_max_pd(b, a));
4142	let a: [u8; `16`] = transmute(a);
4143	let b: [u8; `16`] = transmute(b);
4144	assert_eq!(r1, b);
4145	assert_eq!(r2, a);
4146	assert_ne!(a, b); // sanity check that -0.0 is actually present
4147	}
4148
4149	#[simd_test(enable = "sse2")]
4150	unsafe fn test_mm_min_sd() {
4151	let a = _mm_setr_pd(`1.0`, `2.0`);
4152	let b = _mm_setr_pd(`5.0`, `10.0`);
4153	let r = _mm_min_sd(a, b);
4154	assert_eq_m128d(r, _mm_setr_pd(`1.0`, `2.0`));
4155	}
4156
4157	#[simd_test(enable = "sse2")]
4158	unsafe fn test_mm_min_pd() {
4159	let a = _mm_setr_pd(`1.0`, `2.0`);
4160	let b = _mm_setr_pd(`5.0`, `10.0`);
4161	let r = _mm_min_pd(a, b);
4162	assert_eq_m128d(r, _mm_setr_pd(`1.0`, `2.0`));
4163
4164	// Check SSE(2)-specific semantics for -0.0 handling.
4165	let a = _mm_setr_pd(`-0.0`, `0.0`);
4166	let b = _mm_setr_pd(`0.0`, `0.0`);
4167	let r1: [u8; `16`] = transmute(_mm_min_pd(a, b));
4168	let r2: [u8; `16`] = transmute(_mm_min_pd(b, a));
4169	let a: [u8; `16`] = transmute(a);
4170	let b: [u8; `16`] = transmute(b);
4171	assert_eq!(r1, b);
4172	assert_eq!(r2, a);
4173	assert_ne!(a, b); // sanity check that -0.0 is actually present
4174	}
4175
4176	#[simd_test(enable = "sse2")]
4177	unsafe fn test_mm_mul_sd() {
4178	let a = _mm_setr_pd(`1.0`, `2.0`);
4179	let b = _mm_setr_pd(`5.0`, `10.0`);
4180	let r = _mm_mul_sd(a, b);
4181	assert_eq_m128d(r, _mm_setr_pd(`5.0`, `2.0`));
4182	}
4183
4184	#[simd_test(enable = "sse2")]
4185	unsafe fn test_mm_mul_pd() {
4186	let a = _mm_setr_pd(`1.0`, `2.0`);
4187	let b = _mm_setr_pd(`5.0`, `10.0`);
4188	let r = _mm_mul_pd(a, b);
4189	assert_eq_m128d(r, _mm_setr_pd(`5.0`, `20.0`));
4190	}
4191
4192	#[simd_test(enable = "sse2")]
4193	unsafe fn test_mm_sqrt_sd() {
4194	let a = _mm_setr_pd(`1.0`, `2.0`);
4195	let b = _mm_setr_pd(`5.0`, `10.0`);
4196	let r = _mm_sqrt_sd(a, b);
4197	assert_eq_m128d(r, _mm_setr_pd(`5.0f64`.sqrt(), `2.0`));
4198	}
4199
4200	#[simd_test(enable = "sse2")]
4201	unsafe fn test_mm_sqrt_pd() {
4202	let r = _mm_sqrt_pd(_mm_setr_pd(`1.0`, `2.0`));
4203	assert_eq_m128d(r, _mm_setr_pd(`1.0f64`.sqrt(), `2.0f64`.sqrt()));
4204	}
4205
4206	#[simd_test(enable = "sse2")]
4207	unsafe fn test_mm_sub_sd() {
4208	let a = _mm_setr_pd(`1.0`, `2.0`);
4209	let b = _mm_setr_pd(`5.0`, `10.0`);
4210	let r = _mm_sub_sd(a, b);
4211	assert_eq_m128d(r, _mm_setr_pd(`-4.0`, `2.0`));
4212	}
4213
4214	#[simd_test(enable = "sse2")]
4215	unsafe fn test_mm_sub_pd() {
4216	let a = _mm_setr_pd(`1.0`, `2.0`);
4217	let b = _mm_setr_pd(`5.0`, `10.0`);
4218	let r = _mm_sub_pd(a, b);
4219	assert_eq_m128d(r, _mm_setr_pd(`-4.0`, `-8.0`));
4220	}
4221
4222	#[simd_test(enable = "sse2")]
4223	unsafe fn test_mm_and_pd() {
4224	let a = transmute(u64x2::splat(`5`));
4225	let b = transmute(u64x2::splat(`3`));
4226	let r = _mm_and_pd(a, b);
4227	let e = transmute(u64x2::splat(`1`));
4228	assert_eq_m128d(r, e);
4229	}
4230
4231	#[simd_test(enable = "sse2")]
4232	unsafe fn test_mm_andnot_pd() {
4233	let a = transmute(u64x2::splat(`5`));
4234	let b = transmute(u64x2::splat(`3`));
4235	let r = _mm_andnot_pd(a, b);
4236	let e = transmute(u64x2::splat(`2`));
4237	assert_eq_m128d(r, e);
4238	}
4239
4240	#[simd_test(enable = "sse2")]
4241	unsafe fn test_mm_or_pd() {
4242	let a = transmute(u64x2::splat(`5`));
4243	let b = transmute(u64x2::splat(`3`));
4244	let r = _mm_or_pd(a, b);
4245	let e = transmute(u64x2::splat(`7`));
4246	assert_eq_m128d(r, e);
4247	}
4248
4249	#[simd_test(enable = "sse2")]
4250	unsafe fn test_mm_xor_pd() {
4251	let a = transmute(u64x2::splat(`5`));
4252	let b = transmute(u64x2::splat(`3`));
4253	let r = _mm_xor_pd(a, b);
4254	let e = transmute(u64x2::splat(`6`));
4255	assert_eq_m128d(r, e);
4256	}
4257
4258	#[simd_test(enable = "sse2")]
4259	unsafe fn test_mm_cmpeq_sd() {
4260	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4261	let e = _mm_setr_epi64x(!`0`, `2.0f64`.to_bits() as i64);
4262	let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b));
4263	assert_eq_m128i(r, e);
4264	}
4265
4266	#[simd_test(enable = "sse2")]
4267	unsafe fn test_mm_cmplt_sd() {
4268	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`5.0`, `3.0`));
4269	let e = _mm_setr_epi64x(!`0`, `2.0f64`.to_bits() as i64);
4270	let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b));
4271	assert_eq_m128i(r, e);
4272	}
4273
4274	#[simd_test(enable = "sse2")]
4275	unsafe fn test_mm_cmple_sd() {
4276	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4277	let e = _mm_setr_epi64x(!`0`, `2.0f64`.to_bits() as i64);
4278	let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b));
4279	assert_eq_m128i(r, e);
4280	}
4281
4282	#[simd_test(enable = "sse2")]
4283	unsafe fn test_mm_cmpgt_sd() {
4284	let (a, b) = (_mm_setr_pd(`5.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4285	let e = _mm_setr_epi64x(!`0`, `2.0f64`.to_bits() as i64);
4286	let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b));
4287	assert_eq_m128i(r, e);
4288	}
4289
4290	#[simd_test(enable = "sse2")]
4291	unsafe fn test_mm_cmpge_sd() {
4292	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4293	let e = _mm_setr_epi64x(!`0`, `2.0f64`.to_bits() as i64);
4294	let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b));
4295	assert_eq_m128i(r, e);
4296	}
4297
4298	#[simd_test(enable = "sse2")]
4299	unsafe fn test_mm_cmpord_sd() {
4300	let (a, b) = (_mm_setr_pd(NAN, `2.0`), _mm_setr_pd(`5.0`, `3.0`));
4301	let e = _mm_setr_epi64x(`0`, `2.0f64`.to_bits() as i64);
4302	let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b));
4303	assert_eq_m128i(r, e);
4304	}
4305
4306	#[simd_test(enable = "sse2")]
4307	unsafe fn test_mm_cmpunord_sd() {
4308	let (a, b) = (_mm_setr_pd(NAN, `2.0`), _mm_setr_pd(`5.0`, `3.0`));
4309	let e = _mm_setr_epi64x(!`0`, `2.0f64`.to_bits() as i64);
4310	let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b));
4311	assert_eq_m128i(r, e);
4312	}
4313
4314	#[simd_test(enable = "sse2")]
4315	unsafe fn test_mm_cmpneq_sd() {
4316	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`5.0`, `3.0`));
4317	let e = _mm_setr_epi64x(!`0`, `2.0f64`.to_bits() as i64);
4318	let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b));
4319	assert_eq_m128i(r, e);
4320	}
4321
4322	#[simd_test(enable = "sse2")]
4323	unsafe fn test_mm_cmpnlt_sd() {
4324	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`5.0`, `3.0`));
4325	let e = _mm_setr_epi64x(`0`, `2.0f64`.to_bits() as i64);
4326	let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b));
4327	assert_eq_m128i(r, e);
4328	}
4329
4330	#[simd_test(enable = "sse2")]
4331	unsafe fn test_mm_cmpnle_sd() {
4332	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4333	let e = _mm_setr_epi64x(`0`, `2.0f64`.to_bits() as i64);
4334	let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b));
4335	assert_eq_m128i(r, e);
4336	}
4337
4338	#[simd_test(enable = "sse2")]
4339	unsafe fn test_mm_cmpngt_sd() {
4340	let (a, b) = (_mm_setr_pd(`5.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4341	let e = _mm_setr_epi64x(`0`, `2.0f64`.to_bits() as i64);
4342	let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b));
4343	assert_eq_m128i(r, e);
4344	}
4345
4346	#[simd_test(enable = "sse2")]
4347	unsafe fn test_mm_cmpnge_sd() {
4348	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4349	let e = _mm_setr_epi64x(`0`, `2.0f64`.to_bits() as i64);
4350	let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b));
4351	assert_eq_m128i(r, e);
4352	}
4353
4354	#[simd_test(enable = "sse2")]
4355	unsafe fn test_mm_cmpeq_pd() {
4356	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4357	let e = _mm_setr_epi64x(!`0`, `0`);
4358	let r = transmute::<_, __m128i>(_mm_cmpeq_pd(a, b));
4359	assert_eq_m128i(r, e);
4360	}
4361
4362	#[simd_test(enable = "sse2")]
4363	unsafe fn test_mm_cmplt_pd() {
4364	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4365	let e = _mm_setr_epi64x(`0`, !`0`);
4366	let r = transmute::<_, __m128i>(_mm_cmplt_pd(a, b));
4367	assert_eq_m128i(r, e);
4368	}
4369
4370	#[simd_test(enable = "sse2")]
4371	unsafe fn test_mm_cmple_pd() {
4372	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4373	let e = _mm_setr_epi64x(!`0`, !`0`);
4374	let r = transmute::<_, __m128i>(_mm_cmple_pd(a, b));
4375	assert_eq_m128i(r, e);
4376	}
4377
4378	#[simd_test(enable = "sse2")]
4379	unsafe fn test_mm_cmpgt_pd() {
4380	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4381	let e = _mm_setr_epi64x(`0`, `0`);
4382	let r = transmute::<_, __m128i>(_mm_cmpgt_pd(a, b));
4383	assert_eq_m128i(r, e);
4384	}
4385
4386	#[simd_test(enable = "sse2")]
4387	unsafe fn test_mm_cmpge_pd() {
4388	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4389	let e = _mm_setr_epi64x(!`0`, `0`);
4390	let r = transmute::<_, __m128i>(_mm_cmpge_pd(a, b));
4391	assert_eq_m128i(r, e);
4392	}
4393
4394	#[simd_test(enable = "sse2")]
4395	unsafe fn test_mm_cmpord_pd() {
4396	let (a, b) = (_mm_setr_pd(NAN, `2.0`), _mm_setr_pd(`5.0`, `3.0`));
4397	let e = _mm_setr_epi64x(`0`, !`0`);
4398	let r = transmute::<_, __m128i>(_mm_cmpord_pd(a, b));
4399	assert_eq_m128i(r, e);
4400	}
4401
4402	#[simd_test(enable = "sse2")]
4403	unsafe fn test_mm_cmpunord_pd() {
4404	let (a, b) = (_mm_setr_pd(NAN, `2.0`), _mm_setr_pd(`5.0`, `3.0`));
4405	let e = _mm_setr_epi64x(!`0`, `0`);
4406	let r = transmute::<_, __m128i>(_mm_cmpunord_pd(a, b));
4407	assert_eq_m128i(r, e);
4408	}
4409
4410	#[simd_test(enable = "sse2")]
4411	unsafe fn test_mm_cmpneq_pd() {
4412	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`5.0`, `3.0`));
4413	let e = _mm_setr_epi64x(!`0`, !`0`);
4414	let r = transmute::<_, __m128i>(_mm_cmpneq_pd(a, b));
4415	assert_eq_m128i(r, e);
4416	}
4417
4418	#[simd_test(enable = "sse2")]
4419	unsafe fn test_mm_cmpnlt_pd() {
4420	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`5.0`, `3.0`));
4421	let e = _mm_setr_epi64x(`0`, `0`);
4422	let r = transmute::<_, __m128i>(_mm_cmpnlt_pd(a, b));
4423	assert_eq_m128i(r, e);
4424	}
4425
4426	#[simd_test(enable = "sse2")]
4427	unsafe fn test_mm_cmpnle_pd() {
4428	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4429	let e = _mm_setr_epi64x(`0`, `0`);
4430	let r = transmute::<_, __m128i>(_mm_cmpnle_pd(a, b));
4431	assert_eq_m128i(r, e);
4432	}
4433
4434	#[simd_test(enable = "sse2")]
4435	unsafe fn test_mm_cmpngt_pd() {
4436	let (a, b) = (_mm_setr_pd(`5.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4437	let e = _mm_setr_epi64x(`0`, !`0`);
4438	let r = transmute::<_, __m128i>(_mm_cmpngt_pd(a, b));
4439	assert_eq_m128i(r, e);
4440	}
4441
4442	#[simd_test(enable = "sse2")]
4443	unsafe fn test_mm_cmpnge_pd() {
4444	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4445	let e = _mm_setr_epi64x(`0`, !`0`);
4446	let r = transmute::<_, __m128i>(_mm_cmpnge_pd(a, b));
4447	assert_eq_m128i(r, e);
4448	}
4449
4450	#[simd_test(enable = "sse2")]
4451	unsafe fn test_mm_comieq_sd() {
4452	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4453	assert!(_mm_comieq_sd(a, b) != `0`);
4454
4455	let (a, b) = (_mm_setr_pd(NAN, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4456	assert!(_mm_comieq_sd(a, b) == `0`);
4457	}
4458
4459	#[simd_test(enable = "sse2")]
4460	unsafe fn test_mm_comilt_sd() {
4461	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4462	assert!(_mm_comilt_sd(a, b) == `0`);
4463	}
4464
4465	#[simd_test(enable = "sse2")]
4466	unsafe fn test_mm_comile_sd() {
4467	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4468	assert!(_mm_comile_sd(a, b) != `0`);
4469	}
4470
4471	#[simd_test(enable = "sse2")]
4472	unsafe fn test_mm_comigt_sd() {
4473	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4474	assert!(_mm_comigt_sd(a, b) == `0`);
4475	}
4476
4477	#[simd_test(enable = "sse2")]
4478	unsafe fn test_mm_comige_sd() {
4479	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4480	assert!(_mm_comige_sd(a, b) != `0`);
4481	}
4482
4483	#[simd_test(enable = "sse2")]
4484	unsafe fn test_mm_comineq_sd() {
4485	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4486	assert!(_mm_comineq_sd(a, b) == `0`);
4487	}
4488
4489	#[simd_test(enable = "sse2")]
4490	unsafe fn test_mm_ucomieq_sd() {
4491	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4492	assert!(_mm_ucomieq_sd(a, b) != `0`);
4493
4494	let (a, b) = (_mm_setr_pd(NAN, `2.0`), _mm_setr_pd(NAN, `3.0`));
4495	assert!(_mm_ucomieq_sd(a, b) == `0`);
4496	}
4497
4498	#[simd_test(enable = "sse2")]
4499	unsafe fn test_mm_ucomilt_sd() {
4500	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4501	assert!(_mm_ucomilt_sd(a, b) == `0`);
4502	}
4503
4504	#[simd_test(enable = "sse2")]
4505	unsafe fn test_mm_ucomile_sd() {
4506	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4507	assert!(_mm_ucomile_sd(a, b) != `0`);
4508	}
4509
4510	#[simd_test(enable = "sse2")]
4511	unsafe fn test_mm_ucomigt_sd() {
4512	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4513	assert!(_mm_ucomigt_sd(a, b) == `0`);
4514	}
4515
4516	#[simd_test(enable = "sse2")]
4517	unsafe fn test_mm_ucomige_sd() {
4518	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4519	assert!(_mm_ucomige_sd(a, b) != `0`);
4520	}
4521
4522	#[simd_test(enable = "sse2")]
4523	unsafe fn test_mm_ucomineq_sd() {
4524	let (a, b) = (_mm_setr_pd(`1.0`, `2.0`), _mm_setr_pd(`1.0`, `3.0`));
4525	assert!(_mm_ucomineq_sd(a, b) == `0`);
4526	}
4527
4528	#[simd_test(enable = "sse2")]
4529	unsafe fn test_mm_movemask_pd() {
4530	let r = _mm_movemask_pd(_mm_setr_pd(`-1.0`, `5.0`));
4531	assert_eq!(r, `0b01`);
4532
4533	let r = _mm_movemask_pd(_mm_setr_pd(`-1.0`, `-5.0`));
4534	assert_eq!(r, `0b11`);
4535	}
4536
4537	#[repr(align(`16`))]
4538	struct Memory {
4539	data: [f64; `4`],
4540	}
4541
4542	#[simd_test(enable = "sse2")]
4543	unsafe fn test_mm_load_pd() {
4544	let mem = Memory {
4545	data: [`1.0f64`, `2.0`, `3.0`, `4.0`],
4546	};
4547	let vals = &mem.data;
4548	let d = vals.as_ptr();
4549
4550	let r = _mm_load_pd(d);
4551	assert_eq_m128d(r, _mm_setr_pd(`1.0`, `2.0`));
4552	}
4553
4554	#[simd_test(enable = "sse2")]
4555	unsafe fn test_mm_load_sd() {
4556	let a = `1.`;
4557	let expected = _mm_setr_pd(a, `0.`);
4558	let r = _mm_load_sd(&a);
4559	assert_eq_m128d(r, expected);
4560	}
4561
4562	#[simd_test(enable = "sse2")]
4563	unsafe fn test_mm_loadh_pd() {
4564	let a = _mm_setr_pd(`1.`, `2.`);
4565	let b = `3.`;
4566	let expected = _mm_setr_pd(_mm_cvtsd_f64(a), `3.`);
4567	let r = _mm_loadh_pd(a, &b);
4568	assert_eq_m128d(r, expected);
4569	}
4570
4571	#[simd_test(enable = "sse2")]
4572	unsafe fn test_mm_loadl_pd() {
4573	let a = _mm_setr_pd(`1.`, `2.`);
4574	let b = `3.`;
4575	let expected = _mm_setr_pd(`3.`, get_m128d(a, `1`));
4576	let r = _mm_loadl_pd(a, &b);
4577	assert_eq_m128d(r, expected);
4578	}
4579
4580	#[simd_test(enable = "sse2")]
4581	// Miri cannot support this until it is clear how it fits in the Rust memory model
4582	// (non-temporal store)
4583	#[cfg_attr(miri, ignore)]
4584	unsafe fn test_mm_stream_pd() {
4585	#[repr(align(`128`))]
4586	struct Memory {
4587	pub data: [f64; `2`],
4588	}
4589	let a = _mm_set1_pd(`7.0`);
4590	let mut mem = Memory { data: [`-1.0`; `2`] };
4591
4592	_mm_stream_pd(&mut mem.data[`0`] as *mut f64, a);
4593	for i in `0`..`2` {
4594	assert_eq!(mem.data[i], get_m128d(a, i));
4595	}
4596	}
4597
4598	#[simd_test(enable = "sse2")]
4599	unsafe fn test_mm_store_sd() {
4600	let mut dest = `0.`;
4601	let a = _mm_setr_pd(`1.`, `2.`);
4602	_mm_store_sd(&mut dest, a);
4603	assert_eq!(dest, _mm_cvtsd_f64(a));
4604	}
4605
4606	#[simd_test(enable = "sse2")]
4607	unsafe fn test_mm_store_pd() {
4608	let mut mem = Memory { data: [`0.0f64`; `4`] };
4609	let vals = &mut mem.data;
4610	let a = _mm_setr_pd(`1.0`, `2.0`);
4611	let d = vals.as_mut_ptr();
4612
4613	_mm_store_pd(d, *black_box(&a));
4614	assert_eq!(vals[`0`], `1.0`);
4615	assert_eq!(vals[`1`], `2.0`);
4616	}
4617
4618	#[simd_test(enable = "sse2")]
4619	unsafe fn test_mm_storeu_pd() {
4620	let mut mem = Memory { data: [`0.0f64`; `4`] };
4621	let vals = &mut mem.data;
4622	let a = _mm_setr_pd(`1.0`, `2.0`);
4623
4624	let mut ofs = `0`;
4625	let mut p = vals.as_mut_ptr();
4626
4627	// Make sure p is not* aligned to 16-byte boundary*
4628	if (p as usize) & `0xf` == `0` {
4629	ofs = `1`;
4630	p = p.add(`1`);
4631	}
4632
4633	_mm_storeu_pd(p, *black_box(&a));
4634
4635	if ofs > `0` {
4636	assert_eq!(vals[ofs - `1`], `0.0`);
4637	}
4638	assert_eq!(vals[ofs + `0`], `1.0`);
4639	assert_eq!(vals[ofs + `1`], `2.0`);
4640	}
4641
4642	#[simd_test(enable = "sse2")]
4643	unsafe fn test_mm_store1_pd() {
4644	let mut mem = Memory { data: [`0.0f64`; `4`] };
4645	let vals = &mut mem.data;
4646	let a = _mm_setr_pd(`1.0`, `2.0`);
4647	let d = vals.as_mut_ptr();
4648
4649	_mm_store1_pd(d, *black_box(&a));
4650	assert_eq!(vals[`0`], `1.0`);
4651	assert_eq!(vals[`1`], `1.0`);
4652	}
4653
4654	#[simd_test(enable = "sse2")]
4655	unsafe fn test_mm_store_pd1() {
4656	let mut mem = Memory { data: [`0.0f64`; `4`] };
4657	let vals = &mut mem.data;
4658	let a = _mm_setr_pd(`1.0`, `2.0`);
4659	let d = vals.as_mut_ptr();
4660
4661	_mm_store_pd1(d, *black_box(&a));
4662	assert_eq!(vals[`0`], `1.0`);
4663	assert_eq!(vals[`1`], `1.0`);
4664	}
4665
4666	#[simd_test(enable = "sse2")]
4667	unsafe fn test_mm_storer_pd() {
4668	let mut mem = Memory { data: [`0.0f64`; `4`] };
4669	let vals = &mut mem.data;
4670	let a = _mm_setr_pd(`1.0`, `2.0`);
4671	let d = vals.as_mut_ptr();
4672
4673	_mm_storer_pd(d, *black_box(&a));
4674	assert_eq!(vals[`0`], `2.0`);
4675	assert_eq!(vals[`1`], `1.0`);
4676	}
4677
4678	#[simd_test(enable = "sse2")]
4679	unsafe fn test_mm_storeh_pd() {
4680	let mut dest = `0.`;
4681	let a = _mm_setr_pd(`1.`, `2.`);
4682	_mm_storeh_pd(&mut dest, a);
4683	assert_eq!(dest, get_m128d(a, `1`));
4684	}
4685
4686	#[simd_test(enable = "sse2")]
4687	unsafe fn test_mm_storel_pd() {
4688	let mut dest = `0.`;
4689	let a = _mm_setr_pd(`1.`, `2.`);
4690	_mm_storel_pd(&mut dest, a);
4691	assert_eq!(dest, _mm_cvtsd_f64(a));
4692	}
4693
4694	#[simd_test(enable = "sse2")]
4695	unsafe fn test_mm_loadr_pd() {
4696	let mut mem = Memory {
4697	data: [`1.0f64`, `2.0`, `3.0`, `4.0`],
4698	};
4699	let vals = &mut mem.data;
4700	let d = vals.as_ptr();
4701
4702	let r = _mm_loadr_pd(d);
4703	assert_eq_m128d(r, _mm_setr_pd(`2.0`, `1.0`));
4704	}
4705
4706	#[simd_test(enable = "sse2")]
4707	unsafe fn test_mm_loadu_pd() {
4708	let mut mem = Memory {
4709	data: [`1.0f64`, `2.0`, `3.0`, `4.0`],
4710	};
4711	let vals = &mut mem.data;
4712	let mut d = vals.as_ptr();
4713
4714	// make sure d is not aligned to 16-byte boundary
4715	let mut offset = `0`;
4716	if (d as usize) & `0xf` == `0` {
4717	offset = `1`;
4718	d = d.add(offset);
4719	}
4720
4721	let r = _mm_loadu_pd(d);
4722	let e = _mm_add_pd(_mm_setr_pd(`1.0`, `2.0`), _mm_set1_pd(offset as f64));
4723	assert_eq_m128d(r, e);
4724	}
4725
4726	#[simd_test(enable = "sse2")]
4727	unsafe fn test_mm_cvtpd_ps() {
4728	let r = _mm_cvtpd_ps(_mm_setr_pd(`-1.0`, `5.0`));
4729	assert_eq_m128(r, _mm_setr_ps(`-1.0`, `5.0`, `0.0`, `0.0`));
4730
4731	let r = _mm_cvtpd_ps(_mm_setr_pd(`-1.0`, `-5.0`));
4732	assert_eq_m128(r, _mm_setr_ps(`-1.0`, `-5.0`, `0.0`, `0.0`));
4733
4734	let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN));
4735	assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, `0.0`, `0.0`));
4736
4737	let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64));
4738	assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, `0.0`, `0.0`));
4739	}
4740
4741	#[simd_test(enable = "sse2")]
4742	unsafe fn test_mm_cvtps_pd() {
4743	let r = _mm_cvtps_pd(_mm_setr_ps(`-1.0`, `2.0`, `-3.0`, `5.0`));
4744	assert_eq_m128d(r, _mm_setr_pd(`-1.0`, `2.0`));
4745
4746	let r = _mm_cvtps_pd(_mm_setr_ps(
4747	f32::MAX,
4748	f32::INFINITY,
4749	f32::NEG_INFINITY,
4750	f32::MIN,
4751	));
4752	assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY));
4753	}
4754
4755	#[simd_test(enable = "sse2")]
4756	unsafe fn test_mm_cvtpd_epi32() {
4757	let r = _mm_cvtpd_epi32(_mm_setr_pd(`-1.0`, `5.0`));
4758	assert_eq_m128i(r, _mm_setr_epi32(`-1`, `5`, `0`, `0`));
4759
4760	let r = _mm_cvtpd_epi32(_mm_setr_pd(`-1.0`, `-5.0`));
4761	assert_eq_m128i(r, _mm_setr_epi32(`-1`, `-5`, `0`, `0`));
4762
4763	let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN));
4764	assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, `0`, `0`));
4765
4766	let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY));
4767	assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, `0`, `0`));
4768
4769	let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN));
4770	assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, `0`, `0`));
4771	}
4772
4773	#[simd_test(enable = "sse2")]
4774	unsafe fn test_mm_cvtsd_si32() {
4775	let r = _mm_cvtsd_si32(_mm_setr_pd(`-2.0`, `5.0`));
4776	assert_eq!(r, `-2`);
4777
4778	let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN));
4779	assert_eq!(r, i32::MIN);
4780
4781	let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN));
4782	assert_eq!(r, i32::MIN);
4783	}
4784
4785	#[simd_test(enable = "sse2")]
4786	unsafe fn test_mm_cvtsd_ss() {
4787	let a = _mm_setr_ps(`-1.1`, `-2.2`, `3.3`, `4.4`);
4788	let b = _mm_setr_pd(`2.0`, `-5.0`);
4789
4790	let r = _mm_cvtsd_ss(a, b);
4791
4792	assert_eq_m128(r, _mm_setr_ps(`2.0`, `-2.2`, `3.3`, `4.4`));
4793
4794	let a = _mm_setr_ps(`-1.1`, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY);
4795	let b = _mm_setr_pd(f64::INFINITY, `-5.0`);
4796
4797	let r = _mm_cvtsd_ss(a, b);
4798
4799	assert_eq_m128(
4800	r,
4801	_mm_setr_ps(
4802	f32::INFINITY,
4803	f32::NEG_INFINITY,
4804	f32::MAX,
4805	f32::NEG_INFINITY,
4806	),
4807	);
4808	}
4809
4810	#[simd_test(enable = "sse2")]
4811	unsafe fn test_mm_cvtsd_f64() {
4812	let r = _mm_cvtsd_f64(_mm_setr_pd(`-1.1`, `2.2`));
4813	assert_eq!(r, `-1.1`);
4814	}
4815
4816	#[simd_test(enable = "sse2")]
4817	unsafe fn test_mm_cvtss_sd() {
4818	let a = _mm_setr_pd(`-1.1`, `2.2`);
4819	let b = _mm_setr_ps(`1.0`, `2.0`, `3.0`, `4.0`);
4820
4821	let r = _mm_cvtss_sd(a, b);
4822	assert_eq_m128d(r, _mm_setr_pd(`1.0`, `2.2`));
4823
4824	let a = _mm_setr_pd(`-1.1`, f64::INFINITY);
4825	let b = _mm_setr_ps(f32::NEG_INFINITY, `2.0`, `3.0`, `4.0`);
4826
4827	let r = _mm_cvtss_sd(a, b);
4828	assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY));
4829	}
4830
4831	#[simd_test(enable = "sse2")]
4832	unsafe fn test_mm_cvttpd_epi32() {
4833	let a = _mm_setr_pd(`-1.1`, `2.2`);
4834	let r = _mm_cvttpd_epi32(a);
4835	assert_eq_m128i(r, _mm_setr_epi32(`-1`, `2`, `0`, `0`));
4836
4837	let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
4838	let r = _mm_cvttpd_epi32(a);
4839	assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, `0`, `0`));
4840	}
4841
4842	#[simd_test(enable = "sse2")]
4843	unsafe fn test_mm_cvttsd_si32() {
4844	let a = _mm_setr_pd(`-1.1`, `2.2`);
4845	let r = _mm_cvttsd_si32(a);
4846	assert_eq!(r, `-1`);
4847
4848	let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
4849	let r = _mm_cvttsd_si32(a);
4850	assert_eq!(r, i32::MIN);
4851	}
4852
4853	#[simd_test(enable = "sse2")]
4854	unsafe fn test_mm_cvttps_epi32() {
4855	let a = _mm_setr_ps(`-1.1`, `2.2`, `-3.3`, `6.6`);
4856	let r = _mm_cvttps_epi32(a);
4857	assert_eq_m128i(r, _mm_setr_epi32(`-1`, `2`, `-3`, `6`));
4858
4859	let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX);
4860	let r = _mm_cvttps_epi32(a);
4861	assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN));
4862	}
4863
4864	#[simd_test(enable = "sse2")]
4865	unsafe fn test_mm_set_sd() {
4866	let r = _mm_set_sd(`-1.0_f64`);
4867	assert_eq_m128d(r, _mm_setr_pd(`-1.0_f64`, `0_f64`));
4868	}
4869
4870	#[simd_test(enable = "sse2")]
4871	unsafe fn test_mm_set1_pd() {
4872	let r = _mm_set1_pd(`-1.0_f64`);
4873	assert_eq_m128d(r, _mm_setr_pd(`-1.0_f64`, `-1.0_f64`));
4874	}
4875
4876	#[simd_test(enable = "sse2")]
4877	unsafe fn test_mm_set_pd1() {
4878	let r = _mm_set_pd1(`-2.0_f64`);
4879	assert_eq_m128d(r, _mm_setr_pd(`-2.0_f64`, `-2.0_f64`));
4880	}
4881
4882	#[simd_test(enable = "sse2")]
4883	unsafe fn test_mm_set_pd() {
4884	let r = _mm_set_pd(`1.0_f64`, `5.0_f64`);
4885	assert_eq_m128d(r, _mm_setr_pd(`5.0_f64`, `1.0_f64`));
4886	}
4887
4888	#[simd_test(enable = "sse2")]
4889	unsafe fn test_mm_setr_pd() {
4890	let r = _mm_setr_pd(`1.0_f64`, `-5.0_f64`);
4891	assert_eq_m128d(r, _mm_setr_pd(`1.0_f64`, `-5.0_f64`));
4892	}
4893
4894	#[simd_test(enable = "sse2")]
4895	unsafe fn test_mm_setzero_pd() {
4896	let r = _mm_setzero_pd();
4897	assert_eq_m128d(r, _mm_setr_pd(`0_f64`, `0_f64`));
4898	}
4899
4900	#[simd_test(enable = "sse2")]
4901	unsafe fn test_mm_load1_pd() {
4902	let d = `-5.0`;
4903	let r = _mm_load1_pd(&d);
4904	assert_eq_m128d(r, _mm_setr_pd(d, d));
4905	}
4906
4907	#[simd_test(enable = "sse2")]
4908	unsafe fn test_mm_load_pd1() {
4909	let d = `-5.0`;
4910	let r = _mm_load_pd1(&d);
4911	assert_eq_m128d(r, _mm_setr_pd(d, d));
4912	}
4913
4914	#[simd_test(enable = "sse2")]
4915	unsafe fn test_mm_unpackhi_pd() {
4916	let a = _mm_setr_pd(`1.0`, `2.0`);
4917	let b = _mm_setr_pd(`3.0`, `4.0`);
4918	let r = _mm_unpackhi_pd(a, b);
4919	assert_eq_m128d(r, _mm_setr_pd(`2.0`, `4.0`));
4920	}
4921
4922	#[simd_test(enable = "sse2")]
4923	unsafe fn test_mm_unpacklo_pd() {
4924	let a = _mm_setr_pd(`1.0`, `2.0`);
4925	let b = _mm_setr_pd(`3.0`, `4.0`);
4926	let r = _mm_unpacklo_pd(a, b);
4927	assert_eq_m128d(r, _mm_setr_pd(`1.0`, `3.0`));
4928	}
4929
4930	#[simd_test(enable = "sse2")]
4931	unsafe fn test_mm_shuffle_pd() {
4932	let a = _mm_setr_pd(`1.`, `2.`);
4933	let b = _mm_setr_pd(`3.`, `4.`);
4934	let expected = _mm_setr_pd(`1.`, `3.`);
4935	let r = _mm_shuffle_pd::<`0b00_00_00_00`>(a, b);
4936	assert_eq_m128d(r, expected);
4937	}
4938
4939	#[simd_test(enable = "sse2")]
4940	unsafe fn test_mm_move_sd() {
4941	let a = _mm_setr_pd(`1.`, `2.`);
4942	let b = _mm_setr_pd(`3.`, `4.`);
4943	let expected = _mm_setr_pd(`3.`, `2.`);
4944	let r = _mm_move_sd(a, b);
4945	assert_eq_m128d(r, expected);
4946	}
4947
4948	#[simd_test(enable = "sse2")]
4949	unsafe fn test_mm_castpd_ps() {
4950	let a = _mm_set1_pd(`0.`);
4951	let expected = _mm_set1_ps(`0.`);
4952	let r = _mm_castpd_ps(a);
4953	assert_eq_m128(r, expected);
4954	}
4955
4956	#[simd_test(enable = "sse2")]
4957	unsafe fn test_mm_castpd_si128() {
4958	let a = _mm_set1_pd(`0.`);
4959	let expected = _mm_set1_epi64x(`0`);
4960	let r = _mm_castpd_si128(a);
4961	assert_eq_m128i(r, expected);
4962	}
4963
4964	#[simd_test(enable = "sse2")]
4965	unsafe fn test_mm_castps_pd() {
4966	let a = _mm_set1_ps(`0.`);
4967	let expected = _mm_set1_pd(`0.`);
4968	let r = _mm_castps_pd(a);
4969	assert_eq_m128d(r, expected);
4970	}
4971
4972	#[simd_test(enable = "sse2")]
4973	unsafe fn test_mm_castps_si128() {
4974	let a = _mm_set1_ps(`0.`);
4975	let expected = _mm_set1_epi32(`0`);
4976	let r = _mm_castps_si128(a);
4977	assert_eq_m128i(r, expected);
4978	}
4979
4980	#[simd_test(enable = "sse2")]
4981	unsafe fn test_mm_castsi128_pd() {
4982	let a = _mm_set1_epi64x(`0`);
4983	let expected = _mm_set1_pd(`0.`);
4984	let r = _mm_castsi128_pd(a);
4985	assert_eq_m128d(r, expected);
4986	}
4987
4988	#[simd_test(enable = "sse2")]
4989	unsafe fn test_mm_castsi128_ps() {
4990	let a = _mm_set1_epi32(`0`);
4991	let expected = _mm_set1_ps(`0.`);
4992	let r = _mm_castsi128_ps(a);
4993	assert_eq_m128(r, expected);
4994	}
4995	}
4996