1//! Streaming SIMD Extensions 4.1 (SSE4.1)
2
3use crate::core_arch::{simd::*, x86::*};
4use crate::intrinsics::simd::*;
5
6#[cfg(test)]
7use stdarch_test::assert_instr;
8
9// SSE4 rounding constants
10/// round to nearest
11#[stable(feature = "simd_x86", since = "1.27.0")]
12pub const _MM_FROUND_TO_NEAREST_INT: i32 = 0x00;
13/// round down
14#[stable(feature = "simd_x86", since = "1.27.0")]
15pub const _MM_FROUND_TO_NEG_INF: i32 = 0x01;
16/// round up
17#[stable(feature = "simd_x86", since = "1.27.0")]
18pub const _MM_FROUND_TO_POS_INF: i32 = 0x02;
19/// truncate
20#[stable(feature = "simd_x86", since = "1.27.0")]
21pub const _MM_FROUND_TO_ZERO: i32 = 0x03;
22/// use MXCSR.RC; see `vendor::_MM_SET_ROUNDING_MODE`
23#[stable(feature = "simd_x86", since = "1.27.0")]
24pub const _MM_FROUND_CUR_DIRECTION: i32 = 0x04;
25/// do not suppress exceptions
26#[stable(feature = "simd_x86", since = "1.27.0")]
27pub const _MM_FROUND_RAISE_EXC: i32 = 0x00;
28/// suppress exceptions
29#[stable(feature = "simd_x86", since = "1.27.0")]
30pub const _MM_FROUND_NO_EXC: i32 = 0x08;
31/// round to nearest and do not suppress exceptions
32#[stable(feature = "simd_x86", since = "1.27.0")]
33pub const _MM_FROUND_NINT: i32 = 0x00;
34/// round down and do not suppress exceptions
35#[stable(feature = "simd_x86", since = "1.27.0")]
36pub const _MM_FROUND_FLOOR: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEG_INF;
37/// round up and do not suppress exceptions
38#[stable(feature = "simd_x86", since = "1.27.0")]
39pub const _MM_FROUND_CEIL: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_POS_INF;
40/// truncate and do not suppress exceptions
41#[stable(feature = "simd_x86", since = "1.27.0")]
42pub const _MM_FROUND_TRUNC: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_ZERO;
43/// use MXCSR.RC and do not suppress exceptions; see
44/// `vendor::_MM_SET_ROUNDING_MODE`
45#[stable(feature = "simd_x86", since = "1.27.0")]
46pub const _MM_FROUND_RINT: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION;
47/// use MXCSR.RC and suppress exceptions; see `vendor::_MM_SET_ROUNDING_MODE`
48#[stable(feature = "simd_x86", since = "1.27.0")]
49pub const _MM_FROUND_NEARBYINT: i32 = _MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION;
50
51/// Blend packed 8-bit integers from `a` and `b` using `mask`
52///
53/// The high bit of each corresponding mask byte determines the selection.
54/// If the high bit is set, the element of `b` is selected.
55/// Otherwise, the element of `a` is selected.
56///
57/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blendv_epi8)
58#[inline]
59#[target_feature(enable = "sse4.1")]
60#[cfg_attr(test, assert_instr(pblendvb))]
61#[stable(feature = "simd_x86", since = "1.27.0")]
62#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
63pub const fn _mm_blendv_epi8(a: __m128i, b: __m128i, mask: __m128i) -> __m128i {
64 unsafe {
65 let mask: i8x16 = simd_lt(x:mask.as_i8x16(), y:i8x16::ZERO);
66 transmute(src:simd_select(mask, if_true:b.as_i8x16(), if_false:a.as_i8x16()))
67 }
68}
69
70/// Blend packed 16-bit integers from `a` and `b` using the mask `IMM8`.
71///
72/// The mask bits determine the selection. A clear bit selects the
73/// corresponding element of `a`, and a set bit the corresponding
74/// element of `b`.
75///
76/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blend_epi16)
77#[inline]
78#[target_feature(enable = "sse4.1")]
79#[cfg_attr(test, assert_instr(pblendw, IMM8 = 0xB1))]
80#[rustc_legacy_const_generics(2)]
81#[stable(feature = "simd_x86", since = "1.27.0")]
82#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
83pub const fn _mm_blend_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
84 static_assert_uimm_bits!(IMM8, 8);
85 unsafe {
86 transmute::<i16x8, _>(src:simd_shuffle!(
87 a.as_i16x8(),
88 b.as_i16x8(),
89 [
90 [0, 8][IMM8 as usize & 1],
91 [1, 9][(IMM8 >> 1) as usize & 1],
92 [2, 10][(IMM8 >> 2) as usize & 1],
93 [3, 11][(IMM8 >> 3) as usize & 1],
94 [4, 12][(IMM8 >> 4) as usize & 1],
95 [5, 13][(IMM8 >> 5) as usize & 1],
96 [6, 14][(IMM8 >> 6) as usize & 1],
97 [7, 15][(IMM8 >> 7) as usize & 1],
98 ]
99 ))
100 }
101}
102
103/// Blend packed double-precision (64-bit) floating-point elements from `a`
104/// and `b` using `mask`
105///
106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blendv_pd)
107#[inline]
108#[target_feature(enable = "sse4.1")]
109#[cfg_attr(test, assert_instr(blendvpd))]
110#[stable(feature = "simd_x86", since = "1.27.0")]
111#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
112pub const fn _mm_blendv_pd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d {
113 unsafe {
114 let mask: i64x2 = simd_lt(x:transmute::<_, i64x2>(mask), y:i64x2::ZERO);
115 transmute(src:simd_select(mask, if_true:b.as_f64x2(), if_false:a.as_f64x2()))
116 }
117}
118
119/// Blend packed single-precision (32-bit) floating-point elements from `a`
120/// and `b` using `mask`
121///
122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blendv_ps)
123#[inline]
124#[target_feature(enable = "sse4.1")]
125#[cfg_attr(test, assert_instr(blendvps))]
126#[stable(feature = "simd_x86", since = "1.27.0")]
127#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
128pub const fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 {
129 unsafe {
130 let mask: i32x4 = simd_lt(x:transmute::<_, i32x4>(mask), y:i32x4::ZERO);
131 transmute(src:simd_select(mask, if_true:b.as_f32x4(), if_false:a.as_f32x4()))
132 }
133}
134
135/// Blend packed double-precision (64-bit) floating-point elements from `a`
136/// and `b` using control mask `IMM2`
137///
138/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blend_pd)
139#[inline]
140#[target_feature(enable = "sse4.1")]
141// Note: LLVM7 prefers the single-precision floating-point domain when possible
142// see https://bugs.llvm.org/show_bug.cgi?id=38195
143// #[cfg_attr(test, assert_instr(blendpd, IMM2 = 0b10))]
144#[cfg_attr(test, assert_instr(blendps, IMM2 = 0b10))]
145#[rustc_legacy_const_generics(2)]
146#[stable(feature = "simd_x86", since = "1.27.0")]
147#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
148pub const fn _mm_blend_pd<const IMM2: i32>(a: __m128d, b: __m128d) -> __m128d {
149 static_assert_uimm_bits!(IMM2, 2);
150 unsafe {
151 transmute::<f64x2, _>(src:simd_shuffle!(
152 a.as_f64x2(),
153 b.as_f64x2(),
154 [[0, 2][IMM2 as usize & 1], [1, 3][(IMM2 >> 1) as usize & 1]]
155 ))
156 }
157}
158
159/// Blend packed single-precision (32-bit) floating-point elements from `a`
160/// and `b` using mask `IMM4`
161///
162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blend_ps)
163#[inline]
164#[target_feature(enable = "sse4.1")]
165#[cfg_attr(test, assert_instr(blendps, IMM4 = 0b0101))]
166#[rustc_legacy_const_generics(2)]
167#[stable(feature = "simd_x86", since = "1.27.0")]
168#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
169pub const fn _mm_blend_ps<const IMM4: i32>(a: __m128, b: __m128) -> __m128 {
170 static_assert_uimm_bits!(IMM4, 4);
171 unsafe {
172 transmute::<f32x4, _>(src:simd_shuffle!(
173 a.as_f32x4(),
174 b.as_f32x4(),
175 [
176 [0, 4][IMM4 as usize & 1],
177 [1, 5][(IMM4 >> 1) as usize & 1],
178 [2, 6][(IMM4 >> 2) as usize & 1],
179 [3, 7][(IMM4 >> 3) as usize & 1],
180 ]
181 ))
182 }
183}
184
185/// Extracts a single-precision (32-bit) floating-point element from `a`,
186/// selected with `IMM8`. The returned `i32` stores the float's bit-pattern,
187/// and may be converted back to a floating point number via casting.
188///
189/// # Example
190/// ```rust
191/// # #[cfg(target_arch = "x86")]
192/// # use std::arch::x86::*;
193/// # #[cfg(target_arch = "x86_64")]
194/// # use std::arch::x86_64::*;
195/// # fn main() {
196/// # if is_x86_feature_detected!("sse4.1") {
197/// # #[target_feature(enable = "sse4.1")]
198/// # #[allow(unused_unsafe)] // FIXME remove after stdarch bump in rustc
199/// # unsafe fn worker() { unsafe {
200/// let mut float_store = vec![1.0, 1.0, 2.0, 3.0];
201/// let simd_floats = _mm_set_ps(2.5, 5.0, 7.5, 10.0);
202/// let x: i32 = _mm_extract_ps::<2>(simd_floats);
203/// float_store.push(f32::from_bits(x as u32));
204/// assert_eq!(float_store, vec![1.0, 1.0, 2.0, 3.0, 5.0]);
205/// # }}
206/// # unsafe { worker() }
207/// # }
208/// # }
209/// ```
210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_ps)
211#[inline]
212#[target_feature(enable = "sse4.1")]
213#[cfg_attr(test, assert_instr(extractps, IMM8 = 0))]
214#[rustc_legacy_const_generics(1)]
215#[stable(feature = "simd_x86", since = "1.27.0")]
216#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
217pub const fn _mm_extract_ps<const IMM8: i32>(a: __m128) -> i32 {
218 static_assert_uimm_bits!(IMM8, 2);
219 unsafe { simd_extract!(a, IMM8 as u32, f32).to_bits() as i32 }
220}
221
222/// Extracts an 8-bit integer from `a`, selected with `IMM8`. Returns a 32-bit
223/// integer containing the zero-extended integer data.
224///
225/// See [LLVM commit D20468](https://reviews.llvm.org/D20468).
226///
227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_epi8)
228#[inline]
229#[target_feature(enable = "sse4.1")]
230#[cfg_attr(test, assert_instr(pextrb, IMM8 = 0))]
231#[rustc_legacy_const_generics(1)]
232#[stable(feature = "simd_x86", since = "1.27.0")]
233#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
234pub const fn _mm_extract_epi8<const IMM8: i32>(a: __m128i) -> i32 {
235 static_assert_uimm_bits!(IMM8, 4);
236 unsafe { simd_extract!(a.as_u8x16(), IMM8 as u32, u8) as i32 }
237}
238
239/// Extracts an 32-bit integer from `a` selected with `IMM8`
240///
241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_epi32)
242#[inline]
243#[target_feature(enable = "sse4.1")]
244#[cfg_attr(test, assert_instr(extractps, IMM8 = 1))]
245#[rustc_legacy_const_generics(1)]
246#[stable(feature = "simd_x86", since = "1.27.0")]
247#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
248pub const fn _mm_extract_epi32<const IMM8: i32>(a: __m128i) -> i32 {
249 static_assert_uimm_bits!(IMM8, 2);
250 unsafe { simd_extract!(a.as_i32x4(), IMM8 as u32, i32) }
251}
252
253/// Select a single value in `b` to store at some position in `a`,
254/// Then zero elements according to `IMM8`.
255///
256/// `IMM8` specifies which bits from operand `b` will be copied, which bits in
257/// the result they will be copied to, and which bits in the result will be
258/// cleared. The following assignments are made:
259///
260/// * Bits `[7:6]` specify the bits to copy from operand `b`:
261/// - `00`: Selects bits `[31:0]` from operand `b`.
262/// - `01`: Selects bits `[63:32]` from operand `b`.
263/// - `10`: Selects bits `[95:64]` from operand `b`.
264/// - `11`: Selects bits `[127:96]` from operand `b`.
265///
266/// * Bits `[5:4]` specify the bits in the result to which the selected bits
267/// from operand `b` are copied:
268/// - `00`: Copies the selected bits from `b` to result bits `[31:0]`.
269/// - `01`: Copies the selected bits from `b` to result bits `[63:32]`.
270/// - `10`: Copies the selected bits from `b` to result bits `[95:64]`.
271/// - `11`: Copies the selected bits from `b` to result bits `[127:96]`.
272///
273/// * Bits `[3:0]`: If any of these bits are set, the corresponding result
274/// element is cleared.
275///
276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_ps)
277#[inline]
278#[target_feature(enable = "sse4.1")]
279#[cfg_attr(test, assert_instr(insertps, IMM8 = 0b1010))]
280#[rustc_legacy_const_generics(2)]
281#[stable(feature = "simd_x86", since = "1.27.0")]
282pub fn _mm_insert_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
283 static_assert_uimm_bits!(IMM8, 8);
284 unsafe { insertps(a, b, IMM8 as u8) }
285}
286
287/// Returns a copy of `a` with the 8-bit integer from `i` inserted at a
288/// location specified by `IMM8`.
289///
290/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_epi8)
291#[inline]
292#[target_feature(enable = "sse4.1")]
293#[cfg_attr(test, assert_instr(pinsrb, IMM8 = 0))]
294#[rustc_legacy_const_generics(2)]
295#[stable(feature = "simd_x86", since = "1.27.0")]
296#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
297pub const fn _mm_insert_epi8<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
298 static_assert_uimm_bits!(IMM8, 4);
299 unsafe { transmute(src:simd_insert!(a.as_i8x16(), IMM8 as u32, i as i8)) }
300}
301
302/// Returns a copy of `a` with the 32-bit integer from `i` inserted at a
303/// location specified by `IMM8`.
304///
305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_epi32)
306#[inline]
307#[target_feature(enable = "sse4.1")]
308#[cfg_attr(test, assert_instr(pinsrd, IMM8 = 0))]
309#[rustc_legacy_const_generics(2)]
310#[stable(feature = "simd_x86", since = "1.27.0")]
311#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
312pub const fn _mm_insert_epi32<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
313 static_assert_uimm_bits!(IMM8, 2);
314 unsafe { transmute(src:simd_insert!(a.as_i32x4(), IMM8 as u32, i)) }
315}
316
317/// Compares packed 8-bit integers in `a` and `b` and returns packed maximum
318/// values in dst.
319///
320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi8)
321#[inline]
322#[target_feature(enable = "sse4.1")]
323#[cfg_attr(test, assert_instr(pmaxsb))]
324#[stable(feature = "simd_x86", since = "1.27.0")]
325#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
326pub const fn _mm_max_epi8(a: __m128i, b: __m128i) -> __m128i {
327 unsafe { simd_imax(a.as_i8x16(), b.as_i8x16()).as_m128i() }
328}
329
330/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns packed
331/// maximum.
332///
333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu16)
334#[inline]
335#[target_feature(enable = "sse4.1")]
336#[cfg_attr(test, assert_instr(pmaxuw))]
337#[stable(feature = "simd_x86", since = "1.27.0")]
338#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
339pub const fn _mm_max_epu16(a: __m128i, b: __m128i) -> __m128i {
340 unsafe { simd_imax(a.as_u16x8(), b.as_u16x8()).as_m128i() }
341}
342
343/// Compares packed 32-bit integers in `a` and `b`, and returns packed maximum
344/// values.
345///
346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi32)
347#[inline]
348#[target_feature(enable = "sse4.1")]
349#[cfg_attr(test, assert_instr(pmaxsd))]
350#[stable(feature = "simd_x86", since = "1.27.0")]
351#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
352pub const fn _mm_max_epi32(a: __m128i, b: __m128i) -> __m128i {
353 unsafe { simd_imax(a.as_i32x4(), b.as_i32x4()).as_m128i() }
354}
355
356/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns packed
357/// maximum values.
358///
359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu32)
360#[inline]
361#[target_feature(enable = "sse4.1")]
362#[cfg_attr(test, assert_instr(pmaxud))]
363#[stable(feature = "simd_x86", since = "1.27.0")]
364#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
365pub const fn _mm_max_epu32(a: __m128i, b: __m128i) -> __m128i {
366 unsafe { simd_imax(a.as_u32x4(), b.as_u32x4()).as_m128i() }
367}
368
369/// Compares packed 8-bit integers in `a` and `b` and returns packed minimum
370/// values in dst.
371///
372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi8)
373#[inline]
374#[target_feature(enable = "sse4.1")]
375#[cfg_attr(test, assert_instr(pminsb))]
376#[stable(feature = "simd_x86", since = "1.27.0")]
377#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
378pub const fn _mm_min_epi8(a: __m128i, b: __m128i) -> __m128i {
379 unsafe { simd_imin(a.as_i8x16(), b.as_i8x16()).as_m128i() }
380}
381
382/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns packed
383/// minimum.
384///
385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu16)
386#[inline]
387#[target_feature(enable = "sse4.1")]
388#[cfg_attr(test, assert_instr(pminuw))]
389#[stable(feature = "simd_x86", since = "1.27.0")]
390#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
391pub const fn _mm_min_epu16(a: __m128i, b: __m128i) -> __m128i {
392 unsafe { simd_imin(a.as_u16x8(), b.as_u16x8()).as_m128i() }
393}
394
395/// Compares packed 32-bit integers in `a` and `b`, and returns packed minimum
396/// values.
397///
398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi32)
399#[inline]
400#[target_feature(enable = "sse4.1")]
401#[cfg_attr(test, assert_instr(pminsd))]
402#[stable(feature = "simd_x86", since = "1.27.0")]
403#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
404pub const fn _mm_min_epi32(a: __m128i, b: __m128i) -> __m128i {
405 unsafe { simd_imin(a.as_i32x4(), b.as_i32x4()).as_m128i() }
406}
407
408/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns packed
409/// minimum values.
410///
411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu32)
412#[inline]
413#[target_feature(enable = "sse4.1")]
414#[cfg_attr(test, assert_instr(pminud))]
415#[stable(feature = "simd_x86", since = "1.27.0")]
416#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
417pub const fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i {
418 unsafe { simd_imin(a.as_u32x4(), b.as_u32x4()).as_m128i() }
419}
420
421/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
422/// using unsigned saturation
423///
424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi32)
425#[inline]
426#[target_feature(enable = "sse4.1")]
427#[cfg_attr(test, assert_instr(packusdw))]
428#[stable(feature = "simd_x86", since = "1.27.0")]
429pub fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i {
430 unsafe { transmute(src:packusdw(a.as_i32x4(), b.as_i32x4())) }
431}
432
433/// Compares packed 64-bit integers in `a` and `b` for equality
434///
435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi64)
436#[inline]
437#[target_feature(enable = "sse4.1")]
438#[cfg_attr(test, assert_instr(pcmpeqq))]
439#[stable(feature = "simd_x86", since = "1.27.0")]
440#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
441pub const fn _mm_cmpeq_epi64(a: __m128i, b: __m128i) -> __m128i {
442 unsafe { transmute(src:simd_eq::<_, i64x2>(x:a.as_i64x2(), y:b.as_i64x2())) }
443}
444
445/// Sign extend packed 8-bit integers in `a` to packed 16-bit integers
446///
447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi8_epi16)
448#[inline]
449#[target_feature(enable = "sse4.1")]
450#[cfg_attr(test, assert_instr(pmovsxbw))]
451#[stable(feature = "simd_x86", since = "1.27.0")]
452#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
453pub const fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i {
454 unsafe {
455 let a: Simd = a.as_i8x16();
456 let a: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
457 transmute(src:simd_cast::<_, i16x8>(a))
458 }
459}
460
461/// Sign extend packed 8-bit integers in `a` to packed 32-bit integers
462///
463/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi8_epi32)
464#[inline]
465#[target_feature(enable = "sse4.1")]
466#[cfg_attr(test, assert_instr(pmovsxbd))]
467#[stable(feature = "simd_x86", since = "1.27.0")]
468#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
469pub const fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i {
470 unsafe {
471 let a: Simd = a.as_i8x16();
472 let a: i8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
473 transmute(src:simd_cast::<_, i32x4>(a))
474 }
475}
476
477/// Sign extend packed 8-bit integers in the low 8 bytes of `a` to packed
478/// 64-bit integers
479///
480/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi8_epi64)
481#[inline]
482#[target_feature(enable = "sse4.1")]
483#[cfg_attr(test, assert_instr(pmovsxbq))]
484#[stable(feature = "simd_x86", since = "1.27.0")]
485#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
486pub const fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i {
487 unsafe {
488 let a: Simd = a.as_i8x16();
489 let a: i8x2 = simd_shuffle!(a, a, [0, 1]);
490 transmute(src:simd_cast::<_, i64x2>(a))
491 }
492}
493
494/// Sign extend packed 16-bit integers in `a` to packed 32-bit integers
495///
496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi16_epi32)
497#[inline]
498#[target_feature(enable = "sse4.1")]
499#[cfg_attr(test, assert_instr(pmovsxwd))]
500#[stable(feature = "simd_x86", since = "1.27.0")]
501#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
502pub const fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i {
503 unsafe {
504 let a: Simd = a.as_i16x8();
505 let a: i16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
506 transmute(src:simd_cast::<_, i32x4>(a))
507 }
508}
509
510/// Sign extend packed 16-bit integers in `a` to packed 64-bit integers
511///
512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi16_epi64)
513#[inline]
514#[target_feature(enable = "sse4.1")]
515#[cfg_attr(test, assert_instr(pmovsxwq))]
516#[stable(feature = "simd_x86", since = "1.27.0")]
517#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
518pub const fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i {
519 unsafe {
520 let a: Simd = a.as_i16x8();
521 let a: i16x2 = simd_shuffle!(a, a, [0, 1]);
522 transmute(src:simd_cast::<_, i64x2>(a))
523 }
524}
525
526/// Sign extend packed 32-bit integers in `a` to packed 64-bit integers
527///
528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi64)
529#[inline]
530#[target_feature(enable = "sse4.1")]
531#[cfg_attr(test, assert_instr(pmovsxdq))]
532#[stable(feature = "simd_x86", since = "1.27.0")]
533#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
534pub const fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i {
535 unsafe {
536 let a: Simd = a.as_i32x4();
537 let a: i32x2 = simd_shuffle!(a, a, [0, 1]);
538 transmute(src:simd_cast::<_, i64x2>(a))
539 }
540}
541
542/// Zeroes extend packed unsigned 8-bit integers in `a` to packed 16-bit integers
543///
544/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu8_epi16)
545#[inline]
546#[target_feature(enable = "sse4.1")]
547#[cfg_attr(test, assert_instr(pmovzxbw))]
548#[stable(feature = "simd_x86", since = "1.27.0")]
549#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
550pub const fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i {
551 unsafe {
552 let a: Simd = a.as_u8x16();
553 let a: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
554 transmute(src:simd_cast::<_, i16x8>(a))
555 }
556}
557
558/// Zeroes extend packed unsigned 8-bit integers in `a` to packed 32-bit integers
559///
560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu8_epi32)
561#[inline]
562#[target_feature(enable = "sse4.1")]
563#[cfg_attr(test, assert_instr(pmovzxbd))]
564#[stable(feature = "simd_x86", since = "1.27.0")]
565#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
566pub const fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i {
567 unsafe {
568 let a: Simd = a.as_u8x16();
569 let a: u8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
570 transmute(src:simd_cast::<_, i32x4>(a))
571 }
572}
573
574/// Zeroes extend packed unsigned 8-bit integers in `a` to packed 64-bit integers
575///
576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu8_epi64)
577#[inline]
578#[target_feature(enable = "sse4.1")]
579#[cfg_attr(test, assert_instr(pmovzxbq))]
580#[stable(feature = "simd_x86", since = "1.27.0")]
581#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
582pub const fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i {
583 unsafe {
584 let a: Simd = a.as_u8x16();
585 let a: u8x2 = simd_shuffle!(a, a, [0, 1]);
586 transmute(src:simd_cast::<_, i64x2>(a))
587 }
588}
589
590/// Zeroes extend packed unsigned 16-bit integers in `a`
591/// to packed 32-bit integers
592///
593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu16_epi32)
594#[inline]
595#[target_feature(enable = "sse4.1")]
596#[cfg_attr(test, assert_instr(pmovzxwd))]
597#[stable(feature = "simd_x86", since = "1.27.0")]
598#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
599pub const fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i {
600 unsafe {
601 let a: Simd = a.as_u16x8();
602 let a: u16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
603 transmute(src:simd_cast::<_, i32x4>(a))
604 }
605}
606
607/// Zeroes extend packed unsigned 16-bit integers in `a`
608/// to packed 64-bit integers
609///
610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu16_epi64)
611#[inline]
612#[target_feature(enable = "sse4.1")]
613#[cfg_attr(test, assert_instr(pmovzxwq))]
614#[stable(feature = "simd_x86", since = "1.27.0")]
615#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
616pub const fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i {
617 unsafe {
618 let a: Simd = a.as_u16x8();
619 let a: u16x2 = simd_shuffle!(a, a, [0, 1]);
620 transmute(src:simd_cast::<_, i64x2>(a))
621 }
622}
623
624/// Zeroes extend packed unsigned 32-bit integers in `a`
625/// to packed 64-bit integers
626///
627/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu32_epi64)
628#[inline]
629#[target_feature(enable = "sse4.1")]
630#[cfg_attr(test, assert_instr(pmovzxdq))]
631#[stable(feature = "simd_x86", since = "1.27.0")]
632#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
633pub const fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i {
634 unsafe {
635 let a: Simd = a.as_u32x4();
636 let a: u32x2 = simd_shuffle!(a, a, [0, 1]);
637 transmute(src:simd_cast::<_, i64x2>(a))
638 }
639}
640
641/// Returns the dot product of two __m128d vectors.
642///
643/// `IMM8[1:0]` is the broadcast mask, and `IMM8[5:4]` is the condition mask.
644/// If a condition mask bit is zero, the corresponding multiplication is
645/// replaced by a value of `0.0`. If a broadcast mask bit is one, the result of
646/// the dot product will be stored in the return value component. Otherwise if
647/// the broadcast mask bit is zero then the return component will be zero.
648///
649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dp_pd)
650#[inline]
651#[target_feature(enable = "sse4.1")]
652#[cfg_attr(test, assert_instr(dppd, IMM8 = 0))]
653#[rustc_legacy_const_generics(2)]
654#[stable(feature = "simd_x86", since = "1.27.0")]
655pub fn _mm_dp_pd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
656 unsafe {
657 static_assert_uimm_bits!(IMM8, 8);
658 dppd(a, b, IMM8 as u8)
659 }
660}
661
662/// Returns the dot product of two __m128 vectors.
663///
664/// `IMM8[3:0]` is the broadcast mask, and `IMM8[7:4]` is the condition mask.
665/// If a condition mask bit is zero, the corresponding multiplication is
666/// replaced by a value of `0.0`. If a broadcast mask bit is one, the result of
667/// the dot product will be stored in the return value component. Otherwise if
668/// the broadcast mask bit is zero then the return component will be zero.
669///
670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dp_ps)
671#[inline]
672#[target_feature(enable = "sse4.1")]
673#[cfg_attr(test, assert_instr(dpps, IMM8 = 0))]
674#[rustc_legacy_const_generics(2)]
675#[stable(feature = "simd_x86", since = "1.27.0")]
676pub fn _mm_dp_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
677 static_assert_uimm_bits!(IMM8, 8);
678 unsafe { dpps(a, b, IMM8 as u8) }
679}
680
681/// Round the packed double-precision (64-bit) floating-point elements in `a`
682/// down to an integer value, and stores the results as packed double-precision
683/// floating-point elements.
684///
685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_pd)
686#[inline]
687#[target_feature(enable = "sse4.1")]
688#[cfg_attr(test, assert_instr(roundpd))]
689#[stable(feature = "simd_x86", since = "1.27.0")]
690#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
691pub const fn _mm_floor_pd(a: __m128d) -> __m128d {
692 unsafe { simd_floor(a) }
693}
694
695/// Round the packed single-precision (32-bit) floating-point elements in `a`
696/// down to an integer value, and stores the results as packed single-precision
697/// floating-point elements.
698///
699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_ps)
700#[inline]
701#[target_feature(enable = "sse4.1")]
702#[cfg_attr(test, assert_instr(roundps))]
703#[stable(feature = "simd_x86", since = "1.27.0")]
704#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
705pub const fn _mm_floor_ps(a: __m128) -> __m128 {
706 unsafe { simd_floor(a) }
707}
708
709/// Round the lower double-precision (64-bit) floating-point element in `b`
710/// down to an integer value, store the result as a double-precision
711/// floating-point element in the lower element of the intrinsic result,
712/// and copies the upper element from `a` to the upper element of the intrinsic
713/// result.
714///
715/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_sd)
716#[inline]
717#[target_feature(enable = "sse4.1")]
718#[cfg_attr(test, assert_instr(roundsd))]
719#[stable(feature = "simd_x86", since = "1.27.0")]
720pub fn _mm_floor_sd(a: __m128d, b: __m128d) -> __m128d {
721 unsafe { roundsd(a, b, _MM_FROUND_FLOOR) }
722}
723
724/// Round the lower single-precision (32-bit) floating-point element in `b`
725/// down to an integer value, store the result as a single-precision
726/// floating-point element in the lower element of the intrinsic result,
727/// and copies the upper 3 packed elements from `a` to the upper elements
728/// of the intrinsic result.
729///
730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_ss)
731#[inline]
732#[target_feature(enable = "sse4.1")]
733#[cfg_attr(test, assert_instr(roundss))]
734#[stable(feature = "simd_x86", since = "1.27.0")]
735pub fn _mm_floor_ss(a: __m128, b: __m128) -> __m128 {
736 unsafe { roundss(a, b, _MM_FROUND_FLOOR) }
737}
738
739/// Round the packed double-precision (64-bit) floating-point elements in `a`
740/// up to an integer value, and stores the results as packed double-precision
741/// floating-point elements.
742///
743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_pd)
744#[inline]
745#[target_feature(enable = "sse4.1")]
746#[cfg_attr(test, assert_instr(roundpd))]
747#[stable(feature = "simd_x86", since = "1.27.0")]
748#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
749pub const fn _mm_ceil_pd(a: __m128d) -> __m128d {
750 unsafe { simd_ceil(a) }
751}
752
753/// Round the packed single-precision (32-bit) floating-point elements in `a`
754/// up to an integer value, and stores the results as packed single-precision
755/// floating-point elements.
756///
757/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_ps)
758#[inline]
759#[target_feature(enable = "sse4.1")]
760#[cfg_attr(test, assert_instr(roundps))]
761#[stable(feature = "simd_x86", since = "1.27.0")]
762#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
763pub const fn _mm_ceil_ps(a: __m128) -> __m128 {
764 unsafe { simd_ceil(a) }
765}
766
767/// Round the lower double-precision (64-bit) floating-point element in `b`
768/// up to an integer value, store the result as a double-precision
769/// floating-point element in the lower element of the intrinsic result,
770/// and copies the upper element from `a` to the upper element
771/// of the intrinsic result.
772///
773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_sd)
774#[inline]
775#[target_feature(enable = "sse4.1")]
776#[cfg_attr(test, assert_instr(roundsd))]
777#[stable(feature = "simd_x86", since = "1.27.0")]
778pub fn _mm_ceil_sd(a: __m128d, b: __m128d) -> __m128d {
779 unsafe { roundsd(a, b, _MM_FROUND_CEIL) }
780}
781
782/// Round the lower single-precision (32-bit) floating-point element in `b`
783/// up to an integer value, store the result as a single-precision
784/// floating-point element in the lower element of the intrinsic result,
785/// and copies the upper 3 packed elements from `a` to the upper elements
786/// of the intrinsic result.
787///
788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_ss)
789#[inline]
790#[target_feature(enable = "sse4.1")]
791#[cfg_attr(test, assert_instr(roundss))]
792#[stable(feature = "simd_x86", since = "1.27.0")]
793pub fn _mm_ceil_ss(a: __m128, b: __m128) -> __m128 {
794 unsafe { roundss(a, b, _MM_FROUND_CEIL) }
795}
796
797/// Round the packed double-precision (64-bit) floating-point elements in `a`
798/// using the `ROUNDING` parameter, and stores the results as packed
799/// double-precision floating-point elements.
800/// Rounding is done according to the rounding parameter, which can be one of:
801///
802/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
803/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
804/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
805/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
806/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
807///
808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_round_pd)
809#[inline]
810#[target_feature(enable = "sse4.1")]
811#[cfg_attr(test, assert_instr(roundpd, ROUNDING = 0))]
812#[rustc_legacy_const_generics(1)]
813#[stable(feature = "simd_x86", since = "1.27.0")]
814pub fn _mm_round_pd<const ROUNDING: i32>(a: __m128d) -> __m128d {
815 static_assert_uimm_bits!(ROUNDING, 4);
816 unsafe { roundpd(a, ROUNDING) }
817}
818
819/// Round the packed single-precision (32-bit) floating-point elements in `a`
820/// using the `ROUNDING` parameter, and stores the results as packed
821/// single-precision floating-point elements.
822/// Rounding is done according to the rounding parameter, which can be one of:
823///
824/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
825/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
826/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
827/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
828/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
829///
830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_round_ps)
831#[inline]
832#[target_feature(enable = "sse4.1")]
833#[cfg_attr(test, assert_instr(roundps, ROUNDING = 0))]
834#[rustc_legacy_const_generics(1)]
835#[stable(feature = "simd_x86", since = "1.27.0")]
836pub fn _mm_round_ps<const ROUNDING: i32>(a: __m128) -> __m128 {
837 static_assert_uimm_bits!(ROUNDING, 4);
838 unsafe { roundps(a, ROUNDING) }
839}
840
841/// Round the lower double-precision (64-bit) floating-point element in `b`
842/// using the `ROUNDING` parameter, store the result as a double-precision
843/// floating-point element in the lower element of the intrinsic result,
844/// and copies the upper element from `a` to the upper element of the intrinsic
845/// result.
846/// Rounding is done according to the rounding parameter, which can be one of:
847///
848/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
849/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
850/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
851/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
852/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
853///
854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_round_sd)
855#[inline]
856#[target_feature(enable = "sse4.1")]
857#[cfg_attr(test, assert_instr(roundsd, ROUNDING = 0))]
858#[rustc_legacy_const_generics(2)]
859#[stable(feature = "simd_x86", since = "1.27.0")]
860pub fn _mm_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
861 static_assert_uimm_bits!(ROUNDING, 4);
862 unsafe { roundsd(a, b, ROUNDING) }
863}
864
865/// Round the lower single-precision (32-bit) floating-point element in `b`
866/// using the `ROUNDING` parameter, store the result as a single-precision
867/// floating-point element in the lower element of the intrinsic result,
868/// and copies the upper 3 packed elements from `a` to the upper elements
869/// of the intrinsic result.
870/// Rounding is done according to the rounding parameter, which can be one of:
871///
872/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
873/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
874/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
875/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
876/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
877///
878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_round_ss)
879#[inline]
880#[target_feature(enable = "sse4.1")]
881#[cfg_attr(test, assert_instr(roundss, ROUNDING = 0))]
882#[rustc_legacy_const_generics(2)]
883#[stable(feature = "simd_x86", since = "1.27.0")]
884pub fn _mm_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
885 static_assert_uimm_bits!(ROUNDING, 4);
886 unsafe { roundss(a, b, ROUNDING) }
887}
888
889/// Finds the minimum unsigned 16-bit element in the 128-bit __m128i vector,
890/// returning a vector containing its value in its first position, and its
891/// index
892/// in its second position; all other elements are set to zero.
893///
894/// This intrinsic corresponds to the `VPHMINPOSUW` / `PHMINPOSUW`
895/// instruction.
896///
897/// Arguments:
898///
899/// * `a` - A 128-bit vector of type `__m128i`.
900///
901/// Returns:
902///
903/// A 128-bit value where:
904///
905/// * bits `[15:0]` - contain the minimum value found in parameter `a`,
906/// * bits `[18:16]` - contain the index of the minimum value
907/// * remaining bits are set to `0`.
908///
909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_minpos_epu16)
910#[inline]
911#[target_feature(enable = "sse4.1")]
912#[cfg_attr(test, assert_instr(phminposuw))]
913#[stable(feature = "simd_x86", since = "1.27.0")]
914pub fn _mm_minpos_epu16(a: __m128i) -> __m128i {
915 unsafe { transmute(src:phminposuw(a.as_u16x8())) }
916}
917
918/// Multiplies the low 32-bit integers from each packed 64-bit
919/// element in `a` and `b`, and returns the signed 64-bit result.
920///
921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_epi32)
922#[inline]
923#[target_feature(enable = "sse4.1")]
924#[cfg_attr(test, assert_instr(pmuldq))]
925#[stable(feature = "simd_x86", since = "1.27.0")]
926#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
927pub const fn _mm_mul_epi32(a: __m128i, b: __m128i) -> __m128i {
928 unsafe {
929 let a: Simd = simd_cast::<_, i64x2>(simd_cast::<_, i32x2>(a.as_i64x2()));
930 let b: Simd = simd_cast::<_, i64x2>(simd_cast::<_, i32x2>(b.as_i64x2()));
931 transmute(src:simd_mul(x:a, y:b))
932 }
933}
934
935/// Multiplies the packed 32-bit integers in `a` and `b`, producing intermediate
936/// 64-bit integers, and returns the lowest 32-bit, whatever they might be,
937/// reinterpreted as a signed integer. While `pmulld __m128i::splat(2),
938/// __m128i::splat(2)` returns the obvious `__m128i::splat(4)`, due to wrapping
939/// arithmetic `pmulld __m128i::splat(i32::MAX), __m128i::splat(2)` would
940/// return a negative number.
941///
942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mullo_epi32)
943#[inline]
944#[target_feature(enable = "sse4.1")]
945#[cfg_attr(test, assert_instr(pmulld))]
946#[stable(feature = "simd_x86", since = "1.27.0")]
947#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
948pub const fn _mm_mullo_epi32(a: __m128i, b: __m128i) -> __m128i {
949 unsafe { transmute(src:simd_mul(x:a.as_i32x4(), y:b.as_i32x4())) }
950}
951
952/// Subtracts 8-bit unsigned integer values and computes the absolute
953/// values of the differences to the corresponding bits in the destination.
954/// Then sums of the absolute differences are returned according to the bit
955/// fields in the immediate operand.
956///
957/// The following algorithm is performed:
958///
959/// ```ignore
960/// i = IMM8[2] * 4
961/// j = IMM8[1:0] * 4
962/// for k := 0 to 7
963/// d0 = abs(a[i + k + 0] - b[j + 0])
964/// d1 = abs(a[i + k + 1] - b[j + 1])
965/// d2 = abs(a[i + k + 2] - b[j + 2])
966/// d3 = abs(a[i + k + 3] - b[j + 3])
967/// r[k] = d0 + d1 + d2 + d3
968/// ```
969///
970/// Arguments:
971///
972/// * `a` - A 128-bit vector of type `__m128i`.
973/// * `b` - A 128-bit vector of type `__m128i`.
974/// * `IMM8` - An 8-bit immediate operand specifying how the absolute
975/// differences are to be calculated
976/// * Bit `[2]` specify the offset for operand `a`
977/// * Bits `[1:0]` specify the offset for operand `b`
978///
979/// Returns:
980///
981/// * A `__m128i` vector containing the sums of the sets of absolute
982/// differences between both operands.
983///
984/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mpsadbw_epu8)
985#[inline]
986#[target_feature(enable = "sse4.1")]
987#[cfg_attr(test, assert_instr(mpsadbw, IMM8 = 0))]
988#[rustc_legacy_const_generics(2)]
989#[stable(feature = "simd_x86", since = "1.27.0")]
990pub fn _mm_mpsadbw_epu8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
991 static_assert_uimm_bits!(IMM8, 3);
992 unsafe { transmute(src:mpsadbw(a.as_u8x16(), b.as_u8x16(), IMM8 as u8)) }
993}
994
995/// Tests whether the specified bits in a 128-bit integer vector are all
996/// zeros.
997///
998/// Arguments:
999///
1000/// * `a` - A 128-bit integer vector containing the bits to be tested.
1001/// * `mask` - A 128-bit integer vector selecting which bits to test in
1002/// operand `a`.
1003///
1004/// Returns:
1005///
1006/// * `1` - if the specified bits are all zeros,
1007/// * `0` - otherwise.
1008///
1009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testz_si128)
1010#[inline]
1011#[target_feature(enable = "sse4.1")]
1012#[cfg_attr(test, assert_instr(ptest))]
1013#[stable(feature = "simd_x86", since = "1.27.0")]
1014#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1015pub const fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 {
1016 unsafe {
1017 let r: i64 = simd_reduce_or(simd_and(x:a.as_i64x2(), y:mask.as_i64x2()));
1018 (0i64 == r) as i32
1019 }
1020}
1021
1022/// Tests whether the specified bits in a 128-bit integer vector are all
1023/// ones.
1024///
1025/// Arguments:
1026///
1027/// * `a` - A 128-bit integer vector containing the bits to be tested.
1028/// * `mask` - A 128-bit integer vector selecting which bits to test in
1029/// operand `a`.
1030///
1031/// Returns:
1032///
1033/// * `1` - if the specified bits are all ones,
1034/// * `0` - otherwise.
1035///
1036/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testc_si128)
1037#[inline]
1038#[target_feature(enable = "sse4.1")]
1039#[cfg_attr(test, assert_instr(ptest))]
1040#[stable(feature = "simd_x86", since = "1.27.0")]
1041#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1042pub const fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 {
1043 unsafe {
1044 let r: i64 = simd_reduce_or(simd_and(
1045 x:simd_xor(a.as_i64x2(), i64x2::splat(!0)),
1046 y:mask.as_i64x2(),
1047 ));
1048 (0i64 == r) as i32
1049 }
1050}
1051
1052/// Tests whether the specified bits in a 128-bit integer vector are
1053/// neither all zeros nor all ones.
1054///
1055/// Arguments:
1056///
1057/// * `a` - A 128-bit integer vector containing the bits to be tested.
1058/// * `mask` - A 128-bit integer vector selecting which bits to test in
1059/// operand `a`.
1060///
1061/// Returns:
1062///
1063/// * `1` - if the specified bits are neither all zeros nor all ones,
1064/// * `0` - otherwise.
1065///
1066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testnzc_si128)
1067#[inline]
1068#[target_feature(enable = "sse4.1")]
1069#[cfg_attr(test, assert_instr(ptest))]
1070#[stable(feature = "simd_x86", since = "1.27.0")]
1071pub fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 {
1072 unsafe { ptestnzc(a.as_i64x2(), mask.as_i64x2()) }
1073}
1074
1075/// Tests whether the specified bits in a 128-bit integer vector are all
1076/// zeros.
1077///
1078/// Arguments:
1079///
1080/// * `a` - A 128-bit integer vector containing the bits to be tested.
1081/// * `mask` - A 128-bit integer vector selecting which bits to test in
1082/// operand `a`.
1083///
1084/// Returns:
1085///
1086/// * `1` - if the specified bits are all zeros,
1087/// * `0` - otherwise.
1088///
1089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_all_zeros)
1090#[inline]
1091#[target_feature(enable = "sse4.1")]
1092#[cfg_attr(test, assert_instr(ptest))]
1093#[stable(feature = "simd_x86", since = "1.27.0")]
1094#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1095pub const fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 {
1096 _mm_testz_si128(a, mask)
1097}
1098
1099/// Tests whether the specified bits in `a` 128-bit integer vector are all
1100/// ones.
1101///
1102/// Argument:
1103///
1104/// * `a` - A 128-bit integer vector containing the bits to be tested.
1105///
1106/// Returns:
1107///
1108/// * `1` - if the bits specified in the operand are all set to 1,
1109/// * `0` - otherwise.
1110///
1111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_all_ones)
1112#[inline]
1113#[target_feature(enable = "sse4.1")]
1114#[cfg_attr(test, assert_instr(pcmpeqd))]
1115#[cfg_attr(test, assert_instr(ptest))]
1116#[stable(feature = "simd_x86", since = "1.27.0")]
1117#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1118pub const fn _mm_test_all_ones(a: __m128i) -> i32 {
1119 _mm_testc_si128(a, mask:_mm_cmpeq_epi32(a, b:a))
1120}
1121
1122/// Tests whether the specified bits in a 128-bit integer vector are
1123/// neither all zeros nor all ones.
1124///
1125/// Arguments:
1126///
1127/// * `a` - A 128-bit integer vector containing the bits to be tested.
1128/// * `mask` - A 128-bit integer vector selecting which bits to test in
1129/// operand `a`.
1130///
1131/// Returns:
1132///
1133/// * `1` - if the specified bits are neither all zeros nor all ones,
1134/// * `0` - otherwise.
1135///
1136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_mix_ones_zeros)
1137#[inline]
1138#[target_feature(enable = "sse4.1")]
1139#[cfg_attr(test, assert_instr(ptest))]
1140#[stable(feature = "simd_x86", since = "1.27.0")]
1141pub fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 {
1142 _mm_testnzc_si128(a, mask)
1143}
1144
1145/// Load 128-bits of integer data from memory into dst. mem_addr must be aligned on a 16-byte
1146/// boundary or a general-protection exception may be generated. To minimize caching, the data
1147/// is flagged as non-temporal (unlikely to be used again soon)
1148///
1149/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_load_si128)
1150#[inline]
1151#[target_feature(enable = "sse4.1")]
1152#[cfg_attr(test, assert_instr(movntdqa))]
1153#[stable(feature = "simd_x86_updates", since = "1.82.0")]
1154pub unsafe fn _mm_stream_load_si128(mem_addr: *const __m128i) -> __m128i {
1155 let dst: __m128i;
1156 crate::arch::asm!(
1157 vpl!("movntdqa {a}"),
1158 a = out(xmm_reg) dst,
1159 p = in(reg) mem_addr,
1160 options(pure, readonly, nostack, preserves_flags),
1161 );
1162 dst
1163}
1164
1165#[allow(improper_ctypes)]
1166unsafe extern "C" {
1167 #[link_name = "llvm.x86.sse41.insertps"]
1168 unsafefn insertps(a: __m128, b: __m128, imm8: u8) -> __m128;
1169 #[link_name = "llvm.x86.sse41.packusdw"]
1170 unsafefn packusdw(a: i32x4, b: i32x4) -> u16x8;
1171 #[link_name = "llvm.x86.sse41.dppd"]
1172 unsafefn dppd(a: __m128d, b: __m128d, imm8: u8) -> __m128d;
1173 #[link_name = "llvm.x86.sse41.dpps"]
1174 unsafefn dpps(a: __m128, b: __m128, imm8: u8) -> __m128;
1175 #[link_name = "llvm.x86.sse41.round.pd"]
1176 unsafefn roundpd(a: __m128d, rounding: i32) -> __m128d;
1177 #[link_name = "llvm.x86.sse41.round.ps"]
1178 unsafefn roundps(a: __m128, rounding: i32) -> __m128;
1179 #[link_name = "llvm.x86.sse41.round.sd"]
1180 unsafefn roundsd(a: __m128d, b: __m128d, rounding: i32) -> __m128d;
1181 #[link_name = "llvm.x86.sse41.round.ss"]
1182 unsafefn roundss(a: __m128, b: __m128, rounding: i32) -> __m128;
1183 #[link_name = "llvm.x86.sse41.phminposuw"]
1184 unsafefn phminposuw(a: u16x8) -> u16x8;
1185 #[link_name = "llvm.x86.sse41.mpsadbw"]
1186 unsafefn mpsadbw(a: u8x16, b: u8x16, imm8: u8) -> u16x8;
1187 #[link_name = "llvm.x86.sse41.ptestnzc"]
1188 unsafefn ptestnzc(a: i64x2, mask: i64x2) -> i32;
1189}
1190
1191#[cfg(test)]
1192mod tests {
1193 use crate::core_arch::assert_eq_const as assert_eq;
1194 use crate::core_arch::x86::*;
1195 use std::mem;
1196 use stdarch_test::simd_test;
1197
1198 #[simd_test(enable = "sse4.1")]
1199 const fn test_mm_blendv_epi8() {
1200 #[rustfmt::skip]
1201 let a = _mm_setr_epi8(
1202 0, 1, 2, 3, 4, 5, 6, 7,
1203 8, 9, 10, 11, 12, 13, 14, 15,
1204 );
1205 #[rustfmt::skip]
1206 let b = _mm_setr_epi8(
1207 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
1208 );
1209 #[rustfmt::skip]
1210 let mask = _mm_setr_epi8(
1211 0, -1, 0, -1, 0, -1, 0, -1,
1212 0, -1, 0, -1, 0, -1, 0, -1,
1213 );
1214 #[rustfmt::skip]
1215 let e = _mm_setr_epi8(
1216 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31,
1217 );
1218 assert_eq_m128i(_mm_blendv_epi8(a, b, mask), e);
1219 }
1220
1221 #[simd_test(enable = "sse4.1")]
1222 const fn test_mm_blendv_pd() {
1223 let a = _mm_set1_pd(0.0);
1224 let b = _mm_set1_pd(1.0);
1225 let mask = _mm_castsi128_pd(_mm_setr_epi64x(0, -1));
1226 let r = _mm_blendv_pd(a, b, mask);
1227 let e = _mm_setr_pd(0.0, 1.0);
1228 assert_eq_m128d(r, e);
1229 }
1230
1231 #[simd_test(enable = "sse4.1")]
1232 const fn test_mm_blendv_ps() {
1233 let a = _mm_set1_ps(0.0);
1234 let b = _mm_set1_ps(1.0);
1235 let mask = _mm_castsi128_ps(_mm_setr_epi32(0, -1, 0, -1));
1236 let r = _mm_blendv_ps(a, b, mask);
1237 let e = _mm_setr_ps(0.0, 1.0, 0.0, 1.0);
1238 assert_eq_m128(r, e);
1239 }
1240
1241 #[simd_test(enable = "sse4.1")]
1242 const fn test_mm_blend_pd() {
1243 let a = _mm_set1_pd(0.0);
1244 let b = _mm_set1_pd(1.0);
1245 let r = _mm_blend_pd::<0b10>(a, b);
1246 let e = _mm_setr_pd(0.0, 1.0);
1247 assert_eq_m128d(r, e);
1248 }
1249
1250 #[simd_test(enable = "sse4.1")]
1251 const fn test_mm_blend_ps() {
1252 let a = _mm_set1_ps(0.0);
1253 let b = _mm_set1_ps(1.0);
1254 let r = _mm_blend_ps::<0b1010>(a, b);
1255 let e = _mm_setr_ps(0.0, 1.0, 0.0, 1.0);
1256 assert_eq_m128(r, e);
1257 }
1258
1259 #[simd_test(enable = "sse4.1")]
1260 const fn test_mm_blend_epi16() {
1261 let a = _mm_set1_epi16(0);
1262 let b = _mm_set1_epi16(1);
1263 let r = _mm_blend_epi16::<0b1010_1100>(a, b);
1264 let e = _mm_setr_epi16(0, 0, 1, 1, 0, 1, 0, 1);
1265 assert_eq_m128i(r, e);
1266 }
1267
1268 #[simd_test(enable = "sse4.1")]
1269 const fn test_mm_extract_ps() {
1270 let a = _mm_setr_ps(0.0, 1.0, 2.0, 3.0);
1271 let r: f32 = f32::from_bits(_mm_extract_ps::<1>(a) as u32);
1272 assert_eq!(r, 1.0);
1273 let r: f32 = f32::from_bits(_mm_extract_ps::<3>(a) as u32);
1274 assert_eq!(r, 3.0);
1275 }
1276
1277 #[simd_test(enable = "sse4.1")]
1278 const fn test_mm_extract_epi8() {
1279 #[rustfmt::skip]
1280 let a = _mm_setr_epi8(
1281 -1, 1, 2, 3, 4, 5, 6, 7,
1282 8, 9, 10, 11, 12, 13, 14, 15
1283 );
1284 let r1 = _mm_extract_epi8::<0>(a);
1285 let r2 = _mm_extract_epi8::<3>(a);
1286 assert_eq!(r1, 0xFF);
1287 assert_eq!(r2, 3);
1288 }
1289
1290 #[simd_test(enable = "sse4.1")]
1291 const fn test_mm_extract_epi32() {
1292 let a = _mm_setr_epi32(0, 1, 2, 3);
1293 let r = _mm_extract_epi32::<1>(a);
1294 assert_eq!(r, 1);
1295 let r = _mm_extract_epi32::<3>(a);
1296 assert_eq!(r, 3);
1297 }
1298
1299 #[simd_test(enable = "sse4.1")]
1300 fn test_mm_insert_ps() {
1301 let a = _mm_set1_ps(1.0);
1302 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
1303 let r = _mm_insert_ps::<0b11_00_1100>(a, b);
1304 let e = _mm_setr_ps(4.0, 1.0, 0.0, 0.0);
1305 assert_eq_m128(r, e);
1306
1307 // Zeroing takes precedence over copied value
1308 let a = _mm_set1_ps(1.0);
1309 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
1310 let r = _mm_insert_ps::<0b11_00_0001>(a, b);
1311 let e = _mm_setr_ps(0.0, 1.0, 1.0, 1.0);
1312 assert_eq_m128(r, e);
1313 }
1314
1315 #[simd_test(enable = "sse4.1")]
1316 const fn test_mm_insert_epi8() {
1317 let a = _mm_set1_epi8(0);
1318 let e = _mm_setr_epi8(0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1319 let r = _mm_insert_epi8::<1>(a, 32);
1320 assert_eq_m128i(r, e);
1321 let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0);
1322 let r = _mm_insert_epi8::<14>(a, 32);
1323 assert_eq_m128i(r, e);
1324 }
1325
1326 #[simd_test(enable = "sse4.1")]
1327 const fn test_mm_insert_epi32() {
1328 let a = _mm_set1_epi32(0);
1329 let e = _mm_setr_epi32(0, 32, 0, 0);
1330 let r = _mm_insert_epi32::<1>(a, 32);
1331 assert_eq_m128i(r, e);
1332 let e = _mm_setr_epi32(0, 0, 0, 32);
1333 let r = _mm_insert_epi32::<3>(a, 32);
1334 assert_eq_m128i(r, e);
1335 }
1336
1337 #[simd_test(enable = "sse4.1")]
1338 const fn test_mm_max_epi8() {
1339 #[rustfmt::skip]
1340 let a = _mm_setr_epi8(
1341 1, 4, 5, 8, 9, 12, 13, 16,
1342 17, 20, 21, 24, 25, 28, 29, 32,
1343 );
1344 #[rustfmt::skip]
1345 let b = _mm_setr_epi8(
1346 2, 3, 6, 7, 10, 11, 14, 15,
1347 18, 19, 22, 23, 26, 27, 30, 31,
1348 );
1349 let r = _mm_max_epi8(a, b);
1350 #[rustfmt::skip]
1351 let e = _mm_setr_epi8(
1352 2, 4, 6, 8, 10, 12, 14, 16,
1353 18, 20, 22, 24, 26, 28, 30, 32,
1354 );
1355 assert_eq_m128i(r, e);
1356 }
1357
1358 #[simd_test(enable = "sse4.1")]
1359 const fn test_mm_max_epu16() {
1360 let a = _mm_setr_epi16(1, 4, 5, 8, 9, 12, 13, 16);
1361 let b = _mm_setr_epi16(2, 3, 6, 7, 10, 11, 14, 15);
1362 let r = _mm_max_epu16(a, b);
1363 let e = _mm_setr_epi16(2, 4, 6, 8, 10, 12, 14, 16);
1364 assert_eq_m128i(r, e);
1365 }
1366
1367 #[simd_test(enable = "sse4.1")]
1368 const fn test_mm_max_epi32() {
1369 let a = _mm_setr_epi32(1, 4, 5, 8);
1370 let b = _mm_setr_epi32(2, 3, 6, 7);
1371 let r = _mm_max_epi32(a, b);
1372 let e = _mm_setr_epi32(2, 4, 6, 8);
1373 assert_eq_m128i(r, e);
1374 }
1375
1376 #[simd_test(enable = "sse4.1")]
1377 const fn test_mm_max_epu32() {
1378 let a = _mm_setr_epi32(1, 4, 5, 8);
1379 let b = _mm_setr_epi32(2, 3, 6, 7);
1380 let r = _mm_max_epu32(a, b);
1381 let e = _mm_setr_epi32(2, 4, 6, 8);
1382 assert_eq_m128i(r, e);
1383 }
1384
1385 #[simd_test(enable = "sse4.1")]
1386 const fn test_mm_min_epi8() {
1387 #[rustfmt::skip]
1388 let a = _mm_setr_epi8(
1389 1, 4, 5, 8, 9, 12, 13, 16,
1390 17, 20, 21, 24, 25, 28, 29, 32,
1391 );
1392 #[rustfmt::skip]
1393 let b = _mm_setr_epi8(
1394 2, 3, 6, 7, 10, 11, 14, 15,
1395 18, 19, 22, 23, 26, 27, 30, 31,
1396 );
1397 let r = _mm_min_epi8(a, b);
1398 #[rustfmt::skip]
1399 let e = _mm_setr_epi8(
1400 1, 3, 5, 7, 9, 11, 13, 15,
1401 17, 19, 21, 23, 25, 27, 29, 31,
1402 );
1403 assert_eq_m128i(r, e);
1404
1405 #[rustfmt::skip]
1406 let a = _mm_setr_epi8(
1407 1, -4, -5, 8, -9, -12, 13, -16,
1408 17, 20, 21, 24, 25, 28, 29, 32,
1409 );
1410 #[rustfmt::skip]
1411 let b = _mm_setr_epi8(
1412 2, -3, -6, 7, -10, -11, 14, -15,
1413 18, 19, 22, 23, 26, 27, 30, 31,
1414 );
1415 let r = _mm_min_epi8(a, b);
1416 #[rustfmt::skip]
1417 let e = _mm_setr_epi8(
1418 1, -4, -6, 7, -10, -12, 13, -16,
1419 17, 19, 21, 23, 25, 27, 29, 31,
1420 );
1421 assert_eq_m128i(r, e);
1422 }
1423
1424 #[simd_test(enable = "sse4.1")]
1425 const fn test_mm_min_epu16() {
1426 let a = _mm_setr_epi16(1, 4, 5, 8, 9, 12, 13, 16);
1427 let b = _mm_setr_epi16(2, 3, 6, 7, 10, 11, 14, 15);
1428 let r = _mm_min_epu16(a, b);
1429 let e = _mm_setr_epi16(1, 3, 5, 7, 9, 11, 13, 15);
1430 assert_eq_m128i(r, e);
1431 }
1432
1433 #[simd_test(enable = "sse4.1")]
1434 const fn test_mm_min_epi32() {
1435 let a = _mm_setr_epi32(1, 4, 5, 8);
1436 let b = _mm_setr_epi32(2, 3, 6, 7);
1437 let r = _mm_min_epi32(a, b);
1438 let e = _mm_setr_epi32(1, 3, 5, 7);
1439 assert_eq_m128i(r, e);
1440
1441 let a = _mm_setr_epi32(-1, 4, 5, -7);
1442 let b = _mm_setr_epi32(-2, 3, -6, 8);
1443 let r = _mm_min_epi32(a, b);
1444 let e = _mm_setr_epi32(-2, 3, -6, -7);
1445 assert_eq_m128i(r, e);
1446 }
1447
1448 #[simd_test(enable = "sse4.1")]
1449 const fn test_mm_min_epu32() {
1450 let a = _mm_setr_epi32(1, 4, 5, 8);
1451 let b = _mm_setr_epi32(2, 3, 6, 7);
1452 let r = _mm_min_epu32(a, b);
1453 let e = _mm_setr_epi32(1, 3, 5, 7);
1454 assert_eq_m128i(r, e);
1455 }
1456
1457 #[simd_test(enable = "sse4.1")]
1458 fn test_mm_packus_epi32() {
1459 let a = _mm_setr_epi32(1, 2, 3, 4);
1460 let b = _mm_setr_epi32(-1, -2, -3, -4);
1461 let r = _mm_packus_epi32(a, b);
1462 let e = _mm_setr_epi16(1, 2, 3, 4, 0, 0, 0, 0);
1463 assert_eq_m128i(r, e);
1464 }
1465
1466 #[simd_test(enable = "sse4.1")]
1467 const fn test_mm_cmpeq_epi64() {
1468 let a = _mm_setr_epi64x(0, 1);
1469 let b = _mm_setr_epi64x(0, 0);
1470 let r = _mm_cmpeq_epi64(a, b);
1471 let e = _mm_setr_epi64x(-1, 0);
1472 assert_eq_m128i(r, e);
1473 }
1474
1475 #[simd_test(enable = "sse4.1")]
1476 const fn test_mm_cvtepi8_epi16() {
1477 let a = _mm_set1_epi8(10);
1478 let r = _mm_cvtepi8_epi16(a);
1479 let e = _mm_set1_epi16(10);
1480 assert_eq_m128i(r, e);
1481 let a = _mm_set1_epi8(-10);
1482 let r = _mm_cvtepi8_epi16(a);
1483 let e = _mm_set1_epi16(-10);
1484 assert_eq_m128i(r, e);
1485 }
1486
1487 #[simd_test(enable = "sse4.1")]
1488 const fn test_mm_cvtepi8_epi32() {
1489 let a = _mm_set1_epi8(10);
1490 let r = _mm_cvtepi8_epi32(a);
1491 let e = _mm_set1_epi32(10);
1492 assert_eq_m128i(r, e);
1493 let a = _mm_set1_epi8(-10);
1494 let r = _mm_cvtepi8_epi32(a);
1495 let e = _mm_set1_epi32(-10);
1496 assert_eq_m128i(r, e);
1497 }
1498
1499 #[simd_test(enable = "sse4.1")]
1500 const fn test_mm_cvtepi8_epi64() {
1501 let a = _mm_set1_epi8(10);
1502 let r = _mm_cvtepi8_epi64(a);
1503 let e = _mm_set1_epi64x(10);
1504 assert_eq_m128i(r, e);
1505 let a = _mm_set1_epi8(-10);
1506 let r = _mm_cvtepi8_epi64(a);
1507 let e = _mm_set1_epi64x(-10);
1508 assert_eq_m128i(r, e);
1509 }
1510
1511 #[simd_test(enable = "sse4.1")]
1512 const fn test_mm_cvtepi16_epi32() {
1513 let a = _mm_set1_epi16(10);
1514 let r = _mm_cvtepi16_epi32(a);
1515 let e = _mm_set1_epi32(10);
1516 assert_eq_m128i(r, e);
1517 let a = _mm_set1_epi16(-10);
1518 let r = _mm_cvtepi16_epi32(a);
1519 let e = _mm_set1_epi32(-10);
1520 assert_eq_m128i(r, e);
1521 }
1522
1523 #[simd_test(enable = "sse4.1")]
1524 const fn test_mm_cvtepi16_epi64() {
1525 let a = _mm_set1_epi16(10);
1526 let r = _mm_cvtepi16_epi64(a);
1527 let e = _mm_set1_epi64x(10);
1528 assert_eq_m128i(r, e);
1529 let a = _mm_set1_epi16(-10);
1530 let r = _mm_cvtepi16_epi64(a);
1531 let e = _mm_set1_epi64x(-10);
1532 assert_eq_m128i(r, e);
1533 }
1534
1535 #[simd_test(enable = "sse4.1")]
1536 const fn test_mm_cvtepi32_epi64() {
1537 let a = _mm_set1_epi32(10);
1538 let r = _mm_cvtepi32_epi64(a);
1539 let e = _mm_set1_epi64x(10);
1540 assert_eq_m128i(r, e);
1541 let a = _mm_set1_epi32(-10);
1542 let r = _mm_cvtepi32_epi64(a);
1543 let e = _mm_set1_epi64x(-10);
1544 assert_eq_m128i(r, e);
1545 }
1546
1547 #[simd_test(enable = "sse4.1")]
1548 const fn test_mm_cvtepu8_epi16() {
1549 let a = _mm_set1_epi8(10);
1550 let r = _mm_cvtepu8_epi16(a);
1551 let e = _mm_set1_epi16(10);
1552 assert_eq_m128i(r, e);
1553 }
1554
1555 #[simd_test(enable = "sse4.1")]
1556 const fn test_mm_cvtepu8_epi32() {
1557 let a = _mm_set1_epi8(10);
1558 let r = _mm_cvtepu8_epi32(a);
1559 let e = _mm_set1_epi32(10);
1560 assert_eq_m128i(r, e);
1561 }
1562
1563 #[simd_test(enable = "sse4.1")]
1564 const fn test_mm_cvtepu8_epi64() {
1565 let a = _mm_set1_epi8(10);
1566 let r = _mm_cvtepu8_epi64(a);
1567 let e = _mm_set1_epi64x(10);
1568 assert_eq_m128i(r, e);
1569 }
1570
1571 #[simd_test(enable = "sse4.1")]
1572 const fn test_mm_cvtepu16_epi32() {
1573 let a = _mm_set1_epi16(10);
1574 let r = _mm_cvtepu16_epi32(a);
1575 let e = _mm_set1_epi32(10);
1576 assert_eq_m128i(r, e);
1577 }
1578
1579 #[simd_test(enable = "sse4.1")]
1580 const fn test_mm_cvtepu16_epi64() {
1581 let a = _mm_set1_epi16(10);
1582 let r = _mm_cvtepu16_epi64(a);
1583 let e = _mm_set1_epi64x(10);
1584 assert_eq_m128i(r, e);
1585 }
1586
1587 #[simd_test(enable = "sse4.1")]
1588 const fn test_mm_cvtepu32_epi64() {
1589 let a = _mm_set1_epi32(10);
1590 let r = _mm_cvtepu32_epi64(a);
1591 let e = _mm_set1_epi64x(10);
1592 assert_eq_m128i(r, e);
1593 }
1594
1595 #[simd_test(enable = "sse4.1")]
1596 fn test_mm_dp_pd() {
1597 let a = _mm_setr_pd(2.0, 3.0);
1598 let b = _mm_setr_pd(1.0, 4.0);
1599 let e = _mm_setr_pd(14.0, 0.0);
1600 assert_eq_m128d(_mm_dp_pd::<0b00110001>(a, b), e);
1601 }
1602
1603 #[simd_test(enable = "sse4.1")]
1604 fn test_mm_dp_ps() {
1605 let a = _mm_setr_ps(2.0, 3.0, 1.0, 10.0);
1606 let b = _mm_setr_ps(1.0, 4.0, 0.5, 10.0);
1607 let e = _mm_setr_ps(14.5, 0.0, 14.5, 0.0);
1608 assert_eq_m128(_mm_dp_ps::<0b01110101>(a, b), e);
1609 }
1610
1611 #[simd_test(enable = "sse4.1")]
1612 const fn test_mm_floor_pd() {
1613 let a = _mm_setr_pd(2.5, 4.5);
1614 let r = _mm_floor_pd(a);
1615 let e = _mm_setr_pd(2.0, 4.0);
1616 assert_eq_m128d(r, e);
1617 }
1618
1619 #[simd_test(enable = "sse4.1")]
1620 const fn test_mm_floor_ps() {
1621 let a = _mm_setr_ps(2.5, 4.5, 8.5, 16.5);
1622 let r = _mm_floor_ps(a);
1623 let e = _mm_setr_ps(2.0, 4.0, 8.0, 16.0);
1624 assert_eq_m128(r, e);
1625 }
1626
1627 #[simd_test(enable = "sse4.1")]
1628 fn test_mm_floor_sd() {
1629 let a = _mm_setr_pd(2.5, 4.5);
1630 let b = _mm_setr_pd(-1.5, -3.5);
1631 let r = _mm_floor_sd(a, b);
1632 let e = _mm_setr_pd(-2.0, 4.5);
1633 assert_eq_m128d(r, e);
1634 }
1635
1636 #[simd_test(enable = "sse4.1")]
1637 fn test_mm_floor_ss() {
1638 let a = _mm_setr_ps(2.5, 4.5, 8.5, 16.5);
1639 let b = _mm_setr_ps(-1.5, -3.5, -7.5, -15.5);
1640 let r = _mm_floor_ss(a, b);
1641 let e = _mm_setr_ps(-2.0, 4.5, 8.5, 16.5);
1642 assert_eq_m128(r, e);
1643 }
1644
1645 #[simd_test(enable = "sse4.1")]
1646 const fn test_mm_ceil_pd() {
1647 let a = _mm_setr_pd(1.5, 3.5);
1648 let r = _mm_ceil_pd(a);
1649 let e = _mm_setr_pd(2.0, 4.0);
1650 assert_eq_m128d(r, e);
1651 }
1652
1653 #[simd_test(enable = "sse4.1")]
1654 const fn test_mm_ceil_ps() {
1655 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1656 let r = _mm_ceil_ps(a);
1657 let e = _mm_setr_ps(2.0, 4.0, 8.0, 16.0);
1658 assert_eq_m128(r, e);
1659 }
1660
1661 #[simd_test(enable = "sse4.1")]
1662 fn test_mm_ceil_sd() {
1663 let a = _mm_setr_pd(1.5, 3.5);
1664 let b = _mm_setr_pd(-2.5, -4.5);
1665 let r = _mm_ceil_sd(a, b);
1666 let e = _mm_setr_pd(-2.0, 3.5);
1667 assert_eq_m128d(r, e);
1668 }
1669
1670 #[simd_test(enable = "sse4.1")]
1671 fn test_mm_ceil_ss() {
1672 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1673 let b = _mm_setr_ps(-2.5, -4.5, -8.5, -16.5);
1674 let r = _mm_ceil_ss(a, b);
1675 let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5);
1676 assert_eq_m128(r, e);
1677 }
1678
1679 #[simd_test(enable = "sse4.1")]
1680 fn test_mm_round_pd() {
1681 let a = _mm_setr_pd(1.25, 3.75);
1682 let r = _mm_round_pd::<_MM_FROUND_TO_NEAREST_INT>(a);
1683 let e = _mm_setr_pd(1.0, 4.0);
1684 assert_eq_m128d(r, e);
1685 }
1686
1687 #[simd_test(enable = "sse4.1")]
1688 fn test_mm_round_ps() {
1689 let a = _mm_setr_ps(2.25, 4.75, -1.75, -4.25);
1690 let r = _mm_round_ps::<_MM_FROUND_TO_ZERO>(a);
1691 let e = _mm_setr_ps(2.0, 4.0, -1.0, -4.0);
1692 assert_eq_m128(r, e);
1693 }
1694
1695 #[simd_test(enable = "sse4.1")]
1696 fn test_mm_round_sd() {
1697 let a = _mm_setr_pd(1.5, 3.5);
1698 let b = _mm_setr_pd(-2.5, -4.5);
1699 let r = _mm_round_sd::<_MM_FROUND_TO_NEAREST_INT>(a, b);
1700 let e = _mm_setr_pd(-2.0, 3.5);
1701 assert_eq_m128d(r, e);
1702
1703 let a = _mm_setr_pd(1.5, 3.5);
1704 let b = _mm_setr_pd(-2.5, -4.5);
1705 let r = _mm_round_sd::<_MM_FROUND_TO_NEG_INF>(a, b);
1706 let e = _mm_setr_pd(-3.0, 3.5);
1707 assert_eq_m128d(r, e);
1708
1709 let a = _mm_setr_pd(1.5, 3.5);
1710 let b = _mm_setr_pd(-2.5, -4.5);
1711 let r = _mm_round_sd::<_MM_FROUND_TO_POS_INF>(a, b);
1712 let e = _mm_setr_pd(-2.0, 3.5);
1713 assert_eq_m128d(r, e);
1714
1715 let a = _mm_setr_pd(1.5, 3.5);
1716 let b = _mm_setr_pd(-2.5, -4.5);
1717 let r = _mm_round_sd::<_MM_FROUND_TO_ZERO>(a, b);
1718 let e = _mm_setr_pd(-2.0, 3.5);
1719 assert_eq_m128d(r, e);
1720 }
1721
1722 #[simd_test(enable = "sse4.1")]
1723 fn test_mm_round_ss() {
1724 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1725 let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
1726 let r = _mm_round_ss::<_MM_FROUND_TO_NEAREST_INT>(a, b);
1727 let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5);
1728 assert_eq_m128(r, e);
1729
1730 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1731 let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
1732 let r = _mm_round_ss::<_MM_FROUND_TO_NEG_INF>(a, b);
1733 let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5);
1734 assert_eq_m128(r, e);
1735
1736 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1737 let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
1738 let r = _mm_round_ss::<_MM_FROUND_TO_POS_INF>(a, b);
1739 let e = _mm_setr_ps(-1.0, 3.5, 7.5, 15.5);
1740 assert_eq_m128(r, e);
1741
1742 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1743 let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
1744 let r = _mm_round_ss::<_MM_FROUND_TO_ZERO>(a, b);
1745 let e = _mm_setr_ps(-1.0, 3.5, 7.5, 15.5);
1746 assert_eq_m128(r, e);
1747 }
1748
1749 #[simd_test(enable = "sse4.1")]
1750 fn test_mm_minpos_epu16_1() {
1751 let a = _mm_setr_epi16(23, 18, 44, 97, 50, 13, 67, 66);
1752 let r = _mm_minpos_epu16(a);
1753 let e = _mm_setr_epi16(13, 5, 0, 0, 0, 0, 0, 0);
1754 assert_eq_m128i(r, e);
1755 }
1756
1757 #[simd_test(enable = "sse4.1")]
1758 fn test_mm_minpos_epu16_2() {
1759 let a = _mm_setr_epi16(0, 18, 44, 97, 50, 13, 67, 66);
1760 let r = _mm_minpos_epu16(a);
1761 let e = _mm_setr_epi16(0, 0, 0, 0, 0, 0, 0, 0);
1762 assert_eq_m128i(r, e);
1763 }
1764
1765 #[simd_test(enable = "sse4.1")]
1766 fn test_mm_minpos_epu16_3() {
1767 // Case where the minimum value is repeated
1768 let a = _mm_setr_epi16(23, 18, 44, 97, 50, 13, 67, 13);
1769 let r = _mm_minpos_epu16(a);
1770 let e = _mm_setr_epi16(13, 5, 0, 0, 0, 0, 0, 0);
1771 assert_eq_m128i(r, e);
1772 }
1773
1774 #[simd_test(enable = "sse4.1")]
1775 const fn test_mm_mul_epi32() {
1776 {
1777 let a = _mm_setr_epi32(1, 1, 1, 1);
1778 let b = _mm_setr_epi32(1, 2, 3, 4);
1779 let r = _mm_mul_epi32(a, b);
1780 let e = _mm_setr_epi64x(1, 3);
1781 assert_eq_m128i(r, e);
1782 }
1783 {
1784 let a = _mm_setr_epi32(15, 2 /* ignored */, 1234567, 4 /* ignored */);
1785 let b = _mm_setr_epi32(
1786 -20, -256, /* ignored */
1787 666666, 666666, /* ignored */
1788 );
1789 let r = _mm_mul_epi32(a, b);
1790 let e = _mm_setr_epi64x(-300, 823043843622);
1791 assert_eq_m128i(r, e);
1792 }
1793 }
1794
1795 #[simd_test(enable = "sse4.1")]
1796 const fn test_mm_mullo_epi32() {
1797 {
1798 let a = _mm_setr_epi32(1, 1, 1, 1);
1799 let b = _mm_setr_epi32(1, 2, 3, 4);
1800 let r = _mm_mullo_epi32(a, b);
1801 let e = _mm_setr_epi32(1, 2, 3, 4);
1802 assert_eq_m128i(r, e);
1803 }
1804 {
1805 let a = _mm_setr_epi32(15, -2, 1234567, 99999);
1806 let b = _mm_setr_epi32(-20, -256, 666666, -99999);
1807 let r = _mm_mullo_epi32(a, b);
1808 // Attention, most significant bit in r[2] is treated
1809 // as a sign bit:
1810 // 1234567 * 666666 = -1589877210
1811 let e = _mm_setr_epi32(-300, 512, -1589877210, -1409865409);
1812 assert_eq_m128i(r, e);
1813 }
1814 }
1815
1816 #[simd_test(enable = "sse4.1")]
1817 fn test_mm_minpos_epu16() {
1818 let a = _mm_setr_epi16(8, 7, 6, 5, 4, 1, 2, 3);
1819 let r = _mm_minpos_epu16(a);
1820 let e = _mm_setr_epi16(1, 5, 0, 0, 0, 0, 0, 0);
1821 assert_eq_m128i(r, e);
1822 }
1823
1824 #[simd_test(enable = "sse4.1")]
1825 fn test_mm_mpsadbw_epu8() {
1826 #[rustfmt::skip]
1827 let a = _mm_setr_epi8(
1828 0, 1, 2, 3, 4, 5, 6, 7,
1829 8, 9, 10, 11, 12, 13, 14, 15,
1830 );
1831
1832 let r = _mm_mpsadbw_epu8::<0b000>(a, a);
1833 let e = _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28);
1834 assert_eq_m128i(r, e);
1835
1836 let r = _mm_mpsadbw_epu8::<0b001>(a, a);
1837 let e = _mm_setr_epi16(16, 12, 8, 4, 0, 4, 8, 12);
1838 assert_eq_m128i(r, e);
1839
1840 let r = _mm_mpsadbw_epu8::<0b100>(a, a);
1841 let e = _mm_setr_epi16(16, 20, 24, 28, 32, 36, 40, 44);
1842 assert_eq_m128i(r, e);
1843
1844 let r = _mm_mpsadbw_epu8::<0b101>(a, a);
1845 let e = _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28);
1846 assert_eq_m128i(r, e);
1847
1848 let r = _mm_mpsadbw_epu8::<0b111>(a, a);
1849 let e = _mm_setr_epi16(32, 28, 24, 20, 16, 12, 8, 4);
1850 assert_eq_m128i(r, e);
1851 }
1852
1853 #[simd_test(enable = "sse4.1")]
1854 const fn test_mm_testz_si128() {
1855 let a = _mm_set1_epi8(1);
1856 let mask = _mm_set1_epi8(0);
1857 let r = _mm_testz_si128(a, mask);
1858 assert_eq!(r, 1);
1859 let a = _mm_set1_epi8(0b101);
1860 let mask = _mm_set1_epi8(0b110);
1861 let r = _mm_testz_si128(a, mask);
1862 assert_eq!(r, 0);
1863 let a = _mm_set1_epi8(0b011);
1864 let mask = _mm_set1_epi8(0b100);
1865 let r = _mm_testz_si128(a, mask);
1866 assert_eq!(r, 1);
1867 }
1868
1869 #[simd_test(enable = "sse4.1")]
1870 const fn test_mm_testc_si128() {
1871 let a = _mm_set1_epi8(-1);
1872 let mask = _mm_set1_epi8(0);
1873 let r = _mm_testc_si128(a, mask);
1874 assert_eq!(r, 1);
1875 let a = _mm_set1_epi8(0b101);
1876 let mask = _mm_set1_epi8(0b110);
1877 let r = _mm_testc_si128(a, mask);
1878 assert_eq!(r, 0);
1879 let a = _mm_set1_epi8(0b101);
1880 let mask = _mm_set1_epi8(0b100);
1881 let r = _mm_testc_si128(a, mask);
1882 assert_eq!(r, 1);
1883 }
1884
1885 #[simd_test(enable = "sse4.1")]
1886 fn test_mm_testnzc_si128() {
1887 let a = _mm_set1_epi8(0);
1888 let mask = _mm_set1_epi8(1);
1889 let r = _mm_testnzc_si128(a, mask);
1890 assert_eq!(r, 0);
1891 let a = _mm_set1_epi8(-1);
1892 let mask = _mm_set1_epi8(0);
1893 let r = _mm_testnzc_si128(a, mask);
1894 assert_eq!(r, 0);
1895 let a = _mm_set1_epi8(0b101);
1896 let mask = _mm_set1_epi8(0b110);
1897 let r = _mm_testnzc_si128(a, mask);
1898 assert_eq!(r, 1);
1899 let a = _mm_set1_epi8(0b101);
1900 let mask = _mm_set1_epi8(0b101);
1901 let r = _mm_testnzc_si128(a, mask);
1902 assert_eq!(r, 0);
1903 }
1904
1905 #[simd_test(enable = "sse4.1")]
1906 const fn test_mm_test_all_zeros() {
1907 let a = _mm_set1_epi8(1);
1908 let mask = _mm_set1_epi8(0);
1909 let r = _mm_test_all_zeros(a, mask);
1910 assert_eq!(r, 1);
1911 let a = _mm_set1_epi8(0b101);
1912 let mask = _mm_set1_epi8(0b110);
1913 let r = _mm_test_all_zeros(a, mask);
1914 assert_eq!(r, 0);
1915 let a = _mm_set1_epi8(0b011);
1916 let mask = _mm_set1_epi8(0b100);
1917 let r = _mm_test_all_zeros(a, mask);
1918 assert_eq!(r, 1);
1919 }
1920
1921 #[simd_test(enable = "sse4.1")]
1922 const fn test_mm_test_all_ones() {
1923 let a = _mm_set1_epi8(-1);
1924 let r = _mm_test_all_ones(a);
1925 assert_eq!(r, 1);
1926 let a = _mm_set1_epi8(0b101);
1927 let r = _mm_test_all_ones(a);
1928 assert_eq!(r, 0);
1929 }
1930
1931 #[simd_test(enable = "sse4.1")]
1932 fn test_mm_test_mix_ones_zeros() {
1933 let a = _mm_set1_epi8(0);
1934 let mask = _mm_set1_epi8(1);
1935 let r = _mm_test_mix_ones_zeros(a, mask);
1936 assert_eq!(r, 0);
1937 let a = _mm_set1_epi8(-1);
1938 let mask = _mm_set1_epi8(0);
1939 let r = _mm_test_mix_ones_zeros(a, mask);
1940 assert_eq!(r, 0);
1941 let a = _mm_set1_epi8(0b101);
1942 let mask = _mm_set1_epi8(0b110);
1943 let r = _mm_test_mix_ones_zeros(a, mask);
1944 assert_eq!(r, 1);
1945 let a = _mm_set1_epi8(0b101);
1946 let mask = _mm_set1_epi8(0b101);
1947 let r = _mm_test_mix_ones_zeros(a, mask);
1948 assert_eq!(r, 0);
1949 }
1950
1951 #[simd_test(enable = "sse4.1")]
1952 fn test_mm_stream_load_si128() {
1953 let a = _mm_set_epi64x(5, 6);
1954 let r = unsafe { _mm_stream_load_si128(core::ptr::addr_of!(a) as *const _) };
1955 assert_eq_m128i(a, r);
1956 }
1957}
1958