1 | //! Streaming SIMD Extensions 4.1 (SSE4.1) |
2 | |
3 | use crate::core_arch::{simd::*, x86::*}; |
4 | use crate::intrinsics::simd::*; |
5 | |
6 | #[cfg (test)] |
7 | use stdarch_test::assert_instr; |
8 | |
9 | // SSE4 rounding constants |
10 | /// round to nearest |
11 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
12 | pub const _MM_FROUND_TO_NEAREST_INT: i32 = 0x00; |
13 | /// round down |
14 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
15 | pub const _MM_FROUND_TO_NEG_INF: i32 = 0x01; |
16 | /// round up |
17 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
18 | pub const _MM_FROUND_TO_POS_INF: i32 = 0x02; |
19 | /// truncate |
20 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
21 | pub const _MM_FROUND_TO_ZERO: i32 = 0x03; |
22 | /// use MXCSR.RC; see `vendor::_MM_SET_ROUNDING_MODE` |
23 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
24 | pub const _MM_FROUND_CUR_DIRECTION: i32 = 0x04; |
25 | /// do not suppress exceptions |
26 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
27 | pub const _MM_FROUND_RAISE_EXC: i32 = 0x00; |
28 | /// suppress exceptions |
29 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
30 | pub const _MM_FROUND_NO_EXC: i32 = 0x08; |
31 | /// round to nearest and do not suppress exceptions |
32 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
33 | pub const _MM_FROUND_NINT: i32 = 0x00; |
34 | /// round down and do not suppress exceptions |
35 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
36 | pub const _MM_FROUND_FLOOR: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEG_INF; |
37 | /// round up and do not suppress exceptions |
38 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
39 | pub const _MM_FROUND_CEIL: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_POS_INF; |
40 | /// truncate and do not suppress exceptions |
41 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
42 | pub const _MM_FROUND_TRUNC: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_ZERO; |
43 | /// use MXCSR.RC and do not suppress exceptions; see |
44 | /// `vendor::_MM_SET_ROUNDING_MODE` |
45 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
46 | pub const _MM_FROUND_RINT: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION; |
47 | /// use MXCSR.RC and suppress exceptions; see `vendor::_MM_SET_ROUNDING_MODE` |
48 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
49 | pub const _MM_FROUND_NEARBYINT: i32 = _MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION; |
50 | |
51 | /// Blend packed 8-bit integers from `a` and `b` using `mask` |
52 | /// |
53 | /// The high bit of each corresponding mask byte determines the selection. |
54 | /// If the high bit is set the element of `a` is selected. The element |
55 | /// of `b` is selected otherwise. |
56 | /// |
57 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blendv_epi8) |
58 | #[inline ] |
59 | #[target_feature (enable = "sse4.1" )] |
60 | #[cfg_attr (test, assert_instr(pblendvb))] |
61 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
62 | pub unsafe fn _mm_blendv_epi8(a: __m128i, b: __m128i, mask: __m128i) -> __m128i { |
63 | let mask: i8x16 = simd_lt(x:mask.as_i8x16(), y:i8x16::splat(0)); |
64 | transmute(src:simd_select(mask, if_true:b.as_i8x16(), if_false:a.as_i8x16())) |
65 | } |
66 | |
67 | /// Blend packed 16-bit integers from `a` and `b` using the mask `IMM8`. |
68 | /// |
69 | /// The mask bits determine the selection. A clear bit selects the |
70 | /// corresponding element of `a`, and a set bit the corresponding |
71 | /// element of `b`. |
72 | /// |
73 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blend_epi16) |
74 | #[inline ] |
75 | #[target_feature (enable = "sse4.1" )] |
76 | #[cfg_attr (test, assert_instr(pblendw, IMM8 = 0xB1))] |
77 | #[rustc_legacy_const_generics (2)] |
78 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
79 | pub unsafe fn _mm_blend_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i { |
80 | static_assert_uimm_bits!(IMM8, 8); |
81 | transmute::<i16x8, _>(src:simd_shuffle!( |
82 | a.as_i16x8(), |
83 | b.as_i16x8(), |
84 | [ |
85 | [0, 8][IMM8 as usize & 1], |
86 | [1, 9][(IMM8 >> 1) as usize & 1], |
87 | [2, 10][(IMM8 >> 2) as usize & 1], |
88 | [3, 11][(IMM8 >> 3) as usize & 1], |
89 | [4, 12][(IMM8 >> 4) as usize & 1], |
90 | [5, 13][(IMM8 >> 5) as usize & 1], |
91 | [6, 14][(IMM8 >> 6) as usize & 1], |
92 | [7, 15][(IMM8 >> 7) as usize & 1], |
93 | ] |
94 | )) |
95 | } |
96 | |
97 | /// Blend packed double-precision (64-bit) floating-point elements from `a` |
98 | /// and `b` using `mask` |
99 | /// |
100 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blendv_pd) |
101 | #[inline ] |
102 | #[target_feature (enable = "sse4.1" )] |
103 | #[cfg_attr (test, assert_instr(blendvpd))] |
104 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
105 | pub unsafe fn _mm_blendv_pd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d { |
106 | let mask: i64x2 = simd_lt(x:transmute::<_, i64x2>(mask), y:i64x2::splat(0)); |
107 | transmute(src:simd_select(mask, if_true:b.as_f64x2(), if_false:a.as_f64x2())) |
108 | } |
109 | |
110 | /// Blend packed single-precision (32-bit) floating-point elements from `a` |
111 | /// and `b` using `mask` |
112 | /// |
113 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blendv_ps) |
114 | #[inline ] |
115 | #[target_feature (enable = "sse4.1" )] |
116 | #[cfg_attr (test, assert_instr(blendvps))] |
117 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
118 | pub unsafe fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 { |
119 | let mask: i32x4 = simd_lt(x:transmute::<_, i32x4>(mask), y:i32x4::splat(0)); |
120 | transmute(src:simd_select(mask, if_true:b.as_f32x4(), if_false:a.as_f32x4())) |
121 | } |
122 | |
123 | /// Blend packed double-precision (64-bit) floating-point elements from `a` |
124 | /// and `b` using control mask `IMM2` |
125 | /// |
126 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blend_pd) |
127 | #[inline ] |
128 | #[target_feature (enable = "sse4.1" )] |
129 | // Note: LLVM7 prefers the single-precision floating-point domain when possible |
130 | // see https://bugs.llvm.org/show_bug.cgi?id=38195 |
131 | // #[cfg_attr(test, assert_instr(blendpd, IMM2 = 0b10))] |
132 | #[cfg_attr (test, assert_instr(blendps, IMM2 = 0b10))] |
133 | #[rustc_legacy_const_generics (2)] |
134 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
135 | pub unsafe fn _mm_blend_pd<const IMM2: i32>(a: __m128d, b: __m128d) -> __m128d { |
136 | static_assert_uimm_bits!(IMM2, 2); |
137 | transmute::<f64x2, _>(src:simd_shuffle!( |
138 | a.as_f64x2(), |
139 | b.as_f64x2(), |
140 | [[0, 2][IMM2 as usize & 1], [1, 3][(IMM2 >> 1) as usize & 1]] |
141 | )) |
142 | } |
143 | |
144 | /// Blend packed single-precision (32-bit) floating-point elements from `a` |
145 | /// and `b` using mask `IMM4` |
146 | /// |
147 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blend_ps) |
148 | #[inline ] |
149 | #[target_feature (enable = "sse4.1" )] |
150 | #[cfg_attr (test, assert_instr(blendps, IMM4 = 0b0101))] |
151 | #[rustc_legacy_const_generics (2)] |
152 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
153 | pub unsafe fn _mm_blend_ps<const IMM4: i32>(a: __m128, b: __m128) -> __m128 { |
154 | static_assert_uimm_bits!(IMM4, 4); |
155 | transmute::<f32x4, _>(src:simd_shuffle!( |
156 | a.as_f32x4(), |
157 | b.as_f32x4(), |
158 | [ |
159 | [0, 4][IMM4 as usize & 1], |
160 | [1, 5][(IMM4 >> 1) as usize & 1], |
161 | [2, 6][(IMM4 >> 2) as usize & 1], |
162 | [3, 7][(IMM4 >> 3) as usize & 1], |
163 | ] |
164 | )) |
165 | } |
166 | |
167 | /// Extracts a single-precision (32-bit) floating-point element from `a`, |
168 | /// selected with `IMM8`. The returned `i32` stores the float's bit-pattern, |
169 | /// and may be converted back to a floating point number via casting. |
170 | /// |
171 | /// # Example |
172 | /// ```rust |
173 | /// # #[cfg (target_arch = "x86" )] |
174 | /// # use std::arch::x86::*; |
175 | /// # #[cfg (target_arch = "x86_64" )] |
176 | /// # use std::arch::x86_64::*; |
177 | /// # fn main() { |
178 | /// # if is_x86_feature_detected!("sse4.1" ) { |
179 | /// # #[target_feature (enable = "sse4.1" )] |
180 | /// # unsafe fn worker() { |
181 | /// let mut float_store = vec![1.0, 1.0, 2.0, 3.0]; |
182 | /// let simd_floats = _mm_set_ps(2.5, 5.0, 7.5, 10.0); |
183 | /// let x: i32 = _mm_extract_ps::<2>(simd_floats); |
184 | /// float_store.push(f32::from_bits(x as u32)); |
185 | /// assert_eq!(float_store, vec![1.0, 1.0, 2.0, 3.0, 5.0]); |
186 | /// # } |
187 | /// # unsafe { worker() } |
188 | /// # } |
189 | /// # } |
190 | /// ``` |
191 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_ps) |
192 | #[inline ] |
193 | #[target_feature (enable = "sse4.1" )] |
194 | #[cfg_attr ( |
195 | all(test, not(target_os = "windows" )), |
196 | assert_instr(extractps, IMM8 = 0) |
197 | )] |
198 | #[rustc_legacy_const_generics (1)] |
199 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
200 | pub unsafe fn _mm_extract_ps<const IMM8: i32>(a: __m128) -> i32 { |
201 | static_assert_uimm_bits!(IMM8, 2); |
202 | simd_extract!(a, IMM8 as u32, f32).to_bits() as i32 |
203 | } |
204 | |
205 | /// Extracts an 8-bit integer from `a`, selected with `IMM8`. Returns a 32-bit |
206 | /// integer containing the zero-extended integer data. |
207 | /// |
208 | /// See [LLVM commit D20468](https://reviews.llvm.org/D20468). |
209 | /// |
210 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_epi8) |
211 | #[inline ] |
212 | #[target_feature (enable = "sse4.1" )] |
213 | #[cfg_attr (test, assert_instr(pextrb, IMM8 = 0))] |
214 | #[rustc_legacy_const_generics (1)] |
215 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
216 | pub unsafe fn _mm_extract_epi8<const IMM8: i32>(a: __m128i) -> i32 { |
217 | static_assert_uimm_bits!(IMM8, 4); |
218 | simd_extract!(a.as_u8x16(), IMM8 as u32, u8) as i32 |
219 | } |
220 | |
221 | /// Extracts an 32-bit integer from `a` selected with `IMM8` |
222 | /// |
223 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_epi32) |
224 | #[inline ] |
225 | #[target_feature (enable = "sse4.1" )] |
226 | #[cfg_attr ( |
227 | all(test, not(target_os = "windows" )), |
228 | assert_instr(extractps, IMM8 = 1) |
229 | )] |
230 | #[rustc_legacy_const_generics (1)] |
231 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
232 | pub unsafe fn _mm_extract_epi32<const IMM8: i32>(a: __m128i) -> i32 { |
233 | static_assert_uimm_bits!(IMM8, 2); |
234 | simd_extract!(a.as_i32x4(), IMM8 as u32, i32) |
235 | } |
236 | |
237 | /// Select a single value in `b` to store at some position in `a`, |
238 | /// Then zero elements according to `IMM8`. |
239 | /// |
240 | /// `IMM8` specifies which bits from operand `b` will be copied, which bits in |
241 | /// the result they will be copied to, and which bits in the result will be |
242 | /// cleared. The following assignments are made: |
243 | /// |
244 | /// * Bits `[7:6]` specify the bits to copy from operand `b`: |
245 | /// - `00`: Selects bits `[31:0]` from operand `b`. |
246 | /// - `01`: Selects bits `[63:32]` from operand `b`. |
247 | /// - `10`: Selects bits `[95:64]` from operand `b`. |
248 | /// - `11`: Selects bits `[127:96]` from operand `b`. |
249 | /// |
250 | /// * Bits `[5:4]` specify the bits in the result to which the selected bits |
251 | /// from operand `b` are copied: |
252 | /// - `00`: Copies the selected bits from `b` to result bits `[31:0]`. |
253 | /// - `01`: Copies the selected bits from `b` to result bits `[63:32]`. |
254 | /// - `10`: Copies the selected bits from `b` to result bits `[95:64]`. |
255 | /// - `11`: Copies the selected bits from `b` to result bits `[127:96]`. |
256 | /// |
257 | /// * Bits `[3:0]`: If any of these bits are set, the corresponding result |
258 | /// element is cleared. |
259 | /// |
260 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_ps) |
261 | #[inline ] |
262 | #[target_feature (enable = "sse4.1" )] |
263 | #[cfg_attr (test, assert_instr(insertps, IMM8 = 0b1010))] |
264 | #[rustc_legacy_const_generics (2)] |
265 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
266 | pub unsafe fn _mm_insert_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128 { |
267 | static_assert_uimm_bits!(IMM8, 8); |
268 | insertps(a, b, IMM8 as u8) |
269 | } |
270 | |
271 | /// Returns a copy of `a` with the 8-bit integer from `i` inserted at a |
272 | /// location specified by `IMM8`. |
273 | /// |
274 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_epi8) |
275 | #[inline ] |
276 | #[target_feature (enable = "sse4.1" )] |
277 | #[cfg_attr (test, assert_instr(pinsrb, IMM8 = 0))] |
278 | #[rustc_legacy_const_generics (2)] |
279 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
280 | pub unsafe fn _mm_insert_epi8<const IMM8: i32>(a: __m128i, i: i32) -> __m128i { |
281 | static_assert_uimm_bits!(IMM8, 4); |
282 | transmute(src:simd_insert!(a.as_i8x16(), IMM8 as u32, i as i8)) |
283 | } |
284 | |
285 | /// Returns a copy of `a` with the 32-bit integer from `i` inserted at a |
286 | /// location specified by `IMM8`. |
287 | /// |
288 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_epi32) |
289 | #[inline ] |
290 | #[target_feature (enable = "sse4.1" )] |
291 | #[cfg_attr (test, assert_instr(pinsrd, IMM8 = 0))] |
292 | #[rustc_legacy_const_generics (2)] |
293 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
294 | pub unsafe fn _mm_insert_epi32<const IMM8: i32>(a: __m128i, i: i32) -> __m128i { |
295 | static_assert_uimm_bits!(IMM8, 2); |
296 | transmute(src:simd_insert!(a.as_i32x4(), IMM8 as u32, i)) |
297 | } |
298 | |
299 | /// Compares packed 8-bit integers in `a` and `b` and returns packed maximum |
300 | /// values in dst. |
301 | /// |
302 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi8) |
303 | #[inline ] |
304 | #[target_feature (enable = "sse4.1" )] |
305 | #[cfg_attr (test, assert_instr(pmaxsb))] |
306 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
307 | pub unsafe fn _mm_max_epi8(a: __m128i, b: __m128i) -> __m128i { |
308 | let a: i8x16 = a.as_i8x16(); |
309 | let b: i8x16 = b.as_i8x16(); |
310 | transmute(src:simd_select::<i8x16, _>(mask:simd_gt(a, b), if_true:a, if_false:b)) |
311 | } |
312 | |
313 | /// Compares packed unsigned 16-bit integers in `a` and `b`, and returns packed |
314 | /// maximum. |
315 | /// |
316 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu16) |
317 | #[inline ] |
318 | #[target_feature (enable = "sse4.1" )] |
319 | #[cfg_attr (test, assert_instr(pmaxuw))] |
320 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
321 | pub unsafe fn _mm_max_epu16(a: __m128i, b: __m128i) -> __m128i { |
322 | let a: u16x8 = a.as_u16x8(); |
323 | let b: u16x8 = b.as_u16x8(); |
324 | transmute(src:simd_select::<i16x8, _>(mask:simd_gt(a, b), if_true:a, if_false:b)) |
325 | } |
326 | |
327 | /// Compares packed 32-bit integers in `a` and `b`, and returns packed maximum |
328 | /// values. |
329 | /// |
330 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi32) |
331 | #[inline ] |
332 | #[target_feature (enable = "sse4.1" )] |
333 | #[cfg_attr (test, assert_instr(pmaxsd))] |
334 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
335 | pub unsafe fn _mm_max_epi32(a: __m128i, b: __m128i) -> __m128i { |
336 | let a: i32x4 = a.as_i32x4(); |
337 | let b: i32x4 = b.as_i32x4(); |
338 | transmute(src:simd_select::<i32x4, _>(mask:simd_gt(a, b), if_true:a, if_false:b)) |
339 | } |
340 | |
341 | /// Compares packed unsigned 32-bit integers in `a` and `b`, and returns packed |
342 | /// maximum values. |
343 | /// |
344 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu32) |
345 | #[inline ] |
346 | #[target_feature (enable = "sse4.1" )] |
347 | #[cfg_attr (test, assert_instr(pmaxud))] |
348 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
349 | pub unsafe fn _mm_max_epu32(a: __m128i, b: __m128i) -> __m128i { |
350 | let a: u32x4 = a.as_u32x4(); |
351 | let b: u32x4 = b.as_u32x4(); |
352 | transmute(src:simd_select::<i32x4, _>(mask:simd_gt(a, b), if_true:a, if_false:b)) |
353 | } |
354 | |
355 | /// Compares packed 8-bit integers in `a` and `b` and returns packed minimum |
356 | /// values in dst. |
357 | /// |
358 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi8) |
359 | #[inline ] |
360 | #[target_feature (enable = "sse4.1" )] |
361 | #[cfg_attr (test, assert_instr(pminsb))] |
362 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
363 | pub unsafe fn _mm_min_epi8(a: __m128i, b: __m128i) -> __m128i { |
364 | let a: i8x16 = a.as_i8x16(); |
365 | let b: i8x16 = b.as_i8x16(); |
366 | transmute(src:simd_select::<i8x16, _>(mask:simd_lt(a, b), if_true:a, if_false:b)) |
367 | } |
368 | |
369 | /// Compares packed unsigned 16-bit integers in `a` and `b`, and returns packed |
370 | /// minimum. |
371 | /// |
372 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu16) |
373 | #[inline ] |
374 | #[target_feature (enable = "sse4.1" )] |
375 | #[cfg_attr (test, assert_instr(pminuw))] |
376 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
377 | pub unsafe fn _mm_min_epu16(a: __m128i, b: __m128i) -> __m128i { |
378 | let a: u16x8 = a.as_u16x8(); |
379 | let b: u16x8 = b.as_u16x8(); |
380 | transmute(src:simd_select::<i16x8, _>(mask:simd_lt(a, b), if_true:a, if_false:b)) |
381 | } |
382 | |
383 | /// Compares packed 32-bit integers in `a` and `b`, and returns packed minimum |
384 | /// values. |
385 | /// |
386 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi32) |
387 | #[inline ] |
388 | #[target_feature (enable = "sse4.1" )] |
389 | #[cfg_attr (test, assert_instr(pminsd))] |
390 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
391 | pub unsafe fn _mm_min_epi32(a: __m128i, b: __m128i) -> __m128i { |
392 | let a: i32x4 = a.as_i32x4(); |
393 | let b: i32x4 = b.as_i32x4(); |
394 | transmute(src:simd_select::<i32x4, _>(mask:simd_lt(a, b), if_true:a, if_false:b)) |
395 | } |
396 | |
397 | /// Compares packed unsigned 32-bit integers in `a` and `b`, and returns packed |
398 | /// minimum values. |
399 | /// |
400 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu32) |
401 | #[inline ] |
402 | #[target_feature (enable = "sse4.1" )] |
403 | #[cfg_attr (test, assert_instr(pminud))] |
404 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
405 | pub unsafe fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i { |
406 | let a: u32x4 = a.as_u32x4(); |
407 | let b: u32x4 = b.as_u32x4(); |
408 | transmute(src:simd_select::<i32x4, _>(mask:simd_lt(a, b), if_true:a, if_false:b)) |
409 | } |
410 | |
411 | /// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers |
412 | /// using unsigned saturation |
413 | /// |
414 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi32) |
415 | #[inline ] |
416 | #[target_feature (enable = "sse4.1" )] |
417 | #[cfg_attr (test, assert_instr(packusdw))] |
418 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
419 | pub unsafe fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i { |
420 | transmute(src:packusdw(a:a.as_i32x4(), b:b.as_i32x4())) |
421 | } |
422 | |
423 | /// Compares packed 64-bit integers in `a` and `b` for equality |
424 | /// |
425 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi64) |
426 | #[inline ] |
427 | #[target_feature (enable = "sse4.1" )] |
428 | #[cfg_attr (test, assert_instr(pcmpeqq))] |
429 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
430 | pub unsafe fn _mm_cmpeq_epi64(a: __m128i, b: __m128i) -> __m128i { |
431 | transmute(src:simd_eq::<_, i64x2>(x:a.as_i64x2(), y:b.as_i64x2())) |
432 | } |
433 | |
434 | /// Sign extend packed 8-bit integers in `a` to packed 16-bit integers |
435 | /// |
436 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi8_epi16) |
437 | #[inline ] |
438 | #[target_feature (enable = "sse4.1" )] |
439 | #[cfg_attr (test, assert_instr(pmovsxbw))] |
440 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
441 | pub unsafe fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i { |
442 | let a: i8x16 = a.as_i8x16(); |
443 | let a: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); |
444 | transmute(src:simd_cast::<_, i16x8>(a)) |
445 | } |
446 | |
447 | /// Sign extend packed 8-bit integers in `a` to packed 32-bit integers |
448 | /// |
449 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi8_epi32) |
450 | #[inline ] |
451 | #[target_feature (enable = "sse4.1" )] |
452 | #[cfg_attr (test, assert_instr(pmovsxbd))] |
453 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
454 | pub unsafe fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i { |
455 | let a: i8x16 = a.as_i8x16(); |
456 | let a: i8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); |
457 | transmute(src:simd_cast::<_, i32x4>(a)) |
458 | } |
459 | |
460 | /// Sign extend packed 8-bit integers in the low 8 bytes of `a` to packed |
461 | /// 64-bit integers |
462 | /// |
463 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi8_epi64) |
464 | #[inline ] |
465 | #[target_feature (enable = "sse4.1" )] |
466 | #[cfg_attr (test, assert_instr(pmovsxbq))] |
467 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
468 | pub unsafe fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i { |
469 | let a: i8x16 = a.as_i8x16(); |
470 | let a: i8x2 = simd_shuffle!(a, a, [0, 1]); |
471 | transmute(src:simd_cast::<_, i64x2>(a)) |
472 | } |
473 | |
474 | /// Sign extend packed 16-bit integers in `a` to packed 32-bit integers |
475 | /// |
476 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi16_epi32) |
477 | #[inline ] |
478 | #[target_feature (enable = "sse4.1" )] |
479 | #[cfg_attr (test, assert_instr(pmovsxwd))] |
480 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
481 | pub unsafe fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i { |
482 | let a: i16x8 = a.as_i16x8(); |
483 | let a: i16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); |
484 | transmute(src:simd_cast::<_, i32x4>(a)) |
485 | } |
486 | |
487 | /// Sign extend packed 16-bit integers in `a` to packed 64-bit integers |
488 | /// |
489 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi16_epi64) |
490 | #[inline ] |
491 | #[target_feature (enable = "sse4.1" )] |
492 | #[cfg_attr (test, assert_instr(pmovsxwq))] |
493 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
494 | pub unsafe fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i { |
495 | let a: i16x8 = a.as_i16x8(); |
496 | let a: i16x2 = simd_shuffle!(a, a, [0, 1]); |
497 | transmute(src:simd_cast::<_, i64x2>(a)) |
498 | } |
499 | |
500 | /// Sign extend packed 32-bit integers in `a` to packed 64-bit integers |
501 | /// |
502 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi64) |
503 | #[inline ] |
504 | #[target_feature (enable = "sse4.1" )] |
505 | #[cfg_attr (test, assert_instr(pmovsxdq))] |
506 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
507 | pub unsafe fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i { |
508 | let a: i32x4 = a.as_i32x4(); |
509 | let a: i32x2 = simd_shuffle!(a, a, [0, 1]); |
510 | transmute(src:simd_cast::<_, i64x2>(a)) |
511 | } |
512 | |
513 | /// Zeroes extend packed unsigned 8-bit integers in `a` to packed 16-bit integers |
514 | /// |
515 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu8_epi16) |
516 | #[inline ] |
517 | #[target_feature (enable = "sse4.1" )] |
518 | #[cfg_attr (test, assert_instr(pmovzxbw))] |
519 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
520 | pub unsafe fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i { |
521 | let a: u8x16 = a.as_u8x16(); |
522 | let a: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); |
523 | transmute(src:simd_cast::<_, i16x8>(a)) |
524 | } |
525 | |
526 | /// Zeroes extend packed unsigned 8-bit integers in `a` to packed 32-bit integers |
527 | /// |
528 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu8_epi32) |
529 | #[inline ] |
530 | #[target_feature (enable = "sse4.1" )] |
531 | #[cfg_attr (test, assert_instr(pmovzxbd))] |
532 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
533 | pub unsafe fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i { |
534 | let a: u8x16 = a.as_u8x16(); |
535 | let a: u8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); |
536 | transmute(src:simd_cast::<_, i32x4>(a)) |
537 | } |
538 | |
539 | /// Zeroes extend packed unsigned 8-bit integers in `a` to packed 64-bit integers |
540 | /// |
541 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu8_epi64) |
542 | #[inline ] |
543 | #[target_feature (enable = "sse4.1" )] |
544 | #[cfg_attr (test, assert_instr(pmovzxbq))] |
545 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
546 | pub unsafe fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i { |
547 | let a: u8x16 = a.as_u8x16(); |
548 | let a: u8x2 = simd_shuffle!(a, a, [0, 1]); |
549 | transmute(src:simd_cast::<_, i64x2>(a)) |
550 | } |
551 | |
552 | /// Zeroes extend packed unsigned 16-bit integers in `a` |
553 | /// to packed 32-bit integers |
554 | /// |
555 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu16_epi32) |
556 | #[inline ] |
557 | #[target_feature (enable = "sse4.1" )] |
558 | #[cfg_attr (test, assert_instr(pmovzxwd))] |
559 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
560 | pub unsafe fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i { |
561 | let a: u16x8 = a.as_u16x8(); |
562 | let a: u16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); |
563 | transmute(src:simd_cast::<_, i32x4>(a)) |
564 | } |
565 | |
566 | /// Zeroes extend packed unsigned 16-bit integers in `a` |
567 | /// to packed 64-bit integers |
568 | /// |
569 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu16_epi64) |
570 | #[inline ] |
571 | #[target_feature (enable = "sse4.1" )] |
572 | #[cfg_attr (test, assert_instr(pmovzxwq))] |
573 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
574 | pub unsafe fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i { |
575 | let a: u16x8 = a.as_u16x8(); |
576 | let a: u16x2 = simd_shuffle!(a, a, [0, 1]); |
577 | transmute(src:simd_cast::<_, i64x2>(a)) |
578 | } |
579 | |
580 | /// Zeroes extend packed unsigned 32-bit integers in `a` |
581 | /// to packed 64-bit integers |
582 | /// |
583 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu32_epi64) |
584 | #[inline ] |
585 | #[target_feature (enable = "sse4.1" )] |
586 | #[cfg_attr (test, assert_instr(pmovzxdq))] |
587 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
588 | pub unsafe fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i { |
589 | let a: u32x4 = a.as_u32x4(); |
590 | let a: u32x2 = simd_shuffle!(a, a, [0, 1]); |
591 | transmute(src:simd_cast::<_, i64x2>(a)) |
592 | } |
593 | |
594 | /// Returns the dot product of two __m128d vectors. |
595 | /// |
596 | /// `IMM8[1:0]` is the broadcast mask, and `IMM8[5:4]` is the condition mask. |
597 | /// If a condition mask bit is zero, the corresponding multiplication is |
598 | /// replaced by a value of `0.0`. If a broadcast mask bit is one, the result of |
599 | /// the dot product will be stored in the return value component. Otherwise if |
600 | /// the broadcast mask bit is zero then the return component will be zero. |
601 | /// |
602 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dp_pd) |
603 | #[inline ] |
604 | #[target_feature (enable = "sse4.1" )] |
605 | #[cfg_attr (test, assert_instr(dppd, IMM8 = 0))] |
606 | #[rustc_legacy_const_generics (2)] |
607 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
608 | pub unsafe fn _mm_dp_pd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d { |
609 | static_assert_uimm_bits!(IMM8, 8); |
610 | dppd(a, b, IMM8 as u8) |
611 | } |
612 | |
613 | /// Returns the dot product of two __m128 vectors. |
614 | /// |
615 | /// `IMM8[3:0]` is the broadcast mask, and `IMM8[7:4]` is the condition mask. |
616 | /// If a condition mask bit is zero, the corresponding multiplication is |
617 | /// replaced by a value of `0.0`. If a broadcast mask bit is one, the result of |
618 | /// the dot product will be stored in the return value component. Otherwise if |
619 | /// the broadcast mask bit is zero then the return component will be zero. |
620 | /// |
621 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dp_ps) |
622 | #[inline ] |
623 | #[target_feature (enable = "sse4.1" )] |
624 | #[cfg_attr (test, assert_instr(dpps, IMM8 = 0))] |
625 | #[rustc_legacy_const_generics (2)] |
626 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
627 | pub unsafe fn _mm_dp_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128 { |
628 | static_assert_uimm_bits!(IMM8, 8); |
629 | dpps(a, b, IMM8 as u8) |
630 | } |
631 | |
632 | /// Round the packed double-precision (64-bit) floating-point elements in `a` |
633 | /// down to an integer value, and stores the results as packed double-precision |
634 | /// floating-point elements. |
635 | /// |
636 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_pd) |
637 | #[inline ] |
638 | #[target_feature (enable = "sse4.1" )] |
639 | #[cfg_attr (test, assert_instr(roundpd))] |
640 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
641 | pub unsafe fn _mm_floor_pd(a: __m128d) -> __m128d { |
642 | simd_floor(a) |
643 | } |
644 | |
645 | /// Round the packed single-precision (32-bit) floating-point elements in `a` |
646 | /// down to an integer value, and stores the results as packed single-precision |
647 | /// floating-point elements. |
648 | /// |
649 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_ps) |
650 | #[inline ] |
651 | #[target_feature (enable = "sse4.1" )] |
652 | #[cfg_attr (test, assert_instr(roundps))] |
653 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
654 | pub unsafe fn _mm_floor_ps(a: __m128) -> __m128 { |
655 | simd_floor(a) |
656 | } |
657 | |
658 | /// Round the lower double-precision (64-bit) floating-point element in `b` |
659 | /// down to an integer value, store the result as a double-precision |
660 | /// floating-point element in the lower element of the intrinsic result, |
661 | /// and copies the upper element from `a` to the upper element of the intrinsic |
662 | /// result. |
663 | /// |
664 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_sd) |
665 | #[inline ] |
666 | #[target_feature (enable = "sse4.1" )] |
667 | #[cfg_attr (test, assert_instr(roundsd))] |
668 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
669 | pub unsafe fn _mm_floor_sd(a: __m128d, b: __m128d) -> __m128d { |
670 | roundsd(a, b, _MM_FROUND_FLOOR) |
671 | } |
672 | |
673 | /// Round the lower single-precision (32-bit) floating-point element in `b` |
674 | /// down to an integer value, store the result as a single-precision |
675 | /// floating-point element in the lower element of the intrinsic result, |
676 | /// and copies the upper 3 packed elements from `a` to the upper elements |
677 | /// of the intrinsic result. |
678 | /// |
679 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_ss) |
680 | #[inline ] |
681 | #[target_feature (enable = "sse4.1" )] |
682 | #[cfg_attr (test, assert_instr(roundss))] |
683 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
684 | pub unsafe fn _mm_floor_ss(a: __m128, b: __m128) -> __m128 { |
685 | roundss(a, b, _MM_FROUND_FLOOR) |
686 | } |
687 | |
688 | /// Round the packed double-precision (64-bit) floating-point elements in `a` |
689 | /// up to an integer value, and stores the results as packed double-precision |
690 | /// floating-point elements. |
691 | /// |
692 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_pd) |
693 | #[inline ] |
694 | #[target_feature (enable = "sse4.1" )] |
695 | #[cfg_attr (test, assert_instr(roundpd))] |
696 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
697 | pub unsafe fn _mm_ceil_pd(a: __m128d) -> __m128d { |
698 | simd_ceil(a) |
699 | } |
700 | |
701 | /// Round the packed single-precision (32-bit) floating-point elements in `a` |
702 | /// up to an integer value, and stores the results as packed single-precision |
703 | /// floating-point elements. |
704 | /// |
705 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_ps) |
706 | #[inline ] |
707 | #[target_feature (enable = "sse4.1" )] |
708 | #[cfg_attr (test, assert_instr(roundps))] |
709 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
710 | pub unsafe fn _mm_ceil_ps(a: __m128) -> __m128 { |
711 | simd_ceil(a) |
712 | } |
713 | |
714 | /// Round the lower double-precision (64-bit) floating-point element in `b` |
715 | /// up to an integer value, store the result as a double-precision |
716 | /// floating-point element in the lower element of the intrinsic result, |
717 | /// and copies the upper element from `a` to the upper element |
718 | /// of the intrinsic result. |
719 | /// |
720 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_sd) |
721 | #[inline ] |
722 | #[target_feature (enable = "sse4.1" )] |
723 | #[cfg_attr (test, assert_instr(roundsd))] |
724 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
725 | pub unsafe fn _mm_ceil_sd(a: __m128d, b: __m128d) -> __m128d { |
726 | roundsd(a, b, _MM_FROUND_CEIL) |
727 | } |
728 | |
729 | /// Round the lower single-precision (32-bit) floating-point element in `b` |
730 | /// up to an integer value, store the result as a single-precision |
731 | /// floating-point element in the lower element of the intrinsic result, |
732 | /// and copies the upper 3 packed elements from `a` to the upper elements |
733 | /// of the intrinsic result. |
734 | /// |
735 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_ss) |
736 | #[inline ] |
737 | #[target_feature (enable = "sse4.1" )] |
738 | #[cfg_attr (test, assert_instr(roundss))] |
739 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
740 | pub unsafe fn _mm_ceil_ss(a: __m128, b: __m128) -> __m128 { |
741 | roundss(a, b, _MM_FROUND_CEIL) |
742 | } |
743 | |
744 | /// Round the packed double-precision (64-bit) floating-point elements in `a` |
745 | /// using the `ROUNDING` parameter, and stores the results as packed |
746 | /// double-precision floating-point elements. |
747 | /// Rounding is done according to the rounding parameter, which can be one of: |
748 | /// |
749 | /// ``` |
750 | /// #[cfg(target_arch = "x86" )] |
751 | /// use std::arch::x86::*; |
752 | /// #[cfg(target_arch = "x86_64" )] |
753 | /// use std::arch::x86_64::*; |
754 | /// |
755 | /// # fn main() { |
756 | /// // round to nearest, and suppress exceptions: |
757 | /// # let _x = |
758 | /// _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC; |
759 | /// // round down, and suppress exceptions: |
760 | /// # let _x = |
761 | /// _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC; |
762 | /// // round up, and suppress exceptions: |
763 | /// # let _x = |
764 | /// _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC; |
765 | /// // truncate, and suppress exceptions: |
766 | /// # let _x = |
767 | /// _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC; |
768 | /// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`: |
769 | /// # let _x = |
770 | /// _MM_FROUND_CUR_DIRECTION; |
771 | /// # } |
772 | /// ``` |
773 | /// |
774 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_round_pd) |
775 | #[inline ] |
776 | #[target_feature (enable = "sse4.1" )] |
777 | #[cfg_attr (test, assert_instr(roundpd, ROUNDING = 0))] |
778 | #[rustc_legacy_const_generics (1)] |
779 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
780 | pub unsafe fn _mm_round_pd<const ROUNDING: i32>(a: __m128d) -> __m128d { |
781 | static_assert_uimm_bits!(ROUNDING, 4); |
782 | roundpd(a, ROUNDING) |
783 | } |
784 | |
785 | /// Round the packed single-precision (32-bit) floating-point elements in `a` |
786 | /// using the `ROUNDING` parameter, and stores the results as packed |
787 | /// single-precision floating-point elements. |
788 | /// Rounding is done according to the rounding parameter, which can be one of: |
789 | /// |
790 | /// ``` |
791 | /// #[cfg(target_arch = "x86" )] |
792 | /// use std::arch::x86::*; |
793 | /// #[cfg(target_arch = "x86_64" )] |
794 | /// use std::arch::x86_64::*; |
795 | /// |
796 | /// # fn main() { |
797 | /// // round to nearest, and suppress exceptions: |
798 | /// # let _x = |
799 | /// _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC; |
800 | /// // round down, and suppress exceptions: |
801 | /// # let _x = |
802 | /// _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC; |
803 | /// // round up, and suppress exceptions: |
804 | /// # let _x = |
805 | /// _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC; |
806 | /// // truncate, and suppress exceptions: |
807 | /// # let _x = |
808 | /// _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC; |
809 | /// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`: |
810 | /// # let _x = |
811 | /// _MM_FROUND_CUR_DIRECTION; |
812 | /// # } |
813 | /// ``` |
814 | /// |
815 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_round_ps) |
816 | #[inline ] |
817 | #[target_feature (enable = "sse4.1" )] |
818 | #[cfg_attr (test, assert_instr(roundps, ROUNDING = 0))] |
819 | #[rustc_legacy_const_generics (1)] |
820 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
821 | pub unsafe fn _mm_round_ps<const ROUNDING: i32>(a: __m128) -> __m128 { |
822 | static_assert_uimm_bits!(ROUNDING, 4); |
823 | roundps(a, ROUNDING) |
824 | } |
825 | |
826 | /// Round the lower double-precision (64-bit) floating-point element in `b` |
827 | /// using the `ROUNDING` parameter, store the result as a double-precision |
828 | /// floating-point element in the lower element of the intrinsic result, |
829 | /// and copies the upper element from `a` to the upper element of the intrinsic |
830 | /// result. |
831 | /// Rounding is done according to the rounding parameter, which can be one of: |
832 | /// |
833 | /// ``` |
834 | /// #[cfg(target_arch = "x86" )] |
835 | /// use std::arch::x86::*; |
836 | /// #[cfg(target_arch = "x86_64" )] |
837 | /// use std::arch::x86_64::*; |
838 | /// |
839 | /// # fn main() { |
840 | /// // round to nearest, and suppress exceptions: |
841 | /// # let _x = |
842 | /// _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC; |
843 | /// // round down, and suppress exceptions: |
844 | /// # let _x = |
845 | /// _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC; |
846 | /// // round up, and suppress exceptions: |
847 | /// # let _x = |
848 | /// _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC; |
849 | /// // truncate, and suppress exceptions: |
850 | /// # let _x = |
851 | /// _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC; |
852 | /// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`: |
853 | /// # let _x = |
854 | /// _MM_FROUND_CUR_DIRECTION; |
855 | /// # } |
856 | /// ``` |
857 | /// |
858 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_round_sd) |
859 | #[inline ] |
860 | #[target_feature (enable = "sse4.1" )] |
861 | #[cfg_attr (test, assert_instr(roundsd, ROUNDING = 0))] |
862 | #[rustc_legacy_const_generics (2)] |
863 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
864 | pub unsafe fn _mm_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d { |
865 | static_assert_uimm_bits!(ROUNDING, 4); |
866 | roundsd(a, b, ROUNDING) |
867 | } |
868 | |
869 | /// Round the lower single-precision (32-bit) floating-point element in `b` |
870 | /// using the `ROUNDING` parameter, store the result as a single-precision |
871 | /// floating-point element in the lower element of the intrinsic result, |
872 | /// and copies the upper 3 packed elements from `a` to the upper elements |
873 | /// of the intrinsic result. |
874 | /// Rounding is done according to the rounding parameter, which can be one of: |
875 | /// |
876 | /// ``` |
877 | /// #[cfg(target_arch = "x86" )] |
878 | /// use std::arch::x86::*; |
879 | /// #[cfg(target_arch = "x86_64" )] |
880 | /// use std::arch::x86_64::*; |
881 | /// |
882 | /// # fn main() { |
883 | /// // round to nearest, and suppress exceptions: |
884 | /// # let _x = |
885 | /// _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC; |
886 | /// // round down, and suppress exceptions: |
887 | /// # let _x = |
888 | /// _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC; |
889 | /// // round up, and suppress exceptions: |
890 | /// # let _x = |
891 | /// _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC; |
892 | /// // truncate, and suppress exceptions: |
893 | /// # let _x = |
894 | /// _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC; |
895 | /// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`: |
896 | /// # let _x = |
897 | /// _MM_FROUND_CUR_DIRECTION; |
898 | /// # } |
899 | /// ``` |
900 | /// |
901 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_round_ss) |
902 | #[inline ] |
903 | #[target_feature (enable = "sse4.1" )] |
904 | #[cfg_attr (test, assert_instr(roundss, ROUNDING = 0))] |
905 | #[rustc_legacy_const_generics (2)] |
906 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
907 | pub unsafe fn _mm_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 { |
908 | static_assert_uimm_bits!(ROUNDING, 4); |
909 | roundss(a, b, ROUNDING) |
910 | } |
911 | |
912 | /// Finds the minimum unsigned 16-bit element in the 128-bit __m128i vector, |
913 | /// returning a vector containing its value in its first position, and its |
914 | /// index |
915 | /// in its second position; all other elements are set to zero. |
916 | /// |
917 | /// This intrinsic corresponds to the `VPHMINPOSUW` / `PHMINPOSUW` |
918 | /// instruction. |
919 | /// |
920 | /// Arguments: |
921 | /// |
922 | /// * `a` - A 128-bit vector of type `__m128i`. |
923 | /// |
924 | /// Returns: |
925 | /// |
926 | /// A 128-bit value where: |
927 | /// |
928 | /// * bits `[15:0]` - contain the minimum value found in parameter `a`, |
929 | /// * bits `[18:16]` - contain the index of the minimum value |
930 | /// * remaining bits are set to `0`. |
931 | /// |
932 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_minpos_epu16) |
933 | #[inline ] |
934 | #[target_feature (enable = "sse4.1" )] |
935 | #[cfg_attr (test, assert_instr(phminposuw))] |
936 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
937 | pub unsafe fn _mm_minpos_epu16(a: __m128i) -> __m128i { |
938 | transmute(src:phminposuw(a.as_u16x8())) |
939 | } |
940 | |
941 | /// Multiplies the low 32-bit integers from each packed 64-bit |
942 | /// element in `a` and `b`, and returns the signed 64-bit result. |
943 | /// |
944 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_epi32) |
945 | #[inline ] |
946 | #[target_feature (enable = "sse4.1" )] |
947 | #[cfg_attr (test, assert_instr(pmuldq))] |
948 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
949 | pub unsafe fn _mm_mul_epi32(a: __m128i, b: __m128i) -> __m128i { |
950 | let a: i64x2 = simd_cast::<_, i64x2>(simd_cast::<_, i32x2>(a.as_i64x2())); |
951 | let b: i64x2 = simd_cast::<_, i64x2>(simd_cast::<_, i32x2>(b.as_i64x2())); |
952 | transmute(src:simd_mul(x:a, y:b)) |
953 | } |
954 | |
955 | /// Multiplies the packed 32-bit integers in `a` and `b`, producing intermediate |
956 | /// 64-bit integers, and returns the lowest 32-bit, whatever they might be, |
957 | /// reinterpreted as a signed integer. While `pmulld __m128i::splat(2), |
958 | /// __m128i::splat(2)` returns the obvious `__m128i::splat(4)`, due to wrapping |
959 | /// arithmetic `pmulld __m128i::splat(i32::MAX), __m128i::splat(2)` would |
960 | /// return a negative number. |
961 | /// |
962 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mullo_epi32) |
963 | #[inline ] |
964 | #[target_feature (enable = "sse4.1" )] |
965 | #[cfg_attr (test, assert_instr(pmulld))] |
966 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
967 | pub unsafe fn _mm_mullo_epi32(a: __m128i, b: __m128i) -> __m128i { |
968 | transmute(src:simd_mul(x:a.as_i32x4(), y:b.as_i32x4())) |
969 | } |
970 | |
971 | /// Subtracts 8-bit unsigned integer values and computes the absolute |
972 | /// values of the differences to the corresponding bits in the destination. |
973 | /// Then sums of the absolute differences are returned according to the bit |
974 | /// fields in the immediate operand. |
975 | /// |
976 | /// The following algorithm is performed: |
977 | /// |
978 | /// ```ignore |
979 | /// i = IMM8[2] * 4 |
980 | /// j = IMM8[1:0] * 4 |
981 | /// for k := 0 to 7 |
982 | /// d0 = abs(a[i + k + 0] - b[j + 0]) |
983 | /// d1 = abs(a[i + k + 1] - b[j + 1]) |
984 | /// d2 = abs(a[i + k + 2] - b[j + 2]) |
985 | /// d3 = abs(a[i + k + 3] - b[j + 3]) |
986 | /// r[k] = d0 + d1 + d2 + d3 |
987 | /// ``` |
988 | /// |
989 | /// Arguments: |
990 | /// |
991 | /// * `a` - A 128-bit vector of type `__m128i`. |
992 | /// * `b` - A 128-bit vector of type `__m128i`. |
993 | /// * `IMM8` - An 8-bit immediate operand specifying how the absolute |
994 | /// differences are to be calculated |
995 | /// * Bit `[2]` specify the offset for operand `a` |
996 | /// * Bits `[1:0]` specify the offset for operand `b` |
997 | /// |
998 | /// Returns: |
999 | /// |
1000 | /// * A `__m128i` vector containing the sums of the sets of absolute |
1001 | /// differences between both operands. |
1002 | /// |
1003 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mpsadbw_epu8) |
1004 | #[inline ] |
1005 | #[target_feature (enable = "sse4.1" )] |
1006 | #[cfg_attr (test, assert_instr(mpsadbw, IMM8 = 0))] |
1007 | #[rustc_legacy_const_generics (2)] |
1008 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1009 | pub unsafe fn _mm_mpsadbw_epu8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i { |
1010 | static_assert_uimm_bits!(IMM8, 3); |
1011 | transmute(src:mpsadbw(a:a.as_u8x16(), b:b.as_u8x16(), IMM8 as u8)) |
1012 | } |
1013 | |
1014 | /// Tests whether the specified bits in a 128-bit integer vector are all |
1015 | /// zeros. |
1016 | /// |
1017 | /// Arguments: |
1018 | /// |
1019 | /// * `a` - A 128-bit integer vector containing the bits to be tested. |
1020 | /// * `mask` - A 128-bit integer vector selecting which bits to test in |
1021 | /// operand `a`. |
1022 | /// |
1023 | /// Returns: |
1024 | /// |
1025 | /// * `1` - if the specified bits are all zeros, |
1026 | /// * `0` - otherwise. |
1027 | /// |
1028 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testz_si128) |
1029 | #[inline ] |
1030 | #[target_feature (enable = "sse4.1" )] |
1031 | #[cfg_attr (test, assert_instr(ptest))] |
1032 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1033 | pub unsafe fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 { |
1034 | ptestz(a:a.as_i64x2(), mask:mask.as_i64x2()) |
1035 | } |
1036 | |
1037 | /// Tests whether the specified bits in a 128-bit integer vector are all |
1038 | /// ones. |
1039 | /// |
1040 | /// Arguments: |
1041 | /// |
1042 | /// * `a` - A 128-bit integer vector containing the bits to be tested. |
1043 | /// * `mask` - A 128-bit integer vector selecting which bits to test in |
1044 | /// operand `a`. |
1045 | /// |
1046 | /// Returns: |
1047 | /// |
1048 | /// * `1` - if the specified bits are all ones, |
1049 | /// * `0` - otherwise. |
1050 | /// |
1051 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testc_si128) |
1052 | #[inline ] |
1053 | #[target_feature (enable = "sse4.1" )] |
1054 | #[cfg_attr (test, assert_instr(ptest))] |
1055 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1056 | pub unsafe fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 { |
1057 | ptestc(a:a.as_i64x2(), mask:mask.as_i64x2()) |
1058 | } |
1059 | |
1060 | /// Tests whether the specified bits in a 128-bit integer vector are |
1061 | /// neither all zeros nor all ones. |
1062 | /// |
1063 | /// Arguments: |
1064 | /// |
1065 | /// * `a` - A 128-bit integer vector containing the bits to be tested. |
1066 | /// * `mask` - A 128-bit integer vector selecting which bits to test in |
1067 | /// operand `a`. |
1068 | /// |
1069 | /// Returns: |
1070 | /// |
1071 | /// * `1` - if the specified bits are neither all zeros nor all ones, |
1072 | /// * `0` - otherwise. |
1073 | /// |
1074 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testnzc_si128) |
1075 | #[inline ] |
1076 | #[target_feature (enable = "sse4.1" )] |
1077 | #[cfg_attr (test, assert_instr(ptest))] |
1078 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1079 | pub unsafe fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 { |
1080 | ptestnzc(a:a.as_i64x2(), mask:mask.as_i64x2()) |
1081 | } |
1082 | |
1083 | /// Tests whether the specified bits in a 128-bit integer vector are all |
1084 | /// zeros. |
1085 | /// |
1086 | /// Arguments: |
1087 | /// |
1088 | /// * `a` - A 128-bit integer vector containing the bits to be tested. |
1089 | /// * `mask` - A 128-bit integer vector selecting which bits to test in |
1090 | /// operand `a`. |
1091 | /// |
1092 | /// Returns: |
1093 | /// |
1094 | /// * `1` - if the specified bits are all zeros, |
1095 | /// * `0` - otherwise. |
1096 | /// |
1097 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_all_zeros) |
1098 | #[inline ] |
1099 | #[target_feature (enable = "sse4.1" )] |
1100 | #[cfg_attr (test, assert_instr(ptest))] |
1101 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1102 | pub unsafe fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 { |
1103 | _mm_testz_si128(a, mask) |
1104 | } |
1105 | |
1106 | /// Tests whether the specified bits in `a` 128-bit integer vector are all |
1107 | /// ones. |
1108 | /// |
1109 | /// Argument: |
1110 | /// |
1111 | /// * `a` - A 128-bit integer vector containing the bits to be tested. |
1112 | /// |
1113 | /// Returns: |
1114 | /// |
1115 | /// * `1` - if the bits specified in the operand are all set to 1, |
1116 | /// * `0` - otherwise. |
1117 | /// |
1118 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_all_ones) |
1119 | #[inline ] |
1120 | #[target_feature (enable = "sse4.1" )] |
1121 | #[cfg_attr (test, assert_instr(pcmpeqd))] |
1122 | #[cfg_attr (test, assert_instr(ptest))] |
1123 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1124 | pub unsafe fn _mm_test_all_ones(a: __m128i) -> i32 { |
1125 | _mm_testc_si128(a, mask:_mm_cmpeq_epi32(a, b:a)) |
1126 | } |
1127 | |
1128 | /// Tests whether the specified bits in a 128-bit integer vector are |
1129 | /// neither all zeros nor all ones. |
1130 | /// |
1131 | /// Arguments: |
1132 | /// |
1133 | /// * `a` - A 128-bit integer vector containing the bits to be tested. |
1134 | /// * `mask` - A 128-bit integer vector selecting which bits to test in |
1135 | /// operand `a`. |
1136 | /// |
1137 | /// Returns: |
1138 | /// |
1139 | /// * `1` - if the specified bits are neither all zeros nor all ones, |
1140 | /// * `0` - otherwise. |
1141 | /// |
1142 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_mix_ones_zeros) |
1143 | #[inline ] |
1144 | #[target_feature (enable = "sse4.1" )] |
1145 | #[cfg_attr (test, assert_instr(ptest))] |
1146 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1147 | pub unsafe fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 { |
1148 | _mm_testnzc_si128(a, mask) |
1149 | } |
1150 | |
1151 | #[allow (improper_ctypes)] |
1152 | extern "C" { |
1153 | #[link_name = "llvm.x86.sse41.insertps" ] |
1154 | fn insertps(a: __m128, b: __m128, imm8: u8) -> __m128; |
1155 | #[link_name = "llvm.x86.sse41.packusdw" ] |
1156 | fn packusdw(a: i32x4, b: i32x4) -> u16x8; |
1157 | #[link_name = "llvm.x86.sse41.dppd" ] |
1158 | fn dppd(a: __m128d, b: __m128d, imm8: u8) -> __m128d; |
1159 | #[link_name = "llvm.x86.sse41.dpps" ] |
1160 | fn dpps(a: __m128, b: __m128, imm8: u8) -> __m128; |
1161 | #[link_name = "llvm.x86.sse41.round.pd" ] |
1162 | fn roundpd(a: __m128d, rounding: i32) -> __m128d; |
1163 | #[link_name = "llvm.x86.sse41.round.ps" ] |
1164 | fn roundps(a: __m128, rounding: i32) -> __m128; |
1165 | #[link_name = "llvm.x86.sse41.round.sd" ] |
1166 | fn roundsd(a: __m128d, b: __m128d, rounding: i32) -> __m128d; |
1167 | #[link_name = "llvm.x86.sse41.round.ss" ] |
1168 | fn roundss(a: __m128, b: __m128, rounding: i32) -> __m128; |
1169 | #[link_name = "llvm.x86.sse41.phminposuw" ] |
1170 | fn phminposuw(a: u16x8) -> u16x8; |
1171 | #[link_name = "llvm.x86.sse41.mpsadbw" ] |
1172 | fn mpsadbw(a: u8x16, b: u8x16, imm8: u8) -> u16x8; |
1173 | #[link_name = "llvm.x86.sse41.ptestz" ] |
1174 | fn ptestz(a: i64x2, mask: i64x2) -> i32; |
1175 | #[link_name = "llvm.x86.sse41.ptestc" ] |
1176 | fn ptestc(a: i64x2, mask: i64x2) -> i32; |
1177 | #[link_name = "llvm.x86.sse41.ptestnzc" ] |
1178 | fn ptestnzc(a: i64x2, mask: i64x2) -> i32; |
1179 | } |
1180 | |
1181 | #[cfg (test)] |
1182 | mod tests { |
1183 | use crate::core_arch::x86::*; |
1184 | use std::mem; |
1185 | use stdarch_test::simd_test; |
1186 | |
1187 | #[simd_test(enable = "sse4.1" )] |
1188 | unsafe fn test_mm_blendv_epi8() { |
1189 | #[rustfmt::skip] |
1190 | let a = _mm_setr_epi8( |
1191 | 0, 1, 2, 3, 4, 5, 6, 7, |
1192 | 8, 9, 10, 11, 12, 13, 14, 15, |
1193 | ); |
1194 | #[rustfmt::skip] |
1195 | let b = _mm_setr_epi8( |
1196 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
1197 | ); |
1198 | #[rustfmt::skip] |
1199 | let mask = _mm_setr_epi8( |
1200 | 0, -1, 0, -1, 0, -1, 0, -1, |
1201 | 0, -1, 0, -1, 0, -1, 0, -1, |
1202 | ); |
1203 | #[rustfmt::skip] |
1204 | let e = _mm_setr_epi8( |
1205 | 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31, |
1206 | ); |
1207 | assert_eq_m128i(_mm_blendv_epi8(a, b, mask), e); |
1208 | } |
1209 | |
1210 | #[simd_test(enable = "sse4.1" )] |
1211 | unsafe fn test_mm_blendv_pd() { |
1212 | let a = _mm_set1_pd(0.0); |
1213 | let b = _mm_set1_pd(1.0); |
1214 | let mask = transmute(_mm_setr_epi64x(0, -1)); |
1215 | let r = _mm_blendv_pd(a, b, mask); |
1216 | let e = _mm_setr_pd(0.0, 1.0); |
1217 | assert_eq_m128d(r, e); |
1218 | } |
1219 | |
1220 | #[simd_test(enable = "sse4.1" )] |
1221 | unsafe fn test_mm_blendv_ps() { |
1222 | let a = _mm_set1_ps(0.0); |
1223 | let b = _mm_set1_ps(1.0); |
1224 | let mask = transmute(_mm_setr_epi32(0, -1, 0, -1)); |
1225 | let r = _mm_blendv_ps(a, b, mask); |
1226 | let e = _mm_setr_ps(0.0, 1.0, 0.0, 1.0); |
1227 | assert_eq_m128(r, e); |
1228 | } |
1229 | |
1230 | #[simd_test(enable = "sse4.1" )] |
1231 | unsafe fn test_mm_blend_pd() { |
1232 | let a = _mm_set1_pd(0.0); |
1233 | let b = _mm_set1_pd(1.0); |
1234 | let r = _mm_blend_pd::<0b10>(a, b); |
1235 | let e = _mm_setr_pd(0.0, 1.0); |
1236 | assert_eq_m128d(r, e); |
1237 | } |
1238 | |
1239 | #[simd_test(enable = "sse4.1" )] |
1240 | unsafe fn test_mm_blend_ps() { |
1241 | let a = _mm_set1_ps(0.0); |
1242 | let b = _mm_set1_ps(1.0); |
1243 | let r = _mm_blend_ps::<0b1010>(a, b); |
1244 | let e = _mm_setr_ps(0.0, 1.0, 0.0, 1.0); |
1245 | assert_eq_m128(r, e); |
1246 | } |
1247 | |
1248 | #[simd_test(enable = "sse4.1" )] |
1249 | unsafe fn test_mm_blend_epi16() { |
1250 | let a = _mm_set1_epi16(0); |
1251 | let b = _mm_set1_epi16(1); |
1252 | let r = _mm_blend_epi16::<0b1010_1100>(a, b); |
1253 | let e = _mm_setr_epi16(0, 0, 1, 1, 0, 1, 0, 1); |
1254 | assert_eq_m128i(r, e); |
1255 | } |
1256 | |
1257 | #[simd_test(enable = "sse4.1" )] |
1258 | unsafe fn test_mm_extract_ps() { |
1259 | let a = _mm_setr_ps(0.0, 1.0, 2.0, 3.0); |
1260 | let r: f32 = f32::from_bits(_mm_extract_ps::<1>(a) as u32); |
1261 | assert_eq!(r, 1.0); |
1262 | let r: f32 = f32::from_bits(_mm_extract_ps::<3>(a) as u32); |
1263 | assert_eq!(r, 3.0); |
1264 | } |
1265 | |
1266 | #[simd_test(enable = "sse4.1" )] |
1267 | unsafe fn test_mm_extract_epi8() { |
1268 | #[rustfmt::skip] |
1269 | let a = _mm_setr_epi8( |
1270 | -1, 1, 2, 3, 4, 5, 6, 7, |
1271 | 8, 9, 10, 11, 12, 13, 14, 15 |
1272 | ); |
1273 | let r1 = _mm_extract_epi8::<0>(a); |
1274 | let r2 = _mm_extract_epi8::<3>(a); |
1275 | assert_eq!(r1, 0xFF); |
1276 | assert_eq!(r2, 3); |
1277 | } |
1278 | |
1279 | #[simd_test(enable = "sse4.1" )] |
1280 | unsafe fn test_mm_extract_epi32() { |
1281 | let a = _mm_setr_epi32(0, 1, 2, 3); |
1282 | let r = _mm_extract_epi32::<1>(a); |
1283 | assert_eq!(r, 1); |
1284 | let r = _mm_extract_epi32::<3>(a); |
1285 | assert_eq!(r, 3); |
1286 | } |
1287 | |
1288 | #[simd_test(enable = "sse4.1" )] |
1289 | unsafe fn test_mm_insert_ps() { |
1290 | let a = _mm_set1_ps(1.0); |
1291 | let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); |
1292 | let r = _mm_insert_ps::<0b11_00_1100>(a, b); |
1293 | let e = _mm_setr_ps(4.0, 1.0, 0.0, 0.0); |
1294 | assert_eq_m128(r, e); |
1295 | |
1296 | // Zeroing takes precedence over copied value |
1297 | let a = _mm_set1_ps(1.0); |
1298 | let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); |
1299 | let r = _mm_insert_ps::<0b11_00_0001>(a, b); |
1300 | let e = _mm_setr_ps(0.0, 1.0, 1.0, 1.0); |
1301 | assert_eq_m128(r, e); |
1302 | } |
1303 | |
1304 | #[simd_test(enable = "sse4.1" )] |
1305 | unsafe fn test_mm_insert_epi8() { |
1306 | let a = _mm_set1_epi8(0); |
1307 | let e = _mm_setr_epi8(0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
1308 | let r = _mm_insert_epi8::<1>(a, 32); |
1309 | assert_eq_m128i(r, e); |
1310 | let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0); |
1311 | let r = _mm_insert_epi8::<14>(a, 32); |
1312 | assert_eq_m128i(r, e); |
1313 | } |
1314 | |
1315 | #[simd_test(enable = "sse4.1" )] |
1316 | unsafe fn test_mm_insert_epi32() { |
1317 | let a = _mm_set1_epi32(0); |
1318 | let e = _mm_setr_epi32(0, 32, 0, 0); |
1319 | let r = _mm_insert_epi32::<1>(a, 32); |
1320 | assert_eq_m128i(r, e); |
1321 | let e = _mm_setr_epi32(0, 0, 0, 32); |
1322 | let r = _mm_insert_epi32::<3>(a, 32); |
1323 | assert_eq_m128i(r, e); |
1324 | } |
1325 | |
1326 | #[simd_test(enable = "sse4.1" )] |
1327 | unsafe fn test_mm_max_epi8() { |
1328 | #[rustfmt::skip] |
1329 | let a = _mm_setr_epi8( |
1330 | 1, 4, 5, 8, 9, 12, 13, 16, |
1331 | 17, 20, 21, 24, 25, 28, 29, 32, |
1332 | ); |
1333 | #[rustfmt::skip] |
1334 | let b = _mm_setr_epi8( |
1335 | 2, 3, 6, 7, 10, 11, 14, 15, |
1336 | 18, 19, 22, 23, 26, 27, 30, 31, |
1337 | ); |
1338 | let r = _mm_max_epi8(a, b); |
1339 | #[rustfmt::skip] |
1340 | let e = _mm_setr_epi8( |
1341 | 2, 4, 6, 8, 10, 12, 14, 16, |
1342 | 18, 20, 22, 24, 26, 28, 30, 32, |
1343 | ); |
1344 | assert_eq_m128i(r, e); |
1345 | } |
1346 | |
1347 | #[simd_test(enable = "sse4.1" )] |
1348 | unsafe fn test_mm_max_epu16() { |
1349 | let a = _mm_setr_epi16(1, 4, 5, 8, 9, 12, 13, 16); |
1350 | let b = _mm_setr_epi16(2, 3, 6, 7, 10, 11, 14, 15); |
1351 | let r = _mm_max_epu16(a, b); |
1352 | let e = _mm_setr_epi16(2, 4, 6, 8, 10, 12, 14, 16); |
1353 | assert_eq_m128i(r, e); |
1354 | } |
1355 | |
1356 | #[simd_test(enable = "sse4.1" )] |
1357 | unsafe fn test_mm_max_epi32() { |
1358 | let a = _mm_setr_epi32(1, 4, 5, 8); |
1359 | let b = _mm_setr_epi32(2, 3, 6, 7); |
1360 | let r = _mm_max_epi32(a, b); |
1361 | let e = _mm_setr_epi32(2, 4, 6, 8); |
1362 | assert_eq_m128i(r, e); |
1363 | } |
1364 | |
1365 | #[simd_test(enable = "sse4.1" )] |
1366 | unsafe fn test_mm_max_epu32() { |
1367 | let a = _mm_setr_epi32(1, 4, 5, 8); |
1368 | let b = _mm_setr_epi32(2, 3, 6, 7); |
1369 | let r = _mm_max_epu32(a, b); |
1370 | let e = _mm_setr_epi32(2, 4, 6, 8); |
1371 | assert_eq_m128i(r, e); |
1372 | } |
1373 | |
1374 | #[simd_test(enable = "sse4.1" )] |
1375 | unsafe fn test_mm_min_epi8_1() { |
1376 | #[rustfmt::skip] |
1377 | let a = _mm_setr_epi8( |
1378 | 1, 4, 5, 8, 9, 12, 13, 16, |
1379 | 17, 20, 21, 24, 25, 28, 29, 32, |
1380 | ); |
1381 | #[rustfmt::skip] |
1382 | let b = _mm_setr_epi8( |
1383 | 2, 3, 6, 7, 10, 11, 14, 15, |
1384 | 18, 19, 22, 23, 26, 27, 30, 31, |
1385 | ); |
1386 | let r = _mm_min_epi8(a, b); |
1387 | #[rustfmt::skip] |
1388 | let e = _mm_setr_epi8( |
1389 | 1, 3, 5, 7, 9, 11, 13, 15, |
1390 | 17, 19, 21, 23, 25, 27, 29, 31, |
1391 | ); |
1392 | assert_eq_m128i(r, e); |
1393 | } |
1394 | |
1395 | #[simd_test(enable = "sse4.1" )] |
1396 | unsafe fn test_mm_min_epi8_2() { |
1397 | #[rustfmt::skip] |
1398 | let a = _mm_setr_epi8( |
1399 | 1, -4, -5, 8, -9, -12, 13, -16, |
1400 | 17, 20, 21, 24, 25, 28, 29, 32, |
1401 | ); |
1402 | #[rustfmt::skip] |
1403 | let b = _mm_setr_epi8( |
1404 | 2, -3, -6, 7, -10, -11, 14, -15, |
1405 | 18, 19, 22, 23, 26, 27, 30, 31, |
1406 | ); |
1407 | let r = _mm_min_epi8(a, b); |
1408 | #[rustfmt::skip] |
1409 | let e = _mm_setr_epi8( |
1410 | 1, -4, -6, 7, -10, -12, 13, -16, |
1411 | 17, 19, 21, 23, 25, 27, 29, 31, |
1412 | ); |
1413 | assert_eq_m128i(r, e); |
1414 | } |
1415 | |
1416 | #[simd_test(enable = "sse4.1" )] |
1417 | unsafe fn test_mm_min_epu16() { |
1418 | let a = _mm_setr_epi16(1, 4, 5, 8, 9, 12, 13, 16); |
1419 | let b = _mm_setr_epi16(2, 3, 6, 7, 10, 11, 14, 15); |
1420 | let r = _mm_min_epu16(a, b); |
1421 | let e = _mm_setr_epi16(1, 3, 5, 7, 9, 11, 13, 15); |
1422 | assert_eq_m128i(r, e); |
1423 | } |
1424 | |
1425 | #[simd_test(enable = "sse4.1" )] |
1426 | unsafe fn test_mm_min_epi32_1() { |
1427 | let a = _mm_setr_epi32(1, 4, 5, 8); |
1428 | let b = _mm_setr_epi32(2, 3, 6, 7); |
1429 | let r = _mm_min_epi32(a, b); |
1430 | let e = _mm_setr_epi32(1, 3, 5, 7); |
1431 | assert_eq_m128i(r, e); |
1432 | } |
1433 | |
1434 | #[simd_test(enable = "sse4.1" )] |
1435 | unsafe fn test_mm_min_epi32_2() { |
1436 | let a = _mm_setr_epi32(-1, 4, 5, -7); |
1437 | let b = _mm_setr_epi32(-2, 3, -6, 8); |
1438 | let r = _mm_min_epi32(a, b); |
1439 | let e = _mm_setr_epi32(-2, 3, -6, -7); |
1440 | assert_eq_m128i(r, e); |
1441 | } |
1442 | |
1443 | #[simd_test(enable = "sse4.1" )] |
1444 | unsafe fn test_mm_min_epu32() { |
1445 | let a = _mm_setr_epi32(1, 4, 5, 8); |
1446 | let b = _mm_setr_epi32(2, 3, 6, 7); |
1447 | let r = _mm_min_epu32(a, b); |
1448 | let e = _mm_setr_epi32(1, 3, 5, 7); |
1449 | assert_eq_m128i(r, e); |
1450 | } |
1451 | |
1452 | #[simd_test(enable = "sse4.1" )] |
1453 | unsafe fn test_mm_packus_epi32() { |
1454 | let a = _mm_setr_epi32(1, 2, 3, 4); |
1455 | let b = _mm_setr_epi32(-1, -2, -3, -4); |
1456 | let r = _mm_packus_epi32(a, b); |
1457 | let e = _mm_setr_epi16(1, 2, 3, 4, 0, 0, 0, 0); |
1458 | assert_eq_m128i(r, e); |
1459 | } |
1460 | |
1461 | #[simd_test(enable = "sse4.1" )] |
1462 | unsafe fn test_mm_cmpeq_epi64() { |
1463 | let a = _mm_setr_epi64x(0, 1); |
1464 | let b = _mm_setr_epi64x(0, 0); |
1465 | let r = _mm_cmpeq_epi64(a, b); |
1466 | let e = _mm_setr_epi64x(-1, 0); |
1467 | assert_eq_m128i(r, e); |
1468 | } |
1469 | |
1470 | #[simd_test(enable = "sse4.1" )] |
1471 | unsafe fn test_mm_cvtepi8_epi16() { |
1472 | let a = _mm_set1_epi8(10); |
1473 | let r = _mm_cvtepi8_epi16(a); |
1474 | let e = _mm_set1_epi16(10); |
1475 | assert_eq_m128i(r, e); |
1476 | let a = _mm_set1_epi8(-10); |
1477 | let r = _mm_cvtepi8_epi16(a); |
1478 | let e = _mm_set1_epi16(-10); |
1479 | assert_eq_m128i(r, e); |
1480 | } |
1481 | |
1482 | #[simd_test(enable = "sse4.1" )] |
1483 | unsafe fn test_mm_cvtepi8_epi32() { |
1484 | let a = _mm_set1_epi8(10); |
1485 | let r = _mm_cvtepi8_epi32(a); |
1486 | let e = _mm_set1_epi32(10); |
1487 | assert_eq_m128i(r, e); |
1488 | let a = _mm_set1_epi8(-10); |
1489 | let r = _mm_cvtepi8_epi32(a); |
1490 | let e = _mm_set1_epi32(-10); |
1491 | assert_eq_m128i(r, e); |
1492 | } |
1493 | |
1494 | #[simd_test(enable = "sse4.1" )] |
1495 | unsafe fn test_mm_cvtepi8_epi64() { |
1496 | let a = _mm_set1_epi8(10); |
1497 | let r = _mm_cvtepi8_epi64(a); |
1498 | let e = _mm_set1_epi64x(10); |
1499 | assert_eq_m128i(r, e); |
1500 | let a = _mm_set1_epi8(-10); |
1501 | let r = _mm_cvtepi8_epi64(a); |
1502 | let e = _mm_set1_epi64x(-10); |
1503 | assert_eq_m128i(r, e); |
1504 | } |
1505 | |
1506 | #[simd_test(enable = "sse4.1" )] |
1507 | unsafe fn test_mm_cvtepi16_epi32() { |
1508 | let a = _mm_set1_epi16(10); |
1509 | let r = _mm_cvtepi16_epi32(a); |
1510 | let e = _mm_set1_epi32(10); |
1511 | assert_eq_m128i(r, e); |
1512 | let a = _mm_set1_epi16(-10); |
1513 | let r = _mm_cvtepi16_epi32(a); |
1514 | let e = _mm_set1_epi32(-10); |
1515 | assert_eq_m128i(r, e); |
1516 | } |
1517 | |
1518 | #[simd_test(enable = "sse4.1" )] |
1519 | unsafe fn test_mm_cvtepi16_epi64() { |
1520 | let a = _mm_set1_epi16(10); |
1521 | let r = _mm_cvtepi16_epi64(a); |
1522 | let e = _mm_set1_epi64x(10); |
1523 | assert_eq_m128i(r, e); |
1524 | let a = _mm_set1_epi16(-10); |
1525 | let r = _mm_cvtepi16_epi64(a); |
1526 | let e = _mm_set1_epi64x(-10); |
1527 | assert_eq_m128i(r, e); |
1528 | } |
1529 | |
1530 | #[simd_test(enable = "sse4.1" )] |
1531 | unsafe fn test_mm_cvtepi32_epi64() { |
1532 | let a = _mm_set1_epi32(10); |
1533 | let r = _mm_cvtepi32_epi64(a); |
1534 | let e = _mm_set1_epi64x(10); |
1535 | assert_eq_m128i(r, e); |
1536 | let a = _mm_set1_epi32(-10); |
1537 | let r = _mm_cvtepi32_epi64(a); |
1538 | let e = _mm_set1_epi64x(-10); |
1539 | assert_eq_m128i(r, e); |
1540 | } |
1541 | |
1542 | #[simd_test(enable = "sse4.1" )] |
1543 | unsafe fn test_mm_cvtepu8_epi16() { |
1544 | let a = _mm_set1_epi8(10); |
1545 | let r = _mm_cvtepu8_epi16(a); |
1546 | let e = _mm_set1_epi16(10); |
1547 | assert_eq_m128i(r, e); |
1548 | } |
1549 | |
1550 | #[simd_test(enable = "sse4.1" )] |
1551 | unsafe fn test_mm_cvtepu8_epi32() { |
1552 | let a = _mm_set1_epi8(10); |
1553 | let r = _mm_cvtepu8_epi32(a); |
1554 | let e = _mm_set1_epi32(10); |
1555 | assert_eq_m128i(r, e); |
1556 | } |
1557 | |
1558 | #[simd_test(enable = "sse4.1" )] |
1559 | unsafe fn test_mm_cvtepu8_epi64() { |
1560 | let a = _mm_set1_epi8(10); |
1561 | let r = _mm_cvtepu8_epi64(a); |
1562 | let e = _mm_set1_epi64x(10); |
1563 | assert_eq_m128i(r, e); |
1564 | } |
1565 | |
1566 | #[simd_test(enable = "sse4.1" )] |
1567 | unsafe fn test_mm_cvtepu16_epi32() { |
1568 | let a = _mm_set1_epi16(10); |
1569 | let r = _mm_cvtepu16_epi32(a); |
1570 | let e = _mm_set1_epi32(10); |
1571 | assert_eq_m128i(r, e); |
1572 | } |
1573 | |
1574 | #[simd_test(enable = "sse4.1" )] |
1575 | unsafe fn test_mm_cvtepu16_epi64() { |
1576 | let a = _mm_set1_epi16(10); |
1577 | let r = _mm_cvtepu16_epi64(a); |
1578 | let e = _mm_set1_epi64x(10); |
1579 | assert_eq_m128i(r, e); |
1580 | } |
1581 | |
1582 | #[simd_test(enable = "sse4.1" )] |
1583 | unsafe fn test_mm_cvtepu32_epi64() { |
1584 | let a = _mm_set1_epi32(10); |
1585 | let r = _mm_cvtepu32_epi64(a); |
1586 | let e = _mm_set1_epi64x(10); |
1587 | assert_eq_m128i(r, e); |
1588 | } |
1589 | |
1590 | #[simd_test(enable = "sse4.1" )] |
1591 | unsafe fn test_mm_dp_pd() { |
1592 | let a = _mm_setr_pd(2.0, 3.0); |
1593 | let b = _mm_setr_pd(1.0, 4.0); |
1594 | let e = _mm_setr_pd(14.0, 0.0); |
1595 | assert_eq_m128d(_mm_dp_pd::<0b00110001>(a, b), e); |
1596 | } |
1597 | |
1598 | #[simd_test(enable = "sse4.1" )] |
1599 | unsafe fn test_mm_dp_ps() { |
1600 | let a = _mm_setr_ps(2.0, 3.0, 1.0, 10.0); |
1601 | let b = _mm_setr_ps(1.0, 4.0, 0.5, 10.0); |
1602 | let e = _mm_setr_ps(14.5, 0.0, 14.5, 0.0); |
1603 | assert_eq_m128(_mm_dp_ps::<0b01110101>(a, b), e); |
1604 | } |
1605 | |
1606 | #[simd_test(enable = "sse4.1" )] |
1607 | unsafe fn test_mm_floor_pd() { |
1608 | let a = _mm_setr_pd(2.5, 4.5); |
1609 | let r = _mm_floor_pd(a); |
1610 | let e = _mm_setr_pd(2.0, 4.0); |
1611 | assert_eq_m128d(r, e); |
1612 | } |
1613 | |
1614 | #[simd_test(enable = "sse4.1" )] |
1615 | unsafe fn test_mm_floor_ps() { |
1616 | let a = _mm_setr_ps(2.5, 4.5, 8.5, 16.5); |
1617 | let r = _mm_floor_ps(a); |
1618 | let e = _mm_setr_ps(2.0, 4.0, 8.0, 16.0); |
1619 | assert_eq_m128(r, e); |
1620 | } |
1621 | |
1622 | #[simd_test(enable = "sse4.1" )] |
1623 | unsafe fn test_mm_floor_sd() { |
1624 | let a = _mm_setr_pd(2.5, 4.5); |
1625 | let b = _mm_setr_pd(-1.5, -3.5); |
1626 | let r = _mm_floor_sd(a, b); |
1627 | let e = _mm_setr_pd(-2.0, 4.5); |
1628 | assert_eq_m128d(r, e); |
1629 | } |
1630 | |
1631 | #[simd_test(enable = "sse4.1" )] |
1632 | unsafe fn test_mm_floor_ss() { |
1633 | let a = _mm_setr_ps(2.5, 4.5, 8.5, 16.5); |
1634 | let b = _mm_setr_ps(-1.5, -3.5, -7.5, -15.5); |
1635 | let r = _mm_floor_ss(a, b); |
1636 | let e = _mm_setr_ps(-2.0, 4.5, 8.5, 16.5); |
1637 | assert_eq_m128(r, e); |
1638 | } |
1639 | |
1640 | #[simd_test(enable = "sse4.1" )] |
1641 | unsafe fn test_mm_ceil_pd() { |
1642 | let a = _mm_setr_pd(1.5, 3.5); |
1643 | let r = _mm_ceil_pd(a); |
1644 | let e = _mm_setr_pd(2.0, 4.0); |
1645 | assert_eq_m128d(r, e); |
1646 | } |
1647 | |
1648 | #[simd_test(enable = "sse4.1" )] |
1649 | unsafe fn test_mm_ceil_ps() { |
1650 | let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5); |
1651 | let r = _mm_ceil_ps(a); |
1652 | let e = _mm_setr_ps(2.0, 4.0, 8.0, 16.0); |
1653 | assert_eq_m128(r, e); |
1654 | } |
1655 | |
1656 | #[simd_test(enable = "sse4.1" )] |
1657 | unsafe fn test_mm_ceil_sd() { |
1658 | let a = _mm_setr_pd(1.5, 3.5); |
1659 | let b = _mm_setr_pd(-2.5, -4.5); |
1660 | let r = _mm_ceil_sd(a, b); |
1661 | let e = _mm_setr_pd(-2.0, 3.5); |
1662 | assert_eq_m128d(r, e); |
1663 | } |
1664 | |
1665 | #[simd_test(enable = "sse4.1" )] |
1666 | unsafe fn test_mm_ceil_ss() { |
1667 | let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5); |
1668 | let b = _mm_setr_ps(-2.5, -4.5, -8.5, -16.5); |
1669 | let r = _mm_ceil_ss(a, b); |
1670 | let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5); |
1671 | assert_eq_m128(r, e); |
1672 | } |
1673 | |
1674 | #[simd_test(enable = "sse4.1" )] |
1675 | unsafe fn test_mm_round_pd() { |
1676 | let a = _mm_setr_pd(1.25, 3.75); |
1677 | let r = _mm_round_pd::<_MM_FROUND_TO_NEAREST_INT>(a); |
1678 | let e = _mm_setr_pd(1.0, 4.0); |
1679 | assert_eq_m128d(r, e); |
1680 | } |
1681 | |
1682 | #[simd_test(enable = "sse4.1" )] |
1683 | unsafe fn test_mm_round_ps() { |
1684 | let a = _mm_setr_ps(2.25, 4.75, -1.75, -4.25); |
1685 | let r = _mm_round_ps::<_MM_FROUND_TO_ZERO>(a); |
1686 | let e = _mm_setr_ps(2.0, 4.0, -1.0, -4.0); |
1687 | assert_eq_m128(r, e); |
1688 | } |
1689 | |
1690 | #[simd_test(enable = "sse4.1" )] |
1691 | unsafe fn test_mm_round_sd() { |
1692 | let a = _mm_setr_pd(1.5, 3.5); |
1693 | let b = _mm_setr_pd(-2.5, -4.5); |
1694 | let r = _mm_round_sd::<_MM_FROUND_TO_NEAREST_INT>(a, b); |
1695 | let e = _mm_setr_pd(-2.0, 3.5); |
1696 | assert_eq_m128d(r, e); |
1697 | |
1698 | let a = _mm_setr_pd(1.5, 3.5); |
1699 | let b = _mm_setr_pd(-2.5, -4.5); |
1700 | let r = _mm_round_sd::<_MM_FROUND_TO_NEG_INF>(a, b); |
1701 | let e = _mm_setr_pd(-3.0, 3.5); |
1702 | assert_eq_m128d(r, e); |
1703 | |
1704 | let a = _mm_setr_pd(1.5, 3.5); |
1705 | let b = _mm_setr_pd(-2.5, -4.5); |
1706 | let r = _mm_round_sd::<_MM_FROUND_TO_POS_INF>(a, b); |
1707 | let e = _mm_setr_pd(-2.0, 3.5); |
1708 | assert_eq_m128d(r, e); |
1709 | |
1710 | let a = _mm_setr_pd(1.5, 3.5); |
1711 | let b = _mm_setr_pd(-2.5, -4.5); |
1712 | let r = _mm_round_sd::<_MM_FROUND_TO_ZERO>(a, b); |
1713 | let e = _mm_setr_pd(-2.0, 3.5); |
1714 | assert_eq_m128d(r, e); |
1715 | } |
1716 | |
1717 | #[simd_test(enable = "sse4.1" )] |
1718 | unsafe fn test_mm_round_ss() { |
1719 | let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5); |
1720 | let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5); |
1721 | let r = _mm_round_ss::<_MM_FROUND_TO_NEAREST_INT>(a, b); |
1722 | let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5); |
1723 | assert_eq_m128(r, e); |
1724 | |
1725 | let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5); |
1726 | let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5); |
1727 | let r = _mm_round_ss::<_MM_FROUND_TO_NEG_INF>(a, b); |
1728 | let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5); |
1729 | assert_eq_m128(r, e); |
1730 | |
1731 | let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5); |
1732 | let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5); |
1733 | let r = _mm_round_ss::<_MM_FROUND_TO_POS_INF>(a, b); |
1734 | let e = _mm_setr_ps(-1.0, 3.5, 7.5, 15.5); |
1735 | assert_eq_m128(r, e); |
1736 | |
1737 | let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5); |
1738 | let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5); |
1739 | let r = _mm_round_ss::<_MM_FROUND_TO_ZERO>(a, b); |
1740 | let e = _mm_setr_ps(-1.0, 3.5, 7.5, 15.5); |
1741 | assert_eq_m128(r, e); |
1742 | } |
1743 | |
1744 | #[simd_test(enable = "sse4.1" )] |
1745 | unsafe fn test_mm_minpos_epu16_1() { |
1746 | let a = _mm_setr_epi16(23, 18, 44, 97, 50, 13, 67, 66); |
1747 | let r = _mm_minpos_epu16(a); |
1748 | let e = _mm_setr_epi16(13, 5, 0, 0, 0, 0, 0, 0); |
1749 | assert_eq_m128i(r, e); |
1750 | } |
1751 | |
1752 | #[simd_test(enable = "sse4.1" )] |
1753 | unsafe fn test_mm_minpos_epu16_2() { |
1754 | let a = _mm_setr_epi16(0, 18, 44, 97, 50, 13, 67, 66); |
1755 | let r = _mm_minpos_epu16(a); |
1756 | let e = _mm_setr_epi16(0, 0, 0, 0, 0, 0, 0, 0); |
1757 | assert_eq_m128i(r, e); |
1758 | } |
1759 | |
1760 | #[simd_test(enable = "sse4.1" )] |
1761 | unsafe fn test_mm_minpos_epu16_3() { |
1762 | // Case where the minimum value is repeated |
1763 | let a = _mm_setr_epi16(23, 18, 44, 97, 50, 13, 67, 13); |
1764 | let r = _mm_minpos_epu16(a); |
1765 | let e = _mm_setr_epi16(13, 5, 0, 0, 0, 0, 0, 0); |
1766 | assert_eq_m128i(r, e); |
1767 | } |
1768 | |
1769 | #[simd_test(enable = "sse4.1" )] |
1770 | unsafe fn test_mm_mul_epi32() { |
1771 | { |
1772 | let a = _mm_setr_epi32(1, 1, 1, 1); |
1773 | let b = _mm_setr_epi32(1, 2, 3, 4); |
1774 | let r = _mm_mul_epi32(a, b); |
1775 | let e = _mm_setr_epi64x(1, 3); |
1776 | assert_eq_m128i(r, e); |
1777 | } |
1778 | { |
1779 | let a = _mm_setr_epi32(15, 2 /* ignored */, 1234567, 4 /* ignored */); |
1780 | let b = _mm_setr_epi32( |
1781 | -20, -256, /* ignored */ |
1782 | 666666, 666666, /* ignored */ |
1783 | ); |
1784 | let r = _mm_mul_epi32(a, b); |
1785 | let e = _mm_setr_epi64x(-300, 823043843622); |
1786 | assert_eq_m128i(r, e); |
1787 | } |
1788 | } |
1789 | |
1790 | #[simd_test(enable = "sse4.1" )] |
1791 | unsafe fn test_mm_mullo_epi32() { |
1792 | { |
1793 | let a = _mm_setr_epi32(1, 1, 1, 1); |
1794 | let b = _mm_setr_epi32(1, 2, 3, 4); |
1795 | let r = _mm_mullo_epi32(a, b); |
1796 | let e = _mm_setr_epi32(1, 2, 3, 4); |
1797 | assert_eq_m128i(r, e); |
1798 | } |
1799 | { |
1800 | let a = _mm_setr_epi32(15, -2, 1234567, 99999); |
1801 | let b = _mm_setr_epi32(-20, -256, 666666, -99999); |
1802 | let r = _mm_mullo_epi32(a, b); |
1803 | // Attention, most significant bit in r[2] is treated |
1804 | // as a sign bit: |
1805 | // 1234567 * 666666 = -1589877210 |
1806 | let e = _mm_setr_epi32(-300, 512, -1589877210, -1409865409); |
1807 | assert_eq_m128i(r, e); |
1808 | } |
1809 | } |
1810 | |
1811 | #[simd_test(enable = "sse4.1" )] |
1812 | unsafe fn test_mm_minpos_epu16() { |
1813 | let a = _mm_setr_epi16(8, 7, 6, 5, 4, 1, 2, 3); |
1814 | let r = _mm_minpos_epu16(a); |
1815 | let e = _mm_setr_epi16(1, 5, 0, 0, 0, 0, 0, 0); |
1816 | assert_eq_m128i(r, e); |
1817 | } |
1818 | |
1819 | #[simd_test(enable = "sse4.1" )] |
1820 | unsafe fn test_mm_mpsadbw_epu8() { |
1821 | #[rustfmt::skip] |
1822 | let a = _mm_setr_epi8( |
1823 | 0, 1, 2, 3, 4, 5, 6, 7, |
1824 | 8, 9, 10, 11, 12, 13, 14, 15, |
1825 | ); |
1826 | |
1827 | let r = _mm_mpsadbw_epu8::<0b000>(a, a); |
1828 | let e = _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28); |
1829 | assert_eq_m128i(r, e); |
1830 | |
1831 | let r = _mm_mpsadbw_epu8::<0b001>(a, a); |
1832 | let e = _mm_setr_epi16(16, 12, 8, 4, 0, 4, 8, 12); |
1833 | assert_eq_m128i(r, e); |
1834 | |
1835 | let r = _mm_mpsadbw_epu8::<0b100>(a, a); |
1836 | let e = _mm_setr_epi16(16, 20, 24, 28, 32, 36, 40, 44); |
1837 | assert_eq_m128i(r, e); |
1838 | |
1839 | let r = _mm_mpsadbw_epu8::<0b101>(a, a); |
1840 | let e = _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28); |
1841 | assert_eq_m128i(r, e); |
1842 | |
1843 | let r = _mm_mpsadbw_epu8::<0b111>(a, a); |
1844 | let e = _mm_setr_epi16(32, 28, 24, 20, 16, 12, 8, 4); |
1845 | assert_eq_m128i(r, e); |
1846 | } |
1847 | |
1848 | #[simd_test(enable = "sse4.1" )] |
1849 | unsafe fn test_mm_testz_si128() { |
1850 | let a = _mm_set1_epi8(1); |
1851 | let mask = _mm_set1_epi8(0); |
1852 | let r = _mm_testz_si128(a, mask); |
1853 | assert_eq!(r, 1); |
1854 | let a = _mm_set1_epi8(0b101); |
1855 | let mask = _mm_set1_epi8(0b110); |
1856 | let r = _mm_testz_si128(a, mask); |
1857 | assert_eq!(r, 0); |
1858 | let a = _mm_set1_epi8(0b011); |
1859 | let mask = _mm_set1_epi8(0b100); |
1860 | let r = _mm_testz_si128(a, mask); |
1861 | assert_eq!(r, 1); |
1862 | } |
1863 | |
1864 | #[simd_test(enable = "sse4.1" )] |
1865 | unsafe fn test_mm_testc_si128() { |
1866 | let a = _mm_set1_epi8(-1); |
1867 | let mask = _mm_set1_epi8(0); |
1868 | let r = _mm_testc_si128(a, mask); |
1869 | assert_eq!(r, 1); |
1870 | let a = _mm_set1_epi8(0b101); |
1871 | let mask = _mm_set1_epi8(0b110); |
1872 | let r = _mm_testc_si128(a, mask); |
1873 | assert_eq!(r, 0); |
1874 | let a = _mm_set1_epi8(0b101); |
1875 | let mask = _mm_set1_epi8(0b100); |
1876 | let r = _mm_testc_si128(a, mask); |
1877 | assert_eq!(r, 1); |
1878 | } |
1879 | |
1880 | #[simd_test(enable = "sse4.1" )] |
1881 | unsafe fn test_mm_testnzc_si128() { |
1882 | let a = _mm_set1_epi8(0); |
1883 | let mask = _mm_set1_epi8(1); |
1884 | let r = _mm_testnzc_si128(a, mask); |
1885 | assert_eq!(r, 0); |
1886 | let a = _mm_set1_epi8(-1); |
1887 | let mask = _mm_set1_epi8(0); |
1888 | let r = _mm_testnzc_si128(a, mask); |
1889 | assert_eq!(r, 0); |
1890 | let a = _mm_set1_epi8(0b101); |
1891 | let mask = _mm_set1_epi8(0b110); |
1892 | let r = _mm_testnzc_si128(a, mask); |
1893 | assert_eq!(r, 1); |
1894 | let a = _mm_set1_epi8(0b101); |
1895 | let mask = _mm_set1_epi8(0b101); |
1896 | let r = _mm_testnzc_si128(a, mask); |
1897 | assert_eq!(r, 0); |
1898 | } |
1899 | |
1900 | #[simd_test(enable = "sse4.1" )] |
1901 | unsafe fn test_mm_test_all_zeros() { |
1902 | let a = _mm_set1_epi8(1); |
1903 | let mask = _mm_set1_epi8(0); |
1904 | let r = _mm_test_all_zeros(a, mask); |
1905 | assert_eq!(r, 1); |
1906 | let a = _mm_set1_epi8(0b101); |
1907 | let mask = _mm_set1_epi8(0b110); |
1908 | let r = _mm_test_all_zeros(a, mask); |
1909 | assert_eq!(r, 0); |
1910 | let a = _mm_set1_epi8(0b011); |
1911 | let mask = _mm_set1_epi8(0b100); |
1912 | let r = _mm_test_all_zeros(a, mask); |
1913 | assert_eq!(r, 1); |
1914 | } |
1915 | |
1916 | #[simd_test(enable = "sse4.1" )] |
1917 | unsafe fn test_mm_test_all_ones() { |
1918 | let a = _mm_set1_epi8(-1); |
1919 | let r = _mm_test_all_ones(a); |
1920 | assert_eq!(r, 1); |
1921 | let a = _mm_set1_epi8(0b101); |
1922 | let r = _mm_test_all_ones(a); |
1923 | assert_eq!(r, 0); |
1924 | } |
1925 | |
1926 | #[simd_test(enable = "sse4.1" )] |
1927 | unsafe fn test_mm_test_mix_ones_zeros() { |
1928 | let a = _mm_set1_epi8(0); |
1929 | let mask = _mm_set1_epi8(1); |
1930 | let r = _mm_test_mix_ones_zeros(a, mask); |
1931 | assert_eq!(r, 0); |
1932 | let a = _mm_set1_epi8(-1); |
1933 | let mask = _mm_set1_epi8(0); |
1934 | let r = _mm_test_mix_ones_zeros(a, mask); |
1935 | assert_eq!(r, 0); |
1936 | let a = _mm_set1_epi8(0b101); |
1937 | let mask = _mm_set1_epi8(0b110); |
1938 | let r = _mm_test_mix_ones_zeros(a, mask); |
1939 | assert_eq!(r, 1); |
1940 | let a = _mm_set1_epi8(0b101); |
1941 | let mask = _mm_set1_epi8(0b101); |
1942 | let r = _mm_test_mix_ones_zeros(a, mask); |
1943 | assert_eq!(r, 0); |
1944 | } |
1945 | } |
1946 | |