1 | //! Streaming SIMD Extensions 2 (SSE2) |
2 | |
3 | #[cfg (test)] |
4 | use stdarch_test::assert_instr; |
5 | |
6 | use crate::{ |
7 | core_arch::{simd::*, x86::*}, |
8 | intrinsics::simd::*, |
9 | intrinsics::sqrtf64, |
10 | mem, ptr, |
11 | }; |
12 | |
13 | /// Provides a hint to the processor that the code sequence is a spin-wait loop. |
14 | /// |
15 | /// This can help improve the performance and power consumption of spin-wait |
16 | /// loops. |
17 | /// |
18 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_pause) |
19 | #[inline ] |
20 | #[cfg_attr (all(test, target_feature = "sse2" ), assert_instr(pause))] |
21 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
22 | pub unsafe fn _mm_pause() { |
23 | // note: `pause` is guaranteed to be interpreted as a `nop` by CPUs without |
24 | // the SSE2 target-feature - therefore it does not require any target features |
25 | pause() |
26 | } |
27 | |
28 | /// Invalidates and flushes the cache line that contains `p` from all levels of |
29 | /// the cache hierarchy. |
30 | /// |
31 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clflush) |
32 | #[inline ] |
33 | #[target_feature (enable = "sse2" )] |
34 | #[cfg_attr (test, assert_instr(clflush))] |
35 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
36 | pub unsafe fn _mm_clflush(p: *const u8) { |
37 | clflush(p) |
38 | } |
39 | |
40 | /// Performs a serializing operation on all load-from-memory instructions |
41 | /// that were issued prior to this instruction. |
42 | /// |
43 | /// Guarantees that every load instruction that precedes, in program order, is |
44 | /// globally visible before any load instruction which follows the fence in |
45 | /// program order. |
46 | /// |
47 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_lfence) |
48 | #[inline ] |
49 | #[target_feature (enable = "sse2" )] |
50 | #[cfg_attr (test, assert_instr(lfence))] |
51 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
52 | pub unsafe fn _mm_lfence() { |
53 | lfence() |
54 | } |
55 | |
56 | /// Performs a serializing operation on all load-from-memory and store-to-memory |
57 | /// instructions that were issued prior to this instruction. |
58 | /// |
59 | /// Guarantees that every memory access that precedes, in program order, the |
60 | /// memory fence instruction is globally visible before any memory instruction |
61 | /// which follows the fence in program order. |
62 | /// |
63 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mfence) |
64 | #[inline ] |
65 | #[target_feature (enable = "sse2" )] |
66 | #[cfg_attr (test, assert_instr(mfence))] |
67 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
68 | pub unsafe fn _mm_mfence() { |
69 | mfence() |
70 | } |
71 | |
72 | /// Adds packed 8-bit integers in `a` and `b`. |
73 | /// |
74 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi8) |
75 | #[inline ] |
76 | #[target_feature (enable = "sse2" )] |
77 | #[cfg_attr (test, assert_instr(paddb))] |
78 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
79 | pub fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i { |
80 | unsafe { transmute(src:simd_add(x:a.as_i8x16(), y:b.as_i8x16())) } |
81 | } |
82 | |
83 | /// Adds packed 16-bit integers in `a` and `b`. |
84 | /// |
85 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi16) |
86 | #[inline ] |
87 | #[target_feature (enable = "sse2" )] |
88 | #[cfg_attr (test, assert_instr(paddw))] |
89 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
90 | pub fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i { |
91 | unsafe { transmute(src:simd_add(x:a.as_i16x8(), y:b.as_i16x8())) } |
92 | } |
93 | |
94 | /// Adds packed 32-bit integers in `a` and `b`. |
95 | /// |
96 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi32) |
97 | #[inline ] |
98 | #[target_feature (enable = "sse2" )] |
99 | #[cfg_attr (test, assert_instr(paddd))] |
100 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
101 | pub fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i { |
102 | unsafe { transmute(src:simd_add(x:a.as_i32x4(), y:b.as_i32x4())) } |
103 | } |
104 | |
105 | /// Adds packed 64-bit integers in `a` and `b`. |
106 | /// |
107 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi64) |
108 | #[inline ] |
109 | #[target_feature (enable = "sse2" )] |
110 | #[cfg_attr (test, assert_instr(paddq))] |
111 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
112 | pub fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i { |
113 | unsafe { transmute(src:simd_add(x:a.as_i64x2(), y:b.as_i64x2())) } |
114 | } |
115 | |
116 | /// Adds packed 8-bit integers in `a` and `b` using saturation. |
117 | /// |
118 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epi8) |
119 | #[inline ] |
120 | #[target_feature (enable = "sse2" )] |
121 | #[cfg_attr (test, assert_instr(paddsb))] |
122 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
123 | pub fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i { |
124 | unsafe { transmute(src:simd_saturating_add(x:a.as_i8x16(), y:b.as_i8x16())) } |
125 | } |
126 | |
127 | /// Adds packed 16-bit integers in `a` and `b` using saturation. |
128 | /// |
129 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epi16) |
130 | #[inline ] |
131 | #[target_feature (enable = "sse2" )] |
132 | #[cfg_attr (test, assert_instr(paddsw))] |
133 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
134 | pub fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i { |
135 | unsafe { transmute(src:simd_saturating_add(x:a.as_i16x8(), y:b.as_i16x8())) } |
136 | } |
137 | |
138 | /// Adds packed unsigned 8-bit integers in `a` and `b` using saturation. |
139 | /// |
140 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epu8) |
141 | #[inline ] |
142 | #[target_feature (enable = "sse2" )] |
143 | #[cfg_attr (test, assert_instr(paddusb))] |
144 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
145 | pub fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i { |
146 | unsafe { transmute(src:simd_saturating_add(x:a.as_u8x16(), y:b.as_u8x16())) } |
147 | } |
148 | |
149 | /// Adds packed unsigned 16-bit integers in `a` and `b` using saturation. |
150 | /// |
151 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epu16) |
152 | #[inline ] |
153 | #[target_feature (enable = "sse2" )] |
154 | #[cfg_attr (test, assert_instr(paddusw))] |
155 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
156 | pub fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i { |
157 | unsafe { transmute(src:simd_saturating_add(x:a.as_u16x8(), y:b.as_u16x8())) } |
158 | } |
159 | |
160 | /// Averages packed unsigned 8-bit integers in `a` and `b`. |
161 | /// |
162 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_avg_epu8) |
163 | #[inline ] |
164 | #[target_feature (enable = "sse2" )] |
165 | #[cfg_attr (test, assert_instr(pavgb))] |
166 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
167 | pub fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i { |
168 | unsafe { |
169 | let a: u16x16 = simd_cast::<_, u16x16>(a.as_u8x16()); |
170 | let b: u16x16 = simd_cast::<_, u16x16>(b.as_u8x16()); |
171 | let r: u16x16 = simd_shr(lhs:simd_add(simd_add(a, b), u16x16::splat(1)), rhs:u16x16::splat(1)); |
172 | transmute(src:simd_cast::<_, u8x16>(r)) |
173 | } |
174 | } |
175 | |
176 | /// Averages packed unsigned 16-bit integers in `a` and `b`. |
177 | /// |
178 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_avg_epu16) |
179 | #[inline ] |
180 | #[target_feature (enable = "sse2" )] |
181 | #[cfg_attr (test, assert_instr(pavgw))] |
182 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
183 | pub fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i { |
184 | unsafe { |
185 | let a: u32x8 = simd_cast::<_, u32x8>(a.as_u16x8()); |
186 | let b: u32x8 = simd_cast::<_, u32x8>(b.as_u16x8()); |
187 | let r: u32x8 = simd_shr(lhs:simd_add(simd_add(a, b), u32x8::splat(1)), rhs:u32x8::splat(1)); |
188 | transmute(src:simd_cast::<_, u16x8>(r)) |
189 | } |
190 | } |
191 | |
192 | /// Multiplies and then horizontally add signed 16 bit integers in `a` and `b`. |
193 | /// |
194 | /// Multiplies packed signed 16-bit integers in `a` and `b`, producing |
195 | /// intermediate signed 32-bit integers. Horizontally add adjacent pairs of |
196 | /// intermediate 32-bit integers. |
197 | /// |
198 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_madd_epi16) |
199 | #[inline ] |
200 | #[target_feature (enable = "sse2" )] |
201 | #[cfg_attr (test, assert_instr(pmaddwd))] |
202 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
203 | pub fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i { |
204 | unsafe { transmute(src:pmaddwd(a.as_i16x8(), b.as_i16x8())) } |
205 | } |
206 | |
207 | /// Compares packed 16-bit integers in `a` and `b`, and returns the packed |
208 | /// maximum values. |
209 | /// |
210 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi16) |
211 | #[inline ] |
212 | #[target_feature (enable = "sse2" )] |
213 | #[cfg_attr (test, assert_instr(pmaxsw))] |
214 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
215 | pub fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i { |
216 | unsafe { |
217 | let a: i16x8 = a.as_i16x8(); |
218 | let b: i16x8 = b.as_i16x8(); |
219 | transmute(src:simd_select::<i16x8, _>(mask:simd_gt(a, b), if_true:a, if_false:b)) |
220 | } |
221 | } |
222 | |
223 | /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the |
224 | /// packed maximum values. |
225 | /// |
226 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu8) |
227 | #[inline ] |
228 | #[target_feature (enable = "sse2" )] |
229 | #[cfg_attr (test, assert_instr(pmaxub))] |
230 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
231 | pub fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i { |
232 | unsafe { |
233 | let a: u8x16 = a.as_u8x16(); |
234 | let b: u8x16 = b.as_u8x16(); |
235 | transmute(src:simd_select::<i8x16, _>(mask:simd_gt(a, b), if_true:a, if_false:b)) |
236 | } |
237 | } |
238 | |
239 | /// Compares packed 16-bit integers in `a` and `b`, and returns the packed |
240 | /// minimum values. |
241 | /// |
242 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi16) |
243 | #[inline ] |
244 | #[target_feature (enable = "sse2" )] |
245 | #[cfg_attr (test, assert_instr(pminsw))] |
246 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
247 | pub fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i { |
248 | unsafe { |
249 | let a: i16x8 = a.as_i16x8(); |
250 | let b: i16x8 = b.as_i16x8(); |
251 | transmute(src:simd_select::<i16x8, _>(mask:simd_lt(a, b), if_true:a, if_false:b)) |
252 | } |
253 | } |
254 | |
255 | /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the |
256 | /// packed minimum values. |
257 | /// |
258 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu8) |
259 | #[inline ] |
260 | #[target_feature (enable = "sse2" )] |
261 | #[cfg_attr (test, assert_instr(pminub))] |
262 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
263 | pub fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i { |
264 | unsafe { |
265 | let a: u8x16 = a.as_u8x16(); |
266 | let b: u8x16 = b.as_u8x16(); |
267 | transmute(src:simd_select::<i8x16, _>(mask:simd_lt(a, b), if_true:a, if_false:b)) |
268 | } |
269 | } |
270 | |
271 | /// Multiplies the packed 16-bit integers in `a` and `b`. |
272 | /// |
273 | /// The multiplication produces intermediate 32-bit integers, and returns the |
274 | /// high 16 bits of the intermediate integers. |
275 | /// |
276 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhi_epi16) |
277 | #[inline ] |
278 | #[target_feature (enable = "sse2" )] |
279 | #[cfg_attr (test, assert_instr(pmulhw))] |
280 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
281 | pub fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i { |
282 | unsafe { |
283 | let a: i32x8 = simd_cast::<_, i32x8>(a.as_i16x8()); |
284 | let b: i32x8 = simd_cast::<_, i32x8>(b.as_i16x8()); |
285 | let r: i32x8 = simd_shr(lhs:simd_mul(a, b), rhs:i32x8::splat(16)); |
286 | transmute(src:simd_cast::<i32x8, i16x8>(r)) |
287 | } |
288 | } |
289 | |
290 | /// Multiplies the packed unsigned 16-bit integers in `a` and `b`. |
291 | /// |
292 | /// The multiplication produces intermediate 32-bit integers, and returns the |
293 | /// high 16 bits of the intermediate integers. |
294 | /// |
295 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhi_epu16) |
296 | #[inline ] |
297 | #[target_feature (enable = "sse2" )] |
298 | #[cfg_attr (test, assert_instr(pmulhuw))] |
299 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
300 | pub fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i { |
301 | unsafe { |
302 | let a: u32x8 = simd_cast::<_, u32x8>(a.as_u16x8()); |
303 | let b: u32x8 = simd_cast::<_, u32x8>(b.as_u16x8()); |
304 | let r: u32x8 = simd_shr(lhs:simd_mul(a, b), rhs:u32x8::splat(16)); |
305 | transmute(src:simd_cast::<u32x8, u16x8>(r)) |
306 | } |
307 | } |
308 | |
309 | /// Multiplies the packed 16-bit integers in `a` and `b`. |
310 | /// |
311 | /// The multiplication produces intermediate 32-bit integers, and returns the |
312 | /// low 16 bits of the intermediate integers. |
313 | /// |
314 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mullo_epi16) |
315 | #[inline ] |
316 | #[target_feature (enable = "sse2" )] |
317 | #[cfg_attr (test, assert_instr(pmullw))] |
318 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
319 | pub fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i { |
320 | unsafe { transmute(src:simd_mul(x:a.as_i16x8(), y:b.as_i16x8())) } |
321 | } |
322 | |
323 | /// Multiplies the low unsigned 32-bit integers from each packed 64-bit element |
324 | /// in `a` and `b`. |
325 | /// |
326 | /// Returns the unsigned 64-bit results. |
327 | /// |
328 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_epu32) |
329 | #[inline ] |
330 | #[target_feature (enable = "sse2" )] |
331 | #[cfg_attr (test, assert_instr(pmuludq))] |
332 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
333 | pub fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i { |
334 | unsafe { |
335 | let a: u64x2 = a.as_u64x2(); |
336 | let b: u64x2 = b.as_u64x2(); |
337 | let mask: u64x2 = u64x2::splat(u32::MAX.into()); |
338 | transmute(src:simd_mul(x:simd_and(a, mask), y:simd_and(x:b, y:mask))) |
339 | } |
340 | } |
341 | |
342 | /// Sum the absolute differences of packed unsigned 8-bit integers. |
343 | /// |
344 | /// Computes the absolute differences of packed unsigned 8-bit integers in `a` |
345 | /// and `b`, then horizontally sum each consecutive 8 differences to produce |
346 | /// two unsigned 16-bit integers, and pack these unsigned 16-bit integers in |
347 | /// the low 16 bits of 64-bit elements returned. |
348 | /// |
349 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sad_epu8) |
350 | #[inline ] |
351 | #[target_feature (enable = "sse2" )] |
352 | #[cfg_attr (test, assert_instr(psadbw))] |
353 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
354 | pub fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i { |
355 | unsafe { transmute(src:psadbw(a.as_u8x16(), b.as_u8x16())) } |
356 | } |
357 | |
358 | /// Subtracts packed 8-bit integers in `b` from packed 8-bit integers in `a`. |
359 | /// |
360 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi8) |
361 | #[inline ] |
362 | #[target_feature (enable = "sse2" )] |
363 | #[cfg_attr (test, assert_instr(psubb))] |
364 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
365 | pub fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i { |
366 | unsafe { transmute(src:simd_sub(lhs:a.as_i8x16(), rhs:b.as_i8x16())) } |
367 | } |
368 | |
369 | /// Subtracts packed 16-bit integers in `b` from packed 16-bit integers in `a`. |
370 | /// |
371 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi16) |
372 | #[inline ] |
373 | #[target_feature (enable = "sse2" )] |
374 | #[cfg_attr (test, assert_instr(psubw))] |
375 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
376 | pub fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i { |
377 | unsafe { transmute(src:simd_sub(lhs:a.as_i16x8(), rhs:b.as_i16x8())) } |
378 | } |
379 | |
380 | /// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`. |
381 | /// |
382 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi32) |
383 | #[inline ] |
384 | #[target_feature (enable = "sse2" )] |
385 | #[cfg_attr (test, assert_instr(psubd))] |
386 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
387 | pub fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i { |
388 | unsafe { transmute(src:simd_sub(lhs:a.as_i32x4(), rhs:b.as_i32x4())) } |
389 | } |
390 | |
391 | /// Subtract packed 64-bit integers in `b` from packed 64-bit integers in `a`. |
392 | /// |
393 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi64) |
394 | #[inline ] |
395 | #[target_feature (enable = "sse2" )] |
396 | #[cfg_attr (test, assert_instr(psubq))] |
397 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
398 | pub fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i { |
399 | unsafe { transmute(src:simd_sub(lhs:a.as_i64x2(), rhs:b.as_i64x2())) } |
400 | } |
401 | |
402 | /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a` |
403 | /// using saturation. |
404 | /// |
405 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epi8) |
406 | #[inline ] |
407 | #[target_feature (enable = "sse2" )] |
408 | #[cfg_attr (test, assert_instr(psubsb))] |
409 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
410 | pub fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i { |
411 | unsafe { transmute(src:simd_saturating_sub(lhs:a.as_i8x16(), rhs:b.as_i8x16())) } |
412 | } |
413 | |
414 | /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a` |
415 | /// using saturation. |
416 | /// |
417 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epi16) |
418 | #[inline ] |
419 | #[target_feature (enable = "sse2" )] |
420 | #[cfg_attr (test, assert_instr(psubsw))] |
421 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
422 | pub fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i { |
423 | unsafe { transmute(src:simd_saturating_sub(lhs:a.as_i16x8(), rhs:b.as_i16x8())) } |
424 | } |
425 | |
426 | /// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit |
427 | /// integers in `a` using saturation. |
428 | /// |
429 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epu8) |
430 | #[inline ] |
431 | #[target_feature (enable = "sse2" )] |
432 | #[cfg_attr (test, assert_instr(psubusb))] |
433 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
434 | pub fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i { |
435 | unsafe { transmute(src:simd_saturating_sub(lhs:a.as_u8x16(), rhs:b.as_u8x16())) } |
436 | } |
437 | |
438 | /// Subtract packed unsigned 16-bit integers in `b` from packed unsigned 16-bit |
439 | /// integers in `a` using saturation. |
440 | /// |
441 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epu16) |
442 | #[inline ] |
443 | #[target_feature (enable = "sse2" )] |
444 | #[cfg_attr (test, assert_instr(psubusw))] |
445 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
446 | pub fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i { |
447 | unsafe { transmute(src:simd_saturating_sub(lhs:a.as_u16x8(), rhs:b.as_u16x8())) } |
448 | } |
449 | |
450 | /// Shifts `a` left by `IMM8` bytes while shifting in zeros. |
451 | /// |
452 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_si128) |
453 | #[inline ] |
454 | #[target_feature (enable = "sse2" )] |
455 | #[cfg_attr (test, assert_instr(pslldq, IMM8 = 1))] |
456 | #[rustc_legacy_const_generics (1)] |
457 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
458 | pub fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i { |
459 | static_assert_uimm_bits!(IMM8, 8); |
460 | unsafe { _mm_slli_si128_impl::<IMM8>(a) } |
461 | } |
462 | |
463 | /// Implementation detail: converts the immediate argument of the |
464 | /// `_mm_slli_si128` intrinsic into a compile-time constant. |
465 | #[inline ] |
466 | #[target_feature (enable = "sse2" )] |
467 | unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i { |
468 | const fn mask(shift: i32, i: u32) -> u32 { |
469 | let shift = shift as u32 & 0xff; |
470 | if shift > 15 { i } else { 16 - shift + i } |
471 | } |
472 | transmute::<i8x16, _>(simd_shuffle!( |
473 | i8x16::ZERO, |
474 | a.as_i8x16(), |
475 | [ |
476 | mask(IMM8, 0), |
477 | mask(IMM8, 1), |
478 | mask(IMM8, 2), |
479 | mask(IMM8, 3), |
480 | mask(IMM8, 4), |
481 | mask(IMM8, 5), |
482 | mask(IMM8, 6), |
483 | mask(IMM8, 7), |
484 | mask(IMM8, 8), |
485 | mask(IMM8, 9), |
486 | mask(IMM8, 10), |
487 | mask(IMM8, 11), |
488 | mask(IMM8, 12), |
489 | mask(IMM8, 13), |
490 | mask(IMM8, 14), |
491 | mask(IMM8, 15), |
492 | ], |
493 | )) |
494 | } |
495 | |
496 | /// Shifts `a` left by `IMM8` bytes while shifting in zeros. |
497 | /// |
498 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bslli_si128) |
499 | #[inline ] |
500 | #[target_feature (enable = "sse2" )] |
501 | #[cfg_attr (test, assert_instr(pslldq, IMM8 = 1))] |
502 | #[rustc_legacy_const_generics (1)] |
503 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
504 | pub fn _mm_bslli_si128<const IMM8: i32>(a: __m128i) -> __m128i { |
505 | unsafe { |
506 | static_assert_uimm_bits!(IMM8, 8); |
507 | _mm_slli_si128_impl::<IMM8>(a) |
508 | } |
509 | } |
510 | |
511 | /// Shifts `a` right by `IMM8` bytes while shifting in zeros. |
512 | /// |
513 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bsrli_si128) |
514 | #[inline ] |
515 | #[target_feature (enable = "sse2" )] |
516 | #[cfg_attr (test, assert_instr(psrldq, IMM8 = 1))] |
517 | #[rustc_legacy_const_generics (1)] |
518 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
519 | pub fn _mm_bsrli_si128<const IMM8: i32>(a: __m128i) -> __m128i { |
520 | unsafe { |
521 | static_assert_uimm_bits!(IMM8, 8); |
522 | _mm_srli_si128_impl::<IMM8>(a) |
523 | } |
524 | } |
525 | |
526 | /// Shifts packed 16-bit integers in `a` left by `IMM8` while shifting in zeros. |
527 | /// |
528 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi16) |
529 | #[inline ] |
530 | #[target_feature (enable = "sse2" )] |
531 | #[cfg_attr (test, assert_instr(psllw, IMM8 = 7))] |
532 | #[rustc_legacy_const_generics (1)] |
533 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
534 | pub fn _mm_slli_epi16<const IMM8: i32>(a: __m128i) -> __m128i { |
535 | static_assert_uimm_bits!(IMM8, 8); |
536 | unsafe { |
537 | if IMM8 >= 16 { |
538 | _mm_setzero_si128() |
539 | } else { |
540 | transmute(src:simd_shl(lhs:a.as_u16x8(), rhs:u16x8::splat(IMM8 as u16))) |
541 | } |
542 | } |
543 | } |
544 | |
545 | /// Shifts packed 16-bit integers in `a` left by `count` while shifting in |
546 | /// zeros. |
547 | /// |
548 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi16) |
549 | #[inline ] |
550 | #[target_feature (enable = "sse2" )] |
551 | #[cfg_attr (test, assert_instr(psllw))] |
552 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
553 | pub fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i { |
554 | unsafe { transmute(src:psllw(a.as_i16x8(), count.as_i16x8())) } |
555 | } |
556 | |
557 | /// Shifts packed 32-bit integers in `a` left by `IMM8` while shifting in zeros. |
558 | /// |
559 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi32) |
560 | #[inline ] |
561 | #[target_feature (enable = "sse2" )] |
562 | #[cfg_attr (test, assert_instr(pslld, IMM8 = 7))] |
563 | #[rustc_legacy_const_generics (1)] |
564 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
565 | pub fn _mm_slli_epi32<const IMM8: i32>(a: __m128i) -> __m128i { |
566 | static_assert_uimm_bits!(IMM8, 8); |
567 | unsafe { |
568 | if IMM8 >= 32 { |
569 | _mm_setzero_si128() |
570 | } else { |
571 | transmute(src:simd_shl(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8 as u32))) |
572 | } |
573 | } |
574 | } |
575 | |
576 | /// Shifts packed 32-bit integers in `a` left by `count` while shifting in |
577 | /// zeros. |
578 | /// |
579 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi32) |
580 | #[inline ] |
581 | #[target_feature (enable = "sse2" )] |
582 | #[cfg_attr (test, assert_instr(pslld))] |
583 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
584 | pub fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i { |
585 | unsafe { transmute(src:pslld(a.as_i32x4(), count.as_i32x4())) } |
586 | } |
587 | |
588 | /// Shifts packed 64-bit integers in `a` left by `IMM8` while shifting in zeros. |
589 | /// |
590 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi64) |
591 | #[inline ] |
592 | #[target_feature (enable = "sse2" )] |
593 | #[cfg_attr (test, assert_instr(psllq, IMM8 = 7))] |
594 | #[rustc_legacy_const_generics (1)] |
595 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
596 | pub fn _mm_slli_epi64<const IMM8: i32>(a: __m128i) -> __m128i { |
597 | static_assert_uimm_bits!(IMM8, 8); |
598 | unsafe { |
599 | if IMM8 >= 64 { |
600 | _mm_setzero_si128() |
601 | } else { |
602 | transmute(src:simd_shl(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64))) |
603 | } |
604 | } |
605 | } |
606 | |
607 | /// Shifts packed 64-bit integers in `a` left by `count` while shifting in |
608 | /// zeros. |
609 | /// |
610 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi64) |
611 | #[inline ] |
612 | #[target_feature (enable = "sse2" )] |
613 | #[cfg_attr (test, assert_instr(psllq))] |
614 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
615 | pub fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i { |
616 | unsafe { transmute(src:psllq(a.as_i64x2(), count.as_i64x2())) } |
617 | } |
618 | |
619 | /// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in sign |
620 | /// bits. |
621 | /// |
622 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi16) |
623 | #[inline ] |
624 | #[target_feature (enable = "sse2" )] |
625 | #[cfg_attr (test, assert_instr(psraw, IMM8 = 1))] |
626 | #[rustc_legacy_const_generics (1)] |
627 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
628 | pub fn _mm_srai_epi16<const IMM8: i32>(a: __m128i) -> __m128i { |
629 | static_assert_uimm_bits!(IMM8, 8); |
630 | unsafe { transmute(src:simd_shr(lhs:a.as_i16x8(), rhs:i16x8::splat(IMM8.min(15) as i16))) } |
631 | } |
632 | |
633 | /// Shifts packed 16-bit integers in `a` right by `count` while shifting in sign |
634 | /// bits. |
635 | /// |
636 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi16) |
637 | #[inline ] |
638 | #[target_feature (enable = "sse2" )] |
639 | #[cfg_attr (test, assert_instr(psraw))] |
640 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
641 | pub fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i { |
642 | unsafe { transmute(src:psraw(a.as_i16x8(), count.as_i16x8())) } |
643 | } |
644 | |
645 | /// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in sign |
646 | /// bits. |
647 | /// |
648 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi32) |
649 | #[inline ] |
650 | #[target_feature (enable = "sse2" )] |
651 | #[cfg_attr (test, assert_instr(psrad, IMM8 = 1))] |
652 | #[rustc_legacy_const_generics (1)] |
653 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
654 | pub fn _mm_srai_epi32<const IMM8: i32>(a: __m128i) -> __m128i { |
655 | static_assert_uimm_bits!(IMM8, 8); |
656 | unsafe { transmute(src:simd_shr(lhs:a.as_i32x4(), rhs:i32x4::splat(IMM8.min(31)))) } |
657 | } |
658 | |
659 | /// Shifts packed 32-bit integers in `a` right by `count` while shifting in sign |
660 | /// bits. |
661 | /// |
662 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi32) |
663 | #[inline ] |
664 | #[target_feature (enable = "sse2" )] |
665 | #[cfg_attr (test, assert_instr(psrad))] |
666 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
667 | pub fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i { |
668 | unsafe { transmute(src:psrad(a.as_i32x4(), count.as_i32x4())) } |
669 | } |
670 | |
671 | /// Shifts `a` right by `IMM8` bytes while shifting in zeros. |
672 | /// |
673 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_si128) |
674 | #[inline ] |
675 | #[target_feature (enable = "sse2" )] |
676 | #[cfg_attr (test, assert_instr(psrldq, IMM8 = 1))] |
677 | #[rustc_legacy_const_generics (1)] |
678 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
679 | pub fn _mm_srli_si128<const IMM8: i32>(a: __m128i) -> __m128i { |
680 | static_assert_uimm_bits!(IMM8, 8); |
681 | unsafe { _mm_srli_si128_impl::<IMM8>(a) } |
682 | } |
683 | |
684 | /// Implementation detail: converts the immediate argument of the |
685 | /// `_mm_srli_si128` intrinsic into a compile-time constant. |
686 | #[inline ] |
687 | #[target_feature (enable = "sse2" )] |
688 | unsafe fn _mm_srli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i { |
689 | const fn mask(shift: i32, i: u32) -> u32 { |
690 | if (shift as u32) > 15 { |
691 | i + 16 |
692 | } else { |
693 | i + (shift as u32) |
694 | } |
695 | } |
696 | let x: i8x16 = simd_shuffle!( |
697 | a.as_i8x16(), |
698 | i8x16::ZERO, |
699 | [ |
700 | mask(IMM8, 0), |
701 | mask(IMM8, 1), |
702 | mask(IMM8, 2), |
703 | mask(IMM8, 3), |
704 | mask(IMM8, 4), |
705 | mask(IMM8, 5), |
706 | mask(IMM8, 6), |
707 | mask(IMM8, 7), |
708 | mask(IMM8, 8), |
709 | mask(IMM8, 9), |
710 | mask(IMM8, 10), |
711 | mask(IMM8, 11), |
712 | mask(IMM8, 12), |
713 | mask(IMM8, 13), |
714 | mask(IMM8, 14), |
715 | mask(IMM8, 15), |
716 | ], |
717 | ); |
718 | transmute(x) |
719 | } |
720 | |
721 | /// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in |
722 | /// zeros. |
723 | /// |
724 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi16) |
725 | #[inline ] |
726 | #[target_feature (enable = "sse2" )] |
727 | #[cfg_attr (test, assert_instr(psrlw, IMM8 = 1))] |
728 | #[rustc_legacy_const_generics (1)] |
729 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
730 | pub fn _mm_srli_epi16<const IMM8: i32>(a: __m128i) -> __m128i { |
731 | static_assert_uimm_bits!(IMM8, 8); |
732 | unsafe { |
733 | if IMM8 >= 16 { |
734 | _mm_setzero_si128() |
735 | } else { |
736 | transmute(src:simd_shr(lhs:a.as_u16x8(), rhs:u16x8::splat(IMM8 as u16))) |
737 | } |
738 | } |
739 | } |
740 | |
741 | /// Shifts packed 16-bit integers in `a` right by `count` while shifting in |
742 | /// zeros. |
743 | /// |
744 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi16) |
745 | #[inline ] |
746 | #[target_feature (enable = "sse2" )] |
747 | #[cfg_attr (test, assert_instr(psrlw))] |
748 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
749 | pub fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i { |
750 | unsafe { transmute(src:psrlw(a.as_i16x8(), count.as_i16x8())) } |
751 | } |
752 | |
753 | /// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in |
754 | /// zeros. |
755 | /// |
756 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi32) |
757 | #[inline ] |
758 | #[target_feature (enable = "sse2" )] |
759 | #[cfg_attr (test, assert_instr(psrld, IMM8 = 8))] |
760 | #[rustc_legacy_const_generics (1)] |
761 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
762 | pub fn _mm_srli_epi32<const IMM8: i32>(a: __m128i) -> __m128i { |
763 | static_assert_uimm_bits!(IMM8, 8); |
764 | unsafe { |
765 | if IMM8 >= 32 { |
766 | _mm_setzero_si128() |
767 | } else { |
768 | transmute(src:simd_shr(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8 as u32))) |
769 | } |
770 | } |
771 | } |
772 | |
773 | /// Shifts packed 32-bit integers in `a` right by `count` while shifting in |
774 | /// zeros. |
775 | /// |
776 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi32) |
777 | #[inline ] |
778 | #[target_feature (enable = "sse2" )] |
779 | #[cfg_attr (test, assert_instr(psrld))] |
780 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
781 | pub fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i { |
782 | unsafe { transmute(src:psrld(a.as_i32x4(), count.as_i32x4())) } |
783 | } |
784 | |
785 | /// Shifts packed 64-bit integers in `a` right by `IMM8` while shifting in |
786 | /// zeros. |
787 | /// |
788 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi64) |
789 | #[inline ] |
790 | #[target_feature (enable = "sse2" )] |
791 | #[cfg_attr (test, assert_instr(psrlq, IMM8 = 1))] |
792 | #[rustc_legacy_const_generics (1)] |
793 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
794 | pub fn _mm_srli_epi64<const IMM8: i32>(a: __m128i) -> __m128i { |
795 | static_assert_uimm_bits!(IMM8, 8); |
796 | unsafe { |
797 | if IMM8 >= 64 { |
798 | _mm_setzero_si128() |
799 | } else { |
800 | transmute(src:simd_shr(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64))) |
801 | } |
802 | } |
803 | } |
804 | |
805 | /// Shifts packed 64-bit integers in `a` right by `count` while shifting in |
806 | /// zeros. |
807 | /// |
808 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi64) |
809 | #[inline ] |
810 | #[target_feature (enable = "sse2" )] |
811 | #[cfg_attr (test, assert_instr(psrlq))] |
812 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
813 | pub fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i { |
814 | unsafe { transmute(src:psrlq(a.as_i64x2(), count.as_i64x2())) } |
815 | } |
816 | |
817 | /// Computes the bitwise AND of 128 bits (representing integer data) in `a` and |
818 | /// `b`. |
819 | /// |
820 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_and_si128) |
821 | #[inline ] |
822 | #[target_feature (enable = "sse2" )] |
823 | #[cfg_attr (test, assert_instr(andps))] |
824 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
825 | pub fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i { |
826 | unsafe { simd_and(x:a, y:b) } |
827 | } |
828 | |
829 | /// Computes the bitwise NOT of 128 bits (representing integer data) in `a` and |
830 | /// then AND with `b`. |
831 | /// |
832 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_andnot_si128) |
833 | #[inline ] |
834 | #[target_feature (enable = "sse2" )] |
835 | #[cfg_attr (test, assert_instr(andnps))] |
836 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
837 | pub fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i { |
838 | unsafe { simd_and(x:simd_xor(_mm_set1_epi8(-1), a), y:b) } |
839 | } |
840 | |
841 | /// Computes the bitwise OR of 128 bits (representing integer data) in `a` and |
842 | /// `b`. |
843 | /// |
844 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_si128) |
845 | #[inline ] |
846 | #[target_feature (enable = "sse2" )] |
847 | #[cfg_attr (test, assert_instr(orps))] |
848 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
849 | pub fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i { |
850 | unsafe { simd_or(x:a, y:b) } |
851 | } |
852 | |
853 | /// Computes the bitwise XOR of 128 bits (representing integer data) in `a` and |
854 | /// `b`. |
855 | /// |
856 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_si128) |
857 | #[inline ] |
858 | #[target_feature (enable = "sse2" )] |
859 | #[cfg_attr (test, assert_instr(xorps))] |
860 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
861 | pub fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i { |
862 | unsafe { simd_xor(x:a, y:b) } |
863 | } |
864 | |
865 | /// Compares packed 8-bit integers in `a` and `b` for equality. |
866 | /// |
867 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi8) |
868 | #[inline ] |
869 | #[target_feature (enable = "sse2" )] |
870 | #[cfg_attr (test, assert_instr(pcmpeqb))] |
871 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
872 | pub fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i { |
873 | unsafe { transmute::<i8x16, _>(src:simd_eq(x:a.as_i8x16(), y:b.as_i8x16())) } |
874 | } |
875 | |
876 | /// Compares packed 16-bit integers in `a` and `b` for equality. |
877 | /// |
878 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi16) |
879 | #[inline ] |
880 | #[target_feature (enable = "sse2" )] |
881 | #[cfg_attr (test, assert_instr(pcmpeqw))] |
882 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
883 | pub fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i { |
884 | unsafe { transmute::<i16x8, _>(src:simd_eq(x:a.as_i16x8(), y:b.as_i16x8())) } |
885 | } |
886 | |
887 | /// Compares packed 32-bit integers in `a` and `b` for equality. |
888 | /// |
889 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi32) |
890 | #[inline ] |
891 | #[target_feature (enable = "sse2" )] |
892 | #[cfg_attr (test, assert_instr(pcmpeqd))] |
893 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
894 | pub fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i { |
895 | unsafe { transmute::<i32x4, _>(src:simd_eq(x:a.as_i32x4(), y:b.as_i32x4())) } |
896 | } |
897 | |
898 | /// Compares packed 8-bit integers in `a` and `b` for greater-than. |
899 | /// |
900 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi8) |
901 | #[inline ] |
902 | #[target_feature (enable = "sse2" )] |
903 | #[cfg_attr (test, assert_instr(pcmpgtb))] |
904 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
905 | pub fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i { |
906 | unsafe { transmute::<i8x16, _>(src:simd_gt(x:a.as_i8x16(), y:b.as_i8x16())) } |
907 | } |
908 | |
909 | /// Compares packed 16-bit integers in `a` and `b` for greater-than. |
910 | /// |
911 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi16) |
912 | #[inline ] |
913 | #[target_feature (enable = "sse2" )] |
914 | #[cfg_attr (test, assert_instr(pcmpgtw))] |
915 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
916 | pub fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i { |
917 | unsafe { transmute::<i16x8, _>(src:simd_gt(x:a.as_i16x8(), y:b.as_i16x8())) } |
918 | } |
919 | |
920 | /// Compares packed 32-bit integers in `a` and `b` for greater-than. |
921 | /// |
922 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi32) |
923 | #[inline ] |
924 | #[target_feature (enable = "sse2" )] |
925 | #[cfg_attr (test, assert_instr(pcmpgtd))] |
926 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
927 | pub fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i { |
928 | unsafe { transmute::<i32x4, _>(src:simd_gt(x:a.as_i32x4(), y:b.as_i32x4())) } |
929 | } |
930 | |
931 | /// Compares packed 8-bit integers in `a` and `b` for less-than. |
932 | /// |
933 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi8) |
934 | #[inline ] |
935 | #[target_feature (enable = "sse2" )] |
936 | #[cfg_attr (test, assert_instr(pcmpgtb))] |
937 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
938 | pub fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i { |
939 | unsafe { transmute::<i8x16, _>(src:simd_lt(x:a.as_i8x16(), y:b.as_i8x16())) } |
940 | } |
941 | |
942 | /// Compares packed 16-bit integers in `a` and `b` for less-than. |
943 | /// |
944 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi16) |
945 | #[inline ] |
946 | #[target_feature (enable = "sse2" )] |
947 | #[cfg_attr (test, assert_instr(pcmpgtw))] |
948 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
949 | pub fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i { |
950 | unsafe { transmute::<i16x8, _>(src:simd_lt(x:a.as_i16x8(), y:b.as_i16x8())) } |
951 | } |
952 | |
953 | /// Compares packed 32-bit integers in `a` and `b` for less-than. |
954 | /// |
955 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi32) |
956 | #[inline ] |
957 | #[target_feature (enable = "sse2" )] |
958 | #[cfg_attr (test, assert_instr(pcmpgtd))] |
959 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
960 | pub fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i { |
961 | unsafe { transmute::<i32x4, _>(src:simd_lt(x:a.as_i32x4(), y:b.as_i32x4())) } |
962 | } |
963 | |
964 | /// Converts the lower two packed 32-bit integers in `a` to packed |
965 | /// double-precision (64-bit) floating-point elements. |
966 | /// |
967 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_pd) |
968 | #[inline ] |
969 | #[target_feature (enable = "sse2" )] |
970 | #[cfg_attr (test, assert_instr(cvtdq2pd))] |
971 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
972 | pub fn _mm_cvtepi32_pd(a: __m128i) -> __m128d { |
973 | unsafe { |
974 | let a: i32x4 = a.as_i32x4(); |
975 | simd_cast::<i32x2, __m128d>(simd_shuffle!(a, a, [0, 1])) |
976 | } |
977 | } |
978 | |
979 | /// Returns `a` with its lower element replaced by `b` after converting it to |
980 | /// an `f64`. |
981 | /// |
982 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi32_sd) |
983 | #[inline ] |
984 | #[target_feature (enable = "sse2" )] |
985 | #[cfg_attr (test, assert_instr(cvtsi2sd))] |
986 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
987 | pub fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d { |
988 | unsafe { simd_insert!(a, 0, b as f64) } |
989 | } |
990 | |
991 | /// Converts packed 32-bit integers in `a` to packed single-precision (32-bit) |
992 | /// floating-point elements. |
993 | /// |
994 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_ps) |
995 | #[inline ] |
996 | #[target_feature (enable = "sse2" )] |
997 | #[cfg_attr (test, assert_instr(cvtdq2ps))] |
998 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
999 | pub fn _mm_cvtepi32_ps(a: __m128i) -> __m128 { |
1000 | unsafe { transmute(src:simd_cast::<_, f32x4>(a.as_i32x4())) } |
1001 | } |
1002 | |
1003 | /// Converts packed single-precision (32-bit) floating-point elements in `a` |
1004 | /// to packed 32-bit integers. |
1005 | /// |
1006 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epi32) |
1007 | #[inline ] |
1008 | #[target_feature (enable = "sse2" )] |
1009 | #[cfg_attr (test, assert_instr(cvtps2dq))] |
1010 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1011 | pub fn _mm_cvtps_epi32(a: __m128) -> __m128i { |
1012 | unsafe { transmute(src:cvtps2dq(a)) } |
1013 | } |
1014 | |
1015 | /// Returns a vector whose lowest element is `a` and all higher elements are |
1016 | /// `0`. |
1017 | /// |
1018 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi32_si128) |
1019 | #[inline ] |
1020 | #[target_feature (enable = "sse2" )] |
1021 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1022 | pub fn _mm_cvtsi32_si128(a: i32) -> __m128i { |
1023 | unsafe { transmute(src:i32x4::new(x0:a, x1:0, x2:0, x3:0)) } |
1024 | } |
1025 | |
1026 | /// Returns the lowest element of `a`. |
1027 | /// |
1028 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi128_si32) |
1029 | #[inline ] |
1030 | #[target_feature (enable = "sse2" )] |
1031 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1032 | pub fn _mm_cvtsi128_si32(a: __m128i) -> i32 { |
1033 | unsafe { simd_extract!(a.as_i32x4(), 0) } |
1034 | } |
1035 | |
1036 | /// Sets packed 64-bit integers with the supplied values, from highest to |
1037 | /// lowest. |
1038 | /// |
1039 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi64x) |
1040 | #[inline ] |
1041 | #[target_feature (enable = "sse2" )] |
1042 | // no particular instruction to test |
1043 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1044 | pub fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i { |
1045 | unsafe { transmute(src:i64x2::new(x0:e0, x1:e1)) } |
1046 | } |
1047 | |
1048 | /// Sets packed 32-bit integers with the supplied values. |
1049 | /// |
1050 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi32) |
1051 | #[inline ] |
1052 | #[target_feature (enable = "sse2" )] |
1053 | // no particular instruction to test |
1054 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1055 | pub fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i { |
1056 | unsafe { transmute(src:i32x4::new(x0:e0, x1:e1, x2:e2, x3:e3)) } |
1057 | } |
1058 | |
1059 | /// Sets packed 16-bit integers with the supplied values. |
1060 | /// |
1061 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi16) |
1062 | #[inline ] |
1063 | #[target_feature (enable = "sse2" )] |
1064 | // no particular instruction to test |
1065 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1066 | pub fn _mm_set_epi16( |
1067 | e7: i16, |
1068 | e6: i16, |
1069 | e5: i16, |
1070 | e4: i16, |
1071 | e3: i16, |
1072 | e2: i16, |
1073 | e1: i16, |
1074 | e0: i16, |
1075 | ) -> __m128i { |
1076 | unsafe { transmute(src:i16x8::new(x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7)) } |
1077 | } |
1078 | |
1079 | /// Sets packed 8-bit integers with the supplied values. |
1080 | /// |
1081 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi8) |
1082 | #[inline ] |
1083 | #[target_feature (enable = "sse2" )] |
1084 | // no particular instruction to test |
1085 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1086 | pub fn _mm_set_epi8( |
1087 | e15: i8, |
1088 | e14: i8, |
1089 | e13: i8, |
1090 | e12: i8, |
1091 | e11: i8, |
1092 | e10: i8, |
1093 | e9: i8, |
1094 | e8: i8, |
1095 | e7: i8, |
1096 | e6: i8, |
1097 | e5: i8, |
1098 | e4: i8, |
1099 | e3: i8, |
1100 | e2: i8, |
1101 | e1: i8, |
1102 | e0: i8, |
1103 | ) -> __m128i { |
1104 | unsafe { |
1105 | #[rustfmt::skip] |
1106 | transmute(src:i8x16::new( |
1107 | x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7, x8:e8, x9:e9, x10:e10, x11:e11, x12:e12, x13:e13, x14:e14, x15:e15, |
1108 | )) |
1109 | } |
1110 | } |
1111 | |
1112 | /// Broadcasts 64-bit integer `a` to all elements. |
1113 | /// |
1114 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi64x) |
1115 | #[inline ] |
1116 | #[target_feature (enable = "sse2" )] |
1117 | // no particular instruction to test |
1118 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1119 | pub fn _mm_set1_epi64x(a: i64) -> __m128i { |
1120 | _mm_set_epi64x(e1:a, e0:a) |
1121 | } |
1122 | |
1123 | /// Broadcasts 32-bit integer `a` to all elements. |
1124 | /// |
1125 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi32) |
1126 | #[inline ] |
1127 | #[target_feature (enable = "sse2" )] |
1128 | // no particular instruction to test |
1129 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1130 | pub fn _mm_set1_epi32(a: i32) -> __m128i { |
1131 | _mm_set_epi32(e3:a, e2:a, e1:a, e0:a) |
1132 | } |
1133 | |
1134 | /// Broadcasts 16-bit integer `a` to all elements. |
1135 | /// |
1136 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi16) |
1137 | #[inline ] |
1138 | #[target_feature (enable = "sse2" )] |
1139 | // no particular instruction to test |
1140 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1141 | pub fn _mm_set1_epi16(a: i16) -> __m128i { |
1142 | _mm_set_epi16(e7:a, e6:a, e5:a, e4:a, e3:a, e2:a, e1:a, e0:a) |
1143 | } |
1144 | |
1145 | /// Broadcasts 8-bit integer `a` to all elements. |
1146 | /// |
1147 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi8) |
1148 | #[inline ] |
1149 | #[target_feature (enable = "sse2" )] |
1150 | // no particular instruction to test |
1151 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1152 | pub fn _mm_set1_epi8(a: i8) -> __m128i { |
1153 | _mm_set_epi8(e15:a, e14:a, e13:a, e12:a, e11:a, e10:a, e9:a, e8:a, e7:a, e6:a, e5:a, e4:a, e3:a, e2:a, e1:a, e0:a) |
1154 | } |
1155 | |
1156 | /// Sets packed 32-bit integers with the supplied values in reverse order. |
1157 | /// |
1158 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi32) |
1159 | #[inline ] |
1160 | #[target_feature (enable = "sse2" )] |
1161 | // no particular instruction to test |
1162 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1163 | pub fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i { |
1164 | _mm_set_epi32(e3:e0, e2:e1, e1:e2, e0:e3) |
1165 | } |
1166 | |
1167 | /// Sets packed 16-bit integers with the supplied values in reverse order. |
1168 | /// |
1169 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi16) |
1170 | #[inline ] |
1171 | #[target_feature (enable = "sse2" )] |
1172 | // no particular instruction to test |
1173 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1174 | pub fn _mm_setr_epi16( |
1175 | e7: i16, |
1176 | e6: i16, |
1177 | e5: i16, |
1178 | e4: i16, |
1179 | e3: i16, |
1180 | e2: i16, |
1181 | e1: i16, |
1182 | e0: i16, |
1183 | ) -> __m128i { |
1184 | _mm_set_epi16(e7:e0, e6:e1, e5:e2, e4:e3, e3:e4, e2:e5, e1:e6, e0:e7) |
1185 | } |
1186 | |
1187 | /// Sets packed 8-bit integers with the supplied values in reverse order. |
1188 | /// |
1189 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi8) |
1190 | #[inline ] |
1191 | #[target_feature (enable = "sse2" )] |
1192 | // no particular instruction to test |
1193 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1194 | pub fn _mm_setr_epi8( |
1195 | e15: i8, |
1196 | e14: i8, |
1197 | e13: i8, |
1198 | e12: i8, |
1199 | e11: i8, |
1200 | e10: i8, |
1201 | e9: i8, |
1202 | e8: i8, |
1203 | e7: i8, |
1204 | e6: i8, |
1205 | e5: i8, |
1206 | e4: i8, |
1207 | e3: i8, |
1208 | e2: i8, |
1209 | e1: i8, |
1210 | e0: i8, |
1211 | ) -> __m128i { |
1212 | #[rustfmt::skip] |
1213 | _mm_set_epi8( |
1214 | e15:e0, e14:e1, e13:e2, e12:e3, e11:e4, e10:e5, e9:e6, e8:e7, e7:e8, e6:e9, e5:e10, e4:e11, e3:e12, e2:e13, e1:e14, e0:e15, |
1215 | ) |
1216 | } |
1217 | |
1218 | /// Returns a vector with all elements set to zero. |
1219 | /// |
1220 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_si128) |
1221 | #[inline ] |
1222 | #[target_feature (enable = "sse2" )] |
1223 | #[cfg_attr (test, assert_instr(xorps))] |
1224 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1225 | pub fn _mm_setzero_si128() -> __m128i { |
1226 | const { unsafe { mem::zeroed() } } |
1227 | } |
1228 | |
1229 | /// Loads 64-bit integer from memory into first element of returned vector. |
1230 | /// |
1231 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadl_epi64) |
1232 | #[inline ] |
1233 | #[target_feature (enable = "sse2" )] |
1234 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1235 | pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i { |
1236 | _mm_set_epi64x(e1:0, e0:ptr::read_unaligned(src:mem_addr as *const i64)) |
1237 | } |
1238 | |
1239 | /// Loads 128-bits of integer data from memory into a new vector. |
1240 | /// |
1241 | /// `mem_addr` must be aligned on a 16-byte boundary. |
1242 | /// |
1243 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_si128) |
1244 | #[inline ] |
1245 | #[target_feature (enable = "sse2" )] |
1246 | #[cfg_attr (test, assert_instr(movaps))] |
1247 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1248 | pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i { |
1249 | *mem_addr |
1250 | } |
1251 | |
1252 | /// Loads 128-bits of integer data from memory into a new vector. |
1253 | /// |
1254 | /// `mem_addr` does not need to be aligned on any particular boundary. |
1255 | /// |
1256 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si128) |
1257 | #[inline ] |
1258 | #[target_feature (enable = "sse2" )] |
1259 | #[cfg_attr (test, assert_instr(movups))] |
1260 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1261 | pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i { |
1262 | let mut dst: __m128i = _mm_undefined_si128(); |
1263 | ptr::copy_nonoverlapping( |
1264 | src:mem_addr as *const u8, |
1265 | dst:ptr::addr_of_mut!(dst) as *mut u8, |
1266 | count:mem::size_of::<__m128i>(), |
1267 | ); |
1268 | dst |
1269 | } |
1270 | |
1271 | /// Conditionally store 8-bit integer elements from `a` into memory using |
1272 | /// `mask`. |
1273 | /// |
1274 | /// Elements are not stored when the highest bit is not set in the |
1275 | /// corresponding element. |
1276 | /// |
1277 | /// `mem_addr` should correspond to a 128-bit memory location and does not need |
1278 | /// to be aligned on any particular boundary. |
1279 | /// |
1280 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskmoveu_si128) |
1281 | #[inline ] |
1282 | #[target_feature (enable = "sse2" )] |
1283 | #[cfg_attr (test, assert_instr(maskmovdqu))] |
1284 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1285 | pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8) { |
1286 | maskmovdqu(a.as_i8x16(), mask.as_i8x16(), mem_addr) |
1287 | } |
1288 | |
1289 | /// Stores 128-bits of integer data from `a` into memory. |
1290 | /// |
1291 | /// `mem_addr` must be aligned on a 16-byte boundary. |
1292 | /// |
1293 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_si128) |
1294 | #[inline ] |
1295 | #[target_feature (enable = "sse2" )] |
1296 | #[cfg_attr (test, assert_instr(movaps))] |
1297 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1298 | pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) { |
1299 | *mem_addr = a; |
1300 | } |
1301 | |
1302 | /// Stores 128-bits of integer data from `a` into memory. |
1303 | /// |
1304 | /// `mem_addr` does not need to be aligned on any particular boundary. |
1305 | /// |
1306 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si128) |
1307 | #[inline ] |
1308 | #[target_feature (enable = "sse2" )] |
1309 | #[cfg_attr (test, assert_instr(movups))] // FIXME movdqu expected |
1310 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1311 | pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) { |
1312 | mem_addr.write_unaligned(val:a); |
1313 | } |
1314 | |
1315 | /// Stores the lower 64-bit integer `a` to a memory location. |
1316 | /// |
1317 | /// `mem_addr` does not need to be aligned on any particular boundary. |
1318 | /// |
1319 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storel_epi64) |
1320 | #[inline ] |
1321 | #[target_feature (enable = "sse2" )] |
1322 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1323 | pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) { |
1324 | ptr::copy_nonoverlapping(src:ptr::addr_of!(a) as *const u8, dst:mem_addr as *mut u8, count:8); |
1325 | } |
1326 | |
1327 | /// Stores a 128-bit integer vector to a 128-bit aligned memory location. |
1328 | /// To minimize caching, the data is flagged as non-temporal (unlikely to be |
1329 | /// used again soon). |
1330 | /// |
1331 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si128) |
1332 | /// |
1333 | /// # Safety of non-temporal stores |
1334 | /// |
1335 | /// After using this intrinsic, but before any other access to the memory that this intrinsic |
1336 | /// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In |
1337 | /// particular, functions that call this intrinsic should generally call `_mm_sfence` before they |
1338 | /// return. |
1339 | /// |
1340 | /// See [`_mm_sfence`] for details. |
1341 | #[inline ] |
1342 | #[target_feature (enable = "sse2" )] |
1343 | #[cfg_attr (test, assert_instr(movntdq))] |
1344 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1345 | pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) { |
1346 | crate::arch::asm!( |
1347 | vps!("movntdq" , ",{a}" ), |
1348 | p = in(reg) mem_addr, |
1349 | a = in(xmm_reg) a, |
1350 | options(nostack, preserves_flags), |
1351 | ); |
1352 | } |
1353 | |
1354 | /// Stores a 32-bit integer value in the specified memory location. |
1355 | /// To minimize caching, the data is flagged as non-temporal (unlikely to be |
1356 | /// used again soon). |
1357 | /// |
1358 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si32) |
1359 | /// |
1360 | /// # Safety of non-temporal stores |
1361 | /// |
1362 | /// After using this intrinsic, but before any other access to the memory that this intrinsic |
1363 | /// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In |
1364 | /// particular, functions that call this intrinsic should generally call `_mm_sfence` before they |
1365 | /// return. |
1366 | /// |
1367 | /// See [`_mm_sfence`] for details. |
1368 | #[inline ] |
1369 | #[target_feature (enable = "sse2" )] |
1370 | #[cfg_attr (test, assert_instr(movnti))] |
1371 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1372 | pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) { |
1373 | crate::arch::asm!( |
1374 | vps!("movnti" , ",{a:e}" ), // `:e` for 32bit value |
1375 | p = in(reg) mem_addr, |
1376 | a = in(reg) a, |
1377 | options(nostack, preserves_flags), |
1378 | ); |
1379 | } |
1380 | |
1381 | /// Returns a vector where the low element is extracted from `a` and its upper |
1382 | /// element is zero. |
1383 | /// |
1384 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_move_epi64) |
1385 | #[inline ] |
1386 | #[target_feature (enable = "sse2" )] |
1387 | // FIXME movd on msvc, movd on i686 |
1388 | #[cfg_attr ( |
1389 | all(test, not(target_env = "msvc" ), target_arch = "x86_64" ), |
1390 | assert_instr(movq) |
1391 | )] |
1392 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1393 | pub fn _mm_move_epi64(a: __m128i) -> __m128i { |
1394 | unsafe { |
1395 | let r: i64x2 = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 2]); |
1396 | transmute(src:r) |
1397 | } |
1398 | } |
1399 | |
1400 | /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers |
1401 | /// using signed saturation. |
1402 | /// |
1403 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi16) |
1404 | #[inline ] |
1405 | #[target_feature (enable = "sse2" )] |
1406 | #[cfg_attr (test, assert_instr(packsswb))] |
1407 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1408 | pub fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i { |
1409 | unsafe { transmute(src:packsswb(a.as_i16x8(), b.as_i16x8())) } |
1410 | } |
1411 | |
1412 | /// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers |
1413 | /// using signed saturation. |
1414 | /// |
1415 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi32) |
1416 | #[inline ] |
1417 | #[target_feature (enable = "sse2" )] |
1418 | #[cfg_attr (test, assert_instr(packssdw))] |
1419 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1420 | pub fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i { |
1421 | unsafe { transmute(src:packssdw(a.as_i32x4(), b.as_i32x4())) } |
1422 | } |
1423 | |
1424 | /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers |
1425 | /// using unsigned saturation. |
1426 | /// |
1427 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi16) |
1428 | #[inline ] |
1429 | #[target_feature (enable = "sse2" )] |
1430 | #[cfg_attr (test, assert_instr(packuswb))] |
1431 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1432 | pub fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i { |
1433 | unsafe { transmute(src:packuswb(a.as_i16x8(), b.as_i16x8())) } |
1434 | } |
1435 | |
1436 | /// Returns the `imm8` element of `a`. |
1437 | /// |
1438 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_epi16) |
1439 | #[inline ] |
1440 | #[target_feature (enable = "sse2" )] |
1441 | #[cfg_attr (test, assert_instr(pextrw, IMM8 = 7))] |
1442 | #[rustc_legacy_const_generics (1)] |
1443 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1444 | pub fn _mm_extract_epi16<const IMM8: i32>(a: __m128i) -> i32 { |
1445 | static_assert_uimm_bits!(IMM8, 3); |
1446 | unsafe { simd_extract!(a.as_u16x8(), IMM8 as u32, u16) as i32 } |
1447 | } |
1448 | |
1449 | /// Returns a new vector where the `imm8` element of `a` is replaced with `i`. |
1450 | /// |
1451 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_epi16) |
1452 | #[inline ] |
1453 | #[target_feature (enable = "sse2" )] |
1454 | #[cfg_attr (test, assert_instr(pinsrw, IMM8 = 7))] |
1455 | #[rustc_legacy_const_generics (2)] |
1456 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1457 | pub fn _mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i { |
1458 | static_assert_uimm_bits!(IMM8, 3); |
1459 | unsafe { transmute(src:simd_insert!(a.as_i16x8(), IMM8 as u32, i as i16)) } |
1460 | } |
1461 | |
1462 | /// Returns a mask of the most significant bit of each element in `a`. |
1463 | /// |
1464 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_epi8) |
1465 | #[inline ] |
1466 | #[target_feature (enable = "sse2" )] |
1467 | #[cfg_attr (test, assert_instr(pmovmskb))] |
1468 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1469 | pub fn _mm_movemask_epi8(a: __m128i) -> i32 { |
1470 | unsafe { |
1471 | let z: i8x16 = i8x16::ZERO; |
1472 | let m: i8x16 = simd_lt(x:a.as_i8x16(), y:z); |
1473 | simd_bitmask::<_, u16>(m) as u32 as i32 |
1474 | } |
1475 | } |
1476 | |
1477 | /// Shuffles 32-bit integers in `a` using the control in `IMM8`. |
1478 | /// |
1479 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_epi32) |
1480 | #[inline ] |
1481 | #[target_feature (enable = "sse2" )] |
1482 | #[cfg_attr (test, assert_instr(pshufd, IMM8 = 9))] |
1483 | #[rustc_legacy_const_generics (1)] |
1484 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1485 | pub fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i { |
1486 | static_assert_uimm_bits!(IMM8, 8); |
1487 | unsafe { |
1488 | let a: i32x4 = a.as_i32x4(); |
1489 | let x: i32x4 = simd_shuffle!( |
1490 | a, |
1491 | a, |
1492 | [ |
1493 | IMM8 as u32 & 0b11, |
1494 | (IMM8 as u32 >> 2) & 0b11, |
1495 | (IMM8 as u32 >> 4) & 0b11, |
1496 | (IMM8 as u32 >> 6) & 0b11, |
1497 | ], |
1498 | ); |
1499 | transmute(src:x) |
1500 | } |
1501 | } |
1502 | |
1503 | /// Shuffles 16-bit integers in the high 64 bits of `a` using the control in |
1504 | /// `IMM8`. |
1505 | /// |
1506 | /// Put the results in the high 64 bits of the returned vector, with the low 64 |
1507 | /// bits being copied from `a`. |
1508 | /// |
1509 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shufflehi_epi16) |
1510 | #[inline ] |
1511 | #[target_feature (enable = "sse2" )] |
1512 | #[cfg_attr (test, assert_instr(pshufhw, IMM8 = 9))] |
1513 | #[rustc_legacy_const_generics (1)] |
1514 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1515 | pub fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i { |
1516 | static_assert_uimm_bits!(IMM8, 8); |
1517 | unsafe { |
1518 | let a: i16x8 = a.as_i16x8(); |
1519 | let x: i16x8 = simd_shuffle!( |
1520 | a, |
1521 | a, |
1522 | [ |
1523 | 0, |
1524 | 1, |
1525 | 2, |
1526 | 3, |
1527 | (IMM8 as u32 & 0b11) + 4, |
1528 | ((IMM8 as u32 >> 2) & 0b11) + 4, |
1529 | ((IMM8 as u32 >> 4) & 0b11) + 4, |
1530 | ((IMM8 as u32 >> 6) & 0b11) + 4, |
1531 | ], |
1532 | ); |
1533 | transmute(src:x) |
1534 | } |
1535 | } |
1536 | |
1537 | /// Shuffles 16-bit integers in the low 64 bits of `a` using the control in |
1538 | /// `IMM8`. |
1539 | /// |
1540 | /// Put the results in the low 64 bits of the returned vector, with the high 64 |
1541 | /// bits being copied from `a`. |
1542 | /// |
1543 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shufflelo_epi16) |
1544 | #[inline ] |
1545 | #[target_feature (enable = "sse2" )] |
1546 | #[cfg_attr (test, assert_instr(pshuflw, IMM8 = 9))] |
1547 | #[rustc_legacy_const_generics (1)] |
1548 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1549 | pub fn _mm_shufflelo_epi16<const IMM8: i32>(a: __m128i) -> __m128i { |
1550 | static_assert_uimm_bits!(IMM8, 8); |
1551 | unsafe { |
1552 | let a: i16x8 = a.as_i16x8(); |
1553 | let x: i16x8 = simd_shuffle!( |
1554 | a, |
1555 | a, |
1556 | [ |
1557 | IMM8 as u32 & 0b11, |
1558 | (IMM8 as u32 >> 2) & 0b11, |
1559 | (IMM8 as u32 >> 4) & 0b11, |
1560 | (IMM8 as u32 >> 6) & 0b11, |
1561 | 4, |
1562 | 5, |
1563 | 6, |
1564 | 7, |
1565 | ], |
1566 | ); |
1567 | transmute(src:x) |
1568 | } |
1569 | } |
1570 | |
1571 | /// Unpacks and interleave 8-bit integers from the high half of `a` and `b`. |
1572 | /// |
1573 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi8) |
1574 | #[inline ] |
1575 | #[target_feature (enable = "sse2" )] |
1576 | #[cfg_attr (test, assert_instr(punpckhbw))] |
1577 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1578 | pub fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i { |
1579 | unsafe { |
1580 | transmute::<i8x16, _>(src:simd_shuffle!( |
1581 | a.as_i8x16(), |
1582 | b.as_i8x16(), |
1583 | [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31], |
1584 | )) |
1585 | } |
1586 | } |
1587 | |
1588 | /// Unpacks and interleave 16-bit integers from the high half of `a` and `b`. |
1589 | /// |
1590 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi16) |
1591 | #[inline ] |
1592 | #[target_feature (enable = "sse2" )] |
1593 | #[cfg_attr (test, assert_instr(punpckhwd))] |
1594 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1595 | pub fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i { |
1596 | unsafe { |
1597 | let x: i16x8 = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]); |
1598 | transmute::<i16x8, _>(src:x) |
1599 | } |
1600 | } |
1601 | |
1602 | /// Unpacks and interleave 32-bit integers from the high half of `a` and `b`. |
1603 | /// |
1604 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi32) |
1605 | #[inline ] |
1606 | #[target_feature (enable = "sse2" )] |
1607 | #[cfg_attr (test, assert_instr(unpckhps))] |
1608 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1609 | pub fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i { |
1610 | unsafe { transmute::<i32x4, _>(src:simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7])) } |
1611 | } |
1612 | |
1613 | /// Unpacks and interleave 64-bit integers from the high half of `a` and `b`. |
1614 | /// |
1615 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi64) |
1616 | #[inline ] |
1617 | #[target_feature (enable = "sse2" )] |
1618 | #[cfg_attr (test, assert_instr(unpckhpd))] |
1619 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1620 | pub fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i { |
1621 | unsafe { transmute::<i64x2, _>(src:simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [1, 3])) } |
1622 | } |
1623 | |
1624 | /// Unpacks and interleave 8-bit integers from the low half of `a` and `b`. |
1625 | /// |
1626 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi8) |
1627 | #[inline ] |
1628 | #[target_feature (enable = "sse2" )] |
1629 | #[cfg_attr (test, assert_instr(punpcklbw))] |
1630 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1631 | pub fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i { |
1632 | unsafe { |
1633 | transmute::<i8x16, _>(src:simd_shuffle!( |
1634 | a.as_i8x16(), |
1635 | b.as_i8x16(), |
1636 | [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23], |
1637 | )) |
1638 | } |
1639 | } |
1640 | |
1641 | /// Unpacks and interleave 16-bit integers from the low half of `a` and `b`. |
1642 | /// |
1643 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi16) |
1644 | #[inline ] |
1645 | #[target_feature (enable = "sse2" )] |
1646 | #[cfg_attr (test, assert_instr(punpcklwd))] |
1647 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1648 | pub fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i { |
1649 | unsafe { |
1650 | let x: i16x8 = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]); |
1651 | transmute::<i16x8, _>(src:x) |
1652 | } |
1653 | } |
1654 | |
1655 | /// Unpacks and interleave 32-bit integers from the low half of `a` and `b`. |
1656 | /// |
1657 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi32) |
1658 | #[inline ] |
1659 | #[target_feature (enable = "sse2" )] |
1660 | #[cfg_attr (test, assert_instr(unpcklps))] |
1661 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1662 | pub fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i { |
1663 | unsafe { transmute::<i32x4, _>(src:simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5])) } |
1664 | } |
1665 | |
1666 | /// Unpacks and interleave 64-bit integers from the low half of `a` and `b`. |
1667 | /// |
1668 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi64) |
1669 | #[inline ] |
1670 | #[target_feature (enable = "sse2" )] |
1671 | #[cfg_attr (all(test, not(target_env = "msvc" )), assert_instr(movlhps))] |
1672 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1673 | pub fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i { |
1674 | unsafe { transmute::<i64x2, _>(src:simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [0, 2])) } |
1675 | } |
1676 | |
1677 | /// Returns a new vector with the low element of `a` replaced by the sum of the |
1678 | /// low elements of `a` and `b`. |
1679 | /// |
1680 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_sd) |
1681 | #[inline ] |
1682 | #[target_feature (enable = "sse2" )] |
1683 | #[cfg_attr (test, assert_instr(addsd))] |
1684 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1685 | pub fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d { |
1686 | unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b)) } |
1687 | } |
1688 | |
1689 | /// Adds packed double-precision (64-bit) floating-point elements in `a` and |
1690 | /// `b`. |
1691 | /// |
1692 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_pd) |
1693 | #[inline ] |
1694 | #[target_feature (enable = "sse2" )] |
1695 | #[cfg_attr (test, assert_instr(addpd))] |
1696 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1697 | pub fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d { |
1698 | unsafe { simd_add(x:a, y:b) } |
1699 | } |
1700 | |
1701 | /// Returns a new vector with the low element of `a` replaced by the result of |
1702 | /// diving the lower element of `a` by the lower element of `b`. |
1703 | /// |
1704 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_sd) |
1705 | #[inline ] |
1706 | #[target_feature (enable = "sse2" )] |
1707 | #[cfg_attr (test, assert_instr(divsd))] |
1708 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1709 | pub fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d { |
1710 | unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) / _mm_cvtsd_f64(b)) } |
1711 | } |
1712 | |
1713 | /// Divide packed double-precision (64-bit) floating-point elements in `a` by |
1714 | /// packed elements in `b`. |
1715 | /// |
1716 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_pd) |
1717 | #[inline ] |
1718 | #[target_feature (enable = "sse2" )] |
1719 | #[cfg_attr (test, assert_instr(divpd))] |
1720 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1721 | pub fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d { |
1722 | unsafe { simd_div(lhs:a, rhs:b) } |
1723 | } |
1724 | |
1725 | /// Returns a new vector with the low element of `a` replaced by the maximum |
1726 | /// of the lower elements of `a` and `b`. |
1727 | /// |
1728 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_sd) |
1729 | #[inline ] |
1730 | #[target_feature (enable = "sse2" )] |
1731 | #[cfg_attr (test, assert_instr(maxsd))] |
1732 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1733 | pub fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d { |
1734 | unsafe { maxsd(a, b) } |
1735 | } |
1736 | |
1737 | /// Returns a new vector with the maximum values from corresponding elements in |
1738 | /// `a` and `b`. |
1739 | /// |
1740 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_pd) |
1741 | #[inline ] |
1742 | #[target_feature (enable = "sse2" )] |
1743 | #[cfg_attr (test, assert_instr(maxpd))] |
1744 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1745 | pub fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d { |
1746 | unsafe { maxpd(a, b) } |
1747 | } |
1748 | |
1749 | /// Returns a new vector with the low element of `a` replaced by the minimum |
1750 | /// of the lower elements of `a` and `b`. |
1751 | /// |
1752 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_sd) |
1753 | #[inline ] |
1754 | #[target_feature (enable = "sse2" )] |
1755 | #[cfg_attr (test, assert_instr(minsd))] |
1756 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1757 | pub fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d { |
1758 | unsafe { minsd(a, b) } |
1759 | } |
1760 | |
1761 | /// Returns a new vector with the minimum values from corresponding elements in |
1762 | /// `a` and `b`. |
1763 | /// |
1764 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_pd) |
1765 | #[inline ] |
1766 | #[target_feature (enable = "sse2" )] |
1767 | #[cfg_attr (test, assert_instr(minpd))] |
1768 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1769 | pub fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d { |
1770 | unsafe { minpd(a, b) } |
1771 | } |
1772 | |
1773 | /// Returns a new vector with the low element of `a` replaced by multiplying the |
1774 | /// low elements of `a` and `b`. |
1775 | /// |
1776 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_sd) |
1777 | #[inline ] |
1778 | #[target_feature (enable = "sse2" )] |
1779 | #[cfg_attr (test, assert_instr(mulsd))] |
1780 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1781 | pub fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d { |
1782 | unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b)) } |
1783 | } |
1784 | |
1785 | /// Multiplies packed double-precision (64-bit) floating-point elements in `a` |
1786 | /// and `b`. |
1787 | /// |
1788 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_pd) |
1789 | #[inline ] |
1790 | #[target_feature (enable = "sse2" )] |
1791 | #[cfg_attr (test, assert_instr(mulpd))] |
1792 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1793 | pub fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d { |
1794 | unsafe { simd_mul(x:a, y:b) } |
1795 | } |
1796 | |
1797 | /// Returns a new vector with the low element of `a` replaced by the square |
1798 | /// root of the lower element `b`. |
1799 | /// |
1800 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_sd) |
1801 | #[inline ] |
1802 | #[target_feature (enable = "sse2" )] |
1803 | #[cfg_attr (test, assert_instr(sqrtsd))] |
1804 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1805 | pub fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d { |
1806 | unsafe { simd_insert!(a, 0, sqrtf64(_mm_cvtsd_f64(b))) } |
1807 | } |
1808 | |
1809 | /// Returns a new vector with the square root of each of the values in `a`. |
1810 | /// |
1811 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_pd) |
1812 | #[inline ] |
1813 | #[target_feature (enable = "sse2" )] |
1814 | #[cfg_attr (test, assert_instr(sqrtpd))] |
1815 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1816 | pub fn _mm_sqrt_pd(a: __m128d) -> __m128d { |
1817 | unsafe { simd_fsqrt(a) } |
1818 | } |
1819 | |
1820 | /// Returns a new vector with the low element of `a` replaced by subtracting the |
1821 | /// low element by `b` from the low element of `a`. |
1822 | /// |
1823 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_sd) |
1824 | #[inline ] |
1825 | #[target_feature (enable = "sse2" )] |
1826 | #[cfg_attr (test, assert_instr(subsd))] |
1827 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1828 | pub fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d { |
1829 | unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) - _mm_cvtsd_f64(b)) } |
1830 | } |
1831 | |
1832 | /// Subtract packed double-precision (64-bit) floating-point elements in `b` |
1833 | /// from `a`. |
1834 | /// |
1835 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_pd) |
1836 | #[inline ] |
1837 | #[target_feature (enable = "sse2" )] |
1838 | #[cfg_attr (test, assert_instr(subpd))] |
1839 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1840 | pub fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d { |
1841 | unsafe { simd_sub(lhs:a, rhs:b) } |
1842 | } |
1843 | |
1844 | /// Computes the bitwise AND of packed double-precision (64-bit) floating-point |
1845 | /// elements in `a` and `b`. |
1846 | /// |
1847 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_and_pd) |
1848 | #[inline ] |
1849 | #[target_feature (enable = "sse2" )] |
1850 | #[cfg_attr (test, assert_instr(andps))] |
1851 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1852 | pub fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d { |
1853 | unsafe { |
1854 | let a: __m128i = transmute(src:a); |
1855 | let b: __m128i = transmute(src:b); |
1856 | transmute(src:_mm_and_si128(a, b)) |
1857 | } |
1858 | } |
1859 | |
1860 | /// Computes the bitwise NOT of `a` and then AND with `b`. |
1861 | /// |
1862 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_andnot_pd) |
1863 | #[inline ] |
1864 | #[target_feature (enable = "sse2" )] |
1865 | #[cfg_attr (test, assert_instr(andnps))] |
1866 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1867 | pub fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d { |
1868 | unsafe { |
1869 | let a: __m128i = transmute(src:a); |
1870 | let b: __m128i = transmute(src:b); |
1871 | transmute(src:_mm_andnot_si128(a, b)) |
1872 | } |
1873 | } |
1874 | |
1875 | /// Computes the bitwise OR of `a` and `b`. |
1876 | /// |
1877 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_pd) |
1878 | #[inline ] |
1879 | #[target_feature (enable = "sse2" )] |
1880 | #[cfg_attr (test, assert_instr(orps))] |
1881 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1882 | pub fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d { |
1883 | unsafe { |
1884 | let a: __m128i = transmute(src:a); |
1885 | let b: __m128i = transmute(src:b); |
1886 | transmute(src:_mm_or_si128(a, b)) |
1887 | } |
1888 | } |
1889 | |
1890 | /// Computes the bitwise XOR of `a` and `b`. |
1891 | /// |
1892 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_pd) |
1893 | #[inline ] |
1894 | #[target_feature (enable = "sse2" )] |
1895 | #[cfg_attr (test, assert_instr(xorps))] |
1896 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1897 | pub fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d { |
1898 | unsafe { |
1899 | let a: __m128i = transmute(src:a); |
1900 | let b: __m128i = transmute(src:b); |
1901 | transmute(src:_mm_xor_si128(a, b)) |
1902 | } |
1903 | } |
1904 | |
1905 | /// Returns a new vector with the low element of `a` replaced by the equality |
1906 | /// comparison of the lower elements of `a` and `b`. |
1907 | /// |
1908 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_sd) |
1909 | #[inline ] |
1910 | #[target_feature (enable = "sse2" )] |
1911 | #[cfg_attr (test, assert_instr(cmpeqsd))] |
1912 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1913 | pub fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d { |
1914 | unsafe { cmpsd(a, b, imm8:0) } |
1915 | } |
1916 | |
1917 | /// Returns a new vector with the low element of `a` replaced by the less-than |
1918 | /// comparison of the lower elements of `a` and `b`. |
1919 | /// |
1920 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_sd) |
1921 | #[inline ] |
1922 | #[target_feature (enable = "sse2" )] |
1923 | #[cfg_attr (test, assert_instr(cmpltsd))] |
1924 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1925 | pub fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d { |
1926 | unsafe { cmpsd(a, b, imm8:1) } |
1927 | } |
1928 | |
1929 | /// Returns a new vector with the low element of `a` replaced by the |
1930 | /// less-than-or-equal comparison of the lower elements of `a` and `b`. |
1931 | /// |
1932 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_sd) |
1933 | #[inline ] |
1934 | #[target_feature (enable = "sse2" )] |
1935 | #[cfg_attr (test, assert_instr(cmplesd))] |
1936 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1937 | pub fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d { |
1938 | unsafe { cmpsd(a, b, imm8:2) } |
1939 | } |
1940 | |
1941 | /// Returns a new vector with the low element of `a` replaced by the |
1942 | /// greater-than comparison of the lower elements of `a` and `b`. |
1943 | /// |
1944 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_sd) |
1945 | #[inline ] |
1946 | #[target_feature (enable = "sse2" )] |
1947 | #[cfg_attr (test, assert_instr(cmpltsd))] |
1948 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1949 | pub fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d { |
1950 | unsafe { simd_insert!(_mm_cmplt_sd(b, a), 1, simd_extract!(a, 1, f64)) } |
1951 | } |
1952 | |
1953 | /// Returns a new vector with the low element of `a` replaced by the |
1954 | /// greater-than-or-equal comparison of the lower elements of `a` and `b`. |
1955 | /// |
1956 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_sd) |
1957 | #[inline ] |
1958 | #[target_feature (enable = "sse2" )] |
1959 | #[cfg_attr (test, assert_instr(cmplesd))] |
1960 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1961 | pub fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d { |
1962 | unsafe { simd_insert!(_mm_cmple_sd(b, a), 1, simd_extract!(a, 1, f64)) } |
1963 | } |
1964 | |
1965 | /// Returns a new vector with the low element of `a` replaced by the result |
1966 | /// of comparing both of the lower elements of `a` and `b` to `NaN`. If |
1967 | /// neither are equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` |
1968 | /// otherwise. |
1969 | /// |
1970 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_sd) |
1971 | #[inline ] |
1972 | #[target_feature (enable = "sse2" )] |
1973 | #[cfg_attr (test, assert_instr(cmpordsd))] |
1974 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1975 | pub fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d { |
1976 | unsafe { cmpsd(a, b, imm8:7) } |
1977 | } |
1978 | |
1979 | /// Returns a new vector with the low element of `a` replaced by the result of |
1980 | /// comparing both of the lower elements of `a` and `b` to `NaN`. If either is |
1981 | /// equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` otherwise. |
1982 | /// |
1983 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_sd) |
1984 | #[inline ] |
1985 | #[target_feature (enable = "sse2" )] |
1986 | #[cfg_attr (test, assert_instr(cmpunordsd))] |
1987 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1988 | pub fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d { |
1989 | unsafe { cmpsd(a, b, imm8:3) } |
1990 | } |
1991 | |
1992 | /// Returns a new vector with the low element of `a` replaced by the not-equal |
1993 | /// comparison of the lower elements of `a` and `b`. |
1994 | /// |
1995 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_sd) |
1996 | #[inline ] |
1997 | #[target_feature (enable = "sse2" )] |
1998 | #[cfg_attr (test, assert_instr(cmpneqsd))] |
1999 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2000 | pub fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d { |
2001 | unsafe { cmpsd(a, b, imm8:4) } |
2002 | } |
2003 | |
2004 | /// Returns a new vector with the low element of `a` replaced by the |
2005 | /// not-less-than comparison of the lower elements of `a` and `b`. |
2006 | /// |
2007 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_sd) |
2008 | #[inline ] |
2009 | #[target_feature (enable = "sse2" )] |
2010 | #[cfg_attr (test, assert_instr(cmpnltsd))] |
2011 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2012 | pub fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d { |
2013 | unsafe { cmpsd(a, b, imm8:5) } |
2014 | } |
2015 | |
2016 | /// Returns a new vector with the low element of `a` replaced by the |
2017 | /// not-less-than-or-equal comparison of the lower elements of `a` and `b`. |
2018 | /// |
2019 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_sd) |
2020 | #[inline ] |
2021 | #[target_feature (enable = "sse2" )] |
2022 | #[cfg_attr (test, assert_instr(cmpnlesd))] |
2023 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2024 | pub fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d { |
2025 | unsafe { cmpsd(a, b, imm8:6) } |
2026 | } |
2027 | |
2028 | /// Returns a new vector with the low element of `a` replaced by the |
2029 | /// not-greater-than comparison of the lower elements of `a` and `b`. |
2030 | /// |
2031 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpngt_sd) |
2032 | #[inline ] |
2033 | #[target_feature (enable = "sse2" )] |
2034 | #[cfg_attr (test, assert_instr(cmpnltsd))] |
2035 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2036 | pub fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d { |
2037 | unsafe { simd_insert!(_mm_cmpnlt_sd(b, a), 1, simd_extract!(a, 1, f64)) } |
2038 | } |
2039 | |
2040 | /// Returns a new vector with the low element of `a` replaced by the |
2041 | /// not-greater-than-or-equal comparison of the lower elements of `a` and `b`. |
2042 | /// |
2043 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_sd) |
2044 | #[inline ] |
2045 | #[target_feature (enable = "sse2" )] |
2046 | #[cfg_attr (test, assert_instr(cmpnlesd))] |
2047 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2048 | pub fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d { |
2049 | unsafe { simd_insert!(_mm_cmpnle_sd(b, a), 1, simd_extract!(a, 1, f64)) } |
2050 | } |
2051 | |
2052 | /// Compares corresponding elements in `a` and `b` for equality. |
2053 | /// |
2054 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_pd) |
2055 | #[inline ] |
2056 | #[target_feature (enable = "sse2" )] |
2057 | #[cfg_attr (test, assert_instr(cmpeqpd))] |
2058 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2059 | pub fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d { |
2060 | unsafe { cmppd(a, b, imm8:0) } |
2061 | } |
2062 | |
2063 | /// Compares corresponding elements in `a` and `b` for less-than. |
2064 | /// |
2065 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_pd) |
2066 | #[inline ] |
2067 | #[target_feature (enable = "sse2" )] |
2068 | #[cfg_attr (test, assert_instr(cmpltpd))] |
2069 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2070 | pub fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d { |
2071 | unsafe { cmppd(a, b, imm8:1) } |
2072 | } |
2073 | |
2074 | /// Compares corresponding elements in `a` and `b` for less-than-or-equal |
2075 | /// |
2076 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_pd) |
2077 | #[inline ] |
2078 | #[target_feature (enable = "sse2" )] |
2079 | #[cfg_attr (test, assert_instr(cmplepd))] |
2080 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2081 | pub fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d { |
2082 | unsafe { cmppd(a, b, imm8:2) } |
2083 | } |
2084 | |
2085 | /// Compares corresponding elements in `a` and `b` for greater-than. |
2086 | /// |
2087 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_pd) |
2088 | #[inline ] |
2089 | #[target_feature (enable = "sse2" )] |
2090 | #[cfg_attr (test, assert_instr(cmpltpd))] |
2091 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2092 | pub fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d { |
2093 | _mm_cmplt_pd(a:b, b:a) |
2094 | } |
2095 | |
2096 | /// Compares corresponding elements in `a` and `b` for greater-than-or-equal. |
2097 | /// |
2098 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_pd) |
2099 | #[inline ] |
2100 | #[target_feature (enable = "sse2" )] |
2101 | #[cfg_attr (test, assert_instr(cmplepd))] |
2102 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2103 | pub fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d { |
2104 | _mm_cmple_pd(a:b, b:a) |
2105 | } |
2106 | |
2107 | /// Compares corresponding elements in `a` and `b` to see if neither is `NaN`. |
2108 | /// |
2109 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_pd) |
2110 | #[inline ] |
2111 | #[target_feature (enable = "sse2" )] |
2112 | #[cfg_attr (test, assert_instr(cmpordpd))] |
2113 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2114 | pub fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d { |
2115 | unsafe { cmppd(a, b, imm8:7) } |
2116 | } |
2117 | |
2118 | /// Compares corresponding elements in `a` and `b` to see if either is `NaN`. |
2119 | /// |
2120 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_pd) |
2121 | #[inline ] |
2122 | #[target_feature (enable = "sse2" )] |
2123 | #[cfg_attr (test, assert_instr(cmpunordpd))] |
2124 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2125 | pub fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d { |
2126 | unsafe { cmppd(a, b, imm8:3) } |
2127 | } |
2128 | |
2129 | /// Compares corresponding elements in `a` and `b` for not-equal. |
2130 | /// |
2131 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_pd) |
2132 | #[inline ] |
2133 | #[target_feature (enable = "sse2" )] |
2134 | #[cfg_attr (test, assert_instr(cmpneqpd))] |
2135 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2136 | pub fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d { |
2137 | unsafe { cmppd(a, b, imm8:4) } |
2138 | } |
2139 | |
2140 | /// Compares corresponding elements in `a` and `b` for not-less-than. |
2141 | /// |
2142 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_pd) |
2143 | #[inline ] |
2144 | #[target_feature (enable = "sse2" )] |
2145 | #[cfg_attr (test, assert_instr(cmpnltpd))] |
2146 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2147 | pub fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d { |
2148 | unsafe { cmppd(a, b, imm8:5) } |
2149 | } |
2150 | |
2151 | /// Compares corresponding elements in `a` and `b` for not-less-than-or-equal. |
2152 | /// |
2153 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_pd) |
2154 | #[inline ] |
2155 | #[target_feature (enable = "sse2" )] |
2156 | #[cfg_attr (test, assert_instr(cmpnlepd))] |
2157 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2158 | pub fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d { |
2159 | unsafe { cmppd(a, b, imm8:6) } |
2160 | } |
2161 | |
2162 | /// Compares corresponding elements in `a` and `b` for not-greater-than. |
2163 | /// |
2164 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpngt_pd) |
2165 | #[inline ] |
2166 | #[target_feature (enable = "sse2" )] |
2167 | #[cfg_attr (test, assert_instr(cmpnltpd))] |
2168 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2169 | pub fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d { |
2170 | _mm_cmpnlt_pd(a:b, b:a) |
2171 | } |
2172 | |
2173 | /// Compares corresponding elements in `a` and `b` for |
2174 | /// not-greater-than-or-equal. |
2175 | /// |
2176 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_pd) |
2177 | #[inline ] |
2178 | #[target_feature (enable = "sse2" )] |
2179 | #[cfg_attr (test, assert_instr(cmpnlepd))] |
2180 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2181 | pub fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d { |
2182 | _mm_cmpnle_pd(a:b, b:a) |
2183 | } |
2184 | |
2185 | /// Compares the lower element of `a` and `b` for equality. |
2186 | /// |
2187 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comieq_sd) |
2188 | #[inline ] |
2189 | #[target_feature (enable = "sse2" )] |
2190 | #[cfg_attr (test, assert_instr(comisd))] |
2191 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2192 | pub fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 { |
2193 | unsafe { comieqsd(a, b) } |
2194 | } |
2195 | |
2196 | /// Compares the lower element of `a` and `b` for less-than. |
2197 | /// |
2198 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comilt_sd) |
2199 | #[inline ] |
2200 | #[target_feature (enable = "sse2" )] |
2201 | #[cfg_attr (test, assert_instr(comisd))] |
2202 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2203 | pub fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 { |
2204 | unsafe { comiltsd(a, b) } |
2205 | } |
2206 | |
2207 | /// Compares the lower element of `a` and `b` for less-than-or-equal. |
2208 | /// |
2209 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comile_sd) |
2210 | #[inline ] |
2211 | #[target_feature (enable = "sse2" )] |
2212 | #[cfg_attr (test, assert_instr(comisd))] |
2213 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2214 | pub fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 { |
2215 | unsafe { comilesd(a, b) } |
2216 | } |
2217 | |
2218 | /// Compares the lower element of `a` and `b` for greater-than. |
2219 | /// |
2220 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comigt_sd) |
2221 | #[inline ] |
2222 | #[target_feature (enable = "sse2" )] |
2223 | #[cfg_attr (test, assert_instr(comisd))] |
2224 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2225 | pub fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 { |
2226 | unsafe { comigtsd(a, b) } |
2227 | } |
2228 | |
2229 | /// Compares the lower element of `a` and `b` for greater-than-or-equal. |
2230 | /// |
2231 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comige_sd) |
2232 | #[inline ] |
2233 | #[target_feature (enable = "sse2" )] |
2234 | #[cfg_attr (test, assert_instr(comisd))] |
2235 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2236 | pub fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 { |
2237 | unsafe { comigesd(a, b) } |
2238 | } |
2239 | |
2240 | /// Compares the lower element of `a` and `b` for not-equal. |
2241 | /// |
2242 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comineq_sd) |
2243 | #[inline ] |
2244 | #[target_feature (enable = "sse2" )] |
2245 | #[cfg_attr (test, assert_instr(comisd))] |
2246 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2247 | pub fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 { |
2248 | unsafe { comineqsd(a, b) } |
2249 | } |
2250 | |
2251 | /// Compares the lower element of `a` and `b` for equality. |
2252 | /// |
2253 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomieq_sd) |
2254 | #[inline ] |
2255 | #[target_feature (enable = "sse2" )] |
2256 | #[cfg_attr (test, assert_instr(ucomisd))] |
2257 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2258 | pub fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 { |
2259 | unsafe { ucomieqsd(a, b) } |
2260 | } |
2261 | |
2262 | /// Compares the lower element of `a` and `b` for less-than. |
2263 | /// |
2264 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomilt_sd) |
2265 | #[inline ] |
2266 | #[target_feature (enable = "sse2" )] |
2267 | #[cfg_attr (test, assert_instr(ucomisd))] |
2268 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2269 | pub fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 { |
2270 | unsafe { ucomiltsd(a, b) } |
2271 | } |
2272 | |
2273 | /// Compares the lower element of `a` and `b` for less-than-or-equal. |
2274 | /// |
2275 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomile_sd) |
2276 | #[inline ] |
2277 | #[target_feature (enable = "sse2" )] |
2278 | #[cfg_attr (test, assert_instr(ucomisd))] |
2279 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2280 | pub fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 { |
2281 | unsafe { ucomilesd(a, b) } |
2282 | } |
2283 | |
2284 | /// Compares the lower element of `a` and `b` for greater-than. |
2285 | /// |
2286 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomigt_sd) |
2287 | #[inline ] |
2288 | #[target_feature (enable = "sse2" )] |
2289 | #[cfg_attr (test, assert_instr(ucomisd))] |
2290 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2291 | pub fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 { |
2292 | unsafe { ucomigtsd(a, b) } |
2293 | } |
2294 | |
2295 | /// Compares the lower element of `a` and `b` for greater-than-or-equal. |
2296 | /// |
2297 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomige_sd) |
2298 | #[inline ] |
2299 | #[target_feature (enable = "sse2" )] |
2300 | #[cfg_attr (test, assert_instr(ucomisd))] |
2301 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2302 | pub fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 { |
2303 | unsafe { ucomigesd(a, b) } |
2304 | } |
2305 | |
2306 | /// Compares the lower element of `a` and `b` for not-equal. |
2307 | /// |
2308 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomineq_sd) |
2309 | #[inline ] |
2310 | #[target_feature (enable = "sse2" )] |
2311 | #[cfg_attr (test, assert_instr(ucomisd))] |
2312 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2313 | pub fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 { |
2314 | unsafe { ucomineqsd(a, b) } |
2315 | } |
2316 | |
2317 | /// Converts packed double-precision (64-bit) floating-point elements in `a` to |
2318 | /// packed single-precision (32-bit) floating-point elements |
2319 | /// |
2320 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_ps) |
2321 | #[inline ] |
2322 | #[target_feature (enable = "sse2" )] |
2323 | #[cfg_attr (test, assert_instr(cvtpd2ps))] |
2324 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2325 | pub fn _mm_cvtpd_ps(a: __m128d) -> __m128 { |
2326 | unsafe { |
2327 | let r: f32x2 = simd_cast::<_, f32x2>(a.as_f64x2()); |
2328 | let zero: f32x2 = f32x2::ZERO; |
2329 | transmute::<f32x4, _>(src:simd_shuffle!(r, zero, [0, 1, 2, 3])) |
2330 | } |
2331 | } |
2332 | |
2333 | /// Converts packed single-precision (32-bit) floating-point elements in `a` to |
2334 | /// packed |
2335 | /// double-precision (64-bit) floating-point elements. |
2336 | /// |
2337 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_pd) |
2338 | #[inline ] |
2339 | #[target_feature (enable = "sse2" )] |
2340 | #[cfg_attr (test, assert_instr(cvtps2pd))] |
2341 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2342 | pub fn _mm_cvtps_pd(a: __m128) -> __m128d { |
2343 | unsafe { |
2344 | let a: f32x4 = a.as_f32x4(); |
2345 | transmute(src:simd_cast::<f32x2, f64x2>(simd_shuffle!(a, a, [0, 1]))) |
2346 | } |
2347 | } |
2348 | |
2349 | /// Converts packed double-precision (64-bit) floating-point elements in `a` to |
2350 | /// packed 32-bit integers. |
2351 | /// |
2352 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epi32) |
2353 | #[inline ] |
2354 | #[target_feature (enable = "sse2" )] |
2355 | #[cfg_attr (test, assert_instr(cvtpd2dq))] |
2356 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2357 | pub fn _mm_cvtpd_epi32(a: __m128d) -> __m128i { |
2358 | unsafe { transmute(src:cvtpd2dq(a)) } |
2359 | } |
2360 | |
2361 | /// Converts the lower double-precision (64-bit) floating-point element in a to |
2362 | /// a 32-bit integer. |
2363 | /// |
2364 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_si32) |
2365 | #[inline ] |
2366 | #[target_feature (enable = "sse2" )] |
2367 | #[cfg_attr (test, assert_instr(cvtsd2si))] |
2368 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2369 | pub fn _mm_cvtsd_si32(a: __m128d) -> i32 { |
2370 | unsafe { cvtsd2si(a) } |
2371 | } |
2372 | |
2373 | /// Converts the lower double-precision (64-bit) floating-point element in `b` |
2374 | /// to a single-precision (32-bit) floating-point element, store the result in |
2375 | /// the lower element of the return value, and copies the upper element from `a` |
2376 | /// to the upper element the return value. |
2377 | /// |
2378 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_ss) |
2379 | #[inline ] |
2380 | #[target_feature (enable = "sse2" )] |
2381 | #[cfg_attr (test, assert_instr(cvtsd2ss))] |
2382 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2383 | pub fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 { |
2384 | unsafe { cvtsd2ss(a, b) } |
2385 | } |
2386 | |
2387 | /// Returns the lower double-precision (64-bit) floating-point element of `a`. |
2388 | /// |
2389 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_f64) |
2390 | #[inline ] |
2391 | #[target_feature (enable = "sse2" )] |
2392 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2393 | pub fn _mm_cvtsd_f64(a: __m128d) -> f64 { |
2394 | unsafe { simd_extract!(a, 0) } |
2395 | } |
2396 | |
2397 | /// Converts the lower single-precision (32-bit) floating-point element in `b` |
2398 | /// to a double-precision (64-bit) floating-point element, store the result in |
2399 | /// the lower element of the return value, and copies the upper element from `a` |
2400 | /// to the upper element the return value. |
2401 | /// |
2402 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtss_sd) |
2403 | #[inline ] |
2404 | #[target_feature (enable = "sse2" )] |
2405 | #[cfg_attr (test, assert_instr(cvtss2sd))] |
2406 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2407 | pub fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d { |
2408 | unsafe { cvtss2sd(a, b) } |
2409 | } |
2410 | |
2411 | /// Converts packed double-precision (64-bit) floating-point elements in `a` to |
2412 | /// packed 32-bit integers with truncation. |
2413 | /// |
2414 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epi32) |
2415 | #[inline ] |
2416 | #[target_feature (enable = "sse2" )] |
2417 | #[cfg_attr (test, assert_instr(cvttpd2dq))] |
2418 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2419 | pub fn _mm_cvttpd_epi32(a: __m128d) -> __m128i { |
2420 | unsafe { transmute(src:cvttpd2dq(a)) } |
2421 | } |
2422 | |
2423 | /// Converts the lower double-precision (64-bit) floating-point element in `a` |
2424 | /// to a 32-bit integer with truncation. |
2425 | /// |
2426 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_si32) |
2427 | #[inline ] |
2428 | #[target_feature (enable = "sse2" )] |
2429 | #[cfg_attr (test, assert_instr(cvttsd2si))] |
2430 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2431 | pub fn _mm_cvttsd_si32(a: __m128d) -> i32 { |
2432 | unsafe { cvttsd2si(a) } |
2433 | } |
2434 | |
2435 | /// Converts packed single-precision (32-bit) floating-point elements in `a` to |
2436 | /// packed 32-bit integers with truncation. |
2437 | /// |
2438 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epi32) |
2439 | #[inline ] |
2440 | #[target_feature (enable = "sse2" )] |
2441 | #[cfg_attr (test, assert_instr(cvttps2dq))] |
2442 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2443 | pub fn _mm_cvttps_epi32(a: __m128) -> __m128i { |
2444 | unsafe { transmute(src:cvttps2dq(a)) } |
2445 | } |
2446 | |
2447 | /// Copies double-precision (64-bit) floating-point element `a` to the lower |
2448 | /// element of the packed 64-bit return value. |
2449 | /// |
2450 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_sd) |
2451 | #[inline ] |
2452 | #[target_feature (enable = "sse2" )] |
2453 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2454 | pub fn _mm_set_sd(a: f64) -> __m128d { |
2455 | _mm_set_pd(a:0.0, b:a) |
2456 | } |
2457 | |
2458 | /// Broadcasts double-precision (64-bit) floating-point value a to all elements |
2459 | /// of the return value. |
2460 | /// |
2461 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_pd) |
2462 | #[inline ] |
2463 | #[target_feature (enable = "sse2" )] |
2464 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2465 | pub fn _mm_set1_pd(a: f64) -> __m128d { |
2466 | _mm_set_pd(a, b:a) |
2467 | } |
2468 | |
2469 | /// Broadcasts double-precision (64-bit) floating-point value a to all elements |
2470 | /// of the return value. |
2471 | /// |
2472 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_pd1) |
2473 | #[inline ] |
2474 | #[target_feature (enable = "sse2" )] |
2475 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2476 | pub fn _mm_set_pd1(a: f64) -> __m128d { |
2477 | _mm_set_pd(a, b:a) |
2478 | } |
2479 | |
2480 | /// Sets packed double-precision (64-bit) floating-point elements in the return |
2481 | /// value with the supplied values. |
2482 | /// |
2483 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_pd) |
2484 | #[inline ] |
2485 | #[target_feature (enable = "sse2" )] |
2486 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2487 | pub fn _mm_set_pd(a: f64, b: f64) -> __m128d { |
2488 | __m128d([b, a]) |
2489 | } |
2490 | |
2491 | /// Sets packed double-precision (64-bit) floating-point elements in the return |
2492 | /// value with the supplied values in reverse order. |
2493 | /// |
2494 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_pd) |
2495 | #[inline ] |
2496 | #[target_feature (enable = "sse2" )] |
2497 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2498 | pub fn _mm_setr_pd(a: f64, b: f64) -> __m128d { |
2499 | _mm_set_pd(a:b, b:a) |
2500 | } |
2501 | |
2502 | /// Returns packed double-precision (64-bit) floating-point elements with all |
2503 | /// zeros. |
2504 | /// |
2505 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_pd) |
2506 | #[inline ] |
2507 | #[target_feature (enable = "sse2" )] |
2508 | #[cfg_attr (test, assert_instr(xorp))] |
2509 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2510 | pub fn _mm_setzero_pd() -> __m128d { |
2511 | const { unsafe { mem::zeroed() } } |
2512 | } |
2513 | |
2514 | /// Returns a mask of the most significant bit of each element in `a`. |
2515 | /// |
2516 | /// The mask is stored in the 2 least significant bits of the return value. |
2517 | /// All other bits are set to `0`. |
2518 | /// |
2519 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_pd) |
2520 | #[inline ] |
2521 | #[target_feature (enable = "sse2" )] |
2522 | #[cfg_attr (test, assert_instr(movmskpd))] |
2523 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2524 | pub fn _mm_movemask_pd(a: __m128d) -> i32 { |
2525 | // Propagate the highest bit to the rest, because simd_bitmask |
2526 | // requires all-1 or all-0. |
2527 | unsafe { |
2528 | let mask: i64x2 = simd_lt(x:transmute(a), y:i64x2::ZERO); |
2529 | simd_bitmask::<i64x2, u8>(mask).into() |
2530 | } |
2531 | } |
2532 | |
2533 | /// Loads 128-bits (composed of 2 packed double-precision (64-bit) |
2534 | /// floating-point elements) from memory into the returned vector. |
2535 | /// `mem_addr` must be aligned on a 16-byte boundary or a general-protection |
2536 | /// exception may be generated. |
2537 | /// |
2538 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_pd) |
2539 | #[inline ] |
2540 | #[target_feature (enable = "sse2" )] |
2541 | #[cfg_attr (test, assert_instr(movaps))] |
2542 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2543 | #[allow (clippy::cast_ptr_alignment)] |
2544 | pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d { |
2545 | *(mem_addr as *const __m128d) |
2546 | } |
2547 | |
2548 | /// Loads a 64-bit double-precision value to the low element of a |
2549 | /// 128-bit integer vector and clears the upper element. |
2550 | /// |
2551 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_sd) |
2552 | #[inline ] |
2553 | #[target_feature (enable = "sse2" )] |
2554 | #[cfg_attr (test, assert_instr(movsd))] |
2555 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2556 | pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d { |
2557 | _mm_setr_pd(*mem_addr, b:0.) |
2558 | } |
2559 | |
2560 | /// Loads a double-precision value into the high-order bits of a 128-bit |
2561 | /// vector of `[2 x double]`. The low-order bits are copied from the low-order |
2562 | /// bits of the first operand. |
2563 | /// |
2564 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadh_pd) |
2565 | #[inline ] |
2566 | #[target_feature (enable = "sse2" )] |
2567 | #[cfg_attr (test, assert_instr(movhps))] |
2568 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2569 | pub unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d { |
2570 | _mm_setr_pd(a:simd_extract!(a, 0), *mem_addr) |
2571 | } |
2572 | |
2573 | /// Loads a double-precision value into the low-order bits of a 128-bit |
2574 | /// vector of `[2 x double]`. The high-order bits are copied from the |
2575 | /// high-order bits of the first operand. |
2576 | /// |
2577 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadl_pd) |
2578 | #[inline ] |
2579 | #[target_feature (enable = "sse2" )] |
2580 | #[cfg_attr (test, assert_instr(movlps))] |
2581 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2582 | pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d { |
2583 | _mm_setr_pd(*mem_addr, b:simd_extract!(a, 1)) |
2584 | } |
2585 | |
2586 | /// Stores a 128-bit floating point vector of `[2 x double]` to a 128-bit |
2587 | /// aligned memory location. |
2588 | /// To minimize caching, the data is flagged as non-temporal (unlikely to be |
2589 | /// used again soon). |
2590 | /// |
2591 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_pd) |
2592 | /// |
2593 | /// # Safety of non-temporal stores |
2594 | /// |
2595 | /// After using this intrinsic, but before any other access to the memory that this intrinsic |
2596 | /// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In |
2597 | /// particular, functions that call this intrinsic should generally call `_mm_sfence` before they |
2598 | /// return. |
2599 | /// |
2600 | /// See [`_mm_sfence`] for details. |
2601 | #[inline ] |
2602 | #[target_feature (enable = "sse2" )] |
2603 | #[cfg_attr (test, assert_instr(movntpd))] |
2604 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2605 | #[allow (clippy::cast_ptr_alignment)] |
2606 | pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) { |
2607 | crate::arch::asm!( |
2608 | vps!("movntpd" , ",{a}" ), |
2609 | p = in(reg) mem_addr, |
2610 | a = in(xmm_reg) a, |
2611 | options(nostack, preserves_flags), |
2612 | ); |
2613 | } |
2614 | |
2615 | /// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a |
2616 | /// memory location. |
2617 | /// |
2618 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_sd) |
2619 | #[inline ] |
2620 | #[target_feature (enable = "sse2" )] |
2621 | #[cfg_attr (all(test, not(target_env = "msvc" )), assert_instr(movlps))] |
2622 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2623 | pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) { |
2624 | *mem_addr = simd_extract!(a, 0) |
2625 | } |
2626 | |
2627 | /// Stores 128-bits (composed of 2 packed double-precision (64-bit) |
2628 | /// floating-point elements) from `a` into memory. `mem_addr` must be aligned |
2629 | /// on a 16-byte boundary or a general-protection exception may be generated. |
2630 | /// |
2631 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_pd) |
2632 | #[inline ] |
2633 | #[target_feature (enable = "sse2" )] |
2634 | #[cfg_attr (test, assert_instr(movaps))] |
2635 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2636 | #[allow (clippy::cast_ptr_alignment)] |
2637 | pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) { |
2638 | *(mem_addr as *mut __m128d) = a; |
2639 | } |
2640 | |
2641 | /// Stores 128-bits (composed of 2 packed double-precision (64-bit) |
2642 | /// floating-point elements) from `a` into memory. |
2643 | /// `mem_addr` does not need to be aligned on any particular boundary. |
2644 | /// |
2645 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_pd) |
2646 | #[inline ] |
2647 | #[target_feature (enable = "sse2" )] |
2648 | #[cfg_attr (test, assert_instr(movups))] // FIXME movupd expected |
2649 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2650 | pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) { |
2651 | mem_addr.cast::<__m128d>().write_unaligned(val:a); |
2652 | } |
2653 | |
2654 | /// Store 16-bit integer from the first element of a into memory. |
2655 | /// |
2656 | /// `mem_addr` does not need to be aligned on any particular boundary. |
2657 | /// |
2658 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si16) |
2659 | #[inline ] |
2660 | #[target_feature (enable = "sse2" )] |
2661 | #[stable (feature = "simd_x86_updates" , since = "1.82.0" )] |
2662 | pub unsafe fn _mm_storeu_si16(mem_addr: *mut u8, a: __m128i) { |
2663 | ptr::write_unaligned(dst:mem_addr as *mut i16, src:simd_extract(x:a.as_i16x8(), idx:0)) |
2664 | } |
2665 | |
2666 | /// Store 32-bit integer from the first element of a into memory. |
2667 | /// |
2668 | /// `mem_addr` does not need to be aligned on any particular boundary. |
2669 | /// |
2670 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si32) |
2671 | #[inline ] |
2672 | #[target_feature (enable = "sse2" )] |
2673 | #[stable (feature = "simd_x86_updates" , since = "1.82.0" )] |
2674 | pub unsafe fn _mm_storeu_si32(mem_addr: *mut u8, a: __m128i) { |
2675 | ptr::write_unaligned(dst:mem_addr as *mut i32, src:simd_extract(x:a.as_i32x4(), idx:0)) |
2676 | } |
2677 | |
2678 | /// Store 64-bit integer from the first element of a into memory. |
2679 | /// |
2680 | /// `mem_addr` does not need to be aligned on any particular boundary. |
2681 | /// |
2682 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si64) |
2683 | #[inline ] |
2684 | #[target_feature (enable = "sse2" )] |
2685 | #[stable (feature = "simd_x86_updates" , since = "1.82.0" )] |
2686 | pub unsafe fn _mm_storeu_si64(mem_addr: *mut u8, a: __m128i) { |
2687 | ptr::write_unaligned(dst:mem_addr as *mut i64, src:simd_extract(x:a.as_i64x2(), idx:0)) |
2688 | } |
2689 | |
2690 | /// Stores the lower double-precision (64-bit) floating-point element from `a` |
2691 | /// into 2 contiguous elements in memory. `mem_addr` must be aligned on a |
2692 | /// 16-byte boundary or a general-protection exception may be generated. |
2693 | /// |
2694 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store1_pd) |
2695 | #[inline ] |
2696 | #[target_feature (enable = "sse2" )] |
2697 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2698 | #[allow (clippy::cast_ptr_alignment)] |
2699 | pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) { |
2700 | let b: __m128d = simd_shuffle!(a, a, [0, 0]); |
2701 | *(mem_addr as *mut __m128d) = b; |
2702 | } |
2703 | |
2704 | /// Stores the lower double-precision (64-bit) floating-point element from `a` |
2705 | /// into 2 contiguous elements in memory. `mem_addr` must be aligned on a |
2706 | /// 16-byte boundary or a general-protection exception may be generated. |
2707 | /// |
2708 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_pd1) |
2709 | #[inline ] |
2710 | #[target_feature (enable = "sse2" )] |
2711 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2712 | #[allow (clippy::cast_ptr_alignment)] |
2713 | pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) { |
2714 | let b: __m128d = simd_shuffle!(a, a, [0, 0]); |
2715 | *(mem_addr as *mut __m128d) = b; |
2716 | } |
2717 | |
2718 | /// Stores 2 double-precision (64-bit) floating-point elements from `a` into |
2719 | /// memory in reverse order. |
2720 | /// `mem_addr` must be aligned on a 16-byte boundary or a general-protection |
2721 | /// exception may be generated. |
2722 | /// |
2723 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storer_pd) |
2724 | #[inline ] |
2725 | #[target_feature (enable = "sse2" )] |
2726 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2727 | #[allow (clippy::cast_ptr_alignment)] |
2728 | pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) { |
2729 | let b: __m128d = simd_shuffle!(a, a, [1, 0]); |
2730 | *(mem_addr as *mut __m128d) = b; |
2731 | } |
2732 | |
2733 | /// Stores the upper 64 bits of a 128-bit vector of `[2 x double]` to a |
2734 | /// memory location. |
2735 | /// |
2736 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeh_pd) |
2737 | #[inline ] |
2738 | #[target_feature (enable = "sse2" )] |
2739 | #[cfg_attr (all(test, not(target_env = "msvc" )), assert_instr(movhps))] |
2740 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2741 | pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) { |
2742 | *mem_addr = simd_extract!(a, 1); |
2743 | } |
2744 | |
2745 | /// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a |
2746 | /// memory location. |
2747 | /// |
2748 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storel_pd) |
2749 | #[inline ] |
2750 | #[target_feature (enable = "sse2" )] |
2751 | #[cfg_attr (all(test, not(target_env = "msvc" )), assert_instr(movlps))] |
2752 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2753 | pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) { |
2754 | *mem_addr = simd_extract!(a, 0); |
2755 | } |
2756 | |
2757 | /// Loads a double-precision (64-bit) floating-point element from memory |
2758 | /// into both elements of returned vector. |
2759 | /// |
2760 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load1_pd) |
2761 | #[inline ] |
2762 | #[target_feature (enable = "sse2" )] |
2763 | // #[cfg_attr(test, assert_instr(movapd))] // FIXME LLVM uses different codegen |
2764 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2765 | pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d { |
2766 | let d: f64 = *mem_addr; |
2767 | _mm_setr_pd(a:d, b:d) |
2768 | } |
2769 | |
2770 | /// Loads a double-precision (64-bit) floating-point element from memory |
2771 | /// into both elements of returned vector. |
2772 | /// |
2773 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_pd1) |
2774 | #[inline ] |
2775 | #[target_feature (enable = "sse2" )] |
2776 | // #[cfg_attr(test, assert_instr(movapd))] // FIXME same as _mm_load1_pd |
2777 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2778 | pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d { |
2779 | _mm_load1_pd(mem_addr) |
2780 | } |
2781 | |
2782 | /// Loads 2 double-precision (64-bit) floating-point elements from memory into |
2783 | /// the returned vector in reverse order. `mem_addr` must be aligned on a |
2784 | /// 16-byte boundary or a general-protection exception may be generated. |
2785 | /// |
2786 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadr_pd) |
2787 | #[inline ] |
2788 | #[target_feature (enable = "sse2" )] |
2789 | #[cfg_attr (test, assert_instr(movaps))] |
2790 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2791 | pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d { |
2792 | let a: __m128d = _mm_load_pd(mem_addr); |
2793 | simd_shuffle!(a, a, [1, 0]) |
2794 | } |
2795 | |
2796 | /// Loads 128-bits (composed of 2 packed double-precision (64-bit) |
2797 | /// floating-point elements) from memory into the returned vector. |
2798 | /// `mem_addr` does not need to be aligned on any particular boundary. |
2799 | /// |
2800 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_pd) |
2801 | #[inline ] |
2802 | #[target_feature (enable = "sse2" )] |
2803 | #[cfg_attr (test, assert_instr(movups))] |
2804 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2805 | pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d { |
2806 | let mut dst: __m128d = _mm_undefined_pd(); |
2807 | ptr::copy_nonoverlapping( |
2808 | src:mem_addr as *const u8, |
2809 | dst:ptr::addr_of_mut!(dst) as *mut u8, |
2810 | count:mem::size_of::<__m128d>(), |
2811 | ); |
2812 | dst |
2813 | } |
2814 | |
2815 | /// Loads unaligned 16-bits of integer data from memory into new vector. |
2816 | /// |
2817 | /// `mem_addr` does not need to be aligned on any particular boundary. |
2818 | /// |
2819 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si16) |
2820 | #[inline ] |
2821 | #[target_feature (enable = "sse2" )] |
2822 | #[stable (feature = "simd_x86_updates" , since = "1.82.0" )] |
2823 | pub unsafe fn _mm_loadu_si16(mem_addr: *const u8) -> __m128i { |
2824 | transmute(src:i16x8::new( |
2825 | x0:ptr::read_unaligned(mem_addr as *const i16), |
2826 | x1:0, |
2827 | x2:0, |
2828 | x3:0, |
2829 | x4:0, |
2830 | x5:0, |
2831 | x6:0, |
2832 | x7:0, |
2833 | )) |
2834 | } |
2835 | |
2836 | /// Loads unaligned 32-bits of integer data from memory into new vector. |
2837 | /// |
2838 | /// `mem_addr` does not need to be aligned on any particular boundary. |
2839 | /// |
2840 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si32) |
2841 | #[inline ] |
2842 | #[target_feature (enable = "sse2" )] |
2843 | #[stable (feature = "simd_x86_updates" , since = "1.82.0" )] |
2844 | pub unsafe fn _mm_loadu_si32(mem_addr: *const u8) -> __m128i { |
2845 | transmute(src:i32x4::new( |
2846 | x0:ptr::read_unaligned(mem_addr as *const i32), |
2847 | x1:0, |
2848 | x2:0, |
2849 | x3:0, |
2850 | )) |
2851 | } |
2852 | |
2853 | /// Loads unaligned 64-bits of integer data from memory into new vector. |
2854 | /// |
2855 | /// `mem_addr` does not need to be aligned on any particular boundary. |
2856 | /// |
2857 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si64) |
2858 | #[inline ] |
2859 | #[target_feature (enable = "sse2" )] |
2860 | #[stable (feature = "simd_x86_mm_loadu_si64" , since = "1.46.0" )] |
2861 | pub unsafe fn _mm_loadu_si64(mem_addr: *const u8) -> __m128i { |
2862 | transmute(src:i64x2::new(x0:ptr::read_unaligned(mem_addr as *const i64), x1:0)) |
2863 | } |
2864 | |
2865 | /// Constructs a 128-bit floating-point vector of `[2 x double]` from two |
2866 | /// 128-bit vector parameters of `[2 x double]`, using the immediate-value |
2867 | /// parameter as a specifier. |
2868 | /// |
2869 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_pd) |
2870 | #[inline ] |
2871 | #[target_feature (enable = "sse2" )] |
2872 | #[cfg_attr (test, assert_instr(shufps, MASK = 2))] |
2873 | #[rustc_legacy_const_generics (2)] |
2874 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2875 | pub fn _mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d { |
2876 | static_assert_uimm_bits!(MASK, 8); |
2877 | unsafe { simd_shuffle!(a, b, [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2]) } |
2878 | } |
2879 | |
2880 | /// Constructs a 128-bit floating-point vector of `[2 x double]`. The lower |
2881 | /// 64 bits are set to the lower 64 bits of the second parameter. The upper |
2882 | /// 64 bits are set to the upper 64 bits of the first parameter. |
2883 | /// |
2884 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_move_sd) |
2885 | #[inline ] |
2886 | #[target_feature (enable = "sse2" )] |
2887 | #[cfg_attr (test, assert_instr(movsd))] |
2888 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2889 | pub fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d { |
2890 | unsafe { _mm_setr_pd(a:simd_extract!(b, 0), b:simd_extract!(a, 1)) } |
2891 | } |
2892 | |
2893 | /// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit |
2894 | /// floating-point vector of `[4 x float]`. |
2895 | /// |
2896 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castpd_ps) |
2897 | #[inline ] |
2898 | #[target_feature (enable = "sse2" )] |
2899 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2900 | pub fn _mm_castpd_ps(a: __m128d) -> __m128 { |
2901 | unsafe { transmute(src:a) } |
2902 | } |
2903 | |
2904 | /// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit |
2905 | /// integer vector. |
2906 | /// |
2907 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castpd_si128) |
2908 | #[inline ] |
2909 | #[target_feature (enable = "sse2" )] |
2910 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2911 | pub fn _mm_castpd_si128(a: __m128d) -> __m128i { |
2912 | unsafe { transmute(src:a) } |
2913 | } |
2914 | |
2915 | /// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit |
2916 | /// floating-point vector of `[2 x double]`. |
2917 | /// |
2918 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castps_pd) |
2919 | #[inline ] |
2920 | #[target_feature (enable = "sse2" )] |
2921 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2922 | pub fn _mm_castps_pd(a: __m128) -> __m128d { |
2923 | unsafe { transmute(src:a) } |
2924 | } |
2925 | |
2926 | /// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit |
2927 | /// integer vector. |
2928 | /// |
2929 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castps_si128) |
2930 | #[inline ] |
2931 | #[target_feature (enable = "sse2" )] |
2932 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2933 | pub fn _mm_castps_si128(a: __m128) -> __m128i { |
2934 | unsafe { transmute(src:a) } |
2935 | } |
2936 | |
2937 | /// Casts a 128-bit integer vector into a 128-bit floating-point vector |
2938 | /// of `[2 x double]`. |
2939 | /// |
2940 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castsi128_pd) |
2941 | #[inline ] |
2942 | #[target_feature (enable = "sse2" )] |
2943 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2944 | pub fn _mm_castsi128_pd(a: __m128i) -> __m128d { |
2945 | unsafe { transmute(src:a) } |
2946 | } |
2947 | |
2948 | /// Casts a 128-bit integer vector into a 128-bit floating-point vector |
2949 | /// of `[4 x float]`. |
2950 | /// |
2951 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castsi128_ps) |
2952 | #[inline ] |
2953 | #[target_feature (enable = "sse2" )] |
2954 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2955 | pub fn _mm_castsi128_ps(a: __m128i) -> __m128 { |
2956 | unsafe { transmute(src:a) } |
2957 | } |
2958 | |
2959 | /// Returns vector of type __m128d with indeterminate elements. |
2960 | /// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`]. |
2961 | /// In practice, this is equivalent to [`mem::zeroed`]. |
2962 | /// |
2963 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_undefined_pd) |
2964 | #[inline ] |
2965 | #[target_feature (enable = "sse2" )] |
2966 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2967 | pub fn _mm_undefined_pd() -> __m128d { |
2968 | const { unsafe { mem::zeroed() } } |
2969 | } |
2970 | |
2971 | /// Returns vector of type __m128i with indeterminate elements. |
2972 | /// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`]. |
2973 | /// In practice, this is equivalent to [`mem::zeroed`]. |
2974 | /// |
2975 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_undefined_si128) |
2976 | #[inline ] |
2977 | #[target_feature (enable = "sse2" )] |
2978 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2979 | pub fn _mm_undefined_si128() -> __m128i { |
2980 | const { unsafe { mem::zeroed() } } |
2981 | } |
2982 | |
2983 | /// The resulting `__m128d` element is composed by the low-order values of |
2984 | /// the two `__m128d` interleaved input elements, i.e.: |
2985 | /// |
2986 | /// * The `[127:64]` bits are copied from the `[127:64]` bits of the second input |
2987 | /// * The `[63:0]` bits are copied from the `[127:64]` bits of the first input |
2988 | /// |
2989 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_pd) |
2990 | #[inline ] |
2991 | #[target_feature (enable = "sse2" )] |
2992 | #[cfg_attr (test, assert_instr(unpckhpd))] |
2993 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2994 | pub fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d { |
2995 | unsafe { simd_shuffle!(a, b, [1, 3]) } |
2996 | } |
2997 | |
2998 | /// The resulting `__m128d` element is composed by the high-order values of |
2999 | /// the two `__m128d` interleaved input elements, i.e.: |
3000 | /// |
3001 | /// * The `[127:64]` bits are copied from the `[63:0]` bits of the second input |
3002 | /// * The `[63:0]` bits are copied from the `[63:0]` bits of the first input |
3003 | /// |
3004 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_pd) |
3005 | #[inline ] |
3006 | #[target_feature (enable = "sse2" )] |
3007 | #[cfg_attr (all(test, not(target_env = "msvc" )), assert_instr(movlhps))] |
3008 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
3009 | pub fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d { |
3010 | unsafe { simd_shuffle!(a, b, [0, 2]) } |
3011 | } |
3012 | |
3013 | #[allow (improper_ctypes)] |
3014 | unsafe extern "C" { |
3015 | #[link_name = "llvm.x86.sse2.pause" ] |
3016 | unsafefn pause(); |
3017 | #[link_name = "llvm.x86.sse2.clflush" ] |
3018 | unsafefn clflush(p: *const u8); |
3019 | #[link_name = "llvm.x86.sse2.lfence" ] |
3020 | unsafefn lfence(); |
3021 | #[link_name = "llvm.x86.sse2.mfence" ] |
3022 | unsafefn mfence(); |
3023 | #[link_name = "llvm.x86.sse2.pmadd.wd" ] |
3024 | unsafefn pmaddwd(a: i16x8, b: i16x8) -> i32x4; |
3025 | #[link_name = "llvm.x86.sse2.psad.bw" ] |
3026 | unsafefn psadbw(a: u8x16, b: u8x16) -> u64x2; |
3027 | #[link_name = "llvm.x86.sse2.psll.w" ] |
3028 | unsafefn psllw(a: i16x8, count: i16x8) -> i16x8; |
3029 | #[link_name = "llvm.x86.sse2.psll.d" ] |
3030 | unsafefn pslld(a: i32x4, count: i32x4) -> i32x4; |
3031 | #[link_name = "llvm.x86.sse2.psll.q" ] |
3032 | unsafefn psllq(a: i64x2, count: i64x2) -> i64x2; |
3033 | #[link_name = "llvm.x86.sse2.psra.w" ] |
3034 | unsafefn psraw(a: i16x8, count: i16x8) -> i16x8; |
3035 | #[link_name = "llvm.x86.sse2.psra.d" ] |
3036 | unsafefn psrad(a: i32x4, count: i32x4) -> i32x4; |
3037 | #[link_name = "llvm.x86.sse2.psrl.w" ] |
3038 | unsafefn psrlw(a: i16x8, count: i16x8) -> i16x8; |
3039 | #[link_name = "llvm.x86.sse2.psrl.d" ] |
3040 | unsafefn psrld(a: i32x4, count: i32x4) -> i32x4; |
3041 | #[link_name = "llvm.x86.sse2.psrl.q" ] |
3042 | unsafefn psrlq(a: i64x2, count: i64x2) -> i64x2; |
3043 | #[link_name = "llvm.x86.sse2.cvtps2dq" ] |
3044 | unsafefn cvtps2dq(a: __m128) -> i32x4; |
3045 | #[link_name = "llvm.x86.sse2.maskmov.dqu" ] |
3046 | unsafefn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8); |
3047 | #[link_name = "llvm.x86.sse2.packsswb.128" ] |
3048 | unsafefn packsswb(a: i16x8, b: i16x8) -> i8x16; |
3049 | #[link_name = "llvm.x86.sse2.packssdw.128" ] |
3050 | unsafefn packssdw(a: i32x4, b: i32x4) -> i16x8; |
3051 | #[link_name = "llvm.x86.sse2.packuswb.128" ] |
3052 | unsafefn packuswb(a: i16x8, b: i16x8) -> u8x16; |
3053 | #[link_name = "llvm.x86.sse2.max.sd" ] |
3054 | unsafefn maxsd(a: __m128d, b: __m128d) -> __m128d; |
3055 | #[link_name = "llvm.x86.sse2.max.pd" ] |
3056 | unsafefn maxpd(a: __m128d, b: __m128d) -> __m128d; |
3057 | #[link_name = "llvm.x86.sse2.min.sd" ] |
3058 | unsafefn minsd(a: __m128d, b: __m128d) -> __m128d; |
3059 | #[link_name = "llvm.x86.sse2.min.pd" ] |
3060 | unsafefn minpd(a: __m128d, b: __m128d) -> __m128d; |
3061 | #[link_name = "llvm.x86.sse2.cmp.sd" ] |
3062 | unsafefn cmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d; |
3063 | #[link_name = "llvm.x86.sse2.cmp.pd" ] |
3064 | unsafefn cmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d; |
3065 | #[link_name = "llvm.x86.sse2.comieq.sd" ] |
3066 | unsafefn comieqsd(a: __m128d, b: __m128d) -> i32; |
3067 | #[link_name = "llvm.x86.sse2.comilt.sd" ] |
3068 | unsafefn comiltsd(a: __m128d, b: __m128d) -> i32; |
3069 | #[link_name = "llvm.x86.sse2.comile.sd" ] |
3070 | unsafefn comilesd(a: __m128d, b: __m128d) -> i32; |
3071 | #[link_name = "llvm.x86.sse2.comigt.sd" ] |
3072 | unsafefn comigtsd(a: __m128d, b: __m128d) -> i32; |
3073 | #[link_name = "llvm.x86.sse2.comige.sd" ] |
3074 | unsafefn comigesd(a: __m128d, b: __m128d) -> i32; |
3075 | #[link_name = "llvm.x86.sse2.comineq.sd" ] |
3076 | unsafefn comineqsd(a: __m128d, b: __m128d) -> i32; |
3077 | #[link_name = "llvm.x86.sse2.ucomieq.sd" ] |
3078 | unsafefn ucomieqsd(a: __m128d, b: __m128d) -> i32; |
3079 | #[link_name = "llvm.x86.sse2.ucomilt.sd" ] |
3080 | unsafefn ucomiltsd(a: __m128d, b: __m128d) -> i32; |
3081 | #[link_name = "llvm.x86.sse2.ucomile.sd" ] |
3082 | unsafefn ucomilesd(a: __m128d, b: __m128d) -> i32; |
3083 | #[link_name = "llvm.x86.sse2.ucomigt.sd" ] |
3084 | unsafefn ucomigtsd(a: __m128d, b: __m128d) -> i32; |
3085 | #[link_name = "llvm.x86.sse2.ucomige.sd" ] |
3086 | unsafefn ucomigesd(a: __m128d, b: __m128d) -> i32; |
3087 | #[link_name = "llvm.x86.sse2.ucomineq.sd" ] |
3088 | unsafefn ucomineqsd(a: __m128d, b: __m128d) -> i32; |
3089 | #[link_name = "llvm.x86.sse2.cvtpd2dq" ] |
3090 | unsafefn cvtpd2dq(a: __m128d) -> i32x4; |
3091 | #[link_name = "llvm.x86.sse2.cvtsd2si" ] |
3092 | unsafefn cvtsd2si(a: __m128d) -> i32; |
3093 | #[link_name = "llvm.x86.sse2.cvtsd2ss" ] |
3094 | unsafefn cvtsd2ss(a: __m128, b: __m128d) -> __m128; |
3095 | #[link_name = "llvm.x86.sse2.cvtss2sd" ] |
3096 | unsafefn cvtss2sd(a: __m128d, b: __m128) -> __m128d; |
3097 | #[link_name = "llvm.x86.sse2.cvttpd2dq" ] |
3098 | unsafefn cvttpd2dq(a: __m128d) -> i32x4; |
3099 | #[link_name = "llvm.x86.sse2.cvttsd2si" ] |
3100 | unsafefn cvttsd2si(a: __m128d) -> i32; |
3101 | #[link_name = "llvm.x86.sse2.cvttps2dq" ] |
3102 | unsafefn cvttps2dq(a: __m128) -> i32x4; |
3103 | } |
3104 | |
3105 | #[cfg (test)] |
3106 | mod tests { |
3107 | use crate::{ |
3108 | core_arch::{simd::*, x86::*}, |
3109 | hint::black_box, |
3110 | }; |
3111 | use std::{ |
3112 | boxed, f32, f64, |
3113 | mem::{self, transmute}, |
3114 | ptr, |
3115 | }; |
3116 | use stdarch_test::simd_test; |
3117 | |
3118 | const NAN: f64 = f64::NAN; |
3119 | |
3120 | #[test ] |
3121 | fn test_mm_pause() { |
3122 | unsafe { _mm_pause() } |
3123 | } |
3124 | |
3125 | #[simd_test(enable = "sse2" )] |
3126 | unsafe fn test_mm_clflush() { |
3127 | let x = 0_u8; |
3128 | _mm_clflush(ptr::addr_of!(x)); |
3129 | } |
3130 | |
3131 | #[simd_test(enable = "sse2" )] |
3132 | // Miri cannot support this until it is clear how it fits in the Rust memory model |
3133 | #[cfg_attr (miri, ignore)] |
3134 | unsafe fn test_mm_lfence() { |
3135 | _mm_lfence(); |
3136 | } |
3137 | |
3138 | #[simd_test(enable = "sse2" )] |
3139 | // Miri cannot support this until it is clear how it fits in the Rust memory model |
3140 | #[cfg_attr (miri, ignore)] |
3141 | unsafe fn test_mm_mfence() { |
3142 | _mm_mfence(); |
3143 | } |
3144 | |
3145 | #[simd_test(enable = "sse2" )] |
3146 | unsafe fn test_mm_add_epi8() { |
3147 | let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
3148 | #[rustfmt::skip] |
3149 | let b = _mm_setr_epi8( |
3150 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
3151 | ); |
3152 | let r = _mm_add_epi8(a, b); |
3153 | #[rustfmt::skip] |
3154 | let e = _mm_setr_epi8( |
3155 | 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, |
3156 | ); |
3157 | assert_eq_m128i(r, e); |
3158 | } |
3159 | |
3160 | #[simd_test(enable = "sse2" )] |
3161 | unsafe fn test_mm_add_epi8_overflow() { |
3162 | let a = _mm_set1_epi8(0x7F); |
3163 | let b = _mm_set1_epi8(1); |
3164 | let r = _mm_add_epi8(a, b); |
3165 | assert_eq_m128i(r, _mm_set1_epi8(-128)); |
3166 | } |
3167 | |
3168 | #[simd_test(enable = "sse2" )] |
3169 | unsafe fn test_mm_add_epi16() { |
3170 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
3171 | let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); |
3172 | let r = _mm_add_epi16(a, b); |
3173 | let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22); |
3174 | assert_eq_m128i(r, e); |
3175 | } |
3176 | |
3177 | #[simd_test(enable = "sse2" )] |
3178 | unsafe fn test_mm_add_epi32() { |
3179 | let a = _mm_setr_epi32(0, 1, 2, 3); |
3180 | let b = _mm_setr_epi32(4, 5, 6, 7); |
3181 | let r = _mm_add_epi32(a, b); |
3182 | let e = _mm_setr_epi32(4, 6, 8, 10); |
3183 | assert_eq_m128i(r, e); |
3184 | } |
3185 | |
3186 | #[simd_test(enable = "sse2" )] |
3187 | unsafe fn test_mm_add_epi64() { |
3188 | let a = _mm_setr_epi64x(0, 1); |
3189 | let b = _mm_setr_epi64x(2, 3); |
3190 | let r = _mm_add_epi64(a, b); |
3191 | let e = _mm_setr_epi64x(2, 4); |
3192 | assert_eq_m128i(r, e); |
3193 | } |
3194 | |
3195 | #[simd_test(enable = "sse2" )] |
3196 | unsafe fn test_mm_adds_epi8() { |
3197 | let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
3198 | #[rustfmt::skip] |
3199 | let b = _mm_setr_epi8( |
3200 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
3201 | ); |
3202 | let r = _mm_adds_epi8(a, b); |
3203 | #[rustfmt::skip] |
3204 | let e = _mm_setr_epi8( |
3205 | 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, |
3206 | ); |
3207 | assert_eq_m128i(r, e); |
3208 | } |
3209 | |
3210 | #[simd_test(enable = "sse2" )] |
3211 | unsafe fn test_mm_adds_epi8_saturate_positive() { |
3212 | let a = _mm_set1_epi8(0x7F); |
3213 | let b = _mm_set1_epi8(1); |
3214 | let r = _mm_adds_epi8(a, b); |
3215 | assert_eq_m128i(r, a); |
3216 | } |
3217 | |
3218 | #[simd_test(enable = "sse2" )] |
3219 | unsafe fn test_mm_adds_epi8_saturate_negative() { |
3220 | let a = _mm_set1_epi8(-0x80); |
3221 | let b = _mm_set1_epi8(-1); |
3222 | let r = _mm_adds_epi8(a, b); |
3223 | assert_eq_m128i(r, a); |
3224 | } |
3225 | |
3226 | #[simd_test(enable = "sse2" )] |
3227 | unsafe fn test_mm_adds_epi16() { |
3228 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
3229 | let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); |
3230 | let r = _mm_adds_epi16(a, b); |
3231 | let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22); |
3232 | assert_eq_m128i(r, e); |
3233 | } |
3234 | |
3235 | #[simd_test(enable = "sse2" )] |
3236 | unsafe fn test_mm_adds_epi16_saturate_positive() { |
3237 | let a = _mm_set1_epi16(0x7FFF); |
3238 | let b = _mm_set1_epi16(1); |
3239 | let r = _mm_adds_epi16(a, b); |
3240 | assert_eq_m128i(r, a); |
3241 | } |
3242 | |
3243 | #[simd_test(enable = "sse2" )] |
3244 | unsafe fn test_mm_adds_epi16_saturate_negative() { |
3245 | let a = _mm_set1_epi16(-0x8000); |
3246 | let b = _mm_set1_epi16(-1); |
3247 | let r = _mm_adds_epi16(a, b); |
3248 | assert_eq_m128i(r, a); |
3249 | } |
3250 | |
3251 | #[simd_test(enable = "sse2" )] |
3252 | unsafe fn test_mm_adds_epu8() { |
3253 | let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
3254 | #[rustfmt::skip] |
3255 | let b = _mm_setr_epi8( |
3256 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
3257 | ); |
3258 | let r = _mm_adds_epu8(a, b); |
3259 | #[rustfmt::skip] |
3260 | let e = _mm_setr_epi8( |
3261 | 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, |
3262 | ); |
3263 | assert_eq_m128i(r, e); |
3264 | } |
3265 | |
3266 | #[simd_test(enable = "sse2" )] |
3267 | unsafe fn test_mm_adds_epu8_saturate() { |
3268 | let a = _mm_set1_epi8(!0); |
3269 | let b = _mm_set1_epi8(1); |
3270 | let r = _mm_adds_epu8(a, b); |
3271 | assert_eq_m128i(r, a); |
3272 | } |
3273 | |
3274 | #[simd_test(enable = "sse2" )] |
3275 | unsafe fn test_mm_adds_epu16() { |
3276 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
3277 | let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); |
3278 | let r = _mm_adds_epu16(a, b); |
3279 | let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22); |
3280 | assert_eq_m128i(r, e); |
3281 | } |
3282 | |
3283 | #[simd_test(enable = "sse2" )] |
3284 | unsafe fn test_mm_adds_epu16_saturate() { |
3285 | let a = _mm_set1_epi16(!0); |
3286 | let b = _mm_set1_epi16(1); |
3287 | let r = _mm_adds_epu16(a, b); |
3288 | assert_eq_m128i(r, a); |
3289 | } |
3290 | |
3291 | #[simd_test(enable = "sse2" )] |
3292 | unsafe fn test_mm_avg_epu8() { |
3293 | let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9)); |
3294 | let r = _mm_avg_epu8(a, b); |
3295 | assert_eq_m128i(r, _mm_set1_epi8(6)); |
3296 | } |
3297 | |
3298 | #[simd_test(enable = "sse2" )] |
3299 | unsafe fn test_mm_avg_epu16() { |
3300 | let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9)); |
3301 | let r = _mm_avg_epu16(a, b); |
3302 | assert_eq_m128i(r, _mm_set1_epi16(6)); |
3303 | } |
3304 | |
3305 | #[simd_test(enable = "sse2" )] |
3306 | unsafe fn test_mm_madd_epi16() { |
3307 | let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); |
3308 | let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16); |
3309 | let r = _mm_madd_epi16(a, b); |
3310 | let e = _mm_setr_epi32(29, 81, 149, 233); |
3311 | assert_eq_m128i(r, e); |
3312 | |
3313 | // Test large values. |
3314 | // MIN*MIN+MIN*MIN will overflow into i32::MIN. |
3315 | let a = _mm_setr_epi16( |
3316 | i16::MAX, |
3317 | i16::MAX, |
3318 | i16::MIN, |
3319 | i16::MIN, |
3320 | i16::MIN, |
3321 | i16::MAX, |
3322 | 0, |
3323 | 0, |
3324 | ); |
3325 | let b = _mm_setr_epi16( |
3326 | i16::MAX, |
3327 | i16::MAX, |
3328 | i16::MIN, |
3329 | i16::MIN, |
3330 | i16::MAX, |
3331 | i16::MIN, |
3332 | 0, |
3333 | 0, |
3334 | ); |
3335 | let r = _mm_madd_epi16(a, b); |
3336 | let e = _mm_setr_epi32(0x7FFE0002, i32::MIN, -0x7FFF0000, 0); |
3337 | assert_eq_m128i(r, e); |
3338 | } |
3339 | |
3340 | #[simd_test(enable = "sse2" )] |
3341 | unsafe fn test_mm_max_epi16() { |
3342 | let a = _mm_set1_epi16(1); |
3343 | let b = _mm_set1_epi16(-1); |
3344 | let r = _mm_max_epi16(a, b); |
3345 | assert_eq_m128i(r, a); |
3346 | } |
3347 | |
3348 | #[simd_test(enable = "sse2" )] |
3349 | unsafe fn test_mm_max_epu8() { |
3350 | let a = _mm_set1_epi8(1); |
3351 | let b = _mm_set1_epi8(!0); |
3352 | let r = _mm_max_epu8(a, b); |
3353 | assert_eq_m128i(r, b); |
3354 | } |
3355 | |
3356 | #[simd_test(enable = "sse2" )] |
3357 | unsafe fn test_mm_min_epi16() { |
3358 | let a = _mm_set1_epi16(1); |
3359 | let b = _mm_set1_epi16(-1); |
3360 | let r = _mm_min_epi16(a, b); |
3361 | assert_eq_m128i(r, b); |
3362 | } |
3363 | |
3364 | #[simd_test(enable = "sse2" )] |
3365 | unsafe fn test_mm_min_epu8() { |
3366 | let a = _mm_set1_epi8(1); |
3367 | let b = _mm_set1_epi8(!0); |
3368 | let r = _mm_min_epu8(a, b); |
3369 | assert_eq_m128i(r, a); |
3370 | } |
3371 | |
3372 | #[simd_test(enable = "sse2" )] |
3373 | unsafe fn test_mm_mulhi_epi16() { |
3374 | let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001)); |
3375 | let r = _mm_mulhi_epi16(a, b); |
3376 | assert_eq_m128i(r, _mm_set1_epi16(-16)); |
3377 | } |
3378 | |
3379 | #[simd_test(enable = "sse2" )] |
3380 | unsafe fn test_mm_mulhi_epu16() { |
3381 | let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001)); |
3382 | let r = _mm_mulhi_epu16(a, b); |
3383 | assert_eq_m128i(r, _mm_set1_epi16(15)); |
3384 | } |
3385 | |
3386 | #[simd_test(enable = "sse2" )] |
3387 | unsafe fn test_mm_mullo_epi16() { |
3388 | let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001)); |
3389 | let r = _mm_mullo_epi16(a, b); |
3390 | assert_eq_m128i(r, _mm_set1_epi16(-17960)); |
3391 | } |
3392 | |
3393 | #[simd_test(enable = "sse2" )] |
3394 | unsafe fn test_mm_mul_epu32() { |
3395 | let a = _mm_setr_epi64x(1_000_000_000, 1 << 34); |
3396 | let b = _mm_setr_epi64x(1_000_000_000, 1 << 35); |
3397 | let r = _mm_mul_epu32(a, b); |
3398 | let e = _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0); |
3399 | assert_eq_m128i(r, e); |
3400 | } |
3401 | |
3402 | #[simd_test(enable = "sse2" )] |
3403 | unsafe fn test_mm_sad_epu8() { |
3404 | #[rustfmt::skip] |
3405 | let a = _mm_setr_epi8( |
3406 | 255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8, |
3407 | 1, 2, 3, 4, |
3408 | 155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8, |
3409 | 1, 2, 3, 4, |
3410 | ); |
3411 | let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2); |
3412 | let r = _mm_sad_epu8(a, b); |
3413 | let e = _mm_setr_epi64x(1020, 614); |
3414 | assert_eq_m128i(r, e); |
3415 | } |
3416 | |
3417 | #[simd_test(enable = "sse2" )] |
3418 | unsafe fn test_mm_sub_epi8() { |
3419 | let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(6)); |
3420 | let r = _mm_sub_epi8(a, b); |
3421 | assert_eq_m128i(r, _mm_set1_epi8(-1)); |
3422 | } |
3423 | |
3424 | #[simd_test(enable = "sse2" )] |
3425 | unsafe fn test_mm_sub_epi16() { |
3426 | let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(6)); |
3427 | let r = _mm_sub_epi16(a, b); |
3428 | assert_eq_m128i(r, _mm_set1_epi16(-1)); |
3429 | } |
3430 | |
3431 | #[simd_test(enable = "sse2" )] |
3432 | unsafe fn test_mm_sub_epi32() { |
3433 | let (a, b) = (_mm_set1_epi32(5), _mm_set1_epi32(6)); |
3434 | let r = _mm_sub_epi32(a, b); |
3435 | assert_eq_m128i(r, _mm_set1_epi32(-1)); |
3436 | } |
3437 | |
3438 | #[simd_test(enable = "sse2" )] |
3439 | unsafe fn test_mm_sub_epi64() { |
3440 | let (a, b) = (_mm_set1_epi64x(5), _mm_set1_epi64x(6)); |
3441 | let r = _mm_sub_epi64(a, b); |
3442 | assert_eq_m128i(r, _mm_set1_epi64x(-1)); |
3443 | } |
3444 | |
3445 | #[simd_test(enable = "sse2" )] |
3446 | unsafe fn test_mm_subs_epi8() { |
3447 | let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2)); |
3448 | let r = _mm_subs_epi8(a, b); |
3449 | assert_eq_m128i(r, _mm_set1_epi8(3)); |
3450 | } |
3451 | |
3452 | #[simd_test(enable = "sse2" )] |
3453 | unsafe fn test_mm_subs_epi8_saturate_positive() { |
3454 | let a = _mm_set1_epi8(0x7F); |
3455 | let b = _mm_set1_epi8(-1); |
3456 | let r = _mm_subs_epi8(a, b); |
3457 | assert_eq_m128i(r, a); |
3458 | } |
3459 | |
3460 | #[simd_test(enable = "sse2" )] |
3461 | unsafe fn test_mm_subs_epi8_saturate_negative() { |
3462 | let a = _mm_set1_epi8(-0x80); |
3463 | let b = _mm_set1_epi8(1); |
3464 | let r = _mm_subs_epi8(a, b); |
3465 | assert_eq_m128i(r, a); |
3466 | } |
3467 | |
3468 | #[simd_test(enable = "sse2" )] |
3469 | unsafe fn test_mm_subs_epi16() { |
3470 | let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2)); |
3471 | let r = _mm_subs_epi16(a, b); |
3472 | assert_eq_m128i(r, _mm_set1_epi16(3)); |
3473 | } |
3474 | |
3475 | #[simd_test(enable = "sse2" )] |
3476 | unsafe fn test_mm_subs_epi16_saturate_positive() { |
3477 | let a = _mm_set1_epi16(0x7FFF); |
3478 | let b = _mm_set1_epi16(-1); |
3479 | let r = _mm_subs_epi16(a, b); |
3480 | assert_eq_m128i(r, a); |
3481 | } |
3482 | |
3483 | #[simd_test(enable = "sse2" )] |
3484 | unsafe fn test_mm_subs_epi16_saturate_negative() { |
3485 | let a = _mm_set1_epi16(-0x8000); |
3486 | let b = _mm_set1_epi16(1); |
3487 | let r = _mm_subs_epi16(a, b); |
3488 | assert_eq_m128i(r, a); |
3489 | } |
3490 | |
3491 | #[simd_test(enable = "sse2" )] |
3492 | unsafe fn test_mm_subs_epu8() { |
3493 | let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2)); |
3494 | let r = _mm_subs_epu8(a, b); |
3495 | assert_eq_m128i(r, _mm_set1_epi8(3)); |
3496 | } |
3497 | |
3498 | #[simd_test(enable = "sse2" )] |
3499 | unsafe fn test_mm_subs_epu8_saturate() { |
3500 | let a = _mm_set1_epi8(0); |
3501 | let b = _mm_set1_epi8(1); |
3502 | let r = _mm_subs_epu8(a, b); |
3503 | assert_eq_m128i(r, a); |
3504 | } |
3505 | |
3506 | #[simd_test(enable = "sse2" )] |
3507 | unsafe fn test_mm_subs_epu16() { |
3508 | let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2)); |
3509 | let r = _mm_subs_epu16(a, b); |
3510 | assert_eq_m128i(r, _mm_set1_epi16(3)); |
3511 | } |
3512 | |
3513 | #[simd_test(enable = "sse2" )] |
3514 | unsafe fn test_mm_subs_epu16_saturate() { |
3515 | let a = _mm_set1_epi16(0); |
3516 | let b = _mm_set1_epi16(1); |
3517 | let r = _mm_subs_epu16(a, b); |
3518 | assert_eq_m128i(r, a); |
3519 | } |
3520 | |
3521 | #[simd_test(enable = "sse2" )] |
3522 | unsafe fn test_mm_slli_si128() { |
3523 | #[rustfmt::skip] |
3524 | let a = _mm_setr_epi8( |
3525 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
3526 | ); |
3527 | let r = _mm_slli_si128::<1>(a); |
3528 | let e = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
3529 | assert_eq_m128i(r, e); |
3530 | |
3531 | #[rustfmt::skip] |
3532 | let a = _mm_setr_epi8( |
3533 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
3534 | ); |
3535 | let r = _mm_slli_si128::<15>(a); |
3536 | let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1); |
3537 | assert_eq_m128i(r, e); |
3538 | |
3539 | #[rustfmt::skip] |
3540 | let a = _mm_setr_epi8( |
3541 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
3542 | ); |
3543 | let r = _mm_slli_si128::<16>(a); |
3544 | assert_eq_m128i(r, _mm_set1_epi8(0)); |
3545 | } |
3546 | |
3547 | #[simd_test(enable = "sse2" )] |
3548 | unsafe fn test_mm_slli_epi16() { |
3549 | let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); |
3550 | let r = _mm_slli_epi16::<4>(a); |
3551 | assert_eq_m128i( |
3552 | r, |
3553 | _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0), |
3554 | ); |
3555 | let r = _mm_slli_epi16::<16>(a); |
3556 | assert_eq_m128i(r, _mm_set1_epi16(0)); |
3557 | } |
3558 | |
3559 | #[simd_test(enable = "sse2" )] |
3560 | unsafe fn test_mm_sll_epi16() { |
3561 | let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); |
3562 | let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 4)); |
3563 | assert_eq_m128i( |
3564 | r, |
3565 | _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0), |
3566 | ); |
3567 | let r = _mm_sll_epi16(a, _mm_set_epi64x(4, 0)); |
3568 | assert_eq_m128i(r, a); |
3569 | let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 16)); |
3570 | assert_eq_m128i(r, _mm_set1_epi16(0)); |
3571 | let r = _mm_sll_epi16(a, _mm_set_epi64x(0, i64::MAX)); |
3572 | assert_eq_m128i(r, _mm_set1_epi16(0)); |
3573 | } |
3574 | |
3575 | #[simd_test(enable = "sse2" )] |
3576 | unsafe fn test_mm_slli_epi32() { |
3577 | let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); |
3578 | let r = _mm_slli_epi32::<4>(a); |
3579 | assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0)); |
3580 | let r = _mm_slli_epi32::<32>(a); |
3581 | assert_eq_m128i(r, _mm_set1_epi32(0)); |
3582 | } |
3583 | |
3584 | #[simd_test(enable = "sse2" )] |
3585 | unsafe fn test_mm_sll_epi32() { |
3586 | let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); |
3587 | let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 4)); |
3588 | assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0)); |
3589 | let r = _mm_sll_epi32(a, _mm_set_epi64x(4, 0)); |
3590 | assert_eq_m128i(r, a); |
3591 | let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 32)); |
3592 | assert_eq_m128i(r, _mm_set1_epi32(0)); |
3593 | let r = _mm_sll_epi32(a, _mm_set_epi64x(0, i64::MAX)); |
3594 | assert_eq_m128i(r, _mm_set1_epi32(0)); |
3595 | } |
3596 | |
3597 | #[simd_test(enable = "sse2" )] |
3598 | unsafe fn test_mm_slli_epi64() { |
3599 | let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF); |
3600 | let r = _mm_slli_epi64::<4>(a); |
3601 | assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0)); |
3602 | let r = _mm_slli_epi64::<64>(a); |
3603 | assert_eq_m128i(r, _mm_set1_epi64x(0)); |
3604 | } |
3605 | |
3606 | #[simd_test(enable = "sse2" )] |
3607 | unsafe fn test_mm_sll_epi64() { |
3608 | let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF); |
3609 | let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 4)); |
3610 | assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0)); |
3611 | let r = _mm_sll_epi64(a, _mm_set_epi64x(4, 0)); |
3612 | assert_eq_m128i(r, a); |
3613 | let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 64)); |
3614 | assert_eq_m128i(r, _mm_set1_epi64x(0)); |
3615 | let r = _mm_sll_epi64(a, _mm_set_epi64x(0, i64::MAX)); |
3616 | assert_eq_m128i(r, _mm_set1_epi64x(0)); |
3617 | } |
3618 | |
3619 | #[simd_test(enable = "sse2" )] |
3620 | unsafe fn test_mm_srai_epi16() { |
3621 | let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); |
3622 | let r = _mm_srai_epi16::<4>(a); |
3623 | assert_eq_m128i( |
3624 | r, |
3625 | _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10), |
3626 | ); |
3627 | let r = _mm_srai_epi16::<16>(a); |
3628 | assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1)); |
3629 | } |
3630 | |
3631 | #[simd_test(enable = "sse2" )] |
3632 | unsafe fn test_mm_sra_epi16() { |
3633 | let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); |
3634 | let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 4)); |
3635 | assert_eq_m128i( |
3636 | r, |
3637 | _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10), |
3638 | ); |
3639 | let r = _mm_sra_epi16(a, _mm_set_epi64x(4, 0)); |
3640 | assert_eq_m128i(r, a); |
3641 | let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 16)); |
3642 | assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1)); |
3643 | let r = _mm_sra_epi16(a, _mm_set_epi64x(0, i64::MAX)); |
3644 | assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1)); |
3645 | } |
3646 | |
3647 | #[simd_test(enable = "sse2" )] |
3648 | unsafe fn test_mm_srai_epi32() { |
3649 | let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); |
3650 | let r = _mm_srai_epi32::<4>(a); |
3651 | assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000)); |
3652 | let r = _mm_srai_epi32::<32>(a); |
3653 | assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1)); |
3654 | } |
3655 | |
3656 | #[simd_test(enable = "sse2" )] |
3657 | unsafe fn test_mm_sra_epi32() { |
3658 | let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); |
3659 | let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 4)); |
3660 | assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000)); |
3661 | let r = _mm_sra_epi32(a, _mm_set_epi64x(4, 0)); |
3662 | assert_eq_m128i(r, a); |
3663 | let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 32)); |
3664 | assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1)); |
3665 | let r = _mm_sra_epi32(a, _mm_set_epi64x(0, i64::MAX)); |
3666 | assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1)); |
3667 | } |
3668 | |
3669 | #[simd_test(enable = "sse2" )] |
3670 | unsafe fn test_mm_srli_si128() { |
3671 | #[rustfmt::skip] |
3672 | let a = _mm_setr_epi8( |
3673 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
3674 | ); |
3675 | let r = _mm_srli_si128::<1>(a); |
3676 | #[rustfmt::skip] |
3677 | let e = _mm_setr_epi8( |
3678 | 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, |
3679 | ); |
3680 | assert_eq_m128i(r, e); |
3681 | |
3682 | #[rustfmt::skip] |
3683 | let a = _mm_setr_epi8( |
3684 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
3685 | ); |
3686 | let r = _mm_srli_si128::<15>(a); |
3687 | let e = _mm_setr_epi8(16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
3688 | assert_eq_m128i(r, e); |
3689 | |
3690 | #[rustfmt::skip] |
3691 | let a = _mm_setr_epi8( |
3692 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
3693 | ); |
3694 | let r = _mm_srli_si128::<16>(a); |
3695 | assert_eq_m128i(r, _mm_set1_epi8(0)); |
3696 | } |
3697 | |
3698 | #[simd_test(enable = "sse2" )] |
3699 | unsafe fn test_mm_srli_epi16() { |
3700 | let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); |
3701 | let r = _mm_srli_epi16::<4>(a); |
3702 | assert_eq_m128i( |
3703 | r, |
3704 | _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0), |
3705 | ); |
3706 | let r = _mm_srli_epi16::<16>(a); |
3707 | assert_eq_m128i(r, _mm_set1_epi16(0)); |
3708 | } |
3709 | |
3710 | #[simd_test(enable = "sse2" )] |
3711 | unsafe fn test_mm_srl_epi16() { |
3712 | let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); |
3713 | let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 4)); |
3714 | assert_eq_m128i( |
3715 | r, |
3716 | _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0), |
3717 | ); |
3718 | let r = _mm_srl_epi16(a, _mm_set_epi64x(4, 0)); |
3719 | assert_eq_m128i(r, a); |
3720 | let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 16)); |
3721 | assert_eq_m128i(r, _mm_set1_epi16(0)); |
3722 | let r = _mm_srl_epi16(a, _mm_set_epi64x(0, i64::MAX)); |
3723 | assert_eq_m128i(r, _mm_set1_epi16(0)); |
3724 | } |
3725 | |
3726 | #[simd_test(enable = "sse2" )] |
3727 | unsafe fn test_mm_srli_epi32() { |
3728 | let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); |
3729 | let r = _mm_srli_epi32::<4>(a); |
3730 | assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000)); |
3731 | let r = _mm_srli_epi32::<32>(a); |
3732 | assert_eq_m128i(r, _mm_set1_epi32(0)); |
3733 | } |
3734 | |
3735 | #[simd_test(enable = "sse2" )] |
3736 | unsafe fn test_mm_srl_epi32() { |
3737 | let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); |
3738 | let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 4)); |
3739 | assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000)); |
3740 | let r = _mm_srl_epi32(a, _mm_set_epi64x(4, 0)); |
3741 | assert_eq_m128i(r, a); |
3742 | let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 32)); |
3743 | assert_eq_m128i(r, _mm_set1_epi32(0)); |
3744 | let r = _mm_srl_epi32(a, _mm_set_epi64x(0, i64::MAX)); |
3745 | assert_eq_m128i(r, _mm_set1_epi32(0)); |
3746 | } |
3747 | |
3748 | #[simd_test(enable = "sse2" )] |
3749 | unsafe fn test_mm_srli_epi64() { |
3750 | let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF); |
3751 | let r = _mm_srli_epi64::<4>(a); |
3752 | assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000)); |
3753 | let r = _mm_srli_epi64::<64>(a); |
3754 | assert_eq_m128i(r, _mm_set1_epi64x(0)); |
3755 | } |
3756 | |
3757 | #[simd_test(enable = "sse2" )] |
3758 | unsafe fn test_mm_srl_epi64() { |
3759 | let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF); |
3760 | let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 4)); |
3761 | assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000)); |
3762 | let r = _mm_srl_epi64(a, _mm_set_epi64x(4, 0)); |
3763 | assert_eq_m128i(r, a); |
3764 | let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 64)); |
3765 | assert_eq_m128i(r, _mm_set1_epi64x(0)); |
3766 | let r = _mm_srl_epi64(a, _mm_set_epi64x(0, i64::MAX)); |
3767 | assert_eq_m128i(r, _mm_set1_epi64x(0)); |
3768 | } |
3769 | |
3770 | #[simd_test(enable = "sse2" )] |
3771 | unsafe fn test_mm_and_si128() { |
3772 | let a = _mm_set1_epi8(5); |
3773 | let b = _mm_set1_epi8(3); |
3774 | let r = _mm_and_si128(a, b); |
3775 | assert_eq_m128i(r, _mm_set1_epi8(1)); |
3776 | } |
3777 | |
3778 | #[simd_test(enable = "sse2" )] |
3779 | unsafe fn test_mm_andnot_si128() { |
3780 | let a = _mm_set1_epi8(5); |
3781 | let b = _mm_set1_epi8(3); |
3782 | let r = _mm_andnot_si128(a, b); |
3783 | assert_eq_m128i(r, _mm_set1_epi8(2)); |
3784 | } |
3785 | |
3786 | #[simd_test(enable = "sse2" )] |
3787 | unsafe fn test_mm_or_si128() { |
3788 | let a = _mm_set1_epi8(5); |
3789 | let b = _mm_set1_epi8(3); |
3790 | let r = _mm_or_si128(a, b); |
3791 | assert_eq_m128i(r, _mm_set1_epi8(7)); |
3792 | } |
3793 | |
3794 | #[simd_test(enable = "sse2" )] |
3795 | unsafe fn test_mm_xor_si128() { |
3796 | let a = _mm_set1_epi8(5); |
3797 | let b = _mm_set1_epi8(3); |
3798 | let r = _mm_xor_si128(a, b); |
3799 | assert_eq_m128i(r, _mm_set1_epi8(6)); |
3800 | } |
3801 | |
3802 | #[simd_test(enable = "sse2" )] |
3803 | unsafe fn test_mm_cmpeq_epi8() { |
3804 | let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
3805 | let b = _mm_setr_epi8(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
3806 | let r = _mm_cmpeq_epi8(a, b); |
3807 | #[rustfmt::skip] |
3808 | assert_eq_m128i( |
3809 | r, |
3810 | _mm_setr_epi8( |
3811 | 0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
3812 | ) |
3813 | ); |
3814 | } |
3815 | |
3816 | #[simd_test(enable = "sse2" )] |
3817 | unsafe fn test_mm_cmpeq_epi16() { |
3818 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
3819 | let b = _mm_setr_epi16(7, 6, 2, 4, 3, 2, 1, 0); |
3820 | let r = _mm_cmpeq_epi16(a, b); |
3821 | assert_eq_m128i(r, _mm_setr_epi16(0, 0, !0, 0, 0, 0, 0, 0)); |
3822 | } |
3823 | |
3824 | #[simd_test(enable = "sse2" )] |
3825 | unsafe fn test_mm_cmpeq_epi32() { |
3826 | let a = _mm_setr_epi32(0, 1, 2, 3); |
3827 | let b = _mm_setr_epi32(3, 2, 2, 0); |
3828 | let r = _mm_cmpeq_epi32(a, b); |
3829 | assert_eq_m128i(r, _mm_setr_epi32(0, 0, !0, 0)); |
3830 | } |
3831 | |
3832 | #[simd_test(enable = "sse2" )] |
3833 | unsafe fn test_mm_cmpgt_epi8() { |
3834 | let a = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
3835 | let b = _mm_set1_epi8(0); |
3836 | let r = _mm_cmpgt_epi8(a, b); |
3837 | let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
3838 | assert_eq_m128i(r, e); |
3839 | } |
3840 | |
3841 | #[simd_test(enable = "sse2" )] |
3842 | unsafe fn test_mm_cmpgt_epi16() { |
3843 | let a = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0); |
3844 | let b = _mm_set1_epi16(0); |
3845 | let r = _mm_cmpgt_epi16(a, b); |
3846 | let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0); |
3847 | assert_eq_m128i(r, e); |
3848 | } |
3849 | |
3850 | #[simd_test(enable = "sse2" )] |
3851 | unsafe fn test_mm_cmpgt_epi32() { |
3852 | let a = _mm_set_epi32(5, 0, 0, 0); |
3853 | let b = _mm_set1_epi32(0); |
3854 | let r = _mm_cmpgt_epi32(a, b); |
3855 | assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0)); |
3856 | } |
3857 | |
3858 | #[simd_test(enable = "sse2" )] |
3859 | unsafe fn test_mm_cmplt_epi8() { |
3860 | let a = _mm_set1_epi8(0); |
3861 | let b = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
3862 | let r = _mm_cmplt_epi8(a, b); |
3863 | let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
3864 | assert_eq_m128i(r, e); |
3865 | } |
3866 | |
3867 | #[simd_test(enable = "sse2" )] |
3868 | unsafe fn test_mm_cmplt_epi16() { |
3869 | let a = _mm_set1_epi16(0); |
3870 | let b = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0); |
3871 | let r = _mm_cmplt_epi16(a, b); |
3872 | let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0); |
3873 | assert_eq_m128i(r, e); |
3874 | } |
3875 | |
3876 | #[simd_test(enable = "sse2" )] |
3877 | unsafe fn test_mm_cmplt_epi32() { |
3878 | let a = _mm_set1_epi32(0); |
3879 | let b = _mm_set_epi32(5, 0, 0, 0); |
3880 | let r = _mm_cmplt_epi32(a, b); |
3881 | assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0)); |
3882 | } |
3883 | |
3884 | #[simd_test(enable = "sse2" )] |
3885 | unsafe fn test_mm_cvtepi32_pd() { |
3886 | let a = _mm_set_epi32(35, 25, 15, 5); |
3887 | let r = _mm_cvtepi32_pd(a); |
3888 | assert_eq_m128d(r, _mm_setr_pd(5.0, 15.0)); |
3889 | } |
3890 | |
3891 | #[simd_test(enable = "sse2" )] |
3892 | unsafe fn test_mm_cvtsi32_sd() { |
3893 | let a = _mm_set1_pd(3.5); |
3894 | let r = _mm_cvtsi32_sd(a, 5); |
3895 | assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5)); |
3896 | } |
3897 | |
3898 | #[simd_test(enable = "sse2" )] |
3899 | unsafe fn test_mm_cvtepi32_ps() { |
3900 | let a = _mm_setr_epi32(1, 2, 3, 4); |
3901 | let r = _mm_cvtepi32_ps(a); |
3902 | assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0)); |
3903 | } |
3904 | |
3905 | #[simd_test(enable = "sse2" )] |
3906 | unsafe fn test_mm_cvtps_epi32() { |
3907 | let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); |
3908 | let r = _mm_cvtps_epi32(a); |
3909 | assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4)); |
3910 | } |
3911 | |
3912 | #[simd_test(enable = "sse2" )] |
3913 | unsafe fn test_mm_cvtsi32_si128() { |
3914 | let r = _mm_cvtsi32_si128(5); |
3915 | assert_eq_m128i(r, _mm_setr_epi32(5, 0, 0, 0)); |
3916 | } |
3917 | |
3918 | #[simd_test(enable = "sse2" )] |
3919 | unsafe fn test_mm_cvtsi128_si32() { |
3920 | let r = _mm_cvtsi128_si32(_mm_setr_epi32(5, 0, 0, 0)); |
3921 | assert_eq!(r, 5); |
3922 | } |
3923 | |
3924 | #[simd_test(enable = "sse2" )] |
3925 | unsafe fn test_mm_set_epi64x() { |
3926 | let r = _mm_set_epi64x(0, 1); |
3927 | assert_eq_m128i(r, _mm_setr_epi64x(1, 0)); |
3928 | } |
3929 | |
3930 | #[simd_test(enable = "sse2" )] |
3931 | unsafe fn test_mm_set_epi32() { |
3932 | let r = _mm_set_epi32(0, 1, 2, 3); |
3933 | assert_eq_m128i(r, _mm_setr_epi32(3, 2, 1, 0)); |
3934 | } |
3935 | |
3936 | #[simd_test(enable = "sse2" )] |
3937 | unsafe fn test_mm_set_epi16() { |
3938 | let r = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
3939 | assert_eq_m128i(r, _mm_setr_epi16(7, 6, 5, 4, 3, 2, 1, 0)); |
3940 | } |
3941 | |
3942 | #[simd_test(enable = "sse2" )] |
3943 | unsafe fn test_mm_set_epi8() { |
3944 | #[rustfmt::skip] |
3945 | let r = _mm_set_epi8( |
3946 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
3947 | ); |
3948 | #[rustfmt::skip] |
3949 | let e = _mm_setr_epi8( |
3950 | 15, 14, 13, 12, 11, 10, 9, 8, |
3951 | 7, 6, 5, 4, 3, 2, 1, 0, |
3952 | ); |
3953 | assert_eq_m128i(r, e); |
3954 | } |
3955 | |
3956 | #[simd_test(enable = "sse2" )] |
3957 | unsafe fn test_mm_set1_epi64x() { |
3958 | let r = _mm_set1_epi64x(1); |
3959 | assert_eq_m128i(r, _mm_set1_epi64x(1)); |
3960 | } |
3961 | |
3962 | #[simd_test(enable = "sse2" )] |
3963 | unsafe fn test_mm_set1_epi32() { |
3964 | let r = _mm_set1_epi32(1); |
3965 | assert_eq_m128i(r, _mm_set1_epi32(1)); |
3966 | } |
3967 | |
3968 | #[simd_test(enable = "sse2" )] |
3969 | unsafe fn test_mm_set1_epi16() { |
3970 | let r = _mm_set1_epi16(1); |
3971 | assert_eq_m128i(r, _mm_set1_epi16(1)); |
3972 | } |
3973 | |
3974 | #[simd_test(enable = "sse2" )] |
3975 | unsafe fn test_mm_set1_epi8() { |
3976 | let r = _mm_set1_epi8(1); |
3977 | assert_eq_m128i(r, _mm_set1_epi8(1)); |
3978 | } |
3979 | |
3980 | #[simd_test(enable = "sse2" )] |
3981 | unsafe fn test_mm_setr_epi32() { |
3982 | let r = _mm_setr_epi32(0, 1, 2, 3); |
3983 | assert_eq_m128i(r, _mm_setr_epi32(0, 1, 2, 3)); |
3984 | } |
3985 | |
3986 | #[simd_test(enable = "sse2" )] |
3987 | unsafe fn test_mm_setr_epi16() { |
3988 | let r = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
3989 | assert_eq_m128i(r, _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7)); |
3990 | } |
3991 | |
3992 | #[simd_test(enable = "sse2" )] |
3993 | unsafe fn test_mm_setr_epi8() { |
3994 | #[rustfmt::skip] |
3995 | let r = _mm_setr_epi8( |
3996 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
3997 | ); |
3998 | #[rustfmt::skip] |
3999 | let e = _mm_setr_epi8( |
4000 | 0, 1, 2, 3, 4, 5, 6, 7, |
4001 | 8, 9, 10, 11, 12, 13, 14, 15, |
4002 | ); |
4003 | assert_eq_m128i(r, e); |
4004 | } |
4005 | |
4006 | #[simd_test(enable = "sse2" )] |
4007 | unsafe fn test_mm_setzero_si128() { |
4008 | let r = _mm_setzero_si128(); |
4009 | assert_eq_m128i(r, _mm_set1_epi64x(0)); |
4010 | } |
4011 | |
4012 | #[simd_test(enable = "sse2" )] |
4013 | unsafe fn test_mm_loadl_epi64() { |
4014 | let a = _mm_setr_epi64x(6, 5); |
4015 | let r = _mm_loadl_epi64(ptr::addr_of!(a)); |
4016 | assert_eq_m128i(r, _mm_setr_epi64x(6, 0)); |
4017 | } |
4018 | |
4019 | #[simd_test(enable = "sse2" )] |
4020 | unsafe fn test_mm_load_si128() { |
4021 | let a = _mm_set_epi64x(5, 6); |
4022 | let r = _mm_load_si128(ptr::addr_of!(a) as *const _); |
4023 | assert_eq_m128i(a, r); |
4024 | } |
4025 | |
4026 | #[simd_test(enable = "sse2" )] |
4027 | unsafe fn test_mm_loadu_si128() { |
4028 | let a = _mm_set_epi64x(5, 6); |
4029 | let r = _mm_loadu_si128(ptr::addr_of!(a) as *const _); |
4030 | assert_eq_m128i(a, r); |
4031 | } |
4032 | |
4033 | #[simd_test(enable = "sse2" )] |
4034 | // Miri cannot support this until it is clear how it fits in the Rust memory model |
4035 | // (non-temporal store) |
4036 | #[cfg_attr (miri, ignore)] |
4037 | unsafe fn test_mm_maskmoveu_si128() { |
4038 | let a = _mm_set1_epi8(9); |
4039 | #[rustfmt::skip] |
4040 | let mask = _mm_set_epi8( |
4041 | 0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0, |
4042 | 0, 0, 0, 0, 0, 0, 0, 0, |
4043 | ); |
4044 | let mut r = _mm_set1_epi8(0); |
4045 | _mm_maskmoveu_si128(a, mask, ptr::addr_of_mut!(r) as *mut i8); |
4046 | let e = _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
4047 | assert_eq_m128i(r, e); |
4048 | } |
4049 | |
4050 | #[simd_test(enable = "sse2" )] |
4051 | unsafe fn test_mm_store_si128() { |
4052 | let a = _mm_set1_epi8(9); |
4053 | let mut r = _mm_set1_epi8(0); |
4054 | _mm_store_si128(&mut r, a); |
4055 | assert_eq_m128i(r, a); |
4056 | } |
4057 | |
4058 | #[simd_test(enable = "sse2" )] |
4059 | unsafe fn test_mm_storeu_si128() { |
4060 | let a = _mm_set1_epi8(9); |
4061 | let mut r = _mm_set1_epi8(0); |
4062 | _mm_storeu_si128(&mut r, a); |
4063 | assert_eq_m128i(r, a); |
4064 | } |
4065 | |
4066 | #[simd_test(enable = "sse2" )] |
4067 | unsafe fn test_mm_storel_epi64() { |
4068 | let a = _mm_setr_epi64x(2, 9); |
4069 | let mut r = _mm_set1_epi8(0); |
4070 | _mm_storel_epi64(&mut r, a); |
4071 | assert_eq_m128i(r, _mm_setr_epi64x(2, 0)); |
4072 | } |
4073 | |
4074 | #[simd_test(enable = "sse2" )] |
4075 | // Miri cannot support this until it is clear how it fits in the Rust memory model |
4076 | // (non-temporal store) |
4077 | #[cfg_attr (miri, ignore)] |
4078 | unsafe fn test_mm_stream_si128() { |
4079 | let a = _mm_setr_epi32(1, 2, 3, 4); |
4080 | let mut r = _mm_undefined_si128(); |
4081 | _mm_stream_si128(ptr::addr_of_mut!(r), a); |
4082 | assert_eq_m128i(r, a); |
4083 | } |
4084 | |
4085 | #[simd_test(enable = "sse2" )] |
4086 | // Miri cannot support this until it is clear how it fits in the Rust memory model |
4087 | // (non-temporal store) |
4088 | #[cfg_attr (miri, ignore)] |
4089 | unsafe fn test_mm_stream_si32() { |
4090 | let a: i32 = 7; |
4091 | let mut mem = boxed::Box::<i32>::new(-1); |
4092 | _mm_stream_si32(ptr::addr_of_mut!(*mem), a); |
4093 | assert_eq!(a, *mem); |
4094 | } |
4095 | |
4096 | #[simd_test(enable = "sse2" )] |
4097 | unsafe fn test_mm_move_epi64() { |
4098 | let a = _mm_setr_epi64x(5, 6); |
4099 | let r = _mm_move_epi64(a); |
4100 | assert_eq_m128i(r, _mm_setr_epi64x(5, 0)); |
4101 | } |
4102 | |
4103 | #[simd_test(enable = "sse2" )] |
4104 | unsafe fn test_mm_packs_epi16() { |
4105 | let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0); |
4106 | let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80); |
4107 | let r = _mm_packs_epi16(a, b); |
4108 | #[rustfmt::skip] |
4109 | assert_eq_m128i( |
4110 | r, |
4111 | _mm_setr_epi8( |
4112 | 0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F |
4113 | ) |
4114 | ); |
4115 | } |
4116 | |
4117 | #[simd_test(enable = "sse2" )] |
4118 | unsafe fn test_mm_packs_epi32() { |
4119 | let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0); |
4120 | let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000); |
4121 | let r = _mm_packs_epi32(a, b); |
4122 | assert_eq_m128i( |
4123 | r, |
4124 | _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF), |
4125 | ); |
4126 | } |
4127 | |
4128 | #[simd_test(enable = "sse2" )] |
4129 | unsafe fn test_mm_packus_epi16() { |
4130 | let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0); |
4131 | let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100); |
4132 | let r = _mm_packus_epi16(a, b); |
4133 | assert_eq_m128i( |
4134 | r, |
4135 | _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0), |
4136 | ); |
4137 | } |
4138 | |
4139 | #[simd_test(enable = "sse2" )] |
4140 | unsafe fn test_mm_extract_epi16() { |
4141 | let a = _mm_setr_epi16(-1, 1, 2, 3, 4, 5, 6, 7); |
4142 | let r1 = _mm_extract_epi16::<0>(a); |
4143 | let r2 = _mm_extract_epi16::<3>(a); |
4144 | assert_eq!(r1, 0xFFFF); |
4145 | assert_eq!(r2, 3); |
4146 | } |
4147 | |
4148 | #[simd_test(enable = "sse2" )] |
4149 | unsafe fn test_mm_insert_epi16() { |
4150 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
4151 | let r = _mm_insert_epi16::<0>(a, 9); |
4152 | let e = _mm_setr_epi16(9, 1, 2, 3, 4, 5, 6, 7); |
4153 | assert_eq_m128i(r, e); |
4154 | } |
4155 | |
4156 | #[simd_test(enable = "sse2" )] |
4157 | unsafe fn test_mm_movemask_epi8() { |
4158 | #[rustfmt::skip] |
4159 | let a = _mm_setr_epi8( |
4160 | 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01, |
4161 | 0b0101, 0b1111_0000u8 as i8, 0, 0, |
4162 | 0, 0b1011_0101u8 as i8, 0b1111_0000u8 as i8, 0b0101, |
4163 | 0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, |
4164 | ); |
4165 | let r = _mm_movemask_epi8(a); |
4166 | assert_eq!(r, 0b10100110_00100101); |
4167 | } |
4168 | |
4169 | #[simd_test(enable = "sse2" )] |
4170 | unsafe fn test_mm_shuffle_epi32() { |
4171 | let a = _mm_setr_epi32(5, 10, 15, 20); |
4172 | let r = _mm_shuffle_epi32::<0b00_01_01_11>(a); |
4173 | let e = _mm_setr_epi32(20, 10, 10, 5); |
4174 | assert_eq_m128i(r, e); |
4175 | } |
4176 | |
4177 | #[simd_test(enable = "sse2" )] |
4178 | unsafe fn test_mm_shufflehi_epi16() { |
4179 | let a = _mm_setr_epi16(1, 2, 3, 4, 5, 10, 15, 20); |
4180 | let r = _mm_shufflehi_epi16::<0b00_01_01_11>(a); |
4181 | let e = _mm_setr_epi16(1, 2, 3, 4, 20, 10, 10, 5); |
4182 | assert_eq_m128i(r, e); |
4183 | } |
4184 | |
4185 | #[simd_test(enable = "sse2" )] |
4186 | unsafe fn test_mm_shufflelo_epi16() { |
4187 | let a = _mm_setr_epi16(5, 10, 15, 20, 1, 2, 3, 4); |
4188 | let r = _mm_shufflelo_epi16::<0b00_01_01_11>(a); |
4189 | let e = _mm_setr_epi16(20, 10, 10, 5, 1, 2, 3, 4); |
4190 | assert_eq_m128i(r, e); |
4191 | } |
4192 | |
4193 | #[simd_test(enable = "sse2" )] |
4194 | unsafe fn test_mm_unpackhi_epi8() { |
4195 | #[rustfmt::skip] |
4196 | let a = _mm_setr_epi8( |
4197 | 0, 1, 2, 3, 4, 5, 6, 7, |
4198 | 8, 9, 10, 11, 12, 13, 14, 15, |
4199 | ); |
4200 | #[rustfmt::skip] |
4201 | let b = _mm_setr_epi8( |
4202 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
4203 | ); |
4204 | let r = _mm_unpackhi_epi8(a, b); |
4205 | #[rustfmt::skip] |
4206 | let e = _mm_setr_epi8( |
4207 | 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31, |
4208 | ); |
4209 | assert_eq_m128i(r, e); |
4210 | } |
4211 | |
4212 | #[simd_test(enable = "sse2" )] |
4213 | unsafe fn test_mm_unpackhi_epi16() { |
4214 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
4215 | let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); |
4216 | let r = _mm_unpackhi_epi16(a, b); |
4217 | let e = _mm_setr_epi16(4, 12, 5, 13, 6, 14, 7, 15); |
4218 | assert_eq_m128i(r, e); |
4219 | } |
4220 | |
4221 | #[simd_test(enable = "sse2" )] |
4222 | unsafe fn test_mm_unpackhi_epi32() { |
4223 | let a = _mm_setr_epi32(0, 1, 2, 3); |
4224 | let b = _mm_setr_epi32(4, 5, 6, 7); |
4225 | let r = _mm_unpackhi_epi32(a, b); |
4226 | let e = _mm_setr_epi32(2, 6, 3, 7); |
4227 | assert_eq_m128i(r, e); |
4228 | } |
4229 | |
4230 | #[simd_test(enable = "sse2" )] |
4231 | unsafe fn test_mm_unpackhi_epi64() { |
4232 | let a = _mm_setr_epi64x(0, 1); |
4233 | let b = _mm_setr_epi64x(2, 3); |
4234 | let r = _mm_unpackhi_epi64(a, b); |
4235 | let e = _mm_setr_epi64x(1, 3); |
4236 | assert_eq_m128i(r, e); |
4237 | } |
4238 | |
4239 | #[simd_test(enable = "sse2" )] |
4240 | unsafe fn test_mm_unpacklo_epi8() { |
4241 | #[rustfmt::skip] |
4242 | let a = _mm_setr_epi8( |
4243 | 0, 1, 2, 3, 4, 5, 6, 7, |
4244 | 8, 9, 10, 11, 12, 13, 14, 15, |
4245 | ); |
4246 | #[rustfmt::skip] |
4247 | let b = _mm_setr_epi8( |
4248 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
4249 | ); |
4250 | let r = _mm_unpacklo_epi8(a, b); |
4251 | #[rustfmt::skip] |
4252 | let e = _mm_setr_epi8( |
4253 | 0, 16, 1, 17, 2, 18, 3, 19, |
4254 | 4, 20, 5, 21, 6, 22, 7, 23, |
4255 | ); |
4256 | assert_eq_m128i(r, e); |
4257 | } |
4258 | |
4259 | #[simd_test(enable = "sse2" )] |
4260 | unsafe fn test_mm_unpacklo_epi16() { |
4261 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
4262 | let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); |
4263 | let r = _mm_unpacklo_epi16(a, b); |
4264 | let e = _mm_setr_epi16(0, 8, 1, 9, 2, 10, 3, 11); |
4265 | assert_eq_m128i(r, e); |
4266 | } |
4267 | |
4268 | #[simd_test(enable = "sse2" )] |
4269 | unsafe fn test_mm_unpacklo_epi32() { |
4270 | let a = _mm_setr_epi32(0, 1, 2, 3); |
4271 | let b = _mm_setr_epi32(4, 5, 6, 7); |
4272 | let r = _mm_unpacklo_epi32(a, b); |
4273 | let e = _mm_setr_epi32(0, 4, 1, 5); |
4274 | assert_eq_m128i(r, e); |
4275 | } |
4276 | |
4277 | #[simd_test(enable = "sse2" )] |
4278 | unsafe fn test_mm_unpacklo_epi64() { |
4279 | let a = _mm_setr_epi64x(0, 1); |
4280 | let b = _mm_setr_epi64x(2, 3); |
4281 | let r = _mm_unpacklo_epi64(a, b); |
4282 | let e = _mm_setr_epi64x(0, 2); |
4283 | assert_eq_m128i(r, e); |
4284 | } |
4285 | |
4286 | #[simd_test(enable = "sse2" )] |
4287 | unsafe fn test_mm_add_sd() { |
4288 | let a = _mm_setr_pd(1.0, 2.0); |
4289 | let b = _mm_setr_pd(5.0, 10.0); |
4290 | let r = _mm_add_sd(a, b); |
4291 | assert_eq_m128d(r, _mm_setr_pd(6.0, 2.0)); |
4292 | } |
4293 | |
4294 | #[simd_test(enable = "sse2" )] |
4295 | unsafe fn test_mm_add_pd() { |
4296 | let a = _mm_setr_pd(1.0, 2.0); |
4297 | let b = _mm_setr_pd(5.0, 10.0); |
4298 | let r = _mm_add_pd(a, b); |
4299 | assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0)); |
4300 | } |
4301 | |
4302 | #[simd_test(enable = "sse2" )] |
4303 | unsafe fn test_mm_div_sd() { |
4304 | let a = _mm_setr_pd(1.0, 2.0); |
4305 | let b = _mm_setr_pd(5.0, 10.0); |
4306 | let r = _mm_div_sd(a, b); |
4307 | assert_eq_m128d(r, _mm_setr_pd(0.2, 2.0)); |
4308 | } |
4309 | |
4310 | #[simd_test(enable = "sse2" )] |
4311 | unsafe fn test_mm_div_pd() { |
4312 | let a = _mm_setr_pd(1.0, 2.0); |
4313 | let b = _mm_setr_pd(5.0, 10.0); |
4314 | let r = _mm_div_pd(a, b); |
4315 | assert_eq_m128d(r, _mm_setr_pd(0.2, 0.2)); |
4316 | } |
4317 | |
4318 | #[simd_test(enable = "sse2" )] |
4319 | unsafe fn test_mm_max_sd() { |
4320 | let a = _mm_setr_pd(1.0, 2.0); |
4321 | let b = _mm_setr_pd(5.0, 10.0); |
4322 | let r = _mm_max_sd(a, b); |
4323 | assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0)); |
4324 | } |
4325 | |
4326 | #[simd_test(enable = "sse2" )] |
4327 | unsafe fn test_mm_max_pd() { |
4328 | let a = _mm_setr_pd(1.0, 2.0); |
4329 | let b = _mm_setr_pd(5.0, 10.0); |
4330 | let r = _mm_max_pd(a, b); |
4331 | assert_eq_m128d(r, _mm_setr_pd(5.0, 10.0)); |
4332 | |
4333 | // Check SSE(2)-specific semantics for -0.0 handling. |
4334 | let a = _mm_setr_pd(-0.0, 0.0); |
4335 | let b = _mm_setr_pd(0.0, 0.0); |
4336 | let r1: [u8; 16] = transmute(_mm_max_pd(a, b)); |
4337 | let r2: [u8; 16] = transmute(_mm_max_pd(b, a)); |
4338 | let a: [u8; 16] = transmute(a); |
4339 | let b: [u8; 16] = transmute(b); |
4340 | assert_eq!(r1, b); |
4341 | assert_eq!(r2, a); |
4342 | assert_ne!(a, b); // sanity check that -0.0 is actually present |
4343 | } |
4344 | |
4345 | #[simd_test(enable = "sse2" )] |
4346 | unsafe fn test_mm_min_sd() { |
4347 | let a = _mm_setr_pd(1.0, 2.0); |
4348 | let b = _mm_setr_pd(5.0, 10.0); |
4349 | let r = _mm_min_sd(a, b); |
4350 | assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0)); |
4351 | } |
4352 | |
4353 | #[simd_test(enable = "sse2" )] |
4354 | unsafe fn test_mm_min_pd() { |
4355 | let a = _mm_setr_pd(1.0, 2.0); |
4356 | let b = _mm_setr_pd(5.0, 10.0); |
4357 | let r = _mm_min_pd(a, b); |
4358 | assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0)); |
4359 | |
4360 | // Check SSE(2)-specific semantics for -0.0 handling. |
4361 | let a = _mm_setr_pd(-0.0, 0.0); |
4362 | let b = _mm_setr_pd(0.0, 0.0); |
4363 | let r1: [u8; 16] = transmute(_mm_min_pd(a, b)); |
4364 | let r2: [u8; 16] = transmute(_mm_min_pd(b, a)); |
4365 | let a: [u8; 16] = transmute(a); |
4366 | let b: [u8; 16] = transmute(b); |
4367 | assert_eq!(r1, b); |
4368 | assert_eq!(r2, a); |
4369 | assert_ne!(a, b); // sanity check that -0.0 is actually present |
4370 | } |
4371 | |
4372 | #[simd_test(enable = "sse2" )] |
4373 | unsafe fn test_mm_mul_sd() { |
4374 | let a = _mm_setr_pd(1.0, 2.0); |
4375 | let b = _mm_setr_pd(5.0, 10.0); |
4376 | let r = _mm_mul_sd(a, b); |
4377 | assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0)); |
4378 | } |
4379 | |
4380 | #[simd_test(enable = "sse2" )] |
4381 | unsafe fn test_mm_mul_pd() { |
4382 | let a = _mm_setr_pd(1.0, 2.0); |
4383 | let b = _mm_setr_pd(5.0, 10.0); |
4384 | let r = _mm_mul_pd(a, b); |
4385 | assert_eq_m128d(r, _mm_setr_pd(5.0, 20.0)); |
4386 | } |
4387 | |
4388 | #[simd_test(enable = "sse2" )] |
4389 | unsafe fn test_mm_sqrt_sd() { |
4390 | let a = _mm_setr_pd(1.0, 2.0); |
4391 | let b = _mm_setr_pd(5.0, 10.0); |
4392 | let r = _mm_sqrt_sd(a, b); |
4393 | assert_eq_m128d(r, _mm_setr_pd(5.0f64.sqrt(), 2.0)); |
4394 | } |
4395 | |
4396 | #[simd_test(enable = "sse2" )] |
4397 | unsafe fn test_mm_sqrt_pd() { |
4398 | let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0)); |
4399 | assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt())); |
4400 | } |
4401 | |
4402 | #[simd_test(enable = "sse2" )] |
4403 | unsafe fn test_mm_sub_sd() { |
4404 | let a = _mm_setr_pd(1.0, 2.0); |
4405 | let b = _mm_setr_pd(5.0, 10.0); |
4406 | let r = _mm_sub_sd(a, b); |
4407 | assert_eq_m128d(r, _mm_setr_pd(-4.0, 2.0)); |
4408 | } |
4409 | |
4410 | #[simd_test(enable = "sse2" )] |
4411 | unsafe fn test_mm_sub_pd() { |
4412 | let a = _mm_setr_pd(1.0, 2.0); |
4413 | let b = _mm_setr_pd(5.0, 10.0); |
4414 | let r = _mm_sub_pd(a, b); |
4415 | assert_eq_m128d(r, _mm_setr_pd(-4.0, -8.0)); |
4416 | } |
4417 | |
4418 | #[simd_test(enable = "sse2" )] |
4419 | unsafe fn test_mm_and_pd() { |
4420 | let a = transmute(u64x2::splat(5)); |
4421 | let b = transmute(u64x2::splat(3)); |
4422 | let r = _mm_and_pd(a, b); |
4423 | let e = transmute(u64x2::splat(1)); |
4424 | assert_eq_m128d(r, e); |
4425 | } |
4426 | |
4427 | #[simd_test(enable = "sse2" )] |
4428 | unsafe fn test_mm_andnot_pd() { |
4429 | let a = transmute(u64x2::splat(5)); |
4430 | let b = transmute(u64x2::splat(3)); |
4431 | let r = _mm_andnot_pd(a, b); |
4432 | let e = transmute(u64x2::splat(2)); |
4433 | assert_eq_m128d(r, e); |
4434 | } |
4435 | |
4436 | #[simd_test(enable = "sse2" )] |
4437 | unsafe fn test_mm_or_pd() { |
4438 | let a = transmute(u64x2::splat(5)); |
4439 | let b = transmute(u64x2::splat(3)); |
4440 | let r = _mm_or_pd(a, b); |
4441 | let e = transmute(u64x2::splat(7)); |
4442 | assert_eq_m128d(r, e); |
4443 | } |
4444 | |
4445 | #[simd_test(enable = "sse2" )] |
4446 | unsafe fn test_mm_xor_pd() { |
4447 | let a = transmute(u64x2::splat(5)); |
4448 | let b = transmute(u64x2::splat(3)); |
4449 | let r = _mm_xor_pd(a, b); |
4450 | let e = transmute(u64x2::splat(6)); |
4451 | assert_eq_m128d(r, e); |
4452 | } |
4453 | |
4454 | #[simd_test(enable = "sse2" )] |
4455 | unsafe fn test_mm_cmpeq_sd() { |
4456 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4457 | let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64); |
4458 | let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b)); |
4459 | assert_eq_m128i(r, e); |
4460 | } |
4461 | |
4462 | #[simd_test(enable = "sse2" )] |
4463 | unsafe fn test_mm_cmplt_sd() { |
4464 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); |
4465 | let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64); |
4466 | let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b)); |
4467 | assert_eq_m128i(r, e); |
4468 | } |
4469 | |
4470 | #[simd_test(enable = "sse2" )] |
4471 | unsafe fn test_mm_cmple_sd() { |
4472 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4473 | let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64); |
4474 | let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b)); |
4475 | assert_eq_m128i(r, e); |
4476 | } |
4477 | |
4478 | #[simd_test(enable = "sse2" )] |
4479 | unsafe fn test_mm_cmpgt_sd() { |
4480 | let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4481 | let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64); |
4482 | let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b)); |
4483 | assert_eq_m128i(r, e); |
4484 | } |
4485 | |
4486 | #[simd_test(enable = "sse2" )] |
4487 | unsafe fn test_mm_cmpge_sd() { |
4488 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4489 | let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64); |
4490 | let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b)); |
4491 | assert_eq_m128i(r, e); |
4492 | } |
4493 | |
4494 | #[simd_test(enable = "sse2" )] |
4495 | unsafe fn test_mm_cmpord_sd() { |
4496 | let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); |
4497 | let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64); |
4498 | let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b)); |
4499 | assert_eq_m128i(r, e); |
4500 | } |
4501 | |
4502 | #[simd_test(enable = "sse2" )] |
4503 | unsafe fn test_mm_cmpunord_sd() { |
4504 | let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); |
4505 | let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64); |
4506 | let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b)); |
4507 | assert_eq_m128i(r, e); |
4508 | } |
4509 | |
4510 | #[simd_test(enable = "sse2" )] |
4511 | unsafe fn test_mm_cmpneq_sd() { |
4512 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); |
4513 | let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64); |
4514 | let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b)); |
4515 | assert_eq_m128i(r, e); |
4516 | } |
4517 | |
4518 | #[simd_test(enable = "sse2" )] |
4519 | unsafe fn test_mm_cmpnlt_sd() { |
4520 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); |
4521 | let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64); |
4522 | let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b)); |
4523 | assert_eq_m128i(r, e); |
4524 | } |
4525 | |
4526 | #[simd_test(enable = "sse2" )] |
4527 | unsafe fn test_mm_cmpnle_sd() { |
4528 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4529 | let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64); |
4530 | let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b)); |
4531 | assert_eq_m128i(r, e); |
4532 | } |
4533 | |
4534 | #[simd_test(enable = "sse2" )] |
4535 | unsafe fn test_mm_cmpngt_sd() { |
4536 | let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4537 | let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64); |
4538 | let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b)); |
4539 | assert_eq_m128i(r, e); |
4540 | } |
4541 | |
4542 | #[simd_test(enable = "sse2" )] |
4543 | unsafe fn test_mm_cmpnge_sd() { |
4544 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4545 | let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64); |
4546 | let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b)); |
4547 | assert_eq_m128i(r, e); |
4548 | } |
4549 | |
4550 | #[simd_test(enable = "sse2" )] |
4551 | unsafe fn test_mm_cmpeq_pd() { |
4552 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4553 | let e = _mm_setr_epi64x(!0, 0); |
4554 | let r = transmute::<_, __m128i>(_mm_cmpeq_pd(a, b)); |
4555 | assert_eq_m128i(r, e); |
4556 | } |
4557 | |
4558 | #[simd_test(enable = "sse2" )] |
4559 | unsafe fn test_mm_cmplt_pd() { |
4560 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4561 | let e = _mm_setr_epi64x(0, !0); |
4562 | let r = transmute::<_, __m128i>(_mm_cmplt_pd(a, b)); |
4563 | assert_eq_m128i(r, e); |
4564 | } |
4565 | |
4566 | #[simd_test(enable = "sse2" )] |
4567 | unsafe fn test_mm_cmple_pd() { |
4568 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4569 | let e = _mm_setr_epi64x(!0, !0); |
4570 | let r = transmute::<_, __m128i>(_mm_cmple_pd(a, b)); |
4571 | assert_eq_m128i(r, e); |
4572 | } |
4573 | |
4574 | #[simd_test(enable = "sse2" )] |
4575 | unsafe fn test_mm_cmpgt_pd() { |
4576 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4577 | let e = _mm_setr_epi64x(0, 0); |
4578 | let r = transmute::<_, __m128i>(_mm_cmpgt_pd(a, b)); |
4579 | assert_eq_m128i(r, e); |
4580 | } |
4581 | |
4582 | #[simd_test(enable = "sse2" )] |
4583 | unsafe fn test_mm_cmpge_pd() { |
4584 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4585 | let e = _mm_setr_epi64x(!0, 0); |
4586 | let r = transmute::<_, __m128i>(_mm_cmpge_pd(a, b)); |
4587 | assert_eq_m128i(r, e); |
4588 | } |
4589 | |
4590 | #[simd_test(enable = "sse2" )] |
4591 | unsafe fn test_mm_cmpord_pd() { |
4592 | let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); |
4593 | let e = _mm_setr_epi64x(0, !0); |
4594 | let r = transmute::<_, __m128i>(_mm_cmpord_pd(a, b)); |
4595 | assert_eq_m128i(r, e); |
4596 | } |
4597 | |
4598 | #[simd_test(enable = "sse2" )] |
4599 | unsafe fn test_mm_cmpunord_pd() { |
4600 | let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); |
4601 | let e = _mm_setr_epi64x(!0, 0); |
4602 | let r = transmute::<_, __m128i>(_mm_cmpunord_pd(a, b)); |
4603 | assert_eq_m128i(r, e); |
4604 | } |
4605 | |
4606 | #[simd_test(enable = "sse2" )] |
4607 | unsafe fn test_mm_cmpneq_pd() { |
4608 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); |
4609 | let e = _mm_setr_epi64x(!0, !0); |
4610 | let r = transmute::<_, __m128i>(_mm_cmpneq_pd(a, b)); |
4611 | assert_eq_m128i(r, e); |
4612 | } |
4613 | |
4614 | #[simd_test(enable = "sse2" )] |
4615 | unsafe fn test_mm_cmpnlt_pd() { |
4616 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); |
4617 | let e = _mm_setr_epi64x(0, 0); |
4618 | let r = transmute::<_, __m128i>(_mm_cmpnlt_pd(a, b)); |
4619 | assert_eq_m128i(r, e); |
4620 | } |
4621 | |
4622 | #[simd_test(enable = "sse2" )] |
4623 | unsafe fn test_mm_cmpnle_pd() { |
4624 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4625 | let e = _mm_setr_epi64x(0, 0); |
4626 | let r = transmute::<_, __m128i>(_mm_cmpnle_pd(a, b)); |
4627 | assert_eq_m128i(r, e); |
4628 | } |
4629 | |
4630 | #[simd_test(enable = "sse2" )] |
4631 | unsafe fn test_mm_cmpngt_pd() { |
4632 | let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4633 | let e = _mm_setr_epi64x(0, !0); |
4634 | let r = transmute::<_, __m128i>(_mm_cmpngt_pd(a, b)); |
4635 | assert_eq_m128i(r, e); |
4636 | } |
4637 | |
4638 | #[simd_test(enable = "sse2" )] |
4639 | unsafe fn test_mm_cmpnge_pd() { |
4640 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4641 | let e = _mm_setr_epi64x(0, !0); |
4642 | let r = transmute::<_, __m128i>(_mm_cmpnge_pd(a, b)); |
4643 | assert_eq_m128i(r, e); |
4644 | } |
4645 | |
4646 | #[simd_test(enable = "sse2" )] |
4647 | unsafe fn test_mm_comieq_sd() { |
4648 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4649 | assert!(_mm_comieq_sd(a, b) != 0); |
4650 | |
4651 | let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0)); |
4652 | assert!(_mm_comieq_sd(a, b) == 0); |
4653 | } |
4654 | |
4655 | #[simd_test(enable = "sse2" )] |
4656 | unsafe fn test_mm_comilt_sd() { |
4657 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4658 | assert!(_mm_comilt_sd(a, b) == 0); |
4659 | } |
4660 | |
4661 | #[simd_test(enable = "sse2" )] |
4662 | unsafe fn test_mm_comile_sd() { |
4663 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4664 | assert!(_mm_comile_sd(a, b) != 0); |
4665 | } |
4666 | |
4667 | #[simd_test(enable = "sse2" )] |
4668 | unsafe fn test_mm_comigt_sd() { |
4669 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4670 | assert!(_mm_comigt_sd(a, b) == 0); |
4671 | } |
4672 | |
4673 | #[simd_test(enable = "sse2" )] |
4674 | unsafe fn test_mm_comige_sd() { |
4675 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4676 | assert!(_mm_comige_sd(a, b) != 0); |
4677 | } |
4678 | |
4679 | #[simd_test(enable = "sse2" )] |
4680 | unsafe fn test_mm_comineq_sd() { |
4681 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4682 | assert!(_mm_comineq_sd(a, b) == 0); |
4683 | } |
4684 | |
4685 | #[simd_test(enable = "sse2" )] |
4686 | unsafe fn test_mm_ucomieq_sd() { |
4687 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4688 | assert!(_mm_ucomieq_sd(a, b) != 0); |
4689 | |
4690 | let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0)); |
4691 | assert!(_mm_ucomieq_sd(a, b) == 0); |
4692 | } |
4693 | |
4694 | #[simd_test(enable = "sse2" )] |
4695 | unsafe fn test_mm_ucomilt_sd() { |
4696 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4697 | assert!(_mm_ucomilt_sd(a, b) == 0); |
4698 | } |
4699 | |
4700 | #[simd_test(enable = "sse2" )] |
4701 | unsafe fn test_mm_ucomile_sd() { |
4702 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4703 | assert!(_mm_ucomile_sd(a, b) != 0); |
4704 | } |
4705 | |
4706 | #[simd_test(enable = "sse2" )] |
4707 | unsafe fn test_mm_ucomigt_sd() { |
4708 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4709 | assert!(_mm_ucomigt_sd(a, b) == 0); |
4710 | } |
4711 | |
4712 | #[simd_test(enable = "sse2" )] |
4713 | unsafe fn test_mm_ucomige_sd() { |
4714 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4715 | assert!(_mm_ucomige_sd(a, b) != 0); |
4716 | } |
4717 | |
4718 | #[simd_test(enable = "sse2" )] |
4719 | unsafe fn test_mm_ucomineq_sd() { |
4720 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4721 | assert!(_mm_ucomineq_sd(a, b) == 0); |
4722 | } |
4723 | |
4724 | #[simd_test(enable = "sse2" )] |
4725 | unsafe fn test_mm_movemask_pd() { |
4726 | let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0)); |
4727 | assert_eq!(r, 0b01); |
4728 | |
4729 | let r = _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0)); |
4730 | assert_eq!(r, 0b11); |
4731 | } |
4732 | |
4733 | #[repr (align(16))] |
4734 | struct Memory { |
4735 | data: [f64; 4], |
4736 | } |
4737 | |
4738 | #[simd_test(enable = "sse2" )] |
4739 | unsafe fn test_mm_load_pd() { |
4740 | let mem = Memory { |
4741 | data: [1.0f64, 2.0, 3.0, 4.0], |
4742 | }; |
4743 | let vals = &mem.data; |
4744 | let d = vals.as_ptr(); |
4745 | |
4746 | let r = _mm_load_pd(d); |
4747 | assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0)); |
4748 | } |
4749 | |
4750 | #[simd_test(enable = "sse2" )] |
4751 | unsafe fn test_mm_load_sd() { |
4752 | let a = 1.; |
4753 | let expected = _mm_setr_pd(a, 0.); |
4754 | let r = _mm_load_sd(&a); |
4755 | assert_eq_m128d(r, expected); |
4756 | } |
4757 | |
4758 | #[simd_test(enable = "sse2" )] |
4759 | unsafe fn test_mm_loadh_pd() { |
4760 | let a = _mm_setr_pd(1., 2.); |
4761 | let b = 3.; |
4762 | let expected = _mm_setr_pd(_mm_cvtsd_f64(a), 3.); |
4763 | let r = _mm_loadh_pd(a, &b); |
4764 | assert_eq_m128d(r, expected); |
4765 | } |
4766 | |
4767 | #[simd_test(enable = "sse2" )] |
4768 | unsafe fn test_mm_loadl_pd() { |
4769 | let a = _mm_setr_pd(1., 2.); |
4770 | let b = 3.; |
4771 | let expected = _mm_setr_pd(3., get_m128d(a, 1)); |
4772 | let r = _mm_loadl_pd(a, &b); |
4773 | assert_eq_m128d(r, expected); |
4774 | } |
4775 | |
4776 | #[simd_test(enable = "sse2" )] |
4777 | // Miri cannot support this until it is clear how it fits in the Rust memory model |
4778 | // (non-temporal store) |
4779 | #[cfg_attr (miri, ignore)] |
4780 | unsafe fn test_mm_stream_pd() { |
4781 | #[repr (align(128))] |
4782 | struct Memory { |
4783 | pub data: [f64; 2], |
4784 | } |
4785 | let a = _mm_set1_pd(7.0); |
4786 | let mut mem = Memory { data: [-1.0; 2] }; |
4787 | |
4788 | _mm_stream_pd(ptr::addr_of_mut!(mem.data[0]), a); |
4789 | for i in 0..2 { |
4790 | assert_eq!(mem.data[i], get_m128d(a, i)); |
4791 | } |
4792 | } |
4793 | |
4794 | #[simd_test(enable = "sse2" )] |
4795 | unsafe fn test_mm_store_sd() { |
4796 | let mut dest = 0.; |
4797 | let a = _mm_setr_pd(1., 2.); |
4798 | _mm_store_sd(&mut dest, a); |
4799 | assert_eq!(dest, _mm_cvtsd_f64(a)); |
4800 | } |
4801 | |
4802 | #[simd_test(enable = "sse2" )] |
4803 | unsafe fn test_mm_store_pd() { |
4804 | let mut mem = Memory { data: [0.0f64; 4] }; |
4805 | let vals = &mut mem.data; |
4806 | let a = _mm_setr_pd(1.0, 2.0); |
4807 | let d = vals.as_mut_ptr(); |
4808 | |
4809 | _mm_store_pd(d, *black_box(&a)); |
4810 | assert_eq!(vals[0], 1.0); |
4811 | assert_eq!(vals[1], 2.0); |
4812 | } |
4813 | |
4814 | #[simd_test(enable = "sse2" )] |
4815 | unsafe fn test_mm_storeu_pd() { |
4816 | let mut mem = Memory { data: [0.0f64; 4] }; |
4817 | let vals = &mut mem.data; |
4818 | let a = _mm_setr_pd(1.0, 2.0); |
4819 | |
4820 | let mut ofs = 0; |
4821 | let mut p = vals.as_mut_ptr(); |
4822 | |
4823 | // Make sure p is **not** aligned to 16-byte boundary |
4824 | if (p as usize) & 0xf == 0 { |
4825 | ofs = 1; |
4826 | p = p.add(1); |
4827 | } |
4828 | |
4829 | _mm_storeu_pd(p, *black_box(&a)); |
4830 | |
4831 | if ofs > 0 { |
4832 | assert_eq!(vals[ofs - 1], 0.0); |
4833 | } |
4834 | assert_eq!(vals[ofs + 0], 1.0); |
4835 | assert_eq!(vals[ofs + 1], 2.0); |
4836 | } |
4837 | |
4838 | #[simd_test(enable = "sse2" )] |
4839 | unsafe fn test_mm_storeu_si16() { |
4840 | let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); |
4841 | let mut r = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16); |
4842 | _mm_storeu_si16(ptr::addr_of_mut!(r).cast(), a); |
4843 | let e = _mm_setr_epi16(1, 10, 11, 12, 13, 14, 15, 16); |
4844 | assert_eq_m128i(r, e); |
4845 | } |
4846 | |
4847 | #[simd_test(enable = "sse2" )] |
4848 | unsafe fn test_mm_storeu_si32() { |
4849 | let a = _mm_setr_epi32(1, 2, 3, 4); |
4850 | let mut r = _mm_setr_epi32(5, 6, 7, 8); |
4851 | _mm_storeu_si32(ptr::addr_of_mut!(r).cast(), a); |
4852 | let e = _mm_setr_epi32(1, 6, 7, 8); |
4853 | assert_eq_m128i(r, e); |
4854 | } |
4855 | |
4856 | #[simd_test(enable = "sse2" )] |
4857 | unsafe fn test_mm_storeu_si64() { |
4858 | let a = _mm_setr_epi64x(1, 2); |
4859 | let mut r = _mm_setr_epi64x(3, 4); |
4860 | _mm_storeu_si64(ptr::addr_of_mut!(r).cast(), a); |
4861 | let e = _mm_setr_epi64x(1, 4); |
4862 | assert_eq_m128i(r, e); |
4863 | } |
4864 | |
4865 | #[simd_test(enable = "sse2" )] |
4866 | unsafe fn test_mm_store1_pd() { |
4867 | let mut mem = Memory { data: [0.0f64; 4] }; |
4868 | let vals = &mut mem.data; |
4869 | let a = _mm_setr_pd(1.0, 2.0); |
4870 | let d = vals.as_mut_ptr(); |
4871 | |
4872 | _mm_store1_pd(d, *black_box(&a)); |
4873 | assert_eq!(vals[0], 1.0); |
4874 | assert_eq!(vals[1], 1.0); |
4875 | } |
4876 | |
4877 | #[simd_test(enable = "sse2" )] |
4878 | unsafe fn test_mm_store_pd1() { |
4879 | let mut mem = Memory { data: [0.0f64; 4] }; |
4880 | let vals = &mut mem.data; |
4881 | let a = _mm_setr_pd(1.0, 2.0); |
4882 | let d = vals.as_mut_ptr(); |
4883 | |
4884 | _mm_store_pd1(d, *black_box(&a)); |
4885 | assert_eq!(vals[0], 1.0); |
4886 | assert_eq!(vals[1], 1.0); |
4887 | } |
4888 | |
4889 | #[simd_test(enable = "sse2" )] |
4890 | unsafe fn test_mm_storer_pd() { |
4891 | let mut mem = Memory { data: [0.0f64; 4] }; |
4892 | let vals = &mut mem.data; |
4893 | let a = _mm_setr_pd(1.0, 2.0); |
4894 | let d = vals.as_mut_ptr(); |
4895 | |
4896 | _mm_storer_pd(d, *black_box(&a)); |
4897 | assert_eq!(vals[0], 2.0); |
4898 | assert_eq!(vals[1], 1.0); |
4899 | } |
4900 | |
4901 | #[simd_test(enable = "sse2" )] |
4902 | unsafe fn test_mm_storeh_pd() { |
4903 | let mut dest = 0.; |
4904 | let a = _mm_setr_pd(1., 2.); |
4905 | _mm_storeh_pd(&mut dest, a); |
4906 | assert_eq!(dest, get_m128d(a, 1)); |
4907 | } |
4908 | |
4909 | #[simd_test(enable = "sse2" )] |
4910 | unsafe fn test_mm_storel_pd() { |
4911 | let mut dest = 0.; |
4912 | let a = _mm_setr_pd(1., 2.); |
4913 | _mm_storel_pd(&mut dest, a); |
4914 | assert_eq!(dest, _mm_cvtsd_f64(a)); |
4915 | } |
4916 | |
4917 | #[simd_test(enable = "sse2" )] |
4918 | unsafe fn test_mm_loadr_pd() { |
4919 | let mut mem = Memory { |
4920 | data: [1.0f64, 2.0, 3.0, 4.0], |
4921 | }; |
4922 | let vals = &mut mem.data; |
4923 | let d = vals.as_ptr(); |
4924 | |
4925 | let r = _mm_loadr_pd(d); |
4926 | assert_eq_m128d(r, _mm_setr_pd(2.0, 1.0)); |
4927 | } |
4928 | |
4929 | #[simd_test(enable = "sse2" )] |
4930 | unsafe fn test_mm_loadu_pd() { |
4931 | let mut mem = Memory { |
4932 | data: [1.0f64, 2.0, 3.0, 4.0], |
4933 | }; |
4934 | let vals = &mut mem.data; |
4935 | let mut d = vals.as_ptr(); |
4936 | |
4937 | // make sure d is not aligned to 16-byte boundary |
4938 | let mut offset = 0; |
4939 | if (d as usize) & 0xf == 0 { |
4940 | offset = 1; |
4941 | d = d.add(offset); |
4942 | } |
4943 | |
4944 | let r = _mm_loadu_pd(d); |
4945 | let e = _mm_add_pd(_mm_setr_pd(1.0, 2.0), _mm_set1_pd(offset as f64)); |
4946 | assert_eq_m128d(r, e); |
4947 | } |
4948 | |
4949 | #[simd_test(enable = "sse2" )] |
4950 | unsafe fn test_mm_loadu_si16() { |
4951 | let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); |
4952 | let r = _mm_loadu_si16(ptr::addr_of!(a) as *const _); |
4953 | assert_eq_m128i(r, _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0)); |
4954 | } |
4955 | |
4956 | #[simd_test(enable = "sse2" )] |
4957 | unsafe fn test_mm_loadu_si32() { |
4958 | let a = _mm_setr_epi32(1, 2, 3, 4); |
4959 | let r = _mm_loadu_si32(ptr::addr_of!(a) as *const _); |
4960 | assert_eq_m128i(r, _mm_setr_epi32(1, 0, 0, 0)); |
4961 | } |
4962 | |
4963 | #[simd_test(enable = "sse2" )] |
4964 | unsafe fn test_mm_loadu_si64() { |
4965 | let a = _mm_setr_epi64x(5, 6); |
4966 | let r = _mm_loadu_si64(ptr::addr_of!(a) as *const _); |
4967 | assert_eq_m128i(r, _mm_setr_epi64x(5, 0)); |
4968 | } |
4969 | |
4970 | #[simd_test(enable = "sse2" )] |
4971 | unsafe fn test_mm_cvtpd_ps() { |
4972 | let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0)); |
4973 | assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0)); |
4974 | |
4975 | let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0)); |
4976 | assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0)); |
4977 | |
4978 | let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN)); |
4979 | assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0)); |
4980 | |
4981 | let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64)); |
4982 | assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0)); |
4983 | } |
4984 | |
4985 | #[simd_test(enable = "sse2" )] |
4986 | unsafe fn test_mm_cvtps_pd() { |
4987 | let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0)); |
4988 | assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0)); |
4989 | |
4990 | let r = _mm_cvtps_pd(_mm_setr_ps( |
4991 | f32::MAX, |
4992 | f32::INFINITY, |
4993 | f32::NEG_INFINITY, |
4994 | f32::MIN, |
4995 | )); |
4996 | assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY)); |
4997 | } |
4998 | |
4999 | #[simd_test(enable = "sse2" )] |
5000 | unsafe fn test_mm_cvtpd_epi32() { |
5001 | let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0)); |
5002 | assert_eq_m128i(r, _mm_setr_epi32(-1, 5, 0, 0)); |
5003 | |
5004 | let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0)); |
5005 | assert_eq_m128i(r, _mm_setr_epi32(-1, -5, 0, 0)); |
5006 | |
5007 | let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN)); |
5008 | assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); |
5009 | |
5010 | let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY)); |
5011 | assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); |
5012 | |
5013 | let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN)); |
5014 | assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); |
5015 | } |
5016 | |
5017 | #[simd_test(enable = "sse2" )] |
5018 | unsafe fn test_mm_cvtsd_si32() { |
5019 | let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0)); |
5020 | assert_eq!(r, -2); |
5021 | |
5022 | let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN)); |
5023 | assert_eq!(r, i32::MIN); |
5024 | |
5025 | let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN)); |
5026 | assert_eq!(r, i32::MIN); |
5027 | } |
5028 | |
5029 | #[simd_test(enable = "sse2" )] |
5030 | unsafe fn test_mm_cvtsd_ss() { |
5031 | let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4); |
5032 | let b = _mm_setr_pd(2.0, -5.0); |
5033 | |
5034 | let r = _mm_cvtsd_ss(a, b); |
5035 | |
5036 | assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4)); |
5037 | |
5038 | let a = _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY); |
5039 | let b = _mm_setr_pd(f64::INFINITY, -5.0); |
5040 | |
5041 | let r = _mm_cvtsd_ss(a, b); |
5042 | |
5043 | assert_eq_m128( |
5044 | r, |
5045 | _mm_setr_ps( |
5046 | f32::INFINITY, |
5047 | f32::NEG_INFINITY, |
5048 | f32::MAX, |
5049 | f32::NEG_INFINITY, |
5050 | ), |
5051 | ); |
5052 | } |
5053 | |
5054 | #[simd_test(enable = "sse2" )] |
5055 | unsafe fn test_mm_cvtsd_f64() { |
5056 | let r = _mm_cvtsd_f64(_mm_setr_pd(-1.1, 2.2)); |
5057 | assert_eq!(r, -1.1); |
5058 | } |
5059 | |
5060 | #[simd_test(enable = "sse2" )] |
5061 | unsafe fn test_mm_cvtss_sd() { |
5062 | let a = _mm_setr_pd(-1.1, 2.2); |
5063 | let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); |
5064 | |
5065 | let r = _mm_cvtss_sd(a, b); |
5066 | assert_eq_m128d(r, _mm_setr_pd(1.0, 2.2)); |
5067 | |
5068 | let a = _mm_setr_pd(-1.1, f64::INFINITY); |
5069 | let b = _mm_setr_ps(f32::NEG_INFINITY, 2.0, 3.0, 4.0); |
5070 | |
5071 | let r = _mm_cvtss_sd(a, b); |
5072 | assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY)); |
5073 | } |
5074 | |
5075 | #[simd_test(enable = "sse2" )] |
5076 | unsafe fn test_mm_cvttpd_epi32() { |
5077 | let a = _mm_setr_pd(-1.1, 2.2); |
5078 | let r = _mm_cvttpd_epi32(a); |
5079 | assert_eq_m128i(r, _mm_setr_epi32(-1, 2, 0, 0)); |
5080 | |
5081 | let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN); |
5082 | let r = _mm_cvttpd_epi32(a); |
5083 | assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); |
5084 | } |
5085 | |
5086 | #[simd_test(enable = "sse2" )] |
5087 | unsafe fn test_mm_cvttsd_si32() { |
5088 | let a = _mm_setr_pd(-1.1, 2.2); |
5089 | let r = _mm_cvttsd_si32(a); |
5090 | assert_eq!(r, -1); |
5091 | |
5092 | let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN); |
5093 | let r = _mm_cvttsd_si32(a); |
5094 | assert_eq!(r, i32::MIN); |
5095 | } |
5096 | |
5097 | #[simd_test(enable = "sse2" )] |
5098 | unsafe fn test_mm_cvttps_epi32() { |
5099 | let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6); |
5100 | let r = _mm_cvttps_epi32(a); |
5101 | assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6)); |
5102 | |
5103 | let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX); |
5104 | let r = _mm_cvttps_epi32(a); |
5105 | assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN)); |
5106 | } |
5107 | |
5108 | #[simd_test(enable = "sse2" )] |
5109 | unsafe fn test_mm_set_sd() { |
5110 | let r = _mm_set_sd(-1.0_f64); |
5111 | assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, 0_f64)); |
5112 | } |
5113 | |
5114 | #[simd_test(enable = "sse2" )] |
5115 | unsafe fn test_mm_set1_pd() { |
5116 | let r = _mm_set1_pd(-1.0_f64); |
5117 | assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, -1.0_f64)); |
5118 | } |
5119 | |
5120 | #[simd_test(enable = "sse2" )] |
5121 | unsafe fn test_mm_set_pd1() { |
5122 | let r = _mm_set_pd1(-2.0_f64); |
5123 | assert_eq_m128d(r, _mm_setr_pd(-2.0_f64, -2.0_f64)); |
5124 | } |
5125 | |
5126 | #[simd_test(enable = "sse2" )] |
5127 | unsafe fn test_mm_set_pd() { |
5128 | let r = _mm_set_pd(1.0_f64, 5.0_f64); |
5129 | assert_eq_m128d(r, _mm_setr_pd(5.0_f64, 1.0_f64)); |
5130 | } |
5131 | |
5132 | #[simd_test(enable = "sse2" )] |
5133 | unsafe fn test_mm_setr_pd() { |
5134 | let r = _mm_setr_pd(1.0_f64, -5.0_f64); |
5135 | assert_eq_m128d(r, _mm_setr_pd(1.0_f64, -5.0_f64)); |
5136 | } |
5137 | |
5138 | #[simd_test(enable = "sse2" )] |
5139 | unsafe fn test_mm_setzero_pd() { |
5140 | let r = _mm_setzero_pd(); |
5141 | assert_eq_m128d(r, _mm_setr_pd(0_f64, 0_f64)); |
5142 | } |
5143 | |
5144 | #[simd_test(enable = "sse2" )] |
5145 | unsafe fn test_mm_load1_pd() { |
5146 | let d = -5.0; |
5147 | let r = _mm_load1_pd(&d); |
5148 | assert_eq_m128d(r, _mm_setr_pd(d, d)); |
5149 | } |
5150 | |
5151 | #[simd_test(enable = "sse2" )] |
5152 | unsafe fn test_mm_load_pd1() { |
5153 | let d = -5.0; |
5154 | let r = _mm_load_pd1(&d); |
5155 | assert_eq_m128d(r, _mm_setr_pd(d, d)); |
5156 | } |
5157 | |
5158 | #[simd_test(enable = "sse2" )] |
5159 | unsafe fn test_mm_unpackhi_pd() { |
5160 | let a = _mm_setr_pd(1.0, 2.0); |
5161 | let b = _mm_setr_pd(3.0, 4.0); |
5162 | let r = _mm_unpackhi_pd(a, b); |
5163 | assert_eq_m128d(r, _mm_setr_pd(2.0, 4.0)); |
5164 | } |
5165 | |
5166 | #[simd_test(enable = "sse2" )] |
5167 | unsafe fn test_mm_unpacklo_pd() { |
5168 | let a = _mm_setr_pd(1.0, 2.0); |
5169 | let b = _mm_setr_pd(3.0, 4.0); |
5170 | let r = _mm_unpacklo_pd(a, b); |
5171 | assert_eq_m128d(r, _mm_setr_pd(1.0, 3.0)); |
5172 | } |
5173 | |
5174 | #[simd_test(enable = "sse2" )] |
5175 | unsafe fn test_mm_shuffle_pd() { |
5176 | let a = _mm_setr_pd(1., 2.); |
5177 | let b = _mm_setr_pd(3., 4.); |
5178 | let expected = _mm_setr_pd(1., 3.); |
5179 | let r = _mm_shuffle_pd::<0b00_00_00_00>(a, b); |
5180 | assert_eq_m128d(r, expected); |
5181 | } |
5182 | |
5183 | #[simd_test(enable = "sse2" )] |
5184 | unsafe fn test_mm_move_sd() { |
5185 | let a = _mm_setr_pd(1., 2.); |
5186 | let b = _mm_setr_pd(3., 4.); |
5187 | let expected = _mm_setr_pd(3., 2.); |
5188 | let r = _mm_move_sd(a, b); |
5189 | assert_eq_m128d(r, expected); |
5190 | } |
5191 | |
5192 | #[simd_test(enable = "sse2" )] |
5193 | unsafe fn test_mm_castpd_ps() { |
5194 | let a = _mm_set1_pd(0.); |
5195 | let expected = _mm_set1_ps(0.); |
5196 | let r = _mm_castpd_ps(a); |
5197 | assert_eq_m128(r, expected); |
5198 | } |
5199 | |
5200 | #[simd_test(enable = "sse2" )] |
5201 | unsafe fn test_mm_castpd_si128() { |
5202 | let a = _mm_set1_pd(0.); |
5203 | let expected = _mm_set1_epi64x(0); |
5204 | let r = _mm_castpd_si128(a); |
5205 | assert_eq_m128i(r, expected); |
5206 | } |
5207 | |
5208 | #[simd_test(enable = "sse2" )] |
5209 | unsafe fn test_mm_castps_pd() { |
5210 | let a = _mm_set1_ps(0.); |
5211 | let expected = _mm_set1_pd(0.); |
5212 | let r = _mm_castps_pd(a); |
5213 | assert_eq_m128d(r, expected); |
5214 | } |
5215 | |
5216 | #[simd_test(enable = "sse2" )] |
5217 | unsafe fn test_mm_castps_si128() { |
5218 | let a = _mm_set1_ps(0.); |
5219 | let expected = _mm_set1_epi32(0); |
5220 | let r = _mm_castps_si128(a); |
5221 | assert_eq_m128i(r, expected); |
5222 | } |
5223 | |
5224 | #[simd_test(enable = "sse2" )] |
5225 | unsafe fn test_mm_castsi128_pd() { |
5226 | let a = _mm_set1_epi64x(0); |
5227 | let expected = _mm_set1_pd(0.); |
5228 | let r = _mm_castsi128_pd(a); |
5229 | assert_eq_m128d(r, expected); |
5230 | } |
5231 | |
5232 | #[simd_test(enable = "sse2" )] |
5233 | unsafe fn test_mm_castsi128_ps() { |
5234 | let a = _mm_set1_epi32(0); |
5235 | let expected = _mm_set1_ps(0.); |
5236 | let r = _mm_castsi128_ps(a); |
5237 | assert_eq_m128(r, expected); |
5238 | } |
5239 | } |
5240 | |