1 | //! Streaming SIMD Extensions 2 (SSE2) |
---|---|
2 | |
3 | #[cfg(test)] |
4 | use stdarch_test::assert_instr; |
5 | |
6 | use crate::{ |
7 | core_arch::{simd::*, x86::*}, |
8 | intrinsics::simd::*, |
9 | intrinsics::sqrtf64, |
10 | mem, ptr, |
11 | }; |
12 | |
13 | /// Provides a hint to the processor that the code sequence is a spin-wait loop. |
14 | /// |
15 | /// This can help improve the performance and power consumption of spin-wait |
16 | /// loops. |
17 | /// |
18 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_pause) |
19 | #[inline] |
20 | #[cfg_attr(all(test, target_feature = "sse2"), assert_instr(pause))] |
21 | #[stable(feature = "simd_x86", since = "1.27.0")] |
22 | pub unsafe fn _mm_pause() { |
23 | // note: `pause` is guaranteed to be interpreted as a `nop` by CPUs without |
24 | // the SSE2 target-feature - therefore it does not require any target features |
25 | pause() |
26 | } |
27 | |
28 | /// Invalidates and flushes the cache line that contains `p` from all levels of |
29 | /// the cache hierarchy. |
30 | /// |
31 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clflush) |
32 | #[inline] |
33 | #[target_feature(enable = "sse2")] |
34 | #[cfg_attr(test, assert_instr(clflush))] |
35 | #[stable(feature = "simd_x86", since = "1.27.0")] |
36 | pub unsafe fn _mm_clflush(p: *const u8) { |
37 | clflush(p) |
38 | } |
39 | |
40 | /// Performs a serializing operation on all load-from-memory instructions |
41 | /// that were issued prior to this instruction. |
42 | /// |
43 | /// Guarantees that every load instruction that precedes, in program order, is |
44 | /// globally visible before any load instruction which follows the fence in |
45 | /// program order. |
46 | /// |
47 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_lfence) |
48 | #[inline] |
49 | #[target_feature(enable = "sse2")] |
50 | #[cfg_attr(test, assert_instr(lfence))] |
51 | #[stable(feature = "simd_x86", since = "1.27.0")] |
52 | pub unsafe fn _mm_lfence() { |
53 | lfence() |
54 | } |
55 | |
56 | /// Performs a serializing operation on all load-from-memory and store-to-memory |
57 | /// instructions that were issued prior to this instruction. |
58 | /// |
59 | /// Guarantees that every memory access that precedes, in program order, the |
60 | /// memory fence instruction is globally visible before any memory instruction |
61 | /// which follows the fence in program order. |
62 | /// |
63 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mfence) |
64 | #[inline] |
65 | #[target_feature(enable = "sse2")] |
66 | #[cfg_attr(test, assert_instr(mfence))] |
67 | #[stable(feature = "simd_x86", since = "1.27.0")] |
68 | pub unsafe fn _mm_mfence() { |
69 | mfence() |
70 | } |
71 | |
72 | /// Adds packed 8-bit integers in `a` and `b`. |
73 | /// |
74 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi8) |
75 | #[inline] |
76 | #[target_feature(enable = "sse2")] |
77 | #[cfg_attr(test, assert_instr(paddb))] |
78 | #[stable(feature = "simd_x86", since = "1.27.0")] |
79 | pub fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i { |
80 | unsafe { transmute(src:simd_add(x:a.as_i8x16(), y:b.as_i8x16())) } |
81 | } |
82 | |
83 | /// Adds packed 16-bit integers in `a` and `b`. |
84 | /// |
85 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi16) |
86 | #[inline] |
87 | #[target_feature(enable = "sse2")] |
88 | #[cfg_attr(test, assert_instr(paddw))] |
89 | #[stable(feature = "simd_x86", since = "1.27.0")] |
90 | pub fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i { |
91 | unsafe { transmute(src:simd_add(x:a.as_i16x8(), y:b.as_i16x8())) } |
92 | } |
93 | |
94 | /// Adds packed 32-bit integers in `a` and `b`. |
95 | /// |
96 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi32) |
97 | #[inline] |
98 | #[target_feature(enable = "sse2")] |
99 | #[cfg_attr(test, assert_instr(paddd))] |
100 | #[stable(feature = "simd_x86", since = "1.27.0")] |
101 | pub fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i { |
102 | unsafe { transmute(src:simd_add(x:a.as_i32x4(), y:b.as_i32x4())) } |
103 | } |
104 | |
105 | /// Adds packed 64-bit integers in `a` and `b`. |
106 | /// |
107 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi64) |
108 | #[inline] |
109 | #[target_feature(enable = "sse2")] |
110 | #[cfg_attr(test, assert_instr(paddq))] |
111 | #[stable(feature = "simd_x86", since = "1.27.0")] |
112 | pub fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i { |
113 | unsafe { transmute(src:simd_add(x:a.as_i64x2(), y:b.as_i64x2())) } |
114 | } |
115 | |
116 | /// Adds packed 8-bit integers in `a` and `b` using saturation. |
117 | /// |
118 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epi8) |
119 | #[inline] |
120 | #[target_feature(enable = "sse2")] |
121 | #[cfg_attr(test, assert_instr(paddsb))] |
122 | #[stable(feature = "simd_x86", since = "1.27.0")] |
123 | pub fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i { |
124 | unsafe { transmute(src:simd_saturating_add(x:a.as_i8x16(), y:b.as_i8x16())) } |
125 | } |
126 | |
127 | /// Adds packed 16-bit integers in `a` and `b` using saturation. |
128 | /// |
129 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epi16) |
130 | #[inline] |
131 | #[target_feature(enable = "sse2")] |
132 | #[cfg_attr(test, assert_instr(paddsw))] |
133 | #[stable(feature = "simd_x86", since = "1.27.0")] |
134 | pub fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i { |
135 | unsafe { transmute(src:simd_saturating_add(x:a.as_i16x8(), y:b.as_i16x8())) } |
136 | } |
137 | |
138 | /// Adds packed unsigned 8-bit integers in `a` and `b` using saturation. |
139 | /// |
140 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epu8) |
141 | #[inline] |
142 | #[target_feature(enable = "sse2")] |
143 | #[cfg_attr(test, assert_instr(paddusb))] |
144 | #[stable(feature = "simd_x86", since = "1.27.0")] |
145 | pub fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i { |
146 | unsafe { transmute(src:simd_saturating_add(x:a.as_u8x16(), y:b.as_u8x16())) } |
147 | } |
148 | |
149 | /// Adds packed unsigned 16-bit integers in `a` and `b` using saturation. |
150 | /// |
151 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epu16) |
152 | #[inline] |
153 | #[target_feature(enable = "sse2")] |
154 | #[cfg_attr(test, assert_instr(paddusw))] |
155 | #[stable(feature = "simd_x86", since = "1.27.0")] |
156 | pub fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i { |
157 | unsafe { transmute(src:simd_saturating_add(x:a.as_u16x8(), y:b.as_u16x8())) } |
158 | } |
159 | |
160 | /// Averages packed unsigned 8-bit integers in `a` and `b`. |
161 | /// |
162 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_avg_epu8) |
163 | #[inline] |
164 | #[target_feature(enable = "sse2")] |
165 | #[cfg_attr(test, assert_instr(pavgb))] |
166 | #[stable(feature = "simd_x86", since = "1.27.0")] |
167 | pub fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i { |
168 | unsafe { |
169 | let a: u16x16 = simd_cast::<_, u16x16>(a.as_u8x16()); |
170 | let b: u16x16 = simd_cast::<_, u16x16>(b.as_u8x16()); |
171 | let r: u16x16 = simd_shr(lhs:simd_add(simd_add(a, b), u16x16::splat(1)), rhs:u16x16::splat(1)); |
172 | transmute(src:simd_cast::<_, u8x16>(r)) |
173 | } |
174 | } |
175 | |
176 | /// Averages packed unsigned 16-bit integers in `a` and `b`. |
177 | /// |
178 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_avg_epu16) |
179 | #[inline] |
180 | #[target_feature(enable = "sse2")] |
181 | #[cfg_attr(test, assert_instr(pavgw))] |
182 | #[stable(feature = "simd_x86", since = "1.27.0")] |
183 | pub fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i { |
184 | unsafe { |
185 | let a: u32x8 = simd_cast::<_, u32x8>(a.as_u16x8()); |
186 | let b: u32x8 = simd_cast::<_, u32x8>(b.as_u16x8()); |
187 | let r: u32x8 = simd_shr(lhs:simd_add(simd_add(a, b), u32x8::splat(1)), rhs:u32x8::splat(1)); |
188 | transmute(src:simd_cast::<_, u16x8>(r)) |
189 | } |
190 | } |
191 | |
192 | /// Multiplies and then horizontally add signed 16 bit integers in `a` and `b`. |
193 | /// |
194 | /// Multiplies packed signed 16-bit integers in `a` and `b`, producing |
195 | /// intermediate signed 32-bit integers. Horizontally add adjacent pairs of |
196 | /// intermediate 32-bit integers. |
197 | /// |
198 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_madd_epi16) |
199 | #[inline] |
200 | #[target_feature(enable = "sse2")] |
201 | #[cfg_attr(test, assert_instr(pmaddwd))] |
202 | #[stable(feature = "simd_x86", since = "1.27.0")] |
203 | pub fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i { |
204 | unsafe { transmute(src:pmaddwd(a.as_i16x8(), b.as_i16x8())) } |
205 | } |
206 | |
207 | /// Compares packed 16-bit integers in `a` and `b`, and returns the packed |
208 | /// maximum values. |
209 | /// |
210 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi16) |
211 | #[inline] |
212 | #[target_feature(enable = "sse2")] |
213 | #[cfg_attr(test, assert_instr(pmaxsw))] |
214 | #[stable(feature = "simd_x86", since = "1.27.0")] |
215 | pub fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i { |
216 | unsafe { |
217 | let a: i16x8 = a.as_i16x8(); |
218 | let b: i16x8 = b.as_i16x8(); |
219 | transmute(src:simd_select::<i16x8, _>(mask:simd_gt(a, b), if_true:a, if_false:b)) |
220 | } |
221 | } |
222 | |
223 | /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the |
224 | /// packed maximum values. |
225 | /// |
226 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu8) |
227 | #[inline] |
228 | #[target_feature(enable = "sse2")] |
229 | #[cfg_attr(test, assert_instr(pmaxub))] |
230 | #[stable(feature = "simd_x86", since = "1.27.0")] |
231 | pub fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i { |
232 | unsafe { |
233 | let a: u8x16 = a.as_u8x16(); |
234 | let b: u8x16 = b.as_u8x16(); |
235 | transmute(src:simd_select::<i8x16, _>(mask:simd_gt(a, b), if_true:a, if_false:b)) |
236 | } |
237 | } |
238 | |
239 | /// Compares packed 16-bit integers in `a` and `b`, and returns the packed |
240 | /// minimum values. |
241 | /// |
242 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi16) |
243 | #[inline] |
244 | #[target_feature(enable = "sse2")] |
245 | #[cfg_attr(test, assert_instr(pminsw))] |
246 | #[stable(feature = "simd_x86", since = "1.27.0")] |
247 | pub fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i { |
248 | unsafe { |
249 | let a: i16x8 = a.as_i16x8(); |
250 | let b: i16x8 = b.as_i16x8(); |
251 | transmute(src:simd_select::<i16x8, _>(mask:simd_lt(a, b), if_true:a, if_false:b)) |
252 | } |
253 | } |
254 | |
255 | /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the |
256 | /// packed minimum values. |
257 | /// |
258 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu8) |
259 | #[inline] |
260 | #[target_feature(enable = "sse2")] |
261 | #[cfg_attr(test, assert_instr(pminub))] |
262 | #[stable(feature = "simd_x86", since = "1.27.0")] |
263 | pub fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i { |
264 | unsafe { |
265 | let a: u8x16 = a.as_u8x16(); |
266 | let b: u8x16 = b.as_u8x16(); |
267 | transmute(src:simd_select::<i8x16, _>(mask:simd_lt(a, b), if_true:a, if_false:b)) |
268 | } |
269 | } |
270 | |
271 | /// Multiplies the packed 16-bit integers in `a` and `b`. |
272 | /// |
273 | /// The multiplication produces intermediate 32-bit integers, and returns the |
274 | /// high 16 bits of the intermediate integers. |
275 | /// |
276 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhi_epi16) |
277 | #[inline] |
278 | #[target_feature(enable = "sse2")] |
279 | #[cfg_attr(test, assert_instr(pmulhw))] |
280 | #[stable(feature = "simd_x86", since = "1.27.0")] |
281 | pub fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i { |
282 | unsafe { |
283 | let a: i32x8 = simd_cast::<_, i32x8>(a.as_i16x8()); |
284 | let b: i32x8 = simd_cast::<_, i32x8>(b.as_i16x8()); |
285 | let r: i32x8 = simd_shr(lhs:simd_mul(a, b), rhs:i32x8::splat(16)); |
286 | transmute(src:simd_cast::<i32x8, i16x8>(r)) |
287 | } |
288 | } |
289 | |
290 | /// Multiplies the packed unsigned 16-bit integers in `a` and `b`. |
291 | /// |
292 | /// The multiplication produces intermediate 32-bit integers, and returns the |
293 | /// high 16 bits of the intermediate integers. |
294 | /// |
295 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhi_epu16) |
296 | #[inline] |
297 | #[target_feature(enable = "sse2")] |
298 | #[cfg_attr(test, assert_instr(pmulhuw))] |
299 | #[stable(feature = "simd_x86", since = "1.27.0")] |
300 | pub fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i { |
301 | unsafe { |
302 | let a: u32x8 = simd_cast::<_, u32x8>(a.as_u16x8()); |
303 | let b: u32x8 = simd_cast::<_, u32x8>(b.as_u16x8()); |
304 | let r: u32x8 = simd_shr(lhs:simd_mul(a, b), rhs:u32x8::splat(16)); |
305 | transmute(src:simd_cast::<u32x8, u16x8>(r)) |
306 | } |
307 | } |
308 | |
309 | /// Multiplies the packed 16-bit integers in `a` and `b`. |
310 | /// |
311 | /// The multiplication produces intermediate 32-bit integers, and returns the |
312 | /// low 16 bits of the intermediate integers. |
313 | /// |
314 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mullo_epi16) |
315 | #[inline] |
316 | #[target_feature(enable = "sse2")] |
317 | #[cfg_attr(test, assert_instr(pmullw))] |
318 | #[stable(feature = "simd_x86", since = "1.27.0")] |
319 | pub fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i { |
320 | unsafe { transmute(src:simd_mul(x:a.as_i16x8(), y:b.as_i16x8())) } |
321 | } |
322 | |
323 | /// Multiplies the low unsigned 32-bit integers from each packed 64-bit element |
324 | /// in `a` and `b`. |
325 | /// |
326 | /// Returns the unsigned 64-bit results. |
327 | /// |
328 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_epu32) |
329 | #[inline] |
330 | #[target_feature(enable = "sse2")] |
331 | #[cfg_attr(test, assert_instr(pmuludq))] |
332 | #[stable(feature = "simd_x86", since = "1.27.0")] |
333 | pub fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i { |
334 | unsafe { |
335 | let a: u64x2 = a.as_u64x2(); |
336 | let b: u64x2 = b.as_u64x2(); |
337 | let mask: u64x2 = u64x2::splat(u32::MAX.into()); |
338 | transmute(src:simd_mul(x:simd_and(a, mask), y:simd_and(x:b, y:mask))) |
339 | } |
340 | } |
341 | |
342 | /// Sum the absolute differences of packed unsigned 8-bit integers. |
343 | /// |
344 | /// Computes the absolute differences of packed unsigned 8-bit integers in `a` |
345 | /// and `b`, then horizontally sum each consecutive 8 differences to produce |
346 | /// two unsigned 16-bit integers, and pack these unsigned 16-bit integers in |
347 | /// the low 16 bits of 64-bit elements returned. |
348 | /// |
349 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sad_epu8) |
350 | #[inline] |
351 | #[target_feature(enable = "sse2")] |
352 | #[cfg_attr(test, assert_instr(psadbw))] |
353 | #[stable(feature = "simd_x86", since = "1.27.0")] |
354 | pub fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i { |
355 | unsafe { transmute(src:psadbw(a.as_u8x16(), b.as_u8x16())) } |
356 | } |
357 | |
358 | /// Subtracts packed 8-bit integers in `b` from packed 8-bit integers in `a`. |
359 | /// |
360 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi8) |
361 | #[inline] |
362 | #[target_feature(enable = "sse2")] |
363 | #[cfg_attr(test, assert_instr(psubb))] |
364 | #[stable(feature = "simd_x86", since = "1.27.0")] |
365 | pub fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i { |
366 | unsafe { transmute(src:simd_sub(lhs:a.as_i8x16(), rhs:b.as_i8x16())) } |
367 | } |
368 | |
369 | /// Subtracts packed 16-bit integers in `b` from packed 16-bit integers in `a`. |
370 | /// |
371 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi16) |
372 | #[inline] |
373 | #[target_feature(enable = "sse2")] |
374 | #[cfg_attr(test, assert_instr(psubw))] |
375 | #[stable(feature = "simd_x86", since = "1.27.0")] |
376 | pub fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i { |
377 | unsafe { transmute(src:simd_sub(lhs:a.as_i16x8(), rhs:b.as_i16x8())) } |
378 | } |
379 | |
380 | /// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`. |
381 | /// |
382 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi32) |
383 | #[inline] |
384 | #[target_feature(enable = "sse2")] |
385 | #[cfg_attr(test, assert_instr(psubd))] |
386 | #[stable(feature = "simd_x86", since = "1.27.0")] |
387 | pub fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i { |
388 | unsafe { transmute(src:simd_sub(lhs:a.as_i32x4(), rhs:b.as_i32x4())) } |
389 | } |
390 | |
391 | /// Subtract packed 64-bit integers in `b` from packed 64-bit integers in `a`. |
392 | /// |
393 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi64) |
394 | #[inline] |
395 | #[target_feature(enable = "sse2")] |
396 | #[cfg_attr(test, assert_instr(psubq))] |
397 | #[stable(feature = "simd_x86", since = "1.27.0")] |
398 | pub fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i { |
399 | unsafe { transmute(src:simd_sub(lhs:a.as_i64x2(), rhs:b.as_i64x2())) } |
400 | } |
401 | |
402 | /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a` |
403 | /// using saturation. |
404 | /// |
405 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epi8) |
406 | #[inline] |
407 | #[target_feature(enable = "sse2")] |
408 | #[cfg_attr(test, assert_instr(psubsb))] |
409 | #[stable(feature = "simd_x86", since = "1.27.0")] |
410 | pub fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i { |
411 | unsafe { transmute(src:simd_saturating_sub(lhs:a.as_i8x16(), rhs:b.as_i8x16())) } |
412 | } |
413 | |
414 | /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a` |
415 | /// using saturation. |
416 | /// |
417 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epi16) |
418 | #[inline] |
419 | #[target_feature(enable = "sse2")] |
420 | #[cfg_attr(test, assert_instr(psubsw))] |
421 | #[stable(feature = "simd_x86", since = "1.27.0")] |
422 | pub fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i { |
423 | unsafe { transmute(src:simd_saturating_sub(lhs:a.as_i16x8(), rhs:b.as_i16x8())) } |
424 | } |
425 | |
426 | /// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit |
427 | /// integers in `a` using saturation. |
428 | /// |
429 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epu8) |
430 | #[inline] |
431 | #[target_feature(enable = "sse2")] |
432 | #[cfg_attr(test, assert_instr(psubusb))] |
433 | #[stable(feature = "simd_x86", since = "1.27.0")] |
434 | pub fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i { |
435 | unsafe { transmute(src:simd_saturating_sub(lhs:a.as_u8x16(), rhs:b.as_u8x16())) } |
436 | } |
437 | |
438 | /// Subtract packed unsigned 16-bit integers in `b` from packed unsigned 16-bit |
439 | /// integers in `a` using saturation. |
440 | /// |
441 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epu16) |
442 | #[inline] |
443 | #[target_feature(enable = "sse2")] |
444 | #[cfg_attr(test, assert_instr(psubusw))] |
445 | #[stable(feature = "simd_x86", since = "1.27.0")] |
446 | pub fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i { |
447 | unsafe { transmute(src:simd_saturating_sub(lhs:a.as_u16x8(), rhs:b.as_u16x8())) } |
448 | } |
449 | |
450 | /// Shifts `a` left by `IMM8` bytes while shifting in zeros. |
451 | /// |
452 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_si128) |
453 | #[inline] |
454 | #[target_feature(enable = "sse2")] |
455 | #[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))] |
456 | #[rustc_legacy_const_generics(1)] |
457 | #[stable(feature = "simd_x86", since = "1.27.0")] |
458 | pub fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i { |
459 | static_assert_uimm_bits!(IMM8, 8); |
460 | unsafe { _mm_slli_si128_impl::<IMM8>(a) } |
461 | } |
462 | |
463 | /// Implementation detail: converts the immediate argument of the |
464 | /// `_mm_slli_si128` intrinsic into a compile-time constant. |
465 | #[inline] |
466 | #[target_feature(enable = "sse2")] |
467 | unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i { |
468 | const fn mask(shift: i32, i: u32) -> u32 { |
469 | let shift = shift as u32 & 0xff; |
470 | if shift > 15 { i } else { 16 - shift + i } |
471 | } |
472 | transmute::<i8x16, _>(simd_shuffle!( |
473 | i8x16::ZERO, |
474 | a.as_i8x16(), |
475 | [ |
476 | mask(IMM8, 0), |
477 | mask(IMM8, 1), |
478 | mask(IMM8, 2), |
479 | mask(IMM8, 3), |
480 | mask(IMM8, 4), |
481 | mask(IMM8, 5), |
482 | mask(IMM8, 6), |
483 | mask(IMM8, 7), |
484 | mask(IMM8, 8), |
485 | mask(IMM8, 9), |
486 | mask(IMM8, 10), |
487 | mask(IMM8, 11), |
488 | mask(IMM8, 12), |
489 | mask(IMM8, 13), |
490 | mask(IMM8, 14), |
491 | mask(IMM8, 15), |
492 | ], |
493 | )) |
494 | } |
495 | |
496 | /// Shifts `a` left by `IMM8` bytes while shifting in zeros. |
497 | /// |
498 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bslli_si128) |
499 | #[inline] |
500 | #[target_feature(enable = "sse2")] |
501 | #[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))] |
502 | #[rustc_legacy_const_generics(1)] |
503 | #[stable(feature = "simd_x86", since = "1.27.0")] |
504 | pub fn _mm_bslli_si128<const IMM8: i32>(a: __m128i) -> __m128i { |
505 | unsafe { |
506 | static_assert_uimm_bits!(IMM8, 8); |
507 | _mm_slli_si128_impl::<IMM8>(a) |
508 | } |
509 | } |
510 | |
511 | /// Shifts `a` right by `IMM8` bytes while shifting in zeros. |
512 | /// |
513 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bsrli_si128) |
514 | #[inline] |
515 | #[target_feature(enable = "sse2")] |
516 | #[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))] |
517 | #[rustc_legacy_const_generics(1)] |
518 | #[stable(feature = "simd_x86", since = "1.27.0")] |
519 | pub fn _mm_bsrli_si128<const IMM8: i32>(a: __m128i) -> __m128i { |
520 | unsafe { |
521 | static_assert_uimm_bits!(IMM8, 8); |
522 | _mm_srli_si128_impl::<IMM8>(a) |
523 | } |
524 | } |
525 | |
526 | /// Shifts packed 16-bit integers in `a` left by `IMM8` while shifting in zeros. |
527 | /// |
528 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi16) |
529 | #[inline] |
530 | #[target_feature(enable = "sse2")] |
531 | #[cfg_attr(test, assert_instr(psllw, IMM8 = 7))] |
532 | #[rustc_legacy_const_generics(1)] |
533 | #[stable(feature = "simd_x86", since = "1.27.0")] |
534 | pub fn _mm_slli_epi16<const IMM8: i32>(a: __m128i) -> __m128i { |
535 | static_assert_uimm_bits!(IMM8, 8); |
536 | unsafe { |
537 | if IMM8 >= 16 { |
538 | _mm_setzero_si128() |
539 | } else { |
540 | transmute(src:simd_shl(lhs:a.as_u16x8(), rhs:u16x8::splat(IMM8 as u16))) |
541 | } |
542 | } |
543 | } |
544 | |
545 | /// Shifts packed 16-bit integers in `a` left by `count` while shifting in |
546 | /// zeros. |
547 | /// |
548 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi16) |
549 | #[inline] |
550 | #[target_feature(enable = "sse2")] |
551 | #[cfg_attr(test, assert_instr(psllw))] |
552 | #[stable(feature = "simd_x86", since = "1.27.0")] |
553 | pub fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i { |
554 | unsafe { transmute(src:psllw(a.as_i16x8(), count.as_i16x8())) } |
555 | } |
556 | |
557 | /// Shifts packed 32-bit integers in `a` left by `IMM8` while shifting in zeros. |
558 | /// |
559 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi32) |
560 | #[inline] |
561 | #[target_feature(enable = "sse2")] |
562 | #[cfg_attr(test, assert_instr(pslld, IMM8 = 7))] |
563 | #[rustc_legacy_const_generics(1)] |
564 | #[stable(feature = "simd_x86", since = "1.27.0")] |
565 | pub fn _mm_slli_epi32<const IMM8: i32>(a: __m128i) -> __m128i { |
566 | static_assert_uimm_bits!(IMM8, 8); |
567 | unsafe { |
568 | if IMM8 >= 32 { |
569 | _mm_setzero_si128() |
570 | } else { |
571 | transmute(src:simd_shl(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8 as u32))) |
572 | } |
573 | } |
574 | } |
575 | |
576 | /// Shifts packed 32-bit integers in `a` left by `count` while shifting in |
577 | /// zeros. |
578 | /// |
579 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi32) |
580 | #[inline] |
581 | #[target_feature(enable = "sse2")] |
582 | #[cfg_attr(test, assert_instr(pslld))] |
583 | #[stable(feature = "simd_x86", since = "1.27.0")] |
584 | pub fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i { |
585 | unsafe { transmute(src:pslld(a.as_i32x4(), count.as_i32x4())) } |
586 | } |
587 | |
588 | /// Shifts packed 64-bit integers in `a` left by `IMM8` while shifting in zeros. |
589 | /// |
590 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi64) |
591 | #[inline] |
592 | #[target_feature(enable = "sse2")] |
593 | #[cfg_attr(test, assert_instr(psllq, IMM8 = 7))] |
594 | #[rustc_legacy_const_generics(1)] |
595 | #[stable(feature = "simd_x86", since = "1.27.0")] |
596 | pub fn _mm_slli_epi64<const IMM8: i32>(a: __m128i) -> __m128i { |
597 | static_assert_uimm_bits!(IMM8, 8); |
598 | unsafe { |
599 | if IMM8 >= 64 { |
600 | _mm_setzero_si128() |
601 | } else { |
602 | transmute(src:simd_shl(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64))) |
603 | } |
604 | } |
605 | } |
606 | |
607 | /// Shifts packed 64-bit integers in `a` left by `count` while shifting in |
608 | /// zeros. |
609 | /// |
610 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi64) |
611 | #[inline] |
612 | #[target_feature(enable = "sse2")] |
613 | #[cfg_attr(test, assert_instr(psllq))] |
614 | #[stable(feature = "simd_x86", since = "1.27.0")] |
615 | pub fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i { |
616 | unsafe { transmute(src:psllq(a.as_i64x2(), count.as_i64x2())) } |
617 | } |
618 | |
619 | /// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in sign |
620 | /// bits. |
621 | /// |
622 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi16) |
623 | #[inline] |
624 | #[target_feature(enable = "sse2")] |
625 | #[cfg_attr(test, assert_instr(psraw, IMM8 = 1))] |
626 | #[rustc_legacy_const_generics(1)] |
627 | #[stable(feature = "simd_x86", since = "1.27.0")] |
628 | pub fn _mm_srai_epi16<const IMM8: i32>(a: __m128i) -> __m128i { |
629 | static_assert_uimm_bits!(IMM8, 8); |
630 | unsafe { transmute(src:simd_shr(lhs:a.as_i16x8(), rhs:i16x8::splat(IMM8.min(15) as i16))) } |
631 | } |
632 | |
633 | /// Shifts packed 16-bit integers in `a` right by `count` while shifting in sign |
634 | /// bits. |
635 | /// |
636 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi16) |
637 | #[inline] |
638 | #[target_feature(enable = "sse2")] |
639 | #[cfg_attr(test, assert_instr(psraw))] |
640 | #[stable(feature = "simd_x86", since = "1.27.0")] |
641 | pub fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i { |
642 | unsafe { transmute(src:psraw(a.as_i16x8(), count.as_i16x8())) } |
643 | } |
644 | |
645 | /// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in sign |
646 | /// bits. |
647 | /// |
648 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi32) |
649 | #[inline] |
650 | #[target_feature(enable = "sse2")] |
651 | #[cfg_attr(test, assert_instr(psrad, IMM8 = 1))] |
652 | #[rustc_legacy_const_generics(1)] |
653 | #[stable(feature = "simd_x86", since = "1.27.0")] |
654 | pub fn _mm_srai_epi32<const IMM8: i32>(a: __m128i) -> __m128i { |
655 | static_assert_uimm_bits!(IMM8, 8); |
656 | unsafe { transmute(src:simd_shr(lhs:a.as_i32x4(), rhs:i32x4::splat(IMM8.min(31)))) } |
657 | } |
658 | |
659 | /// Shifts packed 32-bit integers in `a` right by `count` while shifting in sign |
660 | /// bits. |
661 | /// |
662 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi32) |
663 | #[inline] |
664 | #[target_feature(enable = "sse2")] |
665 | #[cfg_attr(test, assert_instr(psrad))] |
666 | #[stable(feature = "simd_x86", since = "1.27.0")] |
667 | pub fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i { |
668 | unsafe { transmute(src:psrad(a.as_i32x4(), count.as_i32x4())) } |
669 | } |
670 | |
671 | /// Shifts `a` right by `IMM8` bytes while shifting in zeros. |
672 | /// |
673 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_si128) |
674 | #[inline] |
675 | #[target_feature(enable = "sse2")] |
676 | #[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))] |
677 | #[rustc_legacy_const_generics(1)] |
678 | #[stable(feature = "simd_x86", since = "1.27.0")] |
679 | pub fn _mm_srli_si128<const IMM8: i32>(a: __m128i) -> __m128i { |
680 | static_assert_uimm_bits!(IMM8, 8); |
681 | unsafe { _mm_srli_si128_impl::<IMM8>(a) } |
682 | } |
683 | |
684 | /// Implementation detail: converts the immediate argument of the |
685 | /// `_mm_srli_si128` intrinsic into a compile-time constant. |
686 | #[inline] |
687 | #[target_feature(enable = "sse2")] |
688 | unsafe fn _mm_srli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i { |
689 | const fn mask(shift: i32, i: u32) -> u32 { |
690 | if (shift as u32) > 15 { |
691 | i + 16 |
692 | } else { |
693 | i + (shift as u32) |
694 | } |
695 | } |
696 | let x: i8x16 = simd_shuffle!( |
697 | a.as_i8x16(), |
698 | i8x16::ZERO, |
699 | [ |
700 | mask(IMM8, 0), |
701 | mask(IMM8, 1), |
702 | mask(IMM8, 2), |
703 | mask(IMM8, 3), |
704 | mask(IMM8, 4), |
705 | mask(IMM8, 5), |
706 | mask(IMM8, 6), |
707 | mask(IMM8, 7), |
708 | mask(IMM8, 8), |
709 | mask(IMM8, 9), |
710 | mask(IMM8, 10), |
711 | mask(IMM8, 11), |
712 | mask(IMM8, 12), |
713 | mask(IMM8, 13), |
714 | mask(IMM8, 14), |
715 | mask(IMM8, 15), |
716 | ], |
717 | ); |
718 | transmute(x) |
719 | } |
720 | |
721 | /// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in |
722 | /// zeros. |
723 | /// |
724 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi16) |
725 | #[inline] |
726 | #[target_feature(enable = "sse2")] |
727 | #[cfg_attr(test, assert_instr(psrlw, IMM8 = 1))] |
728 | #[rustc_legacy_const_generics(1)] |
729 | #[stable(feature = "simd_x86", since = "1.27.0")] |
730 | pub fn _mm_srli_epi16<const IMM8: i32>(a: __m128i) -> __m128i { |
731 | static_assert_uimm_bits!(IMM8, 8); |
732 | unsafe { |
733 | if IMM8 >= 16 { |
734 | _mm_setzero_si128() |
735 | } else { |
736 | transmute(src:simd_shr(lhs:a.as_u16x8(), rhs:u16x8::splat(IMM8 as u16))) |
737 | } |
738 | } |
739 | } |
740 | |
741 | /// Shifts packed 16-bit integers in `a` right by `count` while shifting in |
742 | /// zeros. |
743 | /// |
744 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi16) |
745 | #[inline] |
746 | #[target_feature(enable = "sse2")] |
747 | #[cfg_attr(test, assert_instr(psrlw))] |
748 | #[stable(feature = "simd_x86", since = "1.27.0")] |
749 | pub fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i { |
750 | unsafe { transmute(src:psrlw(a.as_i16x8(), count.as_i16x8())) } |
751 | } |
752 | |
753 | /// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in |
754 | /// zeros. |
755 | /// |
756 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi32) |
757 | #[inline] |
758 | #[target_feature(enable = "sse2")] |
759 | #[cfg_attr(test, assert_instr(psrld, IMM8 = 8))] |
760 | #[rustc_legacy_const_generics(1)] |
761 | #[stable(feature = "simd_x86", since = "1.27.0")] |
762 | pub fn _mm_srli_epi32<const IMM8: i32>(a: __m128i) -> __m128i { |
763 | static_assert_uimm_bits!(IMM8, 8); |
764 | unsafe { |
765 | if IMM8 >= 32 { |
766 | _mm_setzero_si128() |
767 | } else { |
768 | transmute(src:simd_shr(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8 as u32))) |
769 | } |
770 | } |
771 | } |
772 | |
773 | /// Shifts packed 32-bit integers in `a` right by `count` while shifting in |
774 | /// zeros. |
775 | /// |
776 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi32) |
777 | #[inline] |
778 | #[target_feature(enable = "sse2")] |
779 | #[cfg_attr(test, assert_instr(psrld))] |
780 | #[stable(feature = "simd_x86", since = "1.27.0")] |
781 | pub fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i { |
782 | unsafe { transmute(src:psrld(a.as_i32x4(), count.as_i32x4())) } |
783 | } |
784 | |
785 | /// Shifts packed 64-bit integers in `a` right by `IMM8` while shifting in |
786 | /// zeros. |
787 | /// |
788 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi64) |
789 | #[inline] |
790 | #[target_feature(enable = "sse2")] |
791 | #[cfg_attr(test, assert_instr(psrlq, IMM8 = 1))] |
792 | #[rustc_legacy_const_generics(1)] |
793 | #[stable(feature = "simd_x86", since = "1.27.0")] |
794 | pub fn _mm_srli_epi64<const IMM8: i32>(a: __m128i) -> __m128i { |
795 | static_assert_uimm_bits!(IMM8, 8); |
796 | unsafe { |
797 | if IMM8 >= 64 { |
798 | _mm_setzero_si128() |
799 | } else { |
800 | transmute(src:simd_shr(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64))) |
801 | } |
802 | } |
803 | } |
804 | |
805 | /// Shifts packed 64-bit integers in `a` right by `count` while shifting in |
806 | /// zeros. |
807 | /// |
808 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi64) |
809 | #[inline] |
810 | #[target_feature(enable = "sse2")] |
811 | #[cfg_attr(test, assert_instr(psrlq))] |
812 | #[stable(feature = "simd_x86", since = "1.27.0")] |
813 | pub fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i { |
814 | unsafe { transmute(src:psrlq(a.as_i64x2(), count.as_i64x2())) } |
815 | } |
816 | |
817 | /// Computes the bitwise AND of 128 bits (representing integer data) in `a` and |
818 | /// `b`. |
819 | /// |
820 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_and_si128) |
821 | #[inline] |
822 | #[target_feature(enable = "sse2")] |
823 | #[cfg_attr(test, assert_instr(andps))] |
824 | #[stable(feature = "simd_x86", since = "1.27.0")] |
825 | pub fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i { |
826 | unsafe { simd_and(x:a, y:b) } |
827 | } |
828 | |
829 | /// Computes the bitwise NOT of 128 bits (representing integer data) in `a` and |
830 | /// then AND with `b`. |
831 | /// |
832 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_andnot_si128) |
833 | #[inline] |
834 | #[target_feature(enable = "sse2")] |
835 | #[cfg_attr(test, assert_instr(andnps))] |
836 | #[stable(feature = "simd_x86", since = "1.27.0")] |
837 | pub fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i { |
838 | unsafe { simd_and(x:simd_xor(_mm_set1_epi8(-1), a), y:b) } |
839 | } |
840 | |
841 | /// Computes the bitwise OR of 128 bits (representing integer data) in `a` and |
842 | /// `b`. |
843 | /// |
844 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_si128) |
845 | #[inline] |
846 | #[target_feature(enable = "sse2")] |
847 | #[cfg_attr(test, assert_instr(orps))] |
848 | #[stable(feature = "simd_x86", since = "1.27.0")] |
849 | pub fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i { |
850 | unsafe { simd_or(x:a, y:b) } |
851 | } |
852 | |
853 | /// Computes the bitwise XOR of 128 bits (representing integer data) in `a` and |
854 | /// `b`. |
855 | /// |
856 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_si128) |
857 | #[inline] |
858 | #[target_feature(enable = "sse2")] |
859 | #[cfg_attr(test, assert_instr(xorps))] |
860 | #[stable(feature = "simd_x86", since = "1.27.0")] |
861 | pub fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i { |
862 | unsafe { simd_xor(x:a, y:b) } |
863 | } |
864 | |
865 | /// Compares packed 8-bit integers in `a` and `b` for equality. |
866 | /// |
867 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi8) |
868 | #[inline] |
869 | #[target_feature(enable = "sse2")] |
870 | #[cfg_attr(test, assert_instr(pcmpeqb))] |
871 | #[stable(feature = "simd_x86", since = "1.27.0")] |
872 | pub fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i { |
873 | unsafe { transmute::<i8x16, _>(src:simd_eq(x:a.as_i8x16(), y:b.as_i8x16())) } |
874 | } |
875 | |
876 | /// Compares packed 16-bit integers in `a` and `b` for equality. |
877 | /// |
878 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi16) |
879 | #[inline] |
880 | #[target_feature(enable = "sse2")] |
881 | #[cfg_attr(test, assert_instr(pcmpeqw))] |
882 | #[stable(feature = "simd_x86", since = "1.27.0")] |
883 | pub fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i { |
884 | unsafe { transmute::<i16x8, _>(src:simd_eq(x:a.as_i16x8(), y:b.as_i16x8())) } |
885 | } |
886 | |
887 | /// Compares packed 32-bit integers in `a` and `b` for equality. |
888 | /// |
889 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi32) |
890 | #[inline] |
891 | #[target_feature(enable = "sse2")] |
892 | #[cfg_attr(test, assert_instr(pcmpeqd))] |
893 | #[stable(feature = "simd_x86", since = "1.27.0")] |
894 | pub fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i { |
895 | unsafe { transmute::<i32x4, _>(src:simd_eq(x:a.as_i32x4(), y:b.as_i32x4())) } |
896 | } |
897 | |
898 | /// Compares packed 8-bit integers in `a` and `b` for greater-than. |
899 | /// |
900 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi8) |
901 | #[inline] |
902 | #[target_feature(enable = "sse2")] |
903 | #[cfg_attr(test, assert_instr(pcmpgtb))] |
904 | #[stable(feature = "simd_x86", since = "1.27.0")] |
905 | pub fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i { |
906 | unsafe { transmute::<i8x16, _>(src:simd_gt(x:a.as_i8x16(), y:b.as_i8x16())) } |
907 | } |
908 | |
909 | /// Compares packed 16-bit integers in `a` and `b` for greater-than. |
910 | /// |
911 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi16) |
912 | #[inline] |
913 | #[target_feature(enable = "sse2")] |
914 | #[cfg_attr(test, assert_instr(pcmpgtw))] |
915 | #[stable(feature = "simd_x86", since = "1.27.0")] |
916 | pub fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i { |
917 | unsafe { transmute::<i16x8, _>(src:simd_gt(x:a.as_i16x8(), y:b.as_i16x8())) } |
918 | } |
919 | |
920 | /// Compares packed 32-bit integers in `a` and `b` for greater-than. |
921 | /// |
922 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi32) |
923 | #[inline] |
924 | #[target_feature(enable = "sse2")] |
925 | #[cfg_attr(test, assert_instr(pcmpgtd))] |
926 | #[stable(feature = "simd_x86", since = "1.27.0")] |
927 | pub fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i { |
928 | unsafe { transmute::<i32x4, _>(src:simd_gt(x:a.as_i32x4(), y:b.as_i32x4())) } |
929 | } |
930 | |
931 | /// Compares packed 8-bit integers in `a` and `b` for less-than. |
932 | /// |
933 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi8) |
934 | #[inline] |
935 | #[target_feature(enable = "sse2")] |
936 | #[cfg_attr(test, assert_instr(pcmpgtb))] |
937 | #[stable(feature = "simd_x86", since = "1.27.0")] |
938 | pub fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i { |
939 | unsafe { transmute::<i8x16, _>(src:simd_lt(x:a.as_i8x16(), y:b.as_i8x16())) } |
940 | } |
941 | |
942 | /// Compares packed 16-bit integers in `a` and `b` for less-than. |
943 | /// |
944 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi16) |
945 | #[inline] |
946 | #[target_feature(enable = "sse2")] |
947 | #[cfg_attr(test, assert_instr(pcmpgtw))] |
948 | #[stable(feature = "simd_x86", since = "1.27.0")] |
949 | pub fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i { |
950 | unsafe { transmute::<i16x8, _>(src:simd_lt(x:a.as_i16x8(), y:b.as_i16x8())) } |
951 | } |
952 | |
953 | /// Compares packed 32-bit integers in `a` and `b` for less-than. |
954 | /// |
955 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi32) |
956 | #[inline] |
957 | #[target_feature(enable = "sse2")] |
958 | #[cfg_attr(test, assert_instr(pcmpgtd))] |
959 | #[stable(feature = "simd_x86", since = "1.27.0")] |
960 | pub fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i { |
961 | unsafe { transmute::<i32x4, _>(src:simd_lt(x:a.as_i32x4(), y:b.as_i32x4())) } |
962 | } |
963 | |
964 | /// Converts the lower two packed 32-bit integers in `a` to packed |
965 | /// double-precision (64-bit) floating-point elements. |
966 | /// |
967 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_pd) |
968 | #[inline] |
969 | #[target_feature(enable = "sse2")] |
970 | #[cfg_attr(test, assert_instr(cvtdq2pd))] |
971 | #[stable(feature = "simd_x86", since = "1.27.0")] |
972 | pub fn _mm_cvtepi32_pd(a: __m128i) -> __m128d { |
973 | unsafe { |
974 | let a: i32x4 = a.as_i32x4(); |
975 | simd_cast::<i32x2, __m128d>(simd_shuffle!(a, a, [0, 1])) |
976 | } |
977 | } |
978 | |
979 | /// Returns `a` with its lower element replaced by `b` after converting it to |
980 | /// an `f64`. |
981 | /// |
982 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi32_sd) |
983 | #[inline] |
984 | #[target_feature(enable = "sse2")] |
985 | #[cfg_attr(test, assert_instr(cvtsi2sd))] |
986 | #[stable(feature = "simd_x86", since = "1.27.0")] |
987 | pub fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d { |
988 | unsafe { simd_insert!(a, 0, b as f64) } |
989 | } |
990 | |
991 | /// Converts packed 32-bit integers in `a` to packed single-precision (32-bit) |
992 | /// floating-point elements. |
993 | /// |
994 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_ps) |
995 | #[inline] |
996 | #[target_feature(enable = "sse2")] |
997 | #[cfg_attr(test, assert_instr(cvtdq2ps))] |
998 | #[stable(feature = "simd_x86", since = "1.27.0")] |
999 | pub fn _mm_cvtepi32_ps(a: __m128i) -> __m128 { |
1000 | unsafe { transmute(src:simd_cast::<_, f32x4>(a.as_i32x4())) } |
1001 | } |
1002 | |
1003 | /// Converts packed single-precision (32-bit) floating-point elements in `a` |
1004 | /// to packed 32-bit integers. |
1005 | /// |
1006 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epi32) |
1007 | #[inline] |
1008 | #[target_feature(enable = "sse2")] |
1009 | #[cfg_attr(test, assert_instr(cvtps2dq))] |
1010 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1011 | pub fn _mm_cvtps_epi32(a: __m128) -> __m128i { |
1012 | unsafe { transmute(src:cvtps2dq(a)) } |
1013 | } |
1014 | |
1015 | /// Returns a vector whose lowest element is `a` and all higher elements are |
1016 | /// `0`. |
1017 | /// |
1018 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi32_si128) |
1019 | #[inline] |
1020 | #[target_feature(enable = "sse2")] |
1021 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1022 | pub fn _mm_cvtsi32_si128(a: i32) -> __m128i { |
1023 | unsafe { transmute(src:i32x4::new(x0:a, x1:0, x2:0, x3:0)) } |
1024 | } |
1025 | |
1026 | /// Returns the lowest element of `a`. |
1027 | /// |
1028 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi128_si32) |
1029 | #[inline] |
1030 | #[target_feature(enable = "sse2")] |
1031 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1032 | pub fn _mm_cvtsi128_si32(a: __m128i) -> i32 { |
1033 | unsafe { simd_extract!(a.as_i32x4(), 0) } |
1034 | } |
1035 | |
1036 | /// Sets packed 64-bit integers with the supplied values, from highest to |
1037 | /// lowest. |
1038 | /// |
1039 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi64x) |
1040 | #[inline] |
1041 | #[target_feature(enable = "sse2")] |
1042 | // no particular instruction to test |
1043 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1044 | pub fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i { |
1045 | unsafe { transmute(src:i64x2::new(x0:e0, x1:e1)) } |
1046 | } |
1047 | |
1048 | /// Sets packed 32-bit integers with the supplied values. |
1049 | /// |
1050 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi32) |
1051 | #[inline] |
1052 | #[target_feature(enable = "sse2")] |
1053 | // no particular instruction to test |
1054 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1055 | pub fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i { |
1056 | unsafe { transmute(src:i32x4::new(x0:e0, x1:e1, x2:e2, x3:e3)) } |
1057 | } |
1058 | |
1059 | /// Sets packed 16-bit integers with the supplied values. |
1060 | /// |
1061 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi16) |
1062 | #[inline] |
1063 | #[target_feature(enable = "sse2")] |
1064 | // no particular instruction to test |
1065 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1066 | pub fn _mm_set_epi16( |
1067 | e7: i16, |
1068 | e6: i16, |
1069 | e5: i16, |
1070 | e4: i16, |
1071 | e3: i16, |
1072 | e2: i16, |
1073 | e1: i16, |
1074 | e0: i16, |
1075 | ) -> __m128i { |
1076 | unsafe { transmute(src:i16x8::new(x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7)) } |
1077 | } |
1078 | |
1079 | /// Sets packed 8-bit integers with the supplied values. |
1080 | /// |
1081 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi8) |
1082 | #[inline] |
1083 | #[target_feature(enable = "sse2")] |
1084 | // no particular instruction to test |
1085 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1086 | pub fn _mm_set_epi8( |
1087 | e15: i8, |
1088 | e14: i8, |
1089 | e13: i8, |
1090 | e12: i8, |
1091 | e11: i8, |
1092 | e10: i8, |
1093 | e9: i8, |
1094 | e8: i8, |
1095 | e7: i8, |
1096 | e6: i8, |
1097 | e5: i8, |
1098 | e4: i8, |
1099 | e3: i8, |
1100 | e2: i8, |
1101 | e1: i8, |
1102 | e0: i8, |
1103 | ) -> __m128i { |
1104 | unsafe { |
1105 | #[rustfmt::skip] |
1106 | transmute(src:i8x16::new( |
1107 | x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7, x8:e8, x9:e9, x10:e10, x11:e11, x12:e12, x13:e13, x14:e14, x15:e15, |
1108 | )) |
1109 | } |
1110 | } |
1111 | |
1112 | /// Broadcasts 64-bit integer `a` to all elements. |
1113 | /// |
1114 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi64x) |
1115 | #[inline] |
1116 | #[target_feature(enable = "sse2")] |
1117 | // no particular instruction to test |
1118 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1119 | pub fn _mm_set1_epi64x(a: i64) -> __m128i { |
1120 | _mm_set_epi64x(e1:a, e0:a) |
1121 | } |
1122 | |
1123 | /// Broadcasts 32-bit integer `a` to all elements. |
1124 | /// |
1125 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi32) |
1126 | #[inline] |
1127 | #[target_feature(enable = "sse2")] |
1128 | // no particular instruction to test |
1129 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1130 | pub fn _mm_set1_epi32(a: i32) -> __m128i { |
1131 | _mm_set_epi32(e3:a, e2:a, e1:a, e0:a) |
1132 | } |
1133 | |
1134 | /// Broadcasts 16-bit integer `a` to all elements. |
1135 | /// |
1136 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi16) |
1137 | #[inline] |
1138 | #[target_feature(enable = "sse2")] |
1139 | // no particular instruction to test |
1140 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1141 | pub fn _mm_set1_epi16(a: i16) -> __m128i { |
1142 | _mm_set_epi16(e7:a, e6:a, e5:a, e4:a, e3:a, e2:a, e1:a, e0:a) |
1143 | } |
1144 | |
1145 | /// Broadcasts 8-bit integer `a` to all elements. |
1146 | /// |
1147 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi8) |
1148 | #[inline] |
1149 | #[target_feature(enable = "sse2")] |
1150 | // no particular instruction to test |
1151 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1152 | pub fn _mm_set1_epi8(a: i8) -> __m128i { |
1153 | _mm_set_epi8(e15:a, e14:a, e13:a, e12:a, e11:a, e10:a, e9:a, e8:a, e7:a, e6:a, e5:a, e4:a, e3:a, e2:a, e1:a, e0:a) |
1154 | } |
1155 | |
1156 | /// Sets packed 32-bit integers with the supplied values in reverse order. |
1157 | /// |
1158 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi32) |
1159 | #[inline] |
1160 | #[target_feature(enable = "sse2")] |
1161 | // no particular instruction to test |
1162 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1163 | pub fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i { |
1164 | _mm_set_epi32(e3:e0, e2:e1, e1:e2, e0:e3) |
1165 | } |
1166 | |
1167 | /// Sets packed 16-bit integers with the supplied values in reverse order. |
1168 | /// |
1169 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi16) |
1170 | #[inline] |
1171 | #[target_feature(enable = "sse2")] |
1172 | // no particular instruction to test |
1173 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1174 | pub fn _mm_setr_epi16( |
1175 | e7: i16, |
1176 | e6: i16, |
1177 | e5: i16, |
1178 | e4: i16, |
1179 | e3: i16, |
1180 | e2: i16, |
1181 | e1: i16, |
1182 | e0: i16, |
1183 | ) -> __m128i { |
1184 | _mm_set_epi16(e7:e0, e6:e1, e5:e2, e4:e3, e3:e4, e2:e5, e1:e6, e0:e7) |
1185 | } |
1186 | |
1187 | /// Sets packed 8-bit integers with the supplied values in reverse order. |
1188 | /// |
1189 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi8) |
1190 | #[inline] |
1191 | #[target_feature(enable = "sse2")] |
1192 | // no particular instruction to test |
1193 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1194 | pub fn _mm_setr_epi8( |
1195 | e15: i8, |
1196 | e14: i8, |
1197 | e13: i8, |
1198 | e12: i8, |
1199 | e11: i8, |
1200 | e10: i8, |
1201 | e9: i8, |
1202 | e8: i8, |
1203 | e7: i8, |
1204 | e6: i8, |
1205 | e5: i8, |
1206 | e4: i8, |
1207 | e3: i8, |
1208 | e2: i8, |
1209 | e1: i8, |
1210 | e0: i8, |
1211 | ) -> __m128i { |
1212 | #[rustfmt::skip] |
1213 | _mm_set_epi8( |
1214 | e15:e0, e14:e1, e13:e2, e12:e3, e11:e4, e10:e5, e9:e6, e8:e7, e7:e8, e6:e9, e5:e10, e4:e11, e3:e12, e2:e13, e1:e14, e0:e15, |
1215 | ) |
1216 | } |
1217 | |
1218 | /// Returns a vector with all elements set to zero. |
1219 | /// |
1220 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_si128) |
1221 | #[inline] |
1222 | #[target_feature(enable = "sse2")] |
1223 | #[cfg_attr(test, assert_instr(xorps))] |
1224 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1225 | pub fn _mm_setzero_si128() -> __m128i { |
1226 | const { unsafe { mem::zeroed() } } |
1227 | } |
1228 | |
1229 | /// Loads 64-bit integer from memory into first element of returned vector. |
1230 | /// |
1231 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadl_epi64) |
1232 | #[inline] |
1233 | #[target_feature(enable = "sse2")] |
1234 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1235 | pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i { |
1236 | _mm_set_epi64x(e1:0, e0:ptr::read_unaligned(src:mem_addr as *const i64)) |
1237 | } |
1238 | |
1239 | /// Loads 128-bits of integer data from memory into a new vector. |
1240 | /// |
1241 | /// `mem_addr` must be aligned on a 16-byte boundary. |
1242 | /// |
1243 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_si128) |
1244 | #[inline] |
1245 | #[target_feature(enable = "sse2")] |
1246 | #[cfg_attr( |
1247 | all(test, not(all(target_arch = "x86", target_env = "msvc"))), |
1248 | assert_instr(movaps) |
1249 | )] |
1250 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1251 | pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i { |
1252 | *mem_addr |
1253 | } |
1254 | |
1255 | /// Loads 128-bits of integer data from memory into a new vector. |
1256 | /// |
1257 | /// `mem_addr` does not need to be aligned on any particular boundary. |
1258 | /// |
1259 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si128) |
1260 | #[inline] |
1261 | #[target_feature(enable = "sse2")] |
1262 | #[cfg_attr(test, assert_instr(movups))] |
1263 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1264 | pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i { |
1265 | let mut dst: __m128i = _mm_undefined_si128(); |
1266 | ptr::copy_nonoverlapping( |
1267 | src:mem_addr as *const u8, |
1268 | dst:ptr::addr_of_mut!(dst) as *mut u8, |
1269 | count:mem::size_of::<__m128i>(), |
1270 | ); |
1271 | dst |
1272 | } |
1273 | |
1274 | /// Conditionally store 8-bit integer elements from `a` into memory using |
1275 | /// `mask`. |
1276 | /// |
1277 | /// Elements are not stored when the highest bit is not set in the |
1278 | /// corresponding element. |
1279 | /// |
1280 | /// `mem_addr` should correspond to a 128-bit memory location and does not need |
1281 | /// to be aligned on any particular boundary. |
1282 | /// |
1283 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskmoveu_si128) |
1284 | #[inline] |
1285 | #[target_feature(enable = "sse2")] |
1286 | #[cfg_attr(test, assert_instr(maskmovdqu))] |
1287 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1288 | pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8) { |
1289 | maskmovdqu(a.as_i8x16(), mask.as_i8x16(), mem_addr) |
1290 | } |
1291 | |
1292 | /// Stores 128-bits of integer data from `a` into memory. |
1293 | /// |
1294 | /// `mem_addr` must be aligned on a 16-byte boundary. |
1295 | /// |
1296 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_si128) |
1297 | #[inline] |
1298 | #[target_feature(enable = "sse2")] |
1299 | #[cfg_attr( |
1300 | all(test, not(all(target_arch = "x86", target_env = "msvc"))), |
1301 | assert_instr(movaps) |
1302 | )] |
1303 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1304 | pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) { |
1305 | *mem_addr = a; |
1306 | } |
1307 | |
1308 | /// Stores 128-bits of integer data from `a` into memory. |
1309 | /// |
1310 | /// `mem_addr` does not need to be aligned on any particular boundary. |
1311 | /// |
1312 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si128) |
1313 | #[inline] |
1314 | #[target_feature(enable = "sse2")] |
1315 | #[cfg_attr(test, assert_instr(movups))] // FIXME movdqu expected |
1316 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1317 | pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) { |
1318 | mem_addr.write_unaligned(val:a); |
1319 | } |
1320 | |
1321 | /// Stores the lower 64-bit integer `a` to a memory location. |
1322 | /// |
1323 | /// `mem_addr` does not need to be aligned on any particular boundary. |
1324 | /// |
1325 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storel_epi64) |
1326 | #[inline] |
1327 | #[target_feature(enable = "sse2")] |
1328 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1329 | pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) { |
1330 | ptr::copy_nonoverlapping(src:ptr::addr_of!(a) as *const u8, dst:mem_addr as *mut u8, count:8); |
1331 | } |
1332 | |
1333 | /// Stores a 128-bit integer vector to a 128-bit aligned memory location. |
1334 | /// To minimize caching, the data is flagged as non-temporal (unlikely to be |
1335 | /// used again soon). |
1336 | /// |
1337 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si128) |
1338 | /// |
1339 | /// # Safety of non-temporal stores |
1340 | /// |
1341 | /// After using this intrinsic, but before any other access to the memory that this intrinsic |
1342 | /// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In |
1343 | /// particular, functions that call this intrinsic should generally call `_mm_sfence` before they |
1344 | /// return. |
1345 | /// |
1346 | /// See [`_mm_sfence`] for details. |
1347 | #[inline] |
1348 | #[target_feature(enable = "sse2")] |
1349 | #[cfg_attr(test, assert_instr(movntdq))] |
1350 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1351 | pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) { |
1352 | crate::arch::asm!( |
1353 | vps!("movntdq", ",{a}"), |
1354 | p = in(reg) mem_addr, |
1355 | a = in(xmm_reg) a, |
1356 | options(nostack, preserves_flags), |
1357 | ); |
1358 | } |
1359 | |
1360 | /// Stores a 32-bit integer value in the specified memory location. |
1361 | /// To minimize caching, the data is flagged as non-temporal (unlikely to be |
1362 | /// used again soon). |
1363 | /// |
1364 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si32) |
1365 | /// |
1366 | /// # Safety of non-temporal stores |
1367 | /// |
1368 | /// After using this intrinsic, but before any other access to the memory that this intrinsic |
1369 | /// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In |
1370 | /// particular, functions that call this intrinsic should generally call `_mm_sfence` before they |
1371 | /// return. |
1372 | /// |
1373 | /// See [`_mm_sfence`] for details. |
1374 | #[inline] |
1375 | #[target_feature(enable = "sse2")] |
1376 | #[cfg_attr(test, assert_instr(movnti))] |
1377 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1378 | pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) { |
1379 | crate::arch::asm!( |
1380 | vps!("movnti", ",{a:e}"), // `:e` for 32bit value |
1381 | p = in(reg) mem_addr, |
1382 | a = in(reg) a, |
1383 | options(nostack, preserves_flags), |
1384 | ); |
1385 | } |
1386 | |
1387 | /// Returns a vector where the low element is extracted from `a` and its upper |
1388 | /// element is zero. |
1389 | /// |
1390 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_move_epi64) |
1391 | #[inline] |
1392 | #[target_feature(enable = "sse2")] |
1393 | // FIXME movd on msvc, movd on i686 |
1394 | #[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movq))] |
1395 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1396 | pub fn _mm_move_epi64(a: __m128i) -> __m128i { |
1397 | unsafe { |
1398 | let r: i64x2 = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 2]); |
1399 | transmute(src:r) |
1400 | } |
1401 | } |
1402 | |
1403 | /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers |
1404 | /// using signed saturation. |
1405 | /// |
1406 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi16) |
1407 | #[inline] |
1408 | #[target_feature(enable = "sse2")] |
1409 | #[cfg_attr(test, assert_instr(packsswb))] |
1410 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1411 | pub fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i { |
1412 | unsafe { transmute(src:packsswb(a.as_i16x8(), b.as_i16x8())) } |
1413 | } |
1414 | |
1415 | /// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers |
1416 | /// using signed saturation. |
1417 | /// |
1418 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi32) |
1419 | #[inline] |
1420 | #[target_feature(enable = "sse2")] |
1421 | #[cfg_attr(test, assert_instr(packssdw))] |
1422 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1423 | pub fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i { |
1424 | unsafe { transmute(src:packssdw(a.as_i32x4(), b.as_i32x4())) } |
1425 | } |
1426 | |
1427 | /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers |
1428 | /// using unsigned saturation. |
1429 | /// |
1430 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi16) |
1431 | #[inline] |
1432 | #[target_feature(enable = "sse2")] |
1433 | #[cfg_attr(test, assert_instr(packuswb))] |
1434 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1435 | pub fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i { |
1436 | unsafe { transmute(src:packuswb(a.as_i16x8(), b.as_i16x8())) } |
1437 | } |
1438 | |
1439 | /// Returns the `imm8` element of `a`. |
1440 | /// |
1441 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_epi16) |
1442 | #[inline] |
1443 | #[target_feature(enable = "sse2")] |
1444 | #[cfg_attr(test, assert_instr(pextrw, IMM8 = 7))] |
1445 | #[rustc_legacy_const_generics(1)] |
1446 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1447 | pub fn _mm_extract_epi16<const IMM8: i32>(a: __m128i) -> i32 { |
1448 | static_assert_uimm_bits!(IMM8, 3); |
1449 | unsafe { simd_extract!(a.as_u16x8(), IMM8 as u32, u16) as i32 } |
1450 | } |
1451 | |
1452 | /// Returns a new vector where the `imm8` element of `a` is replaced with `i`. |
1453 | /// |
1454 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_epi16) |
1455 | #[inline] |
1456 | #[target_feature(enable = "sse2")] |
1457 | #[cfg_attr(test, assert_instr(pinsrw, IMM8 = 7))] |
1458 | #[rustc_legacy_const_generics(2)] |
1459 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1460 | pub fn _mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i { |
1461 | static_assert_uimm_bits!(IMM8, 3); |
1462 | unsafe { transmute(src:simd_insert!(a.as_i16x8(), IMM8 as u32, i as i16)) } |
1463 | } |
1464 | |
1465 | /// Returns a mask of the most significant bit of each element in `a`. |
1466 | /// |
1467 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_epi8) |
1468 | #[inline] |
1469 | #[target_feature(enable = "sse2")] |
1470 | #[cfg_attr(test, assert_instr(pmovmskb))] |
1471 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1472 | pub fn _mm_movemask_epi8(a: __m128i) -> i32 { |
1473 | unsafe { |
1474 | let z: i8x16 = i8x16::ZERO; |
1475 | let m: i8x16 = simd_lt(x:a.as_i8x16(), y:z); |
1476 | simd_bitmask::<_, u16>(m) as u32 as i32 |
1477 | } |
1478 | } |
1479 | |
1480 | /// Shuffles 32-bit integers in `a` using the control in `IMM8`. |
1481 | /// |
1482 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_epi32) |
1483 | #[inline] |
1484 | #[target_feature(enable = "sse2")] |
1485 | #[cfg_attr(test, assert_instr(pshufd, IMM8 = 9))] |
1486 | #[rustc_legacy_const_generics(1)] |
1487 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1488 | pub fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i { |
1489 | static_assert_uimm_bits!(IMM8, 8); |
1490 | unsafe { |
1491 | let a: i32x4 = a.as_i32x4(); |
1492 | let x: i32x4 = simd_shuffle!( |
1493 | a, |
1494 | a, |
1495 | [ |
1496 | IMM8 as u32 & 0b11, |
1497 | (IMM8 as u32 >> 2) & 0b11, |
1498 | (IMM8 as u32 >> 4) & 0b11, |
1499 | (IMM8 as u32 >> 6) & 0b11, |
1500 | ], |
1501 | ); |
1502 | transmute(src:x) |
1503 | } |
1504 | } |
1505 | |
1506 | /// Shuffles 16-bit integers in the high 64 bits of `a` using the control in |
1507 | /// `IMM8`. |
1508 | /// |
1509 | /// Put the results in the high 64 bits of the returned vector, with the low 64 |
1510 | /// bits being copied from `a`. |
1511 | /// |
1512 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shufflehi_epi16) |
1513 | #[inline] |
1514 | #[target_feature(enable = "sse2")] |
1515 | #[cfg_attr(test, assert_instr(pshufhw, IMM8 = 9))] |
1516 | #[rustc_legacy_const_generics(1)] |
1517 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1518 | pub fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i { |
1519 | static_assert_uimm_bits!(IMM8, 8); |
1520 | unsafe { |
1521 | let a: i16x8 = a.as_i16x8(); |
1522 | let x: i16x8 = simd_shuffle!( |
1523 | a, |
1524 | a, |
1525 | [ |
1526 | 0, |
1527 | 1, |
1528 | 2, |
1529 | 3, |
1530 | (IMM8 as u32 & 0b11) + 4, |
1531 | ((IMM8 as u32 >> 2) & 0b11) + 4, |
1532 | ((IMM8 as u32 >> 4) & 0b11) + 4, |
1533 | ((IMM8 as u32 >> 6) & 0b11) + 4, |
1534 | ], |
1535 | ); |
1536 | transmute(src:x) |
1537 | } |
1538 | } |
1539 | |
1540 | /// Shuffles 16-bit integers in the low 64 bits of `a` using the control in |
1541 | /// `IMM8`. |
1542 | /// |
1543 | /// Put the results in the low 64 bits of the returned vector, with the high 64 |
1544 | /// bits being copied from `a`. |
1545 | /// |
1546 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shufflelo_epi16) |
1547 | #[inline] |
1548 | #[target_feature(enable = "sse2")] |
1549 | #[cfg_attr(test, assert_instr(pshuflw, IMM8 = 9))] |
1550 | #[rustc_legacy_const_generics(1)] |
1551 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1552 | pub fn _mm_shufflelo_epi16<const IMM8: i32>(a: __m128i) -> __m128i { |
1553 | static_assert_uimm_bits!(IMM8, 8); |
1554 | unsafe { |
1555 | let a: i16x8 = a.as_i16x8(); |
1556 | let x: i16x8 = simd_shuffle!( |
1557 | a, |
1558 | a, |
1559 | [ |
1560 | IMM8 as u32 & 0b11, |
1561 | (IMM8 as u32 >> 2) & 0b11, |
1562 | (IMM8 as u32 >> 4) & 0b11, |
1563 | (IMM8 as u32 >> 6) & 0b11, |
1564 | 4, |
1565 | 5, |
1566 | 6, |
1567 | 7, |
1568 | ], |
1569 | ); |
1570 | transmute(src:x) |
1571 | } |
1572 | } |
1573 | |
1574 | /// Unpacks and interleave 8-bit integers from the high half of `a` and `b`. |
1575 | /// |
1576 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi8) |
1577 | #[inline] |
1578 | #[target_feature(enable = "sse2")] |
1579 | #[cfg_attr(test, assert_instr(punpckhbw))] |
1580 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1581 | pub fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i { |
1582 | unsafe { |
1583 | transmute::<i8x16, _>(src:simd_shuffle!( |
1584 | a.as_i8x16(), |
1585 | b.as_i8x16(), |
1586 | [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31], |
1587 | )) |
1588 | } |
1589 | } |
1590 | |
1591 | /// Unpacks and interleave 16-bit integers from the high half of `a` and `b`. |
1592 | /// |
1593 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi16) |
1594 | #[inline] |
1595 | #[target_feature(enable = "sse2")] |
1596 | #[cfg_attr(test, assert_instr(punpckhwd))] |
1597 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1598 | pub fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i { |
1599 | unsafe { |
1600 | let x: i16x8 = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]); |
1601 | transmute::<i16x8, _>(src:x) |
1602 | } |
1603 | } |
1604 | |
1605 | /// Unpacks and interleave 32-bit integers from the high half of `a` and `b`. |
1606 | /// |
1607 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi32) |
1608 | #[inline] |
1609 | #[target_feature(enable = "sse2")] |
1610 | #[cfg_attr(test, assert_instr(unpckhps))] |
1611 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1612 | pub fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i { |
1613 | unsafe { transmute::<i32x4, _>(src:simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7])) } |
1614 | } |
1615 | |
1616 | /// Unpacks and interleave 64-bit integers from the high half of `a` and `b`. |
1617 | /// |
1618 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi64) |
1619 | #[inline] |
1620 | #[target_feature(enable = "sse2")] |
1621 | #[cfg_attr(test, assert_instr(unpckhpd))] |
1622 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1623 | pub fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i { |
1624 | unsafe { transmute::<i64x2, _>(src:simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [1, 3])) } |
1625 | } |
1626 | |
1627 | /// Unpacks and interleave 8-bit integers from the low half of `a` and `b`. |
1628 | /// |
1629 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi8) |
1630 | #[inline] |
1631 | #[target_feature(enable = "sse2")] |
1632 | #[cfg_attr(test, assert_instr(punpcklbw))] |
1633 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1634 | pub fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i { |
1635 | unsafe { |
1636 | transmute::<i8x16, _>(src:simd_shuffle!( |
1637 | a.as_i8x16(), |
1638 | b.as_i8x16(), |
1639 | [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23], |
1640 | )) |
1641 | } |
1642 | } |
1643 | |
1644 | /// Unpacks and interleave 16-bit integers from the low half of `a` and `b`. |
1645 | /// |
1646 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi16) |
1647 | #[inline] |
1648 | #[target_feature(enable = "sse2")] |
1649 | #[cfg_attr(test, assert_instr(punpcklwd))] |
1650 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1651 | pub fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i { |
1652 | unsafe { |
1653 | let x: i16x8 = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]); |
1654 | transmute::<i16x8, _>(src:x) |
1655 | } |
1656 | } |
1657 | |
1658 | /// Unpacks and interleave 32-bit integers from the low half of `a` and `b`. |
1659 | /// |
1660 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi32) |
1661 | #[inline] |
1662 | #[target_feature(enable = "sse2")] |
1663 | #[cfg_attr(test, assert_instr(unpcklps))] |
1664 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1665 | pub fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i { |
1666 | unsafe { transmute::<i32x4, _>(src:simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5])) } |
1667 | } |
1668 | |
1669 | /// Unpacks and interleave 64-bit integers from the low half of `a` and `b`. |
1670 | /// |
1671 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi64) |
1672 | #[inline] |
1673 | #[target_feature(enable = "sse2")] |
1674 | #[cfg_attr(test, assert_instr(movlhps))] |
1675 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1676 | pub fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i { |
1677 | unsafe { transmute::<i64x2, _>(src:simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [0, 2])) } |
1678 | } |
1679 | |
1680 | /// Returns a new vector with the low element of `a` replaced by the sum of the |
1681 | /// low elements of `a` and `b`. |
1682 | /// |
1683 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_sd) |
1684 | #[inline] |
1685 | #[target_feature(enable = "sse2")] |
1686 | #[cfg_attr(test, assert_instr(addsd))] |
1687 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1688 | pub fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d { |
1689 | unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b)) } |
1690 | } |
1691 | |
1692 | /// Adds packed double-precision (64-bit) floating-point elements in `a` and |
1693 | /// `b`. |
1694 | /// |
1695 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_pd) |
1696 | #[inline] |
1697 | #[target_feature(enable = "sse2")] |
1698 | #[cfg_attr(test, assert_instr(addpd))] |
1699 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1700 | pub fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d { |
1701 | unsafe { simd_add(x:a, y:b) } |
1702 | } |
1703 | |
1704 | /// Returns a new vector with the low element of `a` replaced by the result of |
1705 | /// diving the lower element of `a` by the lower element of `b`. |
1706 | /// |
1707 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_sd) |
1708 | #[inline] |
1709 | #[target_feature(enable = "sse2")] |
1710 | #[cfg_attr(test, assert_instr(divsd))] |
1711 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1712 | pub fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d { |
1713 | unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) / _mm_cvtsd_f64(b)) } |
1714 | } |
1715 | |
1716 | /// Divide packed double-precision (64-bit) floating-point elements in `a` by |
1717 | /// packed elements in `b`. |
1718 | /// |
1719 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_pd) |
1720 | #[inline] |
1721 | #[target_feature(enable = "sse2")] |
1722 | #[cfg_attr(test, assert_instr(divpd))] |
1723 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1724 | pub fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d { |
1725 | unsafe { simd_div(lhs:a, rhs:b) } |
1726 | } |
1727 | |
1728 | /// Returns a new vector with the low element of `a` replaced by the maximum |
1729 | /// of the lower elements of `a` and `b`. |
1730 | /// |
1731 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_sd) |
1732 | #[inline] |
1733 | #[target_feature(enable = "sse2")] |
1734 | #[cfg_attr(test, assert_instr(maxsd))] |
1735 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1736 | pub fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d { |
1737 | unsafe { maxsd(a, b) } |
1738 | } |
1739 | |
1740 | /// Returns a new vector with the maximum values from corresponding elements in |
1741 | /// `a` and `b`. |
1742 | /// |
1743 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_pd) |
1744 | #[inline] |
1745 | #[target_feature(enable = "sse2")] |
1746 | #[cfg_attr(test, assert_instr(maxpd))] |
1747 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1748 | pub fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d { |
1749 | unsafe { maxpd(a, b) } |
1750 | } |
1751 | |
1752 | /// Returns a new vector with the low element of `a` replaced by the minimum |
1753 | /// of the lower elements of `a` and `b`. |
1754 | /// |
1755 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_sd) |
1756 | #[inline] |
1757 | #[target_feature(enable = "sse2")] |
1758 | #[cfg_attr(test, assert_instr(minsd))] |
1759 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1760 | pub fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d { |
1761 | unsafe { minsd(a, b) } |
1762 | } |
1763 | |
1764 | /// Returns a new vector with the minimum values from corresponding elements in |
1765 | /// `a` and `b`. |
1766 | /// |
1767 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_pd) |
1768 | #[inline] |
1769 | #[target_feature(enable = "sse2")] |
1770 | #[cfg_attr(test, assert_instr(minpd))] |
1771 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1772 | pub fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d { |
1773 | unsafe { minpd(a, b) } |
1774 | } |
1775 | |
1776 | /// Returns a new vector with the low element of `a` replaced by multiplying the |
1777 | /// low elements of `a` and `b`. |
1778 | /// |
1779 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_sd) |
1780 | #[inline] |
1781 | #[target_feature(enable = "sse2")] |
1782 | #[cfg_attr(test, assert_instr(mulsd))] |
1783 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1784 | pub fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d { |
1785 | unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b)) } |
1786 | } |
1787 | |
1788 | /// Multiplies packed double-precision (64-bit) floating-point elements in `a` |
1789 | /// and `b`. |
1790 | /// |
1791 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_pd) |
1792 | #[inline] |
1793 | #[target_feature(enable = "sse2")] |
1794 | #[cfg_attr(test, assert_instr(mulpd))] |
1795 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1796 | pub fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d { |
1797 | unsafe { simd_mul(x:a, y:b) } |
1798 | } |
1799 | |
1800 | /// Returns a new vector with the low element of `a` replaced by the square |
1801 | /// root of the lower element `b`. |
1802 | /// |
1803 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_sd) |
1804 | #[inline] |
1805 | #[target_feature(enable = "sse2")] |
1806 | #[cfg_attr(test, assert_instr(sqrtsd))] |
1807 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1808 | pub fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d { |
1809 | unsafe { simd_insert!(a, 0, sqrtf64(_mm_cvtsd_f64(b))) } |
1810 | } |
1811 | |
1812 | /// Returns a new vector with the square root of each of the values in `a`. |
1813 | /// |
1814 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_pd) |
1815 | #[inline] |
1816 | #[target_feature(enable = "sse2")] |
1817 | #[cfg_attr(test, assert_instr(sqrtpd))] |
1818 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1819 | pub fn _mm_sqrt_pd(a: __m128d) -> __m128d { |
1820 | unsafe { simd_fsqrt(a) } |
1821 | } |
1822 | |
1823 | /// Returns a new vector with the low element of `a` replaced by subtracting the |
1824 | /// low element by `b` from the low element of `a`. |
1825 | /// |
1826 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_sd) |
1827 | #[inline] |
1828 | #[target_feature(enable = "sse2")] |
1829 | #[cfg_attr(test, assert_instr(subsd))] |
1830 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1831 | pub fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d { |
1832 | unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) - _mm_cvtsd_f64(b)) } |
1833 | } |
1834 | |
1835 | /// Subtract packed double-precision (64-bit) floating-point elements in `b` |
1836 | /// from `a`. |
1837 | /// |
1838 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_pd) |
1839 | #[inline] |
1840 | #[target_feature(enable = "sse2")] |
1841 | #[cfg_attr(test, assert_instr(subpd))] |
1842 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1843 | pub fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d { |
1844 | unsafe { simd_sub(lhs:a, rhs:b) } |
1845 | } |
1846 | |
1847 | /// Computes the bitwise AND of packed double-precision (64-bit) floating-point |
1848 | /// elements in `a` and `b`. |
1849 | /// |
1850 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_and_pd) |
1851 | #[inline] |
1852 | #[target_feature(enable = "sse2")] |
1853 | #[cfg_attr(test, assert_instr(andps))] |
1854 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1855 | pub fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d { |
1856 | unsafe { |
1857 | let a: __m128i = transmute(src:a); |
1858 | let b: __m128i = transmute(src:b); |
1859 | transmute(src:_mm_and_si128(a, b)) |
1860 | } |
1861 | } |
1862 | |
1863 | /// Computes the bitwise NOT of `a` and then AND with `b`. |
1864 | /// |
1865 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_andnot_pd) |
1866 | #[inline] |
1867 | #[target_feature(enable = "sse2")] |
1868 | #[cfg_attr(test, assert_instr(andnps))] |
1869 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1870 | pub fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d { |
1871 | unsafe { |
1872 | let a: __m128i = transmute(src:a); |
1873 | let b: __m128i = transmute(src:b); |
1874 | transmute(src:_mm_andnot_si128(a, b)) |
1875 | } |
1876 | } |
1877 | |
1878 | /// Computes the bitwise OR of `a` and `b`. |
1879 | /// |
1880 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_pd) |
1881 | #[inline] |
1882 | #[target_feature(enable = "sse2")] |
1883 | #[cfg_attr(test, assert_instr(orps))] |
1884 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1885 | pub fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d { |
1886 | unsafe { |
1887 | let a: __m128i = transmute(src:a); |
1888 | let b: __m128i = transmute(src:b); |
1889 | transmute(src:_mm_or_si128(a, b)) |
1890 | } |
1891 | } |
1892 | |
1893 | /// Computes the bitwise XOR of `a` and `b`. |
1894 | /// |
1895 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_pd) |
1896 | #[inline] |
1897 | #[target_feature(enable = "sse2")] |
1898 | #[cfg_attr(test, assert_instr(xorps))] |
1899 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1900 | pub fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d { |
1901 | unsafe { |
1902 | let a: __m128i = transmute(src:a); |
1903 | let b: __m128i = transmute(src:b); |
1904 | transmute(src:_mm_xor_si128(a, b)) |
1905 | } |
1906 | } |
1907 | |
1908 | /// Returns a new vector with the low element of `a` replaced by the equality |
1909 | /// comparison of the lower elements of `a` and `b`. |
1910 | /// |
1911 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_sd) |
1912 | #[inline] |
1913 | #[target_feature(enable = "sse2")] |
1914 | #[cfg_attr(test, assert_instr(cmpeqsd))] |
1915 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1916 | pub fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d { |
1917 | unsafe { cmpsd(a, b, imm8:0) } |
1918 | } |
1919 | |
1920 | /// Returns a new vector with the low element of `a` replaced by the less-than |
1921 | /// comparison of the lower elements of `a` and `b`. |
1922 | /// |
1923 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_sd) |
1924 | #[inline] |
1925 | #[target_feature(enable = "sse2")] |
1926 | #[cfg_attr(test, assert_instr(cmpltsd))] |
1927 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1928 | pub fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d { |
1929 | unsafe { cmpsd(a, b, imm8:1) } |
1930 | } |
1931 | |
1932 | /// Returns a new vector with the low element of `a` replaced by the |
1933 | /// less-than-or-equal comparison of the lower elements of `a` and `b`. |
1934 | /// |
1935 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_sd) |
1936 | #[inline] |
1937 | #[target_feature(enable = "sse2")] |
1938 | #[cfg_attr(test, assert_instr(cmplesd))] |
1939 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1940 | pub fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d { |
1941 | unsafe { cmpsd(a, b, imm8:2) } |
1942 | } |
1943 | |
1944 | /// Returns a new vector with the low element of `a` replaced by the |
1945 | /// greater-than comparison of the lower elements of `a` and `b`. |
1946 | /// |
1947 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_sd) |
1948 | #[inline] |
1949 | #[target_feature(enable = "sse2")] |
1950 | #[cfg_attr(test, assert_instr(cmpltsd))] |
1951 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1952 | pub fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d { |
1953 | unsafe { simd_insert!(_mm_cmplt_sd(b, a), 1, simd_extract!(a, 1, f64)) } |
1954 | } |
1955 | |
1956 | /// Returns a new vector with the low element of `a` replaced by the |
1957 | /// greater-than-or-equal comparison of the lower elements of `a` and `b`. |
1958 | /// |
1959 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_sd) |
1960 | #[inline] |
1961 | #[target_feature(enable = "sse2")] |
1962 | #[cfg_attr(test, assert_instr(cmplesd))] |
1963 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1964 | pub fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d { |
1965 | unsafe { simd_insert!(_mm_cmple_sd(b, a), 1, simd_extract!(a, 1, f64)) } |
1966 | } |
1967 | |
1968 | /// Returns a new vector with the low element of `a` replaced by the result |
1969 | /// of comparing both of the lower elements of `a` and `b` to `NaN`. If |
1970 | /// neither are equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` |
1971 | /// otherwise. |
1972 | /// |
1973 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_sd) |
1974 | #[inline] |
1975 | #[target_feature(enable = "sse2")] |
1976 | #[cfg_attr(test, assert_instr(cmpordsd))] |
1977 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1978 | pub fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d { |
1979 | unsafe { cmpsd(a, b, imm8:7) } |
1980 | } |
1981 | |
1982 | /// Returns a new vector with the low element of `a` replaced by the result of |
1983 | /// comparing both of the lower elements of `a` and `b` to `NaN`. If either is |
1984 | /// equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` otherwise. |
1985 | /// |
1986 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_sd) |
1987 | #[inline] |
1988 | #[target_feature(enable = "sse2")] |
1989 | #[cfg_attr(test, assert_instr(cmpunordsd))] |
1990 | #[stable(feature = "simd_x86", since = "1.27.0")] |
1991 | pub fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d { |
1992 | unsafe { cmpsd(a, b, imm8:3) } |
1993 | } |
1994 | |
1995 | /// Returns a new vector with the low element of `a` replaced by the not-equal |
1996 | /// comparison of the lower elements of `a` and `b`. |
1997 | /// |
1998 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_sd) |
1999 | #[inline] |
2000 | #[target_feature(enable = "sse2")] |
2001 | #[cfg_attr(test, assert_instr(cmpneqsd))] |
2002 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2003 | pub fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d { |
2004 | unsafe { cmpsd(a, b, imm8:4) } |
2005 | } |
2006 | |
2007 | /// Returns a new vector with the low element of `a` replaced by the |
2008 | /// not-less-than comparison of the lower elements of `a` and `b`. |
2009 | /// |
2010 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_sd) |
2011 | #[inline] |
2012 | #[target_feature(enable = "sse2")] |
2013 | #[cfg_attr(test, assert_instr(cmpnltsd))] |
2014 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2015 | pub fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d { |
2016 | unsafe { cmpsd(a, b, imm8:5) } |
2017 | } |
2018 | |
2019 | /// Returns a new vector with the low element of `a` replaced by the |
2020 | /// not-less-than-or-equal comparison of the lower elements of `a` and `b`. |
2021 | /// |
2022 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_sd) |
2023 | #[inline] |
2024 | #[target_feature(enable = "sse2")] |
2025 | #[cfg_attr(test, assert_instr(cmpnlesd))] |
2026 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2027 | pub fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d { |
2028 | unsafe { cmpsd(a, b, imm8:6) } |
2029 | } |
2030 | |
2031 | /// Returns a new vector with the low element of `a` replaced by the |
2032 | /// not-greater-than comparison of the lower elements of `a` and `b`. |
2033 | /// |
2034 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpngt_sd) |
2035 | #[inline] |
2036 | #[target_feature(enable = "sse2")] |
2037 | #[cfg_attr(test, assert_instr(cmpnltsd))] |
2038 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2039 | pub fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d { |
2040 | unsafe { simd_insert!(_mm_cmpnlt_sd(b, a), 1, simd_extract!(a, 1, f64)) } |
2041 | } |
2042 | |
2043 | /// Returns a new vector with the low element of `a` replaced by the |
2044 | /// not-greater-than-or-equal comparison of the lower elements of `a` and `b`. |
2045 | /// |
2046 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_sd) |
2047 | #[inline] |
2048 | #[target_feature(enable = "sse2")] |
2049 | #[cfg_attr(test, assert_instr(cmpnlesd))] |
2050 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2051 | pub fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d { |
2052 | unsafe { simd_insert!(_mm_cmpnle_sd(b, a), 1, simd_extract!(a, 1, f64)) } |
2053 | } |
2054 | |
2055 | /// Compares corresponding elements in `a` and `b` for equality. |
2056 | /// |
2057 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_pd) |
2058 | #[inline] |
2059 | #[target_feature(enable = "sse2")] |
2060 | #[cfg_attr(test, assert_instr(cmpeqpd))] |
2061 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2062 | pub fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d { |
2063 | unsafe { cmppd(a, b, imm8:0) } |
2064 | } |
2065 | |
2066 | /// Compares corresponding elements in `a` and `b` for less-than. |
2067 | /// |
2068 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_pd) |
2069 | #[inline] |
2070 | #[target_feature(enable = "sse2")] |
2071 | #[cfg_attr(test, assert_instr(cmpltpd))] |
2072 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2073 | pub fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d { |
2074 | unsafe { cmppd(a, b, imm8:1) } |
2075 | } |
2076 | |
2077 | /// Compares corresponding elements in `a` and `b` for less-than-or-equal |
2078 | /// |
2079 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_pd) |
2080 | #[inline] |
2081 | #[target_feature(enable = "sse2")] |
2082 | #[cfg_attr(test, assert_instr(cmplepd))] |
2083 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2084 | pub fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d { |
2085 | unsafe { cmppd(a, b, imm8:2) } |
2086 | } |
2087 | |
2088 | /// Compares corresponding elements in `a` and `b` for greater-than. |
2089 | /// |
2090 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_pd) |
2091 | #[inline] |
2092 | #[target_feature(enable = "sse2")] |
2093 | #[cfg_attr(test, assert_instr(cmpltpd))] |
2094 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2095 | pub fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d { |
2096 | _mm_cmplt_pd(a:b, b:a) |
2097 | } |
2098 | |
2099 | /// Compares corresponding elements in `a` and `b` for greater-than-or-equal. |
2100 | /// |
2101 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_pd) |
2102 | #[inline] |
2103 | #[target_feature(enable = "sse2")] |
2104 | #[cfg_attr(test, assert_instr(cmplepd))] |
2105 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2106 | pub fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d { |
2107 | _mm_cmple_pd(a:b, b:a) |
2108 | } |
2109 | |
2110 | /// Compares corresponding elements in `a` and `b` to see if neither is `NaN`. |
2111 | /// |
2112 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_pd) |
2113 | #[inline] |
2114 | #[target_feature(enable = "sse2")] |
2115 | #[cfg_attr(test, assert_instr(cmpordpd))] |
2116 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2117 | pub fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d { |
2118 | unsafe { cmppd(a, b, imm8:7) } |
2119 | } |
2120 | |
2121 | /// Compares corresponding elements in `a` and `b` to see if either is `NaN`. |
2122 | /// |
2123 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_pd) |
2124 | #[inline] |
2125 | #[target_feature(enable = "sse2")] |
2126 | #[cfg_attr(test, assert_instr(cmpunordpd))] |
2127 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2128 | pub fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d { |
2129 | unsafe { cmppd(a, b, imm8:3) } |
2130 | } |
2131 | |
2132 | /// Compares corresponding elements in `a` and `b` for not-equal. |
2133 | /// |
2134 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_pd) |
2135 | #[inline] |
2136 | #[target_feature(enable = "sse2")] |
2137 | #[cfg_attr(test, assert_instr(cmpneqpd))] |
2138 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2139 | pub fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d { |
2140 | unsafe { cmppd(a, b, imm8:4) } |
2141 | } |
2142 | |
2143 | /// Compares corresponding elements in `a` and `b` for not-less-than. |
2144 | /// |
2145 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_pd) |
2146 | #[inline] |
2147 | #[target_feature(enable = "sse2")] |
2148 | #[cfg_attr(test, assert_instr(cmpnltpd))] |
2149 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2150 | pub fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d { |
2151 | unsafe { cmppd(a, b, imm8:5) } |
2152 | } |
2153 | |
2154 | /// Compares corresponding elements in `a` and `b` for not-less-than-or-equal. |
2155 | /// |
2156 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_pd) |
2157 | #[inline] |
2158 | #[target_feature(enable = "sse2")] |
2159 | #[cfg_attr(test, assert_instr(cmpnlepd))] |
2160 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2161 | pub fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d { |
2162 | unsafe { cmppd(a, b, imm8:6) } |
2163 | } |
2164 | |
2165 | /// Compares corresponding elements in `a` and `b` for not-greater-than. |
2166 | /// |
2167 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpngt_pd) |
2168 | #[inline] |
2169 | #[target_feature(enable = "sse2")] |
2170 | #[cfg_attr(test, assert_instr(cmpnltpd))] |
2171 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2172 | pub fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d { |
2173 | _mm_cmpnlt_pd(a:b, b:a) |
2174 | } |
2175 | |
2176 | /// Compares corresponding elements in `a` and `b` for |
2177 | /// not-greater-than-or-equal. |
2178 | /// |
2179 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_pd) |
2180 | #[inline] |
2181 | #[target_feature(enable = "sse2")] |
2182 | #[cfg_attr(test, assert_instr(cmpnlepd))] |
2183 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2184 | pub fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d { |
2185 | _mm_cmpnle_pd(a:b, b:a) |
2186 | } |
2187 | |
2188 | /// Compares the lower element of `a` and `b` for equality. |
2189 | /// |
2190 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comieq_sd) |
2191 | #[inline] |
2192 | #[target_feature(enable = "sse2")] |
2193 | #[cfg_attr(test, assert_instr(comisd))] |
2194 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2195 | pub fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 { |
2196 | unsafe { comieqsd(a, b) } |
2197 | } |
2198 | |
2199 | /// Compares the lower element of `a` and `b` for less-than. |
2200 | /// |
2201 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comilt_sd) |
2202 | #[inline] |
2203 | #[target_feature(enable = "sse2")] |
2204 | #[cfg_attr(test, assert_instr(comisd))] |
2205 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2206 | pub fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 { |
2207 | unsafe { comiltsd(a, b) } |
2208 | } |
2209 | |
2210 | /// Compares the lower element of `a` and `b` for less-than-or-equal. |
2211 | /// |
2212 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comile_sd) |
2213 | #[inline] |
2214 | #[target_feature(enable = "sse2")] |
2215 | #[cfg_attr(test, assert_instr(comisd))] |
2216 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2217 | pub fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 { |
2218 | unsafe { comilesd(a, b) } |
2219 | } |
2220 | |
2221 | /// Compares the lower element of `a` and `b` for greater-than. |
2222 | /// |
2223 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comigt_sd) |
2224 | #[inline] |
2225 | #[target_feature(enable = "sse2")] |
2226 | #[cfg_attr(test, assert_instr(comisd))] |
2227 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2228 | pub fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 { |
2229 | unsafe { comigtsd(a, b) } |
2230 | } |
2231 | |
2232 | /// Compares the lower element of `a` and `b` for greater-than-or-equal. |
2233 | /// |
2234 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comige_sd) |
2235 | #[inline] |
2236 | #[target_feature(enable = "sse2")] |
2237 | #[cfg_attr(test, assert_instr(comisd))] |
2238 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2239 | pub fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 { |
2240 | unsafe { comigesd(a, b) } |
2241 | } |
2242 | |
2243 | /// Compares the lower element of `a` and `b` for not-equal. |
2244 | /// |
2245 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comineq_sd) |
2246 | #[inline] |
2247 | #[target_feature(enable = "sse2")] |
2248 | #[cfg_attr(test, assert_instr(comisd))] |
2249 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2250 | pub fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 { |
2251 | unsafe { comineqsd(a, b) } |
2252 | } |
2253 | |
2254 | /// Compares the lower element of `a` and `b` for equality. |
2255 | /// |
2256 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomieq_sd) |
2257 | #[inline] |
2258 | #[target_feature(enable = "sse2")] |
2259 | #[cfg_attr(test, assert_instr(ucomisd))] |
2260 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2261 | pub fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 { |
2262 | unsafe { ucomieqsd(a, b) } |
2263 | } |
2264 | |
2265 | /// Compares the lower element of `a` and `b` for less-than. |
2266 | /// |
2267 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomilt_sd) |
2268 | #[inline] |
2269 | #[target_feature(enable = "sse2")] |
2270 | #[cfg_attr(test, assert_instr(ucomisd))] |
2271 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2272 | pub fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 { |
2273 | unsafe { ucomiltsd(a, b) } |
2274 | } |
2275 | |
2276 | /// Compares the lower element of `a` and `b` for less-than-or-equal. |
2277 | /// |
2278 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomile_sd) |
2279 | #[inline] |
2280 | #[target_feature(enable = "sse2")] |
2281 | #[cfg_attr(test, assert_instr(ucomisd))] |
2282 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2283 | pub fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 { |
2284 | unsafe { ucomilesd(a, b) } |
2285 | } |
2286 | |
2287 | /// Compares the lower element of `a` and `b` for greater-than. |
2288 | /// |
2289 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomigt_sd) |
2290 | #[inline] |
2291 | #[target_feature(enable = "sse2")] |
2292 | #[cfg_attr(test, assert_instr(ucomisd))] |
2293 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2294 | pub fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 { |
2295 | unsafe { ucomigtsd(a, b) } |
2296 | } |
2297 | |
2298 | /// Compares the lower element of `a` and `b` for greater-than-or-equal. |
2299 | /// |
2300 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomige_sd) |
2301 | #[inline] |
2302 | #[target_feature(enable = "sse2")] |
2303 | #[cfg_attr(test, assert_instr(ucomisd))] |
2304 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2305 | pub fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 { |
2306 | unsafe { ucomigesd(a, b) } |
2307 | } |
2308 | |
2309 | /// Compares the lower element of `a` and `b` for not-equal. |
2310 | /// |
2311 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomineq_sd) |
2312 | #[inline] |
2313 | #[target_feature(enable = "sse2")] |
2314 | #[cfg_attr(test, assert_instr(ucomisd))] |
2315 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2316 | pub fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 { |
2317 | unsafe { ucomineqsd(a, b) } |
2318 | } |
2319 | |
2320 | /// Converts packed double-precision (64-bit) floating-point elements in `a` to |
2321 | /// packed single-precision (32-bit) floating-point elements |
2322 | /// |
2323 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_ps) |
2324 | #[inline] |
2325 | #[target_feature(enable = "sse2")] |
2326 | #[cfg_attr(test, assert_instr(cvtpd2ps))] |
2327 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2328 | pub fn _mm_cvtpd_ps(a: __m128d) -> __m128 { |
2329 | unsafe { |
2330 | let r: f32x2 = simd_cast::<_, f32x2>(a.as_f64x2()); |
2331 | let zero: f32x2 = f32x2::ZERO; |
2332 | transmute::<f32x4, _>(src:simd_shuffle!(r, zero, [0, 1, 2, 3])) |
2333 | } |
2334 | } |
2335 | |
2336 | /// Converts packed single-precision (32-bit) floating-point elements in `a` to |
2337 | /// packed |
2338 | /// double-precision (64-bit) floating-point elements. |
2339 | /// |
2340 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_pd) |
2341 | #[inline] |
2342 | #[target_feature(enable = "sse2")] |
2343 | #[cfg_attr(test, assert_instr(cvtps2pd))] |
2344 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2345 | pub fn _mm_cvtps_pd(a: __m128) -> __m128d { |
2346 | unsafe { |
2347 | let a: f32x4 = a.as_f32x4(); |
2348 | transmute(src:simd_cast::<f32x2, f64x2>(simd_shuffle!(a, a, [0, 1]))) |
2349 | } |
2350 | } |
2351 | |
2352 | /// Converts packed double-precision (64-bit) floating-point elements in `a` to |
2353 | /// packed 32-bit integers. |
2354 | /// |
2355 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epi32) |
2356 | #[inline] |
2357 | #[target_feature(enable = "sse2")] |
2358 | #[cfg_attr(test, assert_instr(cvtpd2dq))] |
2359 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2360 | pub fn _mm_cvtpd_epi32(a: __m128d) -> __m128i { |
2361 | unsafe { transmute(src:cvtpd2dq(a)) } |
2362 | } |
2363 | |
2364 | /// Converts the lower double-precision (64-bit) floating-point element in a to |
2365 | /// a 32-bit integer. |
2366 | /// |
2367 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_si32) |
2368 | #[inline] |
2369 | #[target_feature(enable = "sse2")] |
2370 | #[cfg_attr(test, assert_instr(cvtsd2si))] |
2371 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2372 | pub fn _mm_cvtsd_si32(a: __m128d) -> i32 { |
2373 | unsafe { cvtsd2si(a) } |
2374 | } |
2375 | |
2376 | /// Converts the lower double-precision (64-bit) floating-point element in `b` |
2377 | /// to a single-precision (32-bit) floating-point element, store the result in |
2378 | /// the lower element of the return value, and copies the upper element from `a` |
2379 | /// to the upper element the return value. |
2380 | /// |
2381 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_ss) |
2382 | #[inline] |
2383 | #[target_feature(enable = "sse2")] |
2384 | #[cfg_attr(test, assert_instr(cvtsd2ss))] |
2385 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2386 | pub fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 { |
2387 | unsafe { cvtsd2ss(a, b) } |
2388 | } |
2389 | |
2390 | /// Returns the lower double-precision (64-bit) floating-point element of `a`. |
2391 | /// |
2392 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_f64) |
2393 | #[inline] |
2394 | #[target_feature(enable = "sse2")] |
2395 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2396 | pub fn _mm_cvtsd_f64(a: __m128d) -> f64 { |
2397 | unsafe { simd_extract!(a, 0) } |
2398 | } |
2399 | |
2400 | /// Converts the lower single-precision (32-bit) floating-point element in `b` |
2401 | /// to a double-precision (64-bit) floating-point element, store the result in |
2402 | /// the lower element of the return value, and copies the upper element from `a` |
2403 | /// to the upper element the return value. |
2404 | /// |
2405 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtss_sd) |
2406 | #[inline] |
2407 | #[target_feature(enable = "sse2")] |
2408 | #[cfg_attr(test, assert_instr(cvtss2sd))] |
2409 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2410 | pub fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d { |
2411 | unsafe { cvtss2sd(a, b) } |
2412 | } |
2413 | |
2414 | /// Converts packed double-precision (64-bit) floating-point elements in `a` to |
2415 | /// packed 32-bit integers with truncation. |
2416 | /// |
2417 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epi32) |
2418 | #[inline] |
2419 | #[target_feature(enable = "sse2")] |
2420 | #[cfg_attr(test, assert_instr(cvttpd2dq))] |
2421 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2422 | pub fn _mm_cvttpd_epi32(a: __m128d) -> __m128i { |
2423 | unsafe { transmute(src:cvttpd2dq(a)) } |
2424 | } |
2425 | |
2426 | /// Converts the lower double-precision (64-bit) floating-point element in `a` |
2427 | /// to a 32-bit integer with truncation. |
2428 | /// |
2429 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_si32) |
2430 | #[inline] |
2431 | #[target_feature(enable = "sse2")] |
2432 | #[cfg_attr(test, assert_instr(cvttsd2si))] |
2433 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2434 | pub fn _mm_cvttsd_si32(a: __m128d) -> i32 { |
2435 | unsafe { cvttsd2si(a) } |
2436 | } |
2437 | |
2438 | /// Converts packed single-precision (32-bit) floating-point elements in `a` to |
2439 | /// packed 32-bit integers with truncation. |
2440 | /// |
2441 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epi32) |
2442 | #[inline] |
2443 | #[target_feature(enable = "sse2")] |
2444 | #[cfg_attr(test, assert_instr(cvttps2dq))] |
2445 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2446 | pub fn _mm_cvttps_epi32(a: __m128) -> __m128i { |
2447 | unsafe { transmute(src:cvttps2dq(a)) } |
2448 | } |
2449 | |
2450 | /// Copies double-precision (64-bit) floating-point element `a` to the lower |
2451 | /// element of the packed 64-bit return value. |
2452 | /// |
2453 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_sd) |
2454 | #[inline] |
2455 | #[target_feature(enable = "sse2")] |
2456 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2457 | pub fn _mm_set_sd(a: f64) -> __m128d { |
2458 | _mm_set_pd(a:0.0, b:a) |
2459 | } |
2460 | |
2461 | /// Broadcasts double-precision (64-bit) floating-point value a to all elements |
2462 | /// of the return value. |
2463 | /// |
2464 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_pd) |
2465 | #[inline] |
2466 | #[target_feature(enable = "sse2")] |
2467 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2468 | pub fn _mm_set1_pd(a: f64) -> __m128d { |
2469 | _mm_set_pd(a, b:a) |
2470 | } |
2471 | |
2472 | /// Broadcasts double-precision (64-bit) floating-point value a to all elements |
2473 | /// of the return value. |
2474 | /// |
2475 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_pd1) |
2476 | #[inline] |
2477 | #[target_feature(enable = "sse2")] |
2478 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2479 | pub fn _mm_set_pd1(a: f64) -> __m128d { |
2480 | _mm_set_pd(a, b:a) |
2481 | } |
2482 | |
2483 | /// Sets packed double-precision (64-bit) floating-point elements in the return |
2484 | /// value with the supplied values. |
2485 | /// |
2486 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_pd) |
2487 | #[inline] |
2488 | #[target_feature(enable = "sse2")] |
2489 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2490 | pub fn _mm_set_pd(a: f64, b: f64) -> __m128d { |
2491 | __m128d([b, a]) |
2492 | } |
2493 | |
2494 | /// Sets packed double-precision (64-bit) floating-point elements in the return |
2495 | /// value with the supplied values in reverse order. |
2496 | /// |
2497 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_pd) |
2498 | #[inline] |
2499 | #[target_feature(enable = "sse2")] |
2500 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2501 | pub fn _mm_setr_pd(a: f64, b: f64) -> __m128d { |
2502 | _mm_set_pd(a:b, b:a) |
2503 | } |
2504 | |
2505 | /// Returns packed double-precision (64-bit) floating-point elements with all |
2506 | /// zeros. |
2507 | /// |
2508 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_pd) |
2509 | #[inline] |
2510 | #[target_feature(enable = "sse2")] |
2511 | #[cfg_attr(test, assert_instr(xorp))] |
2512 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2513 | pub fn _mm_setzero_pd() -> __m128d { |
2514 | const { unsafe { mem::zeroed() } } |
2515 | } |
2516 | |
2517 | /// Returns a mask of the most significant bit of each element in `a`. |
2518 | /// |
2519 | /// The mask is stored in the 2 least significant bits of the return value. |
2520 | /// All other bits are set to `0`. |
2521 | /// |
2522 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_pd) |
2523 | #[inline] |
2524 | #[target_feature(enable = "sse2")] |
2525 | #[cfg_attr(test, assert_instr(movmskpd))] |
2526 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2527 | pub fn _mm_movemask_pd(a: __m128d) -> i32 { |
2528 | // Propagate the highest bit to the rest, because simd_bitmask |
2529 | // requires all-1 or all-0. |
2530 | unsafe { |
2531 | let mask: i64x2 = simd_lt(x:transmute(a), y:i64x2::ZERO); |
2532 | simd_bitmask::<i64x2, u8>(mask).into() |
2533 | } |
2534 | } |
2535 | |
2536 | /// Loads 128-bits (composed of 2 packed double-precision (64-bit) |
2537 | /// floating-point elements) from memory into the returned vector. |
2538 | /// `mem_addr` must be aligned on a 16-byte boundary or a general-protection |
2539 | /// exception may be generated. |
2540 | /// |
2541 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_pd) |
2542 | #[inline] |
2543 | #[target_feature(enable = "sse2")] |
2544 | #[cfg_attr( |
2545 | all(test, not(all(target_arch = "x86", target_env = "msvc"))), |
2546 | assert_instr(movaps) |
2547 | )] |
2548 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2549 | #[allow(clippy::cast_ptr_alignment)] |
2550 | pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d { |
2551 | *(mem_addr as *const __m128d) |
2552 | } |
2553 | |
2554 | /// Loads a 64-bit double-precision value to the low element of a |
2555 | /// 128-bit integer vector and clears the upper element. |
2556 | /// |
2557 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_sd) |
2558 | #[inline] |
2559 | #[target_feature(enable = "sse2")] |
2560 | #[cfg_attr(test, assert_instr(movsd))] |
2561 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2562 | pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d { |
2563 | _mm_setr_pd(*mem_addr, b:0.) |
2564 | } |
2565 | |
2566 | /// Loads a double-precision value into the high-order bits of a 128-bit |
2567 | /// vector of `[2 x double]`. The low-order bits are copied from the low-order |
2568 | /// bits of the first operand. |
2569 | /// |
2570 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadh_pd) |
2571 | #[inline] |
2572 | #[target_feature(enable = "sse2")] |
2573 | #[cfg_attr(test, assert_instr(movhps))] |
2574 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2575 | pub unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d { |
2576 | _mm_setr_pd(a:simd_extract!(a, 0), *mem_addr) |
2577 | } |
2578 | |
2579 | /// Loads a double-precision value into the low-order bits of a 128-bit |
2580 | /// vector of `[2 x double]`. The high-order bits are copied from the |
2581 | /// high-order bits of the first operand. |
2582 | /// |
2583 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadl_pd) |
2584 | #[inline] |
2585 | #[target_feature(enable = "sse2")] |
2586 | #[cfg_attr(test, assert_instr(movlps))] |
2587 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2588 | pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d { |
2589 | _mm_setr_pd(*mem_addr, b:simd_extract!(a, 1)) |
2590 | } |
2591 | |
2592 | /// Stores a 128-bit floating point vector of `[2 x double]` to a 128-bit |
2593 | /// aligned memory location. |
2594 | /// To minimize caching, the data is flagged as non-temporal (unlikely to be |
2595 | /// used again soon). |
2596 | /// |
2597 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_pd) |
2598 | /// |
2599 | /// # Safety of non-temporal stores |
2600 | /// |
2601 | /// After using this intrinsic, but before any other access to the memory that this intrinsic |
2602 | /// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In |
2603 | /// particular, functions that call this intrinsic should generally call `_mm_sfence` before they |
2604 | /// return. |
2605 | /// |
2606 | /// See [`_mm_sfence`] for details. |
2607 | #[inline] |
2608 | #[target_feature(enable = "sse2")] |
2609 | #[cfg_attr(test, assert_instr(movntpd))] |
2610 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2611 | #[allow(clippy::cast_ptr_alignment)] |
2612 | pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) { |
2613 | crate::arch::asm!( |
2614 | vps!("movntpd", ",{a}"), |
2615 | p = in(reg) mem_addr, |
2616 | a = in(xmm_reg) a, |
2617 | options(nostack, preserves_flags), |
2618 | ); |
2619 | } |
2620 | |
2621 | /// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a |
2622 | /// memory location. |
2623 | /// |
2624 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_sd) |
2625 | #[inline] |
2626 | #[target_feature(enable = "sse2")] |
2627 | #[cfg_attr(test, assert_instr(movlps))] |
2628 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2629 | pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) { |
2630 | *mem_addr = simd_extract!(a, 0) |
2631 | } |
2632 | |
2633 | /// Stores 128-bits (composed of 2 packed double-precision (64-bit) |
2634 | /// floating-point elements) from `a` into memory. `mem_addr` must be aligned |
2635 | /// on a 16-byte boundary or a general-protection exception may be generated. |
2636 | /// |
2637 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_pd) |
2638 | #[inline] |
2639 | #[target_feature(enable = "sse2")] |
2640 | #[cfg_attr( |
2641 | all(test, not(all(target_arch = "x86", target_env = "msvc"))), |
2642 | assert_instr(movaps) |
2643 | )] |
2644 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2645 | #[allow(clippy::cast_ptr_alignment)] |
2646 | pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) { |
2647 | *(mem_addr as *mut __m128d) = a; |
2648 | } |
2649 | |
2650 | /// Stores 128-bits (composed of 2 packed double-precision (64-bit) |
2651 | /// floating-point elements) from `a` into memory. |
2652 | /// `mem_addr` does not need to be aligned on any particular boundary. |
2653 | /// |
2654 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_pd) |
2655 | #[inline] |
2656 | #[target_feature(enable = "sse2")] |
2657 | #[cfg_attr(test, assert_instr(movups))] // FIXME movupd expected |
2658 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2659 | pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) { |
2660 | mem_addr.cast::<__m128d>().write_unaligned(val:a); |
2661 | } |
2662 | |
2663 | /// Store 16-bit integer from the first element of a into memory. |
2664 | /// |
2665 | /// `mem_addr` does not need to be aligned on any particular boundary. |
2666 | /// |
2667 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si16) |
2668 | #[inline] |
2669 | #[target_feature(enable = "sse2")] |
2670 | #[stable(feature = "simd_x86_updates", since = "1.82.0")] |
2671 | pub unsafe fn _mm_storeu_si16(mem_addr: *mut u8, a: __m128i) { |
2672 | ptr::write_unaligned(dst:mem_addr as *mut i16, src:simd_extract(x:a.as_i16x8(), idx:0)) |
2673 | } |
2674 | |
2675 | /// Store 32-bit integer from the first element of a into memory. |
2676 | /// |
2677 | /// `mem_addr` does not need to be aligned on any particular boundary. |
2678 | /// |
2679 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si32) |
2680 | #[inline] |
2681 | #[target_feature(enable = "sse2")] |
2682 | #[stable(feature = "simd_x86_updates", since = "1.82.0")] |
2683 | pub unsafe fn _mm_storeu_si32(mem_addr: *mut u8, a: __m128i) { |
2684 | ptr::write_unaligned(dst:mem_addr as *mut i32, src:simd_extract(x:a.as_i32x4(), idx:0)) |
2685 | } |
2686 | |
2687 | /// Store 64-bit integer from the first element of a into memory. |
2688 | /// |
2689 | /// `mem_addr` does not need to be aligned on any particular boundary. |
2690 | /// |
2691 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si64) |
2692 | #[inline] |
2693 | #[target_feature(enable = "sse2")] |
2694 | #[stable(feature = "simd_x86_updates", since = "1.82.0")] |
2695 | pub unsafe fn _mm_storeu_si64(mem_addr: *mut u8, a: __m128i) { |
2696 | ptr::write_unaligned(dst:mem_addr as *mut i64, src:simd_extract(x:a.as_i64x2(), idx:0)) |
2697 | } |
2698 | |
2699 | /// Stores the lower double-precision (64-bit) floating-point element from `a` |
2700 | /// into 2 contiguous elements in memory. `mem_addr` must be aligned on a |
2701 | /// 16-byte boundary or a general-protection exception may be generated. |
2702 | /// |
2703 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store1_pd) |
2704 | #[inline] |
2705 | #[target_feature(enable = "sse2")] |
2706 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2707 | #[allow(clippy::cast_ptr_alignment)] |
2708 | pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) { |
2709 | let b: __m128d = simd_shuffle!(a, a, [0, 0]); |
2710 | *(mem_addr as *mut __m128d) = b; |
2711 | } |
2712 | |
2713 | /// Stores the lower double-precision (64-bit) floating-point element from `a` |
2714 | /// into 2 contiguous elements in memory. `mem_addr` must be aligned on a |
2715 | /// 16-byte boundary or a general-protection exception may be generated. |
2716 | /// |
2717 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_pd1) |
2718 | #[inline] |
2719 | #[target_feature(enable = "sse2")] |
2720 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2721 | #[allow(clippy::cast_ptr_alignment)] |
2722 | pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) { |
2723 | let b: __m128d = simd_shuffle!(a, a, [0, 0]); |
2724 | *(mem_addr as *mut __m128d) = b; |
2725 | } |
2726 | |
2727 | /// Stores 2 double-precision (64-bit) floating-point elements from `a` into |
2728 | /// memory in reverse order. |
2729 | /// `mem_addr` must be aligned on a 16-byte boundary or a general-protection |
2730 | /// exception may be generated. |
2731 | /// |
2732 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storer_pd) |
2733 | #[inline] |
2734 | #[target_feature(enable = "sse2")] |
2735 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2736 | #[allow(clippy::cast_ptr_alignment)] |
2737 | pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) { |
2738 | let b: __m128d = simd_shuffle!(a, a, [1, 0]); |
2739 | *(mem_addr as *mut __m128d) = b; |
2740 | } |
2741 | |
2742 | /// Stores the upper 64 bits of a 128-bit vector of `[2 x double]` to a |
2743 | /// memory location. |
2744 | /// |
2745 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeh_pd) |
2746 | #[inline] |
2747 | #[target_feature(enable = "sse2")] |
2748 | #[cfg_attr(test, assert_instr(movhps))] |
2749 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2750 | pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) { |
2751 | *mem_addr = simd_extract!(a, 1); |
2752 | } |
2753 | |
2754 | /// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a |
2755 | /// memory location. |
2756 | /// |
2757 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storel_pd) |
2758 | #[inline] |
2759 | #[target_feature(enable = "sse2")] |
2760 | #[cfg_attr(test, assert_instr(movlps))] |
2761 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2762 | pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) { |
2763 | *mem_addr = simd_extract!(a, 0); |
2764 | } |
2765 | |
2766 | /// Loads a double-precision (64-bit) floating-point element from memory |
2767 | /// into both elements of returned vector. |
2768 | /// |
2769 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load1_pd) |
2770 | #[inline] |
2771 | #[target_feature(enable = "sse2")] |
2772 | // #[cfg_attr(test, assert_instr(movapd))] // FIXME LLVM uses different codegen |
2773 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2774 | pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d { |
2775 | let d: f64 = *mem_addr; |
2776 | _mm_setr_pd(a:d, b:d) |
2777 | } |
2778 | |
2779 | /// Loads a double-precision (64-bit) floating-point element from memory |
2780 | /// into both elements of returned vector. |
2781 | /// |
2782 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_pd1) |
2783 | #[inline] |
2784 | #[target_feature(enable = "sse2")] |
2785 | // #[cfg_attr(test, assert_instr(movapd))] // FIXME same as _mm_load1_pd |
2786 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2787 | pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d { |
2788 | _mm_load1_pd(mem_addr) |
2789 | } |
2790 | |
2791 | /// Loads 2 double-precision (64-bit) floating-point elements from memory into |
2792 | /// the returned vector in reverse order. `mem_addr` must be aligned on a |
2793 | /// 16-byte boundary or a general-protection exception may be generated. |
2794 | /// |
2795 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadr_pd) |
2796 | #[inline] |
2797 | #[target_feature(enable = "sse2")] |
2798 | #[cfg_attr( |
2799 | all(test, not(all(target_arch = "x86", target_env = "msvc"))), |
2800 | assert_instr(movaps) |
2801 | )] |
2802 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2803 | pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d { |
2804 | let a: __m128d = _mm_load_pd(mem_addr); |
2805 | simd_shuffle!(a, a, [1, 0]) |
2806 | } |
2807 | |
2808 | /// Loads 128-bits (composed of 2 packed double-precision (64-bit) |
2809 | /// floating-point elements) from memory into the returned vector. |
2810 | /// `mem_addr` does not need to be aligned on any particular boundary. |
2811 | /// |
2812 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_pd) |
2813 | #[inline] |
2814 | #[target_feature(enable = "sse2")] |
2815 | #[cfg_attr(test, assert_instr(movups))] |
2816 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2817 | pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d { |
2818 | let mut dst: __m128d = _mm_undefined_pd(); |
2819 | ptr::copy_nonoverlapping( |
2820 | src:mem_addr as *const u8, |
2821 | dst:ptr::addr_of_mut!(dst) as *mut u8, |
2822 | count:mem::size_of::<__m128d>(), |
2823 | ); |
2824 | dst |
2825 | } |
2826 | |
2827 | /// Loads unaligned 16-bits of integer data from memory into new vector. |
2828 | /// |
2829 | /// `mem_addr` does not need to be aligned on any particular boundary. |
2830 | /// |
2831 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si16) |
2832 | #[inline] |
2833 | #[target_feature(enable = "sse2")] |
2834 | #[stable(feature = "simd_x86_updates", since = "1.82.0")] |
2835 | pub unsafe fn _mm_loadu_si16(mem_addr: *const u8) -> __m128i { |
2836 | transmute(src:i16x8::new( |
2837 | x0:ptr::read_unaligned(mem_addr as *const i16), |
2838 | x1:0, |
2839 | x2:0, |
2840 | x3:0, |
2841 | x4:0, |
2842 | x5:0, |
2843 | x6:0, |
2844 | x7:0, |
2845 | )) |
2846 | } |
2847 | |
2848 | /// Loads unaligned 32-bits of integer data from memory into new vector. |
2849 | /// |
2850 | /// `mem_addr` does not need to be aligned on any particular boundary. |
2851 | /// |
2852 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si32) |
2853 | #[inline] |
2854 | #[target_feature(enable = "sse2")] |
2855 | #[stable(feature = "simd_x86_updates", since = "1.82.0")] |
2856 | pub unsafe fn _mm_loadu_si32(mem_addr: *const u8) -> __m128i { |
2857 | transmute(src:i32x4::new( |
2858 | x0:ptr::read_unaligned(mem_addr as *const i32), |
2859 | x1:0, |
2860 | x2:0, |
2861 | x3:0, |
2862 | )) |
2863 | } |
2864 | |
2865 | /// Loads unaligned 64-bits of integer data from memory into new vector. |
2866 | /// |
2867 | /// `mem_addr` does not need to be aligned on any particular boundary. |
2868 | /// |
2869 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si64) |
2870 | #[inline] |
2871 | #[target_feature(enable = "sse2")] |
2872 | #[stable(feature = "simd_x86_mm_loadu_si64", since = "1.46.0")] |
2873 | pub unsafe fn _mm_loadu_si64(mem_addr: *const u8) -> __m128i { |
2874 | transmute(src:i64x2::new(x0:ptr::read_unaligned(mem_addr as *const i64), x1:0)) |
2875 | } |
2876 | |
2877 | /// Constructs a 128-bit floating-point vector of `[2 x double]` from two |
2878 | /// 128-bit vector parameters of `[2 x double]`, using the immediate-value |
2879 | /// parameter as a specifier. |
2880 | /// |
2881 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_pd) |
2882 | #[inline] |
2883 | #[target_feature(enable = "sse2")] |
2884 | #[cfg_attr(test, assert_instr(shufps, MASK = 2))] |
2885 | #[rustc_legacy_const_generics(2)] |
2886 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2887 | pub fn _mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d { |
2888 | static_assert_uimm_bits!(MASK, 8); |
2889 | unsafe { simd_shuffle!(a, b, [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2]) } |
2890 | } |
2891 | |
2892 | /// Constructs a 128-bit floating-point vector of `[2 x double]`. The lower |
2893 | /// 64 bits are set to the lower 64 bits of the second parameter. The upper |
2894 | /// 64 bits are set to the upper 64 bits of the first parameter. |
2895 | /// |
2896 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_move_sd) |
2897 | #[inline] |
2898 | #[target_feature(enable = "sse2")] |
2899 | #[cfg_attr(test, assert_instr(movsd))] |
2900 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2901 | pub fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d { |
2902 | unsafe { _mm_setr_pd(a:simd_extract!(b, 0), b:simd_extract!(a, 1)) } |
2903 | } |
2904 | |
2905 | /// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit |
2906 | /// floating-point vector of `[4 x float]`. |
2907 | /// |
2908 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castpd_ps) |
2909 | #[inline] |
2910 | #[target_feature(enable = "sse2")] |
2911 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2912 | pub fn _mm_castpd_ps(a: __m128d) -> __m128 { |
2913 | unsafe { transmute(src:a) } |
2914 | } |
2915 | |
2916 | /// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit |
2917 | /// integer vector. |
2918 | /// |
2919 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castpd_si128) |
2920 | #[inline] |
2921 | #[target_feature(enable = "sse2")] |
2922 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2923 | pub fn _mm_castpd_si128(a: __m128d) -> __m128i { |
2924 | unsafe { transmute(src:a) } |
2925 | } |
2926 | |
2927 | /// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit |
2928 | /// floating-point vector of `[2 x double]`. |
2929 | /// |
2930 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castps_pd) |
2931 | #[inline] |
2932 | #[target_feature(enable = "sse2")] |
2933 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2934 | pub fn _mm_castps_pd(a: __m128) -> __m128d { |
2935 | unsafe { transmute(src:a) } |
2936 | } |
2937 | |
2938 | /// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit |
2939 | /// integer vector. |
2940 | /// |
2941 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castps_si128) |
2942 | #[inline] |
2943 | #[target_feature(enable = "sse2")] |
2944 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2945 | pub fn _mm_castps_si128(a: __m128) -> __m128i { |
2946 | unsafe { transmute(src:a) } |
2947 | } |
2948 | |
2949 | /// Casts a 128-bit integer vector into a 128-bit floating-point vector |
2950 | /// of `[2 x double]`. |
2951 | /// |
2952 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castsi128_pd) |
2953 | #[inline] |
2954 | #[target_feature(enable = "sse2")] |
2955 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2956 | pub fn _mm_castsi128_pd(a: __m128i) -> __m128d { |
2957 | unsafe { transmute(src:a) } |
2958 | } |
2959 | |
2960 | /// Casts a 128-bit integer vector into a 128-bit floating-point vector |
2961 | /// of `[4 x float]`. |
2962 | /// |
2963 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castsi128_ps) |
2964 | #[inline] |
2965 | #[target_feature(enable = "sse2")] |
2966 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2967 | pub fn _mm_castsi128_ps(a: __m128i) -> __m128 { |
2968 | unsafe { transmute(src:a) } |
2969 | } |
2970 | |
2971 | /// Returns vector of type __m128d with indeterminate elements.with indetermination elements. |
2972 | /// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically |
2973 | /// picks some valid value and is not equivalent to [`mem::MaybeUninit`]. |
2974 | /// In practice, this is typically equivalent to [`mem::zeroed`]. |
2975 | /// |
2976 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_undefined_pd) |
2977 | #[inline] |
2978 | #[target_feature(enable = "sse2")] |
2979 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2980 | pub fn _mm_undefined_pd() -> __m128d { |
2981 | const { unsafe { mem::zeroed() } } |
2982 | } |
2983 | |
2984 | /// Returns vector of type __m128i with indeterminate elements.with indetermination elements. |
2985 | /// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically |
2986 | /// picks some valid value and is not equivalent to [`mem::MaybeUninit`]. |
2987 | /// In practice, this is typically equivalent to [`mem::zeroed`]. |
2988 | /// |
2989 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_undefined_si128) |
2990 | #[inline] |
2991 | #[target_feature(enable = "sse2")] |
2992 | #[stable(feature = "simd_x86", since = "1.27.0")] |
2993 | pub fn _mm_undefined_si128() -> __m128i { |
2994 | const { unsafe { mem::zeroed() } } |
2995 | } |
2996 | |
2997 | /// The resulting `__m128d` element is composed by the low-order values of |
2998 | /// the two `__m128d` interleaved input elements, i.e.: |
2999 | /// |
3000 | /// * The `[127:64]` bits are copied from the `[127:64]` bits of the second input |
3001 | /// * The `[63:0]` bits are copied from the `[127:64]` bits of the first input |
3002 | /// |
3003 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_pd) |
3004 | #[inline] |
3005 | #[target_feature(enable = "sse2")] |
3006 | #[cfg_attr(test, assert_instr(unpckhpd))] |
3007 | #[stable(feature = "simd_x86", since = "1.27.0")] |
3008 | pub fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d { |
3009 | unsafe { simd_shuffle!(a, b, [1, 3]) } |
3010 | } |
3011 | |
3012 | /// The resulting `__m128d` element is composed by the high-order values of |
3013 | /// the two `__m128d` interleaved input elements, i.e.: |
3014 | /// |
3015 | /// * The `[127:64]` bits are copied from the `[63:0]` bits of the second input |
3016 | /// * The `[63:0]` bits are copied from the `[63:0]` bits of the first input |
3017 | /// |
3018 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_pd) |
3019 | #[inline] |
3020 | #[target_feature(enable = "sse2")] |
3021 | #[cfg_attr(test, assert_instr(movlhps))] |
3022 | #[stable(feature = "simd_x86", since = "1.27.0")] |
3023 | pub fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d { |
3024 | unsafe { simd_shuffle!(a, b, [0, 2]) } |
3025 | } |
3026 | |
3027 | #[allow(improper_ctypes)] |
3028 | unsafe extern "C"{ |
3029 | #[link_name= "llvm.x86.sse2.pause"] |
3030 | unsafefn pause(); |
3031 | #[link_name= "llvm.x86.sse2.clflush"] |
3032 | unsafefn clflush(p: *const u8); |
3033 | #[link_name= "llvm.x86.sse2.lfence"] |
3034 | unsafefn lfence(); |
3035 | #[link_name= "llvm.x86.sse2.mfence"] |
3036 | unsafefn mfence(); |
3037 | #[link_name= "llvm.x86.sse2.pmadd.wd"] |
3038 | unsafefn pmaddwd(a: i16x8, b: i16x8) -> i32x4; |
3039 | #[link_name= "llvm.x86.sse2.psad.bw"] |
3040 | unsafefn psadbw(a: u8x16, b: u8x16) -> u64x2; |
3041 | #[link_name= "llvm.x86.sse2.psll.w"] |
3042 | unsafefn psllw(a: i16x8, count: i16x8) -> i16x8; |
3043 | #[link_name= "llvm.x86.sse2.psll.d"] |
3044 | unsafefn pslld(a: i32x4, count: i32x4) -> i32x4; |
3045 | #[link_name= "llvm.x86.sse2.psll.q"] |
3046 | unsafefn psllq(a: i64x2, count: i64x2) -> i64x2; |
3047 | #[link_name= "llvm.x86.sse2.psra.w"] |
3048 | unsafefn psraw(a: i16x8, count: i16x8) -> i16x8; |
3049 | #[link_name= "llvm.x86.sse2.psra.d"] |
3050 | unsafefn psrad(a: i32x4, count: i32x4) -> i32x4; |
3051 | #[link_name= "llvm.x86.sse2.psrl.w"] |
3052 | unsafefn psrlw(a: i16x8, count: i16x8) -> i16x8; |
3053 | #[link_name= "llvm.x86.sse2.psrl.d"] |
3054 | unsafefn psrld(a: i32x4, count: i32x4) -> i32x4; |
3055 | #[link_name= "llvm.x86.sse2.psrl.q"] |
3056 | unsafefn psrlq(a: i64x2, count: i64x2) -> i64x2; |
3057 | #[link_name= "llvm.x86.sse2.cvtps2dq"] |
3058 | unsafefn cvtps2dq(a: __m128) -> i32x4; |
3059 | #[link_name= "llvm.x86.sse2.maskmov.dqu"] |
3060 | unsafefn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8); |
3061 | #[link_name= "llvm.x86.sse2.packsswb.128"] |
3062 | unsafefn packsswb(a: i16x8, b: i16x8) -> i8x16; |
3063 | #[link_name= "llvm.x86.sse2.packssdw.128"] |
3064 | unsafefn packssdw(a: i32x4, b: i32x4) -> i16x8; |
3065 | #[link_name= "llvm.x86.sse2.packuswb.128"] |
3066 | unsafefn packuswb(a: i16x8, b: i16x8) -> u8x16; |
3067 | #[link_name= "llvm.x86.sse2.max.sd"] |
3068 | unsafefn maxsd(a: __m128d, b: __m128d) -> __m128d; |
3069 | #[link_name= "llvm.x86.sse2.max.pd"] |
3070 | unsafefn maxpd(a: __m128d, b: __m128d) -> __m128d; |
3071 | #[link_name= "llvm.x86.sse2.min.sd"] |
3072 | unsafefn minsd(a: __m128d, b: __m128d) -> __m128d; |
3073 | #[link_name= "llvm.x86.sse2.min.pd"] |
3074 | unsafefn minpd(a: __m128d, b: __m128d) -> __m128d; |
3075 | #[link_name= "llvm.x86.sse2.cmp.sd"] |
3076 | unsafefn cmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d; |
3077 | #[link_name= "llvm.x86.sse2.cmp.pd"] |
3078 | unsafefn cmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d; |
3079 | #[link_name= "llvm.x86.sse2.comieq.sd"] |
3080 | unsafefn comieqsd(a: __m128d, b: __m128d) -> i32; |
3081 | #[link_name= "llvm.x86.sse2.comilt.sd"] |
3082 | unsafefn comiltsd(a: __m128d, b: __m128d) -> i32; |
3083 | #[link_name= "llvm.x86.sse2.comile.sd"] |
3084 | unsafefn comilesd(a: __m128d, b: __m128d) -> i32; |
3085 | #[link_name= "llvm.x86.sse2.comigt.sd"] |
3086 | unsafefn comigtsd(a: __m128d, b: __m128d) -> i32; |
3087 | #[link_name= "llvm.x86.sse2.comige.sd"] |
3088 | unsafefn comigesd(a: __m128d, b: __m128d) -> i32; |
3089 | #[link_name= "llvm.x86.sse2.comineq.sd"] |
3090 | unsafefn comineqsd(a: __m128d, b: __m128d) -> i32; |
3091 | #[link_name= "llvm.x86.sse2.ucomieq.sd"] |
3092 | unsafefn ucomieqsd(a: __m128d, b: __m128d) -> i32; |
3093 | #[link_name= "llvm.x86.sse2.ucomilt.sd"] |
3094 | unsafefn ucomiltsd(a: __m128d, b: __m128d) -> i32; |
3095 | #[link_name= "llvm.x86.sse2.ucomile.sd"] |
3096 | unsafefn ucomilesd(a: __m128d, b: __m128d) -> i32; |
3097 | #[link_name= "llvm.x86.sse2.ucomigt.sd"] |
3098 | unsafefn ucomigtsd(a: __m128d, b: __m128d) -> i32; |
3099 | #[link_name= "llvm.x86.sse2.ucomige.sd"] |
3100 | unsafefn ucomigesd(a: __m128d, b: __m128d) -> i32; |
3101 | #[link_name= "llvm.x86.sse2.ucomineq.sd"] |
3102 | unsafefn ucomineqsd(a: __m128d, b: __m128d) -> i32; |
3103 | #[link_name= "llvm.x86.sse2.cvtpd2dq"] |
3104 | unsafefn cvtpd2dq(a: __m128d) -> i32x4; |
3105 | #[link_name= "llvm.x86.sse2.cvtsd2si"] |
3106 | unsafefn cvtsd2si(a: __m128d) -> i32; |
3107 | #[link_name= "llvm.x86.sse2.cvtsd2ss"] |
3108 | unsafefn cvtsd2ss(a: __m128, b: __m128d) -> __m128; |
3109 | #[link_name= "llvm.x86.sse2.cvtss2sd"] |
3110 | unsafefn cvtss2sd(a: __m128d, b: __m128) -> __m128d; |
3111 | #[link_name= "llvm.x86.sse2.cvttpd2dq"] |
3112 | unsafefn cvttpd2dq(a: __m128d) -> i32x4; |
3113 | #[link_name= "llvm.x86.sse2.cvttsd2si"] |
3114 | unsafefn cvttsd2si(a: __m128d) -> i32; |
3115 | #[link_name= "llvm.x86.sse2.cvttps2dq"] |
3116 | unsafefn cvttps2dq(a: __m128) -> i32x4; |
3117 | } |
3118 | |
3119 | #[cfg(test)] |
3120 | mod tests { |
3121 | use crate::{ |
3122 | core_arch::{simd::*, x86::*}, |
3123 | hint::black_box, |
3124 | }; |
3125 | use std::{ |
3126 | boxed, f32, f64, |
3127 | mem::{self, transmute}, |
3128 | ptr, |
3129 | }; |
3130 | use stdarch_test::simd_test; |
3131 | |
3132 | const NAN: f64 = f64::NAN; |
3133 | |
3134 | #[test] |
3135 | fn test_mm_pause() { |
3136 | unsafe { _mm_pause() } |
3137 | } |
3138 | |
3139 | #[simd_test(enable = "sse2")] |
3140 | unsafe fn test_mm_clflush() { |
3141 | let x = 0_u8; |
3142 | _mm_clflush(ptr::addr_of!(x)); |
3143 | } |
3144 | |
3145 | #[simd_test(enable = "sse2")] |
3146 | // Miri cannot support this until it is clear how it fits in the Rust memory model |
3147 | #[cfg_attr(miri, ignore)] |
3148 | unsafe fn test_mm_lfence() { |
3149 | _mm_lfence(); |
3150 | } |
3151 | |
3152 | #[simd_test(enable = "sse2")] |
3153 | // Miri cannot support this until it is clear how it fits in the Rust memory model |
3154 | #[cfg_attr(miri, ignore)] |
3155 | unsafe fn test_mm_mfence() { |
3156 | _mm_mfence(); |
3157 | } |
3158 | |
3159 | #[simd_test(enable = "sse2")] |
3160 | unsafe fn test_mm_add_epi8() { |
3161 | let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
3162 | #[rustfmt::skip] |
3163 | let b = _mm_setr_epi8( |
3164 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
3165 | ); |
3166 | let r = _mm_add_epi8(a, b); |
3167 | #[rustfmt::skip] |
3168 | let e = _mm_setr_epi8( |
3169 | 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, |
3170 | ); |
3171 | assert_eq_m128i(r, e); |
3172 | } |
3173 | |
3174 | #[simd_test(enable = "sse2")] |
3175 | unsafe fn test_mm_add_epi8_overflow() { |
3176 | let a = _mm_set1_epi8(0x7F); |
3177 | let b = _mm_set1_epi8(1); |
3178 | let r = _mm_add_epi8(a, b); |
3179 | assert_eq_m128i(r, _mm_set1_epi8(-128)); |
3180 | } |
3181 | |
3182 | #[simd_test(enable = "sse2")] |
3183 | unsafe fn test_mm_add_epi16() { |
3184 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
3185 | let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); |
3186 | let r = _mm_add_epi16(a, b); |
3187 | let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22); |
3188 | assert_eq_m128i(r, e); |
3189 | } |
3190 | |
3191 | #[simd_test(enable = "sse2")] |
3192 | unsafe fn test_mm_add_epi32() { |
3193 | let a = _mm_setr_epi32(0, 1, 2, 3); |
3194 | let b = _mm_setr_epi32(4, 5, 6, 7); |
3195 | let r = _mm_add_epi32(a, b); |
3196 | let e = _mm_setr_epi32(4, 6, 8, 10); |
3197 | assert_eq_m128i(r, e); |
3198 | } |
3199 | |
3200 | #[simd_test(enable = "sse2")] |
3201 | unsafe fn test_mm_add_epi64() { |
3202 | let a = _mm_setr_epi64x(0, 1); |
3203 | let b = _mm_setr_epi64x(2, 3); |
3204 | let r = _mm_add_epi64(a, b); |
3205 | let e = _mm_setr_epi64x(2, 4); |
3206 | assert_eq_m128i(r, e); |
3207 | } |
3208 | |
3209 | #[simd_test(enable = "sse2")] |
3210 | unsafe fn test_mm_adds_epi8() { |
3211 | let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
3212 | #[rustfmt::skip] |
3213 | let b = _mm_setr_epi8( |
3214 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
3215 | ); |
3216 | let r = _mm_adds_epi8(a, b); |
3217 | #[rustfmt::skip] |
3218 | let e = _mm_setr_epi8( |
3219 | 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, |
3220 | ); |
3221 | assert_eq_m128i(r, e); |
3222 | } |
3223 | |
3224 | #[simd_test(enable = "sse2")] |
3225 | unsafe fn test_mm_adds_epi8_saturate_positive() { |
3226 | let a = _mm_set1_epi8(0x7F); |
3227 | let b = _mm_set1_epi8(1); |
3228 | let r = _mm_adds_epi8(a, b); |
3229 | assert_eq_m128i(r, a); |
3230 | } |
3231 | |
3232 | #[simd_test(enable = "sse2")] |
3233 | unsafe fn test_mm_adds_epi8_saturate_negative() { |
3234 | let a = _mm_set1_epi8(-0x80); |
3235 | let b = _mm_set1_epi8(-1); |
3236 | let r = _mm_adds_epi8(a, b); |
3237 | assert_eq_m128i(r, a); |
3238 | } |
3239 | |
3240 | #[simd_test(enable = "sse2")] |
3241 | unsafe fn test_mm_adds_epi16() { |
3242 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
3243 | let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); |
3244 | let r = _mm_adds_epi16(a, b); |
3245 | let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22); |
3246 | assert_eq_m128i(r, e); |
3247 | } |
3248 | |
3249 | #[simd_test(enable = "sse2")] |
3250 | unsafe fn test_mm_adds_epi16_saturate_positive() { |
3251 | let a = _mm_set1_epi16(0x7FFF); |
3252 | let b = _mm_set1_epi16(1); |
3253 | let r = _mm_adds_epi16(a, b); |
3254 | assert_eq_m128i(r, a); |
3255 | } |
3256 | |
3257 | #[simd_test(enable = "sse2")] |
3258 | unsafe fn test_mm_adds_epi16_saturate_negative() { |
3259 | let a = _mm_set1_epi16(-0x8000); |
3260 | let b = _mm_set1_epi16(-1); |
3261 | let r = _mm_adds_epi16(a, b); |
3262 | assert_eq_m128i(r, a); |
3263 | } |
3264 | |
3265 | #[simd_test(enable = "sse2")] |
3266 | unsafe fn test_mm_adds_epu8() { |
3267 | let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
3268 | #[rustfmt::skip] |
3269 | let b = _mm_setr_epi8( |
3270 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
3271 | ); |
3272 | let r = _mm_adds_epu8(a, b); |
3273 | #[rustfmt::skip] |
3274 | let e = _mm_setr_epi8( |
3275 | 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, |
3276 | ); |
3277 | assert_eq_m128i(r, e); |
3278 | } |
3279 | |
3280 | #[simd_test(enable = "sse2")] |
3281 | unsafe fn test_mm_adds_epu8_saturate() { |
3282 | let a = _mm_set1_epi8(!0); |
3283 | let b = _mm_set1_epi8(1); |
3284 | let r = _mm_adds_epu8(a, b); |
3285 | assert_eq_m128i(r, a); |
3286 | } |
3287 | |
3288 | #[simd_test(enable = "sse2")] |
3289 | unsafe fn test_mm_adds_epu16() { |
3290 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
3291 | let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); |
3292 | let r = _mm_adds_epu16(a, b); |
3293 | let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22); |
3294 | assert_eq_m128i(r, e); |
3295 | } |
3296 | |
3297 | #[simd_test(enable = "sse2")] |
3298 | unsafe fn test_mm_adds_epu16_saturate() { |
3299 | let a = _mm_set1_epi16(!0); |
3300 | let b = _mm_set1_epi16(1); |
3301 | let r = _mm_adds_epu16(a, b); |
3302 | assert_eq_m128i(r, a); |
3303 | } |
3304 | |
3305 | #[simd_test(enable = "sse2")] |
3306 | unsafe fn test_mm_avg_epu8() { |
3307 | let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9)); |
3308 | let r = _mm_avg_epu8(a, b); |
3309 | assert_eq_m128i(r, _mm_set1_epi8(6)); |
3310 | } |
3311 | |
3312 | #[simd_test(enable = "sse2")] |
3313 | unsafe fn test_mm_avg_epu16() { |
3314 | let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9)); |
3315 | let r = _mm_avg_epu16(a, b); |
3316 | assert_eq_m128i(r, _mm_set1_epi16(6)); |
3317 | } |
3318 | |
3319 | #[simd_test(enable = "sse2")] |
3320 | unsafe fn test_mm_madd_epi16() { |
3321 | let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); |
3322 | let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16); |
3323 | let r = _mm_madd_epi16(a, b); |
3324 | let e = _mm_setr_epi32(29, 81, 149, 233); |
3325 | assert_eq_m128i(r, e); |
3326 | |
3327 | // Test large values. |
3328 | // MIN*MIN+MIN*MIN will overflow into i32::MIN. |
3329 | let a = _mm_setr_epi16( |
3330 | i16::MAX, |
3331 | i16::MAX, |
3332 | i16::MIN, |
3333 | i16::MIN, |
3334 | i16::MIN, |
3335 | i16::MAX, |
3336 | 0, |
3337 | 0, |
3338 | ); |
3339 | let b = _mm_setr_epi16( |
3340 | i16::MAX, |
3341 | i16::MAX, |
3342 | i16::MIN, |
3343 | i16::MIN, |
3344 | i16::MAX, |
3345 | i16::MIN, |
3346 | 0, |
3347 | 0, |
3348 | ); |
3349 | let r = _mm_madd_epi16(a, b); |
3350 | let e = _mm_setr_epi32(0x7FFE0002, i32::MIN, -0x7FFF0000, 0); |
3351 | assert_eq_m128i(r, e); |
3352 | } |
3353 | |
3354 | #[simd_test(enable = "sse2")] |
3355 | unsafe fn test_mm_max_epi16() { |
3356 | let a = _mm_set1_epi16(1); |
3357 | let b = _mm_set1_epi16(-1); |
3358 | let r = _mm_max_epi16(a, b); |
3359 | assert_eq_m128i(r, a); |
3360 | } |
3361 | |
3362 | #[simd_test(enable = "sse2")] |
3363 | unsafe fn test_mm_max_epu8() { |
3364 | let a = _mm_set1_epi8(1); |
3365 | let b = _mm_set1_epi8(!0); |
3366 | let r = _mm_max_epu8(a, b); |
3367 | assert_eq_m128i(r, b); |
3368 | } |
3369 | |
3370 | #[simd_test(enable = "sse2")] |
3371 | unsafe fn test_mm_min_epi16() { |
3372 | let a = _mm_set1_epi16(1); |
3373 | let b = _mm_set1_epi16(-1); |
3374 | let r = _mm_min_epi16(a, b); |
3375 | assert_eq_m128i(r, b); |
3376 | } |
3377 | |
3378 | #[simd_test(enable = "sse2")] |
3379 | unsafe fn test_mm_min_epu8() { |
3380 | let a = _mm_set1_epi8(1); |
3381 | let b = _mm_set1_epi8(!0); |
3382 | let r = _mm_min_epu8(a, b); |
3383 | assert_eq_m128i(r, a); |
3384 | } |
3385 | |
3386 | #[simd_test(enable = "sse2")] |
3387 | unsafe fn test_mm_mulhi_epi16() { |
3388 | let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001)); |
3389 | let r = _mm_mulhi_epi16(a, b); |
3390 | assert_eq_m128i(r, _mm_set1_epi16(-16)); |
3391 | } |
3392 | |
3393 | #[simd_test(enable = "sse2")] |
3394 | unsafe fn test_mm_mulhi_epu16() { |
3395 | let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001)); |
3396 | let r = _mm_mulhi_epu16(a, b); |
3397 | assert_eq_m128i(r, _mm_set1_epi16(15)); |
3398 | } |
3399 | |
3400 | #[simd_test(enable = "sse2")] |
3401 | unsafe fn test_mm_mullo_epi16() { |
3402 | let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001)); |
3403 | let r = _mm_mullo_epi16(a, b); |
3404 | assert_eq_m128i(r, _mm_set1_epi16(-17960)); |
3405 | } |
3406 | |
3407 | #[simd_test(enable = "sse2")] |
3408 | unsafe fn test_mm_mul_epu32() { |
3409 | let a = _mm_setr_epi64x(1_000_000_000, 1 << 34); |
3410 | let b = _mm_setr_epi64x(1_000_000_000, 1 << 35); |
3411 | let r = _mm_mul_epu32(a, b); |
3412 | let e = _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0); |
3413 | assert_eq_m128i(r, e); |
3414 | } |
3415 | |
3416 | #[simd_test(enable = "sse2")] |
3417 | unsafe fn test_mm_sad_epu8() { |
3418 | #[rustfmt::skip] |
3419 | let a = _mm_setr_epi8( |
3420 | 255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8, |
3421 | 1, 2, 3, 4, |
3422 | 155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8, |
3423 | 1, 2, 3, 4, |
3424 | ); |
3425 | let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2); |
3426 | let r = _mm_sad_epu8(a, b); |
3427 | let e = _mm_setr_epi64x(1020, 614); |
3428 | assert_eq_m128i(r, e); |
3429 | } |
3430 | |
3431 | #[simd_test(enable = "sse2")] |
3432 | unsafe fn test_mm_sub_epi8() { |
3433 | let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(6)); |
3434 | let r = _mm_sub_epi8(a, b); |
3435 | assert_eq_m128i(r, _mm_set1_epi8(-1)); |
3436 | } |
3437 | |
3438 | #[simd_test(enable = "sse2")] |
3439 | unsafe fn test_mm_sub_epi16() { |
3440 | let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(6)); |
3441 | let r = _mm_sub_epi16(a, b); |
3442 | assert_eq_m128i(r, _mm_set1_epi16(-1)); |
3443 | } |
3444 | |
3445 | #[simd_test(enable = "sse2")] |
3446 | unsafe fn test_mm_sub_epi32() { |
3447 | let (a, b) = (_mm_set1_epi32(5), _mm_set1_epi32(6)); |
3448 | let r = _mm_sub_epi32(a, b); |
3449 | assert_eq_m128i(r, _mm_set1_epi32(-1)); |
3450 | } |
3451 | |
3452 | #[simd_test(enable = "sse2")] |
3453 | unsafe fn test_mm_sub_epi64() { |
3454 | let (a, b) = (_mm_set1_epi64x(5), _mm_set1_epi64x(6)); |
3455 | let r = _mm_sub_epi64(a, b); |
3456 | assert_eq_m128i(r, _mm_set1_epi64x(-1)); |
3457 | } |
3458 | |
3459 | #[simd_test(enable = "sse2")] |
3460 | unsafe fn test_mm_subs_epi8() { |
3461 | let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2)); |
3462 | let r = _mm_subs_epi8(a, b); |
3463 | assert_eq_m128i(r, _mm_set1_epi8(3)); |
3464 | } |
3465 | |
3466 | #[simd_test(enable = "sse2")] |
3467 | unsafe fn test_mm_subs_epi8_saturate_positive() { |
3468 | let a = _mm_set1_epi8(0x7F); |
3469 | let b = _mm_set1_epi8(-1); |
3470 | let r = _mm_subs_epi8(a, b); |
3471 | assert_eq_m128i(r, a); |
3472 | } |
3473 | |
3474 | #[simd_test(enable = "sse2")] |
3475 | unsafe fn test_mm_subs_epi8_saturate_negative() { |
3476 | let a = _mm_set1_epi8(-0x80); |
3477 | let b = _mm_set1_epi8(1); |
3478 | let r = _mm_subs_epi8(a, b); |
3479 | assert_eq_m128i(r, a); |
3480 | } |
3481 | |
3482 | #[simd_test(enable = "sse2")] |
3483 | unsafe fn test_mm_subs_epi16() { |
3484 | let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2)); |
3485 | let r = _mm_subs_epi16(a, b); |
3486 | assert_eq_m128i(r, _mm_set1_epi16(3)); |
3487 | } |
3488 | |
3489 | #[simd_test(enable = "sse2")] |
3490 | unsafe fn test_mm_subs_epi16_saturate_positive() { |
3491 | let a = _mm_set1_epi16(0x7FFF); |
3492 | let b = _mm_set1_epi16(-1); |
3493 | let r = _mm_subs_epi16(a, b); |
3494 | assert_eq_m128i(r, a); |
3495 | } |
3496 | |
3497 | #[simd_test(enable = "sse2")] |
3498 | unsafe fn test_mm_subs_epi16_saturate_negative() { |
3499 | let a = _mm_set1_epi16(-0x8000); |
3500 | let b = _mm_set1_epi16(1); |
3501 | let r = _mm_subs_epi16(a, b); |
3502 | assert_eq_m128i(r, a); |
3503 | } |
3504 | |
3505 | #[simd_test(enable = "sse2")] |
3506 | unsafe fn test_mm_subs_epu8() { |
3507 | let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2)); |
3508 | let r = _mm_subs_epu8(a, b); |
3509 | assert_eq_m128i(r, _mm_set1_epi8(3)); |
3510 | } |
3511 | |
3512 | #[simd_test(enable = "sse2")] |
3513 | unsafe fn test_mm_subs_epu8_saturate() { |
3514 | let a = _mm_set1_epi8(0); |
3515 | let b = _mm_set1_epi8(1); |
3516 | let r = _mm_subs_epu8(a, b); |
3517 | assert_eq_m128i(r, a); |
3518 | } |
3519 | |
3520 | #[simd_test(enable = "sse2")] |
3521 | unsafe fn test_mm_subs_epu16() { |
3522 | let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2)); |
3523 | let r = _mm_subs_epu16(a, b); |
3524 | assert_eq_m128i(r, _mm_set1_epi16(3)); |
3525 | } |
3526 | |
3527 | #[simd_test(enable = "sse2")] |
3528 | unsafe fn test_mm_subs_epu16_saturate() { |
3529 | let a = _mm_set1_epi16(0); |
3530 | let b = _mm_set1_epi16(1); |
3531 | let r = _mm_subs_epu16(a, b); |
3532 | assert_eq_m128i(r, a); |
3533 | } |
3534 | |
3535 | #[simd_test(enable = "sse2")] |
3536 | unsafe fn test_mm_slli_si128() { |
3537 | #[rustfmt::skip] |
3538 | let a = _mm_setr_epi8( |
3539 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
3540 | ); |
3541 | let r = _mm_slli_si128::<1>(a); |
3542 | let e = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
3543 | assert_eq_m128i(r, e); |
3544 | |
3545 | #[rustfmt::skip] |
3546 | let a = _mm_setr_epi8( |
3547 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
3548 | ); |
3549 | let r = _mm_slli_si128::<15>(a); |
3550 | let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1); |
3551 | assert_eq_m128i(r, e); |
3552 | |
3553 | #[rustfmt::skip] |
3554 | let a = _mm_setr_epi8( |
3555 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
3556 | ); |
3557 | let r = _mm_slli_si128::<16>(a); |
3558 | assert_eq_m128i(r, _mm_set1_epi8(0)); |
3559 | } |
3560 | |
3561 | #[simd_test(enable = "sse2")] |
3562 | unsafe fn test_mm_slli_epi16() { |
3563 | let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); |
3564 | let r = _mm_slli_epi16::<4>(a); |
3565 | assert_eq_m128i( |
3566 | r, |
3567 | _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0), |
3568 | ); |
3569 | let r = _mm_slli_epi16::<16>(a); |
3570 | assert_eq_m128i(r, _mm_set1_epi16(0)); |
3571 | } |
3572 | |
3573 | #[simd_test(enable = "sse2")] |
3574 | unsafe fn test_mm_sll_epi16() { |
3575 | let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); |
3576 | let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 4)); |
3577 | assert_eq_m128i( |
3578 | r, |
3579 | _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0), |
3580 | ); |
3581 | let r = _mm_sll_epi16(a, _mm_set_epi64x(4, 0)); |
3582 | assert_eq_m128i(r, a); |
3583 | let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 16)); |
3584 | assert_eq_m128i(r, _mm_set1_epi16(0)); |
3585 | let r = _mm_sll_epi16(a, _mm_set_epi64x(0, i64::MAX)); |
3586 | assert_eq_m128i(r, _mm_set1_epi16(0)); |
3587 | } |
3588 | |
3589 | #[simd_test(enable = "sse2")] |
3590 | unsafe fn test_mm_slli_epi32() { |
3591 | let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); |
3592 | let r = _mm_slli_epi32::<4>(a); |
3593 | assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0)); |
3594 | let r = _mm_slli_epi32::<32>(a); |
3595 | assert_eq_m128i(r, _mm_set1_epi32(0)); |
3596 | } |
3597 | |
3598 | #[simd_test(enable = "sse2")] |
3599 | unsafe fn test_mm_sll_epi32() { |
3600 | let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); |
3601 | let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 4)); |
3602 | assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0)); |
3603 | let r = _mm_sll_epi32(a, _mm_set_epi64x(4, 0)); |
3604 | assert_eq_m128i(r, a); |
3605 | let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 32)); |
3606 | assert_eq_m128i(r, _mm_set1_epi32(0)); |
3607 | let r = _mm_sll_epi32(a, _mm_set_epi64x(0, i64::MAX)); |
3608 | assert_eq_m128i(r, _mm_set1_epi32(0)); |
3609 | } |
3610 | |
3611 | #[simd_test(enable = "sse2")] |
3612 | unsafe fn test_mm_slli_epi64() { |
3613 | let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF); |
3614 | let r = _mm_slli_epi64::<4>(a); |
3615 | assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0)); |
3616 | let r = _mm_slli_epi64::<64>(a); |
3617 | assert_eq_m128i(r, _mm_set1_epi64x(0)); |
3618 | } |
3619 | |
3620 | #[simd_test(enable = "sse2")] |
3621 | unsafe fn test_mm_sll_epi64() { |
3622 | let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF); |
3623 | let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 4)); |
3624 | assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0)); |
3625 | let r = _mm_sll_epi64(a, _mm_set_epi64x(4, 0)); |
3626 | assert_eq_m128i(r, a); |
3627 | let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 64)); |
3628 | assert_eq_m128i(r, _mm_set1_epi64x(0)); |
3629 | let r = _mm_sll_epi64(a, _mm_set_epi64x(0, i64::MAX)); |
3630 | assert_eq_m128i(r, _mm_set1_epi64x(0)); |
3631 | } |
3632 | |
3633 | #[simd_test(enable = "sse2")] |
3634 | unsafe fn test_mm_srai_epi16() { |
3635 | let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); |
3636 | let r = _mm_srai_epi16::<4>(a); |
3637 | assert_eq_m128i( |
3638 | r, |
3639 | _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10), |
3640 | ); |
3641 | let r = _mm_srai_epi16::<16>(a); |
3642 | assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1)); |
3643 | } |
3644 | |
3645 | #[simd_test(enable = "sse2")] |
3646 | unsafe fn test_mm_sra_epi16() { |
3647 | let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); |
3648 | let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 4)); |
3649 | assert_eq_m128i( |
3650 | r, |
3651 | _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10), |
3652 | ); |
3653 | let r = _mm_sra_epi16(a, _mm_set_epi64x(4, 0)); |
3654 | assert_eq_m128i(r, a); |
3655 | let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 16)); |
3656 | assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1)); |
3657 | let r = _mm_sra_epi16(a, _mm_set_epi64x(0, i64::MAX)); |
3658 | assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1)); |
3659 | } |
3660 | |
3661 | #[simd_test(enable = "sse2")] |
3662 | unsafe fn test_mm_srai_epi32() { |
3663 | let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); |
3664 | let r = _mm_srai_epi32::<4>(a); |
3665 | assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000)); |
3666 | let r = _mm_srai_epi32::<32>(a); |
3667 | assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1)); |
3668 | } |
3669 | |
3670 | #[simd_test(enable = "sse2")] |
3671 | unsafe fn test_mm_sra_epi32() { |
3672 | let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); |
3673 | let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 4)); |
3674 | assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000)); |
3675 | let r = _mm_sra_epi32(a, _mm_set_epi64x(4, 0)); |
3676 | assert_eq_m128i(r, a); |
3677 | let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 32)); |
3678 | assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1)); |
3679 | let r = _mm_sra_epi32(a, _mm_set_epi64x(0, i64::MAX)); |
3680 | assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1)); |
3681 | } |
3682 | |
3683 | #[simd_test(enable = "sse2")] |
3684 | unsafe fn test_mm_srli_si128() { |
3685 | #[rustfmt::skip] |
3686 | let a = _mm_setr_epi8( |
3687 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
3688 | ); |
3689 | let r = _mm_srli_si128::<1>(a); |
3690 | #[rustfmt::skip] |
3691 | let e = _mm_setr_epi8( |
3692 | 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, |
3693 | ); |
3694 | assert_eq_m128i(r, e); |
3695 | |
3696 | #[rustfmt::skip] |
3697 | let a = _mm_setr_epi8( |
3698 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
3699 | ); |
3700 | let r = _mm_srli_si128::<15>(a); |
3701 | let e = _mm_setr_epi8(16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
3702 | assert_eq_m128i(r, e); |
3703 | |
3704 | #[rustfmt::skip] |
3705 | let a = _mm_setr_epi8( |
3706 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
3707 | ); |
3708 | let r = _mm_srli_si128::<16>(a); |
3709 | assert_eq_m128i(r, _mm_set1_epi8(0)); |
3710 | } |
3711 | |
3712 | #[simd_test(enable = "sse2")] |
3713 | unsafe fn test_mm_srli_epi16() { |
3714 | let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); |
3715 | let r = _mm_srli_epi16::<4>(a); |
3716 | assert_eq_m128i( |
3717 | r, |
3718 | _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0), |
3719 | ); |
3720 | let r = _mm_srli_epi16::<16>(a); |
3721 | assert_eq_m128i(r, _mm_set1_epi16(0)); |
3722 | } |
3723 | |
3724 | #[simd_test(enable = "sse2")] |
3725 | unsafe fn test_mm_srl_epi16() { |
3726 | let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); |
3727 | let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 4)); |
3728 | assert_eq_m128i( |
3729 | r, |
3730 | _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0), |
3731 | ); |
3732 | let r = _mm_srl_epi16(a, _mm_set_epi64x(4, 0)); |
3733 | assert_eq_m128i(r, a); |
3734 | let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 16)); |
3735 | assert_eq_m128i(r, _mm_set1_epi16(0)); |
3736 | let r = _mm_srl_epi16(a, _mm_set_epi64x(0, i64::MAX)); |
3737 | assert_eq_m128i(r, _mm_set1_epi16(0)); |
3738 | } |
3739 | |
3740 | #[simd_test(enable = "sse2")] |
3741 | unsafe fn test_mm_srli_epi32() { |
3742 | let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); |
3743 | let r = _mm_srli_epi32::<4>(a); |
3744 | assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000)); |
3745 | let r = _mm_srli_epi32::<32>(a); |
3746 | assert_eq_m128i(r, _mm_set1_epi32(0)); |
3747 | } |
3748 | |
3749 | #[simd_test(enable = "sse2")] |
3750 | unsafe fn test_mm_srl_epi32() { |
3751 | let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); |
3752 | let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 4)); |
3753 | assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000)); |
3754 | let r = _mm_srl_epi32(a, _mm_set_epi64x(4, 0)); |
3755 | assert_eq_m128i(r, a); |
3756 | let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 32)); |
3757 | assert_eq_m128i(r, _mm_set1_epi32(0)); |
3758 | let r = _mm_srl_epi32(a, _mm_set_epi64x(0, i64::MAX)); |
3759 | assert_eq_m128i(r, _mm_set1_epi32(0)); |
3760 | } |
3761 | |
3762 | #[simd_test(enable = "sse2")] |
3763 | unsafe fn test_mm_srli_epi64() { |
3764 | let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF); |
3765 | let r = _mm_srli_epi64::<4>(a); |
3766 | assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000)); |
3767 | let r = _mm_srli_epi64::<64>(a); |
3768 | assert_eq_m128i(r, _mm_set1_epi64x(0)); |
3769 | } |
3770 | |
3771 | #[simd_test(enable = "sse2")] |
3772 | unsafe fn test_mm_srl_epi64() { |
3773 | let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF); |
3774 | let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 4)); |
3775 | assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000)); |
3776 | let r = _mm_srl_epi64(a, _mm_set_epi64x(4, 0)); |
3777 | assert_eq_m128i(r, a); |
3778 | let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 64)); |
3779 | assert_eq_m128i(r, _mm_set1_epi64x(0)); |
3780 | let r = _mm_srl_epi64(a, _mm_set_epi64x(0, i64::MAX)); |
3781 | assert_eq_m128i(r, _mm_set1_epi64x(0)); |
3782 | } |
3783 | |
3784 | #[simd_test(enable = "sse2")] |
3785 | unsafe fn test_mm_and_si128() { |
3786 | let a = _mm_set1_epi8(5); |
3787 | let b = _mm_set1_epi8(3); |
3788 | let r = _mm_and_si128(a, b); |
3789 | assert_eq_m128i(r, _mm_set1_epi8(1)); |
3790 | } |
3791 | |
3792 | #[simd_test(enable = "sse2")] |
3793 | unsafe fn test_mm_andnot_si128() { |
3794 | let a = _mm_set1_epi8(5); |
3795 | let b = _mm_set1_epi8(3); |
3796 | let r = _mm_andnot_si128(a, b); |
3797 | assert_eq_m128i(r, _mm_set1_epi8(2)); |
3798 | } |
3799 | |
3800 | #[simd_test(enable = "sse2")] |
3801 | unsafe fn test_mm_or_si128() { |
3802 | let a = _mm_set1_epi8(5); |
3803 | let b = _mm_set1_epi8(3); |
3804 | let r = _mm_or_si128(a, b); |
3805 | assert_eq_m128i(r, _mm_set1_epi8(7)); |
3806 | } |
3807 | |
3808 | #[simd_test(enable = "sse2")] |
3809 | unsafe fn test_mm_xor_si128() { |
3810 | let a = _mm_set1_epi8(5); |
3811 | let b = _mm_set1_epi8(3); |
3812 | let r = _mm_xor_si128(a, b); |
3813 | assert_eq_m128i(r, _mm_set1_epi8(6)); |
3814 | } |
3815 | |
3816 | #[simd_test(enable = "sse2")] |
3817 | unsafe fn test_mm_cmpeq_epi8() { |
3818 | let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
3819 | let b = _mm_setr_epi8(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
3820 | let r = _mm_cmpeq_epi8(a, b); |
3821 | #[rustfmt::skip] |
3822 | assert_eq_m128i( |
3823 | r, |
3824 | _mm_setr_epi8( |
3825 | 0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
3826 | ) |
3827 | ); |
3828 | } |
3829 | |
3830 | #[simd_test(enable = "sse2")] |
3831 | unsafe fn test_mm_cmpeq_epi16() { |
3832 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
3833 | let b = _mm_setr_epi16(7, 6, 2, 4, 3, 2, 1, 0); |
3834 | let r = _mm_cmpeq_epi16(a, b); |
3835 | assert_eq_m128i(r, _mm_setr_epi16(0, 0, !0, 0, 0, 0, 0, 0)); |
3836 | } |
3837 | |
3838 | #[simd_test(enable = "sse2")] |
3839 | unsafe fn test_mm_cmpeq_epi32() { |
3840 | let a = _mm_setr_epi32(0, 1, 2, 3); |
3841 | let b = _mm_setr_epi32(3, 2, 2, 0); |
3842 | let r = _mm_cmpeq_epi32(a, b); |
3843 | assert_eq_m128i(r, _mm_setr_epi32(0, 0, !0, 0)); |
3844 | } |
3845 | |
3846 | #[simd_test(enable = "sse2")] |
3847 | unsafe fn test_mm_cmpgt_epi8() { |
3848 | let a = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
3849 | let b = _mm_set1_epi8(0); |
3850 | let r = _mm_cmpgt_epi8(a, b); |
3851 | let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
3852 | assert_eq_m128i(r, e); |
3853 | } |
3854 | |
3855 | #[simd_test(enable = "sse2")] |
3856 | unsafe fn test_mm_cmpgt_epi16() { |
3857 | let a = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0); |
3858 | let b = _mm_set1_epi16(0); |
3859 | let r = _mm_cmpgt_epi16(a, b); |
3860 | let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0); |
3861 | assert_eq_m128i(r, e); |
3862 | } |
3863 | |
3864 | #[simd_test(enable = "sse2")] |
3865 | unsafe fn test_mm_cmpgt_epi32() { |
3866 | let a = _mm_set_epi32(5, 0, 0, 0); |
3867 | let b = _mm_set1_epi32(0); |
3868 | let r = _mm_cmpgt_epi32(a, b); |
3869 | assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0)); |
3870 | } |
3871 | |
3872 | #[simd_test(enable = "sse2")] |
3873 | unsafe fn test_mm_cmplt_epi8() { |
3874 | let a = _mm_set1_epi8(0); |
3875 | let b = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
3876 | let r = _mm_cmplt_epi8(a, b); |
3877 | let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
3878 | assert_eq_m128i(r, e); |
3879 | } |
3880 | |
3881 | #[simd_test(enable = "sse2")] |
3882 | unsafe fn test_mm_cmplt_epi16() { |
3883 | let a = _mm_set1_epi16(0); |
3884 | let b = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0); |
3885 | let r = _mm_cmplt_epi16(a, b); |
3886 | let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0); |
3887 | assert_eq_m128i(r, e); |
3888 | } |
3889 | |
3890 | #[simd_test(enable = "sse2")] |
3891 | unsafe fn test_mm_cmplt_epi32() { |
3892 | let a = _mm_set1_epi32(0); |
3893 | let b = _mm_set_epi32(5, 0, 0, 0); |
3894 | let r = _mm_cmplt_epi32(a, b); |
3895 | assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0)); |
3896 | } |
3897 | |
3898 | #[simd_test(enable = "sse2")] |
3899 | unsafe fn test_mm_cvtepi32_pd() { |
3900 | let a = _mm_set_epi32(35, 25, 15, 5); |
3901 | let r = _mm_cvtepi32_pd(a); |
3902 | assert_eq_m128d(r, _mm_setr_pd(5.0, 15.0)); |
3903 | } |
3904 | |
3905 | #[simd_test(enable = "sse2")] |
3906 | unsafe fn test_mm_cvtsi32_sd() { |
3907 | let a = _mm_set1_pd(3.5); |
3908 | let r = _mm_cvtsi32_sd(a, 5); |
3909 | assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5)); |
3910 | } |
3911 | |
3912 | #[simd_test(enable = "sse2")] |
3913 | unsafe fn test_mm_cvtepi32_ps() { |
3914 | let a = _mm_setr_epi32(1, 2, 3, 4); |
3915 | let r = _mm_cvtepi32_ps(a); |
3916 | assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0)); |
3917 | } |
3918 | |
3919 | #[simd_test(enable = "sse2")] |
3920 | unsafe fn test_mm_cvtps_epi32() { |
3921 | let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); |
3922 | let r = _mm_cvtps_epi32(a); |
3923 | assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4)); |
3924 | } |
3925 | |
3926 | #[simd_test(enable = "sse2")] |
3927 | unsafe fn test_mm_cvtsi32_si128() { |
3928 | let r = _mm_cvtsi32_si128(5); |
3929 | assert_eq_m128i(r, _mm_setr_epi32(5, 0, 0, 0)); |
3930 | } |
3931 | |
3932 | #[simd_test(enable = "sse2")] |
3933 | unsafe fn test_mm_cvtsi128_si32() { |
3934 | let r = _mm_cvtsi128_si32(_mm_setr_epi32(5, 0, 0, 0)); |
3935 | assert_eq!(r, 5); |
3936 | } |
3937 | |
3938 | #[simd_test(enable = "sse2")] |
3939 | unsafe fn test_mm_set_epi64x() { |
3940 | let r = _mm_set_epi64x(0, 1); |
3941 | assert_eq_m128i(r, _mm_setr_epi64x(1, 0)); |
3942 | } |
3943 | |
3944 | #[simd_test(enable = "sse2")] |
3945 | unsafe fn test_mm_set_epi32() { |
3946 | let r = _mm_set_epi32(0, 1, 2, 3); |
3947 | assert_eq_m128i(r, _mm_setr_epi32(3, 2, 1, 0)); |
3948 | } |
3949 | |
3950 | #[simd_test(enable = "sse2")] |
3951 | unsafe fn test_mm_set_epi16() { |
3952 | let r = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
3953 | assert_eq_m128i(r, _mm_setr_epi16(7, 6, 5, 4, 3, 2, 1, 0)); |
3954 | } |
3955 | |
3956 | #[simd_test(enable = "sse2")] |
3957 | unsafe fn test_mm_set_epi8() { |
3958 | #[rustfmt::skip] |
3959 | let r = _mm_set_epi8( |
3960 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
3961 | ); |
3962 | #[rustfmt::skip] |
3963 | let e = _mm_setr_epi8( |
3964 | 15, 14, 13, 12, 11, 10, 9, 8, |
3965 | 7, 6, 5, 4, 3, 2, 1, 0, |
3966 | ); |
3967 | assert_eq_m128i(r, e); |
3968 | } |
3969 | |
3970 | #[simd_test(enable = "sse2")] |
3971 | unsafe fn test_mm_set1_epi64x() { |
3972 | let r = _mm_set1_epi64x(1); |
3973 | assert_eq_m128i(r, _mm_set1_epi64x(1)); |
3974 | } |
3975 | |
3976 | #[simd_test(enable = "sse2")] |
3977 | unsafe fn test_mm_set1_epi32() { |
3978 | let r = _mm_set1_epi32(1); |
3979 | assert_eq_m128i(r, _mm_set1_epi32(1)); |
3980 | } |
3981 | |
3982 | #[simd_test(enable = "sse2")] |
3983 | unsafe fn test_mm_set1_epi16() { |
3984 | let r = _mm_set1_epi16(1); |
3985 | assert_eq_m128i(r, _mm_set1_epi16(1)); |
3986 | } |
3987 | |
3988 | #[simd_test(enable = "sse2")] |
3989 | unsafe fn test_mm_set1_epi8() { |
3990 | let r = _mm_set1_epi8(1); |
3991 | assert_eq_m128i(r, _mm_set1_epi8(1)); |
3992 | } |
3993 | |
3994 | #[simd_test(enable = "sse2")] |
3995 | unsafe fn test_mm_setr_epi32() { |
3996 | let r = _mm_setr_epi32(0, 1, 2, 3); |
3997 | assert_eq_m128i(r, _mm_setr_epi32(0, 1, 2, 3)); |
3998 | } |
3999 | |
4000 | #[simd_test(enable = "sse2")] |
4001 | unsafe fn test_mm_setr_epi16() { |
4002 | let r = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
4003 | assert_eq_m128i(r, _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7)); |
4004 | } |
4005 | |
4006 | #[simd_test(enable = "sse2")] |
4007 | unsafe fn test_mm_setr_epi8() { |
4008 | #[rustfmt::skip] |
4009 | let r = _mm_setr_epi8( |
4010 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
4011 | ); |
4012 | #[rustfmt::skip] |
4013 | let e = _mm_setr_epi8( |
4014 | 0, 1, 2, 3, 4, 5, 6, 7, |
4015 | 8, 9, 10, 11, 12, 13, 14, 15, |
4016 | ); |
4017 | assert_eq_m128i(r, e); |
4018 | } |
4019 | |
4020 | #[simd_test(enable = "sse2")] |
4021 | unsafe fn test_mm_setzero_si128() { |
4022 | let r = _mm_setzero_si128(); |
4023 | assert_eq_m128i(r, _mm_set1_epi64x(0)); |
4024 | } |
4025 | |
4026 | #[simd_test(enable = "sse2")] |
4027 | unsafe fn test_mm_loadl_epi64() { |
4028 | let a = _mm_setr_epi64x(6, 5); |
4029 | let r = _mm_loadl_epi64(ptr::addr_of!(a)); |
4030 | assert_eq_m128i(r, _mm_setr_epi64x(6, 0)); |
4031 | } |
4032 | |
4033 | #[simd_test(enable = "sse2")] |
4034 | unsafe fn test_mm_load_si128() { |
4035 | let a = _mm_set_epi64x(5, 6); |
4036 | let r = _mm_load_si128(ptr::addr_of!(a) as *const _); |
4037 | assert_eq_m128i(a, r); |
4038 | } |
4039 | |
4040 | #[simd_test(enable = "sse2")] |
4041 | unsafe fn test_mm_loadu_si128() { |
4042 | let a = _mm_set_epi64x(5, 6); |
4043 | let r = _mm_loadu_si128(ptr::addr_of!(a) as *const _); |
4044 | assert_eq_m128i(a, r); |
4045 | } |
4046 | |
4047 | #[simd_test(enable = "sse2")] |
4048 | // Miri cannot support this until it is clear how it fits in the Rust memory model |
4049 | // (non-temporal store) |
4050 | #[cfg_attr(miri, ignore)] |
4051 | unsafe fn test_mm_maskmoveu_si128() { |
4052 | let a = _mm_set1_epi8(9); |
4053 | #[rustfmt::skip] |
4054 | let mask = _mm_set_epi8( |
4055 | 0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0, |
4056 | 0, 0, 0, 0, 0, 0, 0, 0, |
4057 | ); |
4058 | let mut r = _mm_set1_epi8(0); |
4059 | _mm_maskmoveu_si128(a, mask, ptr::addr_of_mut!(r) as *mut i8); |
4060 | let e = _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
4061 | assert_eq_m128i(r, e); |
4062 | } |
4063 | |
4064 | #[simd_test(enable = "sse2")] |
4065 | unsafe fn test_mm_store_si128() { |
4066 | let a = _mm_set1_epi8(9); |
4067 | let mut r = _mm_set1_epi8(0); |
4068 | _mm_store_si128(&mut r, a); |
4069 | assert_eq_m128i(r, a); |
4070 | } |
4071 | |
4072 | #[simd_test(enable = "sse2")] |
4073 | unsafe fn test_mm_storeu_si128() { |
4074 | let a = _mm_set1_epi8(9); |
4075 | let mut r = _mm_set1_epi8(0); |
4076 | _mm_storeu_si128(&mut r, a); |
4077 | assert_eq_m128i(r, a); |
4078 | } |
4079 | |
4080 | #[simd_test(enable = "sse2")] |
4081 | unsafe fn test_mm_storel_epi64() { |
4082 | let a = _mm_setr_epi64x(2, 9); |
4083 | let mut r = _mm_set1_epi8(0); |
4084 | _mm_storel_epi64(&mut r, a); |
4085 | assert_eq_m128i(r, _mm_setr_epi64x(2, 0)); |
4086 | } |
4087 | |
4088 | #[simd_test(enable = "sse2")] |
4089 | // Miri cannot support this until it is clear how it fits in the Rust memory model |
4090 | // (non-temporal store) |
4091 | #[cfg_attr(miri, ignore)] |
4092 | unsafe fn test_mm_stream_si128() { |
4093 | let a = _mm_setr_epi32(1, 2, 3, 4); |
4094 | let mut r = _mm_undefined_si128(); |
4095 | _mm_stream_si128(ptr::addr_of_mut!(r), a); |
4096 | assert_eq_m128i(r, a); |
4097 | } |
4098 | |
4099 | #[simd_test(enable = "sse2")] |
4100 | // Miri cannot support this until it is clear how it fits in the Rust memory model |
4101 | // (non-temporal store) |
4102 | #[cfg_attr(miri, ignore)] |
4103 | unsafe fn test_mm_stream_si32() { |
4104 | let a: i32 = 7; |
4105 | let mut mem = boxed::Box::<i32>::new(-1); |
4106 | _mm_stream_si32(ptr::addr_of_mut!(*mem), a); |
4107 | assert_eq!(a, *mem); |
4108 | } |
4109 | |
4110 | #[simd_test(enable = "sse2")] |
4111 | unsafe fn test_mm_move_epi64() { |
4112 | let a = _mm_setr_epi64x(5, 6); |
4113 | let r = _mm_move_epi64(a); |
4114 | assert_eq_m128i(r, _mm_setr_epi64x(5, 0)); |
4115 | } |
4116 | |
4117 | #[simd_test(enable = "sse2")] |
4118 | unsafe fn test_mm_packs_epi16() { |
4119 | let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0); |
4120 | let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80); |
4121 | let r = _mm_packs_epi16(a, b); |
4122 | #[rustfmt::skip] |
4123 | assert_eq_m128i( |
4124 | r, |
4125 | _mm_setr_epi8( |
4126 | 0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F |
4127 | ) |
4128 | ); |
4129 | } |
4130 | |
4131 | #[simd_test(enable = "sse2")] |
4132 | unsafe fn test_mm_packs_epi32() { |
4133 | let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0); |
4134 | let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000); |
4135 | let r = _mm_packs_epi32(a, b); |
4136 | assert_eq_m128i( |
4137 | r, |
4138 | _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF), |
4139 | ); |
4140 | } |
4141 | |
4142 | #[simd_test(enable = "sse2")] |
4143 | unsafe fn test_mm_packus_epi16() { |
4144 | let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0); |
4145 | let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100); |
4146 | let r = _mm_packus_epi16(a, b); |
4147 | assert_eq_m128i( |
4148 | r, |
4149 | _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0), |
4150 | ); |
4151 | } |
4152 | |
4153 | #[simd_test(enable = "sse2")] |
4154 | unsafe fn test_mm_extract_epi16() { |
4155 | let a = _mm_setr_epi16(-1, 1, 2, 3, 4, 5, 6, 7); |
4156 | let r1 = _mm_extract_epi16::<0>(a); |
4157 | let r2 = _mm_extract_epi16::<3>(a); |
4158 | assert_eq!(r1, 0xFFFF); |
4159 | assert_eq!(r2, 3); |
4160 | } |
4161 | |
4162 | #[simd_test(enable = "sse2")] |
4163 | unsafe fn test_mm_insert_epi16() { |
4164 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
4165 | let r = _mm_insert_epi16::<0>(a, 9); |
4166 | let e = _mm_setr_epi16(9, 1, 2, 3, 4, 5, 6, 7); |
4167 | assert_eq_m128i(r, e); |
4168 | } |
4169 | |
4170 | #[simd_test(enable = "sse2")] |
4171 | unsafe fn test_mm_movemask_epi8() { |
4172 | #[rustfmt::skip] |
4173 | let a = _mm_setr_epi8( |
4174 | 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01, |
4175 | 0b0101, 0b1111_0000u8 as i8, 0, 0, |
4176 | 0, 0b1011_0101u8 as i8, 0b1111_0000u8 as i8, 0b0101, |
4177 | 0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, |
4178 | ); |
4179 | let r = _mm_movemask_epi8(a); |
4180 | assert_eq!(r, 0b10100110_00100101); |
4181 | } |
4182 | |
4183 | #[simd_test(enable = "sse2")] |
4184 | unsafe fn test_mm_shuffle_epi32() { |
4185 | let a = _mm_setr_epi32(5, 10, 15, 20); |
4186 | let r = _mm_shuffle_epi32::<0b00_01_01_11>(a); |
4187 | let e = _mm_setr_epi32(20, 10, 10, 5); |
4188 | assert_eq_m128i(r, e); |
4189 | } |
4190 | |
4191 | #[simd_test(enable = "sse2")] |
4192 | unsafe fn test_mm_shufflehi_epi16() { |
4193 | let a = _mm_setr_epi16(1, 2, 3, 4, 5, 10, 15, 20); |
4194 | let r = _mm_shufflehi_epi16::<0b00_01_01_11>(a); |
4195 | let e = _mm_setr_epi16(1, 2, 3, 4, 20, 10, 10, 5); |
4196 | assert_eq_m128i(r, e); |
4197 | } |
4198 | |
4199 | #[simd_test(enable = "sse2")] |
4200 | unsafe fn test_mm_shufflelo_epi16() { |
4201 | let a = _mm_setr_epi16(5, 10, 15, 20, 1, 2, 3, 4); |
4202 | let r = _mm_shufflelo_epi16::<0b00_01_01_11>(a); |
4203 | let e = _mm_setr_epi16(20, 10, 10, 5, 1, 2, 3, 4); |
4204 | assert_eq_m128i(r, e); |
4205 | } |
4206 | |
4207 | #[simd_test(enable = "sse2")] |
4208 | unsafe fn test_mm_unpackhi_epi8() { |
4209 | #[rustfmt::skip] |
4210 | let a = _mm_setr_epi8( |
4211 | 0, 1, 2, 3, 4, 5, 6, 7, |
4212 | 8, 9, 10, 11, 12, 13, 14, 15, |
4213 | ); |
4214 | #[rustfmt::skip] |
4215 | let b = _mm_setr_epi8( |
4216 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
4217 | ); |
4218 | let r = _mm_unpackhi_epi8(a, b); |
4219 | #[rustfmt::skip] |
4220 | let e = _mm_setr_epi8( |
4221 | 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31, |
4222 | ); |
4223 | assert_eq_m128i(r, e); |
4224 | } |
4225 | |
4226 | #[simd_test(enable = "sse2")] |
4227 | unsafe fn test_mm_unpackhi_epi16() { |
4228 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
4229 | let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); |
4230 | let r = _mm_unpackhi_epi16(a, b); |
4231 | let e = _mm_setr_epi16(4, 12, 5, 13, 6, 14, 7, 15); |
4232 | assert_eq_m128i(r, e); |
4233 | } |
4234 | |
4235 | #[simd_test(enable = "sse2")] |
4236 | unsafe fn test_mm_unpackhi_epi32() { |
4237 | let a = _mm_setr_epi32(0, 1, 2, 3); |
4238 | let b = _mm_setr_epi32(4, 5, 6, 7); |
4239 | let r = _mm_unpackhi_epi32(a, b); |
4240 | let e = _mm_setr_epi32(2, 6, 3, 7); |
4241 | assert_eq_m128i(r, e); |
4242 | } |
4243 | |
4244 | #[simd_test(enable = "sse2")] |
4245 | unsafe fn test_mm_unpackhi_epi64() { |
4246 | let a = _mm_setr_epi64x(0, 1); |
4247 | let b = _mm_setr_epi64x(2, 3); |
4248 | let r = _mm_unpackhi_epi64(a, b); |
4249 | let e = _mm_setr_epi64x(1, 3); |
4250 | assert_eq_m128i(r, e); |
4251 | } |
4252 | |
4253 | #[simd_test(enable = "sse2")] |
4254 | unsafe fn test_mm_unpacklo_epi8() { |
4255 | #[rustfmt::skip] |
4256 | let a = _mm_setr_epi8( |
4257 | 0, 1, 2, 3, 4, 5, 6, 7, |
4258 | 8, 9, 10, 11, 12, 13, 14, 15, |
4259 | ); |
4260 | #[rustfmt::skip] |
4261 | let b = _mm_setr_epi8( |
4262 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
4263 | ); |
4264 | let r = _mm_unpacklo_epi8(a, b); |
4265 | #[rustfmt::skip] |
4266 | let e = _mm_setr_epi8( |
4267 | 0, 16, 1, 17, 2, 18, 3, 19, |
4268 | 4, 20, 5, 21, 6, 22, 7, 23, |
4269 | ); |
4270 | assert_eq_m128i(r, e); |
4271 | } |
4272 | |
4273 | #[simd_test(enable = "sse2")] |
4274 | unsafe fn test_mm_unpacklo_epi16() { |
4275 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
4276 | let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); |
4277 | let r = _mm_unpacklo_epi16(a, b); |
4278 | let e = _mm_setr_epi16(0, 8, 1, 9, 2, 10, 3, 11); |
4279 | assert_eq_m128i(r, e); |
4280 | } |
4281 | |
4282 | #[simd_test(enable = "sse2")] |
4283 | unsafe fn test_mm_unpacklo_epi32() { |
4284 | let a = _mm_setr_epi32(0, 1, 2, 3); |
4285 | let b = _mm_setr_epi32(4, 5, 6, 7); |
4286 | let r = _mm_unpacklo_epi32(a, b); |
4287 | let e = _mm_setr_epi32(0, 4, 1, 5); |
4288 | assert_eq_m128i(r, e); |
4289 | } |
4290 | |
4291 | #[simd_test(enable = "sse2")] |
4292 | unsafe fn test_mm_unpacklo_epi64() { |
4293 | let a = _mm_setr_epi64x(0, 1); |
4294 | let b = _mm_setr_epi64x(2, 3); |
4295 | let r = _mm_unpacklo_epi64(a, b); |
4296 | let e = _mm_setr_epi64x(0, 2); |
4297 | assert_eq_m128i(r, e); |
4298 | } |
4299 | |
4300 | #[simd_test(enable = "sse2")] |
4301 | unsafe fn test_mm_add_sd() { |
4302 | let a = _mm_setr_pd(1.0, 2.0); |
4303 | let b = _mm_setr_pd(5.0, 10.0); |
4304 | let r = _mm_add_sd(a, b); |
4305 | assert_eq_m128d(r, _mm_setr_pd(6.0, 2.0)); |
4306 | } |
4307 | |
4308 | #[simd_test(enable = "sse2")] |
4309 | unsafe fn test_mm_add_pd() { |
4310 | let a = _mm_setr_pd(1.0, 2.0); |
4311 | let b = _mm_setr_pd(5.0, 10.0); |
4312 | let r = _mm_add_pd(a, b); |
4313 | assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0)); |
4314 | } |
4315 | |
4316 | #[simd_test(enable = "sse2")] |
4317 | unsafe fn test_mm_div_sd() { |
4318 | let a = _mm_setr_pd(1.0, 2.0); |
4319 | let b = _mm_setr_pd(5.0, 10.0); |
4320 | let r = _mm_div_sd(a, b); |
4321 | assert_eq_m128d(r, _mm_setr_pd(0.2, 2.0)); |
4322 | } |
4323 | |
4324 | #[simd_test(enable = "sse2")] |
4325 | unsafe fn test_mm_div_pd() { |
4326 | let a = _mm_setr_pd(1.0, 2.0); |
4327 | let b = _mm_setr_pd(5.0, 10.0); |
4328 | let r = _mm_div_pd(a, b); |
4329 | assert_eq_m128d(r, _mm_setr_pd(0.2, 0.2)); |
4330 | } |
4331 | |
4332 | #[simd_test(enable = "sse2")] |
4333 | unsafe fn test_mm_max_sd() { |
4334 | let a = _mm_setr_pd(1.0, 2.0); |
4335 | let b = _mm_setr_pd(5.0, 10.0); |
4336 | let r = _mm_max_sd(a, b); |
4337 | assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0)); |
4338 | } |
4339 | |
4340 | #[simd_test(enable = "sse2")] |
4341 | unsafe fn test_mm_max_pd() { |
4342 | let a = _mm_setr_pd(1.0, 2.0); |
4343 | let b = _mm_setr_pd(5.0, 10.0); |
4344 | let r = _mm_max_pd(a, b); |
4345 | assert_eq_m128d(r, _mm_setr_pd(5.0, 10.0)); |
4346 | |
4347 | // Check SSE(2)-specific semantics for -0.0 handling. |
4348 | let a = _mm_setr_pd(-0.0, 0.0); |
4349 | let b = _mm_setr_pd(0.0, 0.0); |
4350 | let r1: [u8; 16] = transmute(_mm_max_pd(a, b)); |
4351 | let r2: [u8; 16] = transmute(_mm_max_pd(b, a)); |
4352 | let a: [u8; 16] = transmute(a); |
4353 | let b: [u8; 16] = transmute(b); |
4354 | assert_eq!(r1, b); |
4355 | assert_eq!(r2, a); |
4356 | assert_ne!(a, b); // sanity check that -0.0 is actually present |
4357 | } |
4358 | |
4359 | #[simd_test(enable = "sse2")] |
4360 | unsafe fn test_mm_min_sd() { |
4361 | let a = _mm_setr_pd(1.0, 2.0); |
4362 | let b = _mm_setr_pd(5.0, 10.0); |
4363 | let r = _mm_min_sd(a, b); |
4364 | assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0)); |
4365 | } |
4366 | |
4367 | #[simd_test(enable = "sse2")] |
4368 | unsafe fn test_mm_min_pd() { |
4369 | let a = _mm_setr_pd(1.0, 2.0); |
4370 | let b = _mm_setr_pd(5.0, 10.0); |
4371 | let r = _mm_min_pd(a, b); |
4372 | assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0)); |
4373 | |
4374 | // Check SSE(2)-specific semantics for -0.0 handling. |
4375 | let a = _mm_setr_pd(-0.0, 0.0); |
4376 | let b = _mm_setr_pd(0.0, 0.0); |
4377 | let r1: [u8; 16] = transmute(_mm_min_pd(a, b)); |
4378 | let r2: [u8; 16] = transmute(_mm_min_pd(b, a)); |
4379 | let a: [u8; 16] = transmute(a); |
4380 | let b: [u8; 16] = transmute(b); |
4381 | assert_eq!(r1, b); |
4382 | assert_eq!(r2, a); |
4383 | assert_ne!(a, b); // sanity check that -0.0 is actually present |
4384 | } |
4385 | |
4386 | #[simd_test(enable = "sse2")] |
4387 | unsafe fn test_mm_mul_sd() { |
4388 | let a = _mm_setr_pd(1.0, 2.0); |
4389 | let b = _mm_setr_pd(5.0, 10.0); |
4390 | let r = _mm_mul_sd(a, b); |
4391 | assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0)); |
4392 | } |
4393 | |
4394 | #[simd_test(enable = "sse2")] |
4395 | unsafe fn test_mm_mul_pd() { |
4396 | let a = _mm_setr_pd(1.0, 2.0); |
4397 | let b = _mm_setr_pd(5.0, 10.0); |
4398 | let r = _mm_mul_pd(a, b); |
4399 | assert_eq_m128d(r, _mm_setr_pd(5.0, 20.0)); |
4400 | } |
4401 | |
4402 | #[simd_test(enable = "sse2")] |
4403 | unsafe fn test_mm_sqrt_sd() { |
4404 | let a = _mm_setr_pd(1.0, 2.0); |
4405 | let b = _mm_setr_pd(5.0, 10.0); |
4406 | let r = _mm_sqrt_sd(a, b); |
4407 | assert_eq_m128d(r, _mm_setr_pd(5.0f64.sqrt(), 2.0)); |
4408 | } |
4409 | |
4410 | #[simd_test(enable = "sse2")] |
4411 | unsafe fn test_mm_sqrt_pd() { |
4412 | let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0)); |
4413 | assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt())); |
4414 | } |
4415 | |
4416 | #[simd_test(enable = "sse2")] |
4417 | unsafe fn test_mm_sub_sd() { |
4418 | let a = _mm_setr_pd(1.0, 2.0); |
4419 | let b = _mm_setr_pd(5.0, 10.0); |
4420 | let r = _mm_sub_sd(a, b); |
4421 | assert_eq_m128d(r, _mm_setr_pd(-4.0, 2.0)); |
4422 | } |
4423 | |
4424 | #[simd_test(enable = "sse2")] |
4425 | unsafe fn test_mm_sub_pd() { |
4426 | let a = _mm_setr_pd(1.0, 2.0); |
4427 | let b = _mm_setr_pd(5.0, 10.0); |
4428 | let r = _mm_sub_pd(a, b); |
4429 | assert_eq_m128d(r, _mm_setr_pd(-4.0, -8.0)); |
4430 | } |
4431 | |
4432 | #[simd_test(enable = "sse2")] |
4433 | unsafe fn test_mm_and_pd() { |
4434 | let a = transmute(u64x2::splat(5)); |
4435 | let b = transmute(u64x2::splat(3)); |
4436 | let r = _mm_and_pd(a, b); |
4437 | let e = transmute(u64x2::splat(1)); |
4438 | assert_eq_m128d(r, e); |
4439 | } |
4440 | |
4441 | #[simd_test(enable = "sse2")] |
4442 | unsafe fn test_mm_andnot_pd() { |
4443 | let a = transmute(u64x2::splat(5)); |
4444 | let b = transmute(u64x2::splat(3)); |
4445 | let r = _mm_andnot_pd(a, b); |
4446 | let e = transmute(u64x2::splat(2)); |
4447 | assert_eq_m128d(r, e); |
4448 | } |
4449 | |
4450 | #[simd_test(enable = "sse2")] |
4451 | unsafe fn test_mm_or_pd() { |
4452 | let a = transmute(u64x2::splat(5)); |
4453 | let b = transmute(u64x2::splat(3)); |
4454 | let r = _mm_or_pd(a, b); |
4455 | let e = transmute(u64x2::splat(7)); |
4456 | assert_eq_m128d(r, e); |
4457 | } |
4458 | |
4459 | #[simd_test(enable = "sse2")] |
4460 | unsafe fn test_mm_xor_pd() { |
4461 | let a = transmute(u64x2::splat(5)); |
4462 | let b = transmute(u64x2::splat(3)); |
4463 | let r = _mm_xor_pd(a, b); |
4464 | let e = transmute(u64x2::splat(6)); |
4465 | assert_eq_m128d(r, e); |
4466 | } |
4467 | |
4468 | #[simd_test(enable = "sse2")] |
4469 | unsafe fn test_mm_cmpeq_sd() { |
4470 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4471 | let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64); |
4472 | let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b)); |
4473 | assert_eq_m128i(r, e); |
4474 | } |
4475 | |
4476 | #[simd_test(enable = "sse2")] |
4477 | unsafe fn test_mm_cmplt_sd() { |
4478 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); |
4479 | let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64); |
4480 | let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b)); |
4481 | assert_eq_m128i(r, e); |
4482 | } |
4483 | |
4484 | #[simd_test(enable = "sse2")] |
4485 | unsafe fn test_mm_cmple_sd() { |
4486 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4487 | let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64); |
4488 | let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b)); |
4489 | assert_eq_m128i(r, e); |
4490 | } |
4491 | |
4492 | #[simd_test(enable = "sse2")] |
4493 | unsafe fn test_mm_cmpgt_sd() { |
4494 | let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4495 | let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64); |
4496 | let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b)); |
4497 | assert_eq_m128i(r, e); |
4498 | } |
4499 | |
4500 | #[simd_test(enable = "sse2")] |
4501 | unsafe fn test_mm_cmpge_sd() { |
4502 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4503 | let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64); |
4504 | let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b)); |
4505 | assert_eq_m128i(r, e); |
4506 | } |
4507 | |
4508 | #[simd_test(enable = "sse2")] |
4509 | unsafe fn test_mm_cmpord_sd() { |
4510 | let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); |
4511 | let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64); |
4512 | let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b)); |
4513 | assert_eq_m128i(r, e); |
4514 | } |
4515 | |
4516 | #[simd_test(enable = "sse2")] |
4517 | unsafe fn test_mm_cmpunord_sd() { |
4518 | let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); |
4519 | let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64); |
4520 | let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b)); |
4521 | assert_eq_m128i(r, e); |
4522 | } |
4523 | |
4524 | #[simd_test(enable = "sse2")] |
4525 | unsafe fn test_mm_cmpneq_sd() { |
4526 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); |
4527 | let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64); |
4528 | let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b)); |
4529 | assert_eq_m128i(r, e); |
4530 | } |
4531 | |
4532 | #[simd_test(enable = "sse2")] |
4533 | unsafe fn test_mm_cmpnlt_sd() { |
4534 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); |
4535 | let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64); |
4536 | let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b)); |
4537 | assert_eq_m128i(r, e); |
4538 | } |
4539 | |
4540 | #[simd_test(enable = "sse2")] |
4541 | unsafe fn test_mm_cmpnle_sd() { |
4542 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4543 | let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64); |
4544 | let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b)); |
4545 | assert_eq_m128i(r, e); |
4546 | } |
4547 | |
4548 | #[simd_test(enable = "sse2")] |
4549 | unsafe fn test_mm_cmpngt_sd() { |
4550 | let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4551 | let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64); |
4552 | let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b)); |
4553 | assert_eq_m128i(r, e); |
4554 | } |
4555 | |
4556 | #[simd_test(enable = "sse2")] |
4557 | unsafe fn test_mm_cmpnge_sd() { |
4558 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4559 | let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64); |
4560 | let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b)); |
4561 | assert_eq_m128i(r, e); |
4562 | } |
4563 | |
4564 | #[simd_test(enable = "sse2")] |
4565 | unsafe fn test_mm_cmpeq_pd() { |
4566 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4567 | let e = _mm_setr_epi64x(!0, 0); |
4568 | let r = transmute::<_, __m128i>(_mm_cmpeq_pd(a, b)); |
4569 | assert_eq_m128i(r, e); |
4570 | } |
4571 | |
4572 | #[simd_test(enable = "sse2")] |
4573 | unsafe fn test_mm_cmplt_pd() { |
4574 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4575 | let e = _mm_setr_epi64x(0, !0); |
4576 | let r = transmute::<_, __m128i>(_mm_cmplt_pd(a, b)); |
4577 | assert_eq_m128i(r, e); |
4578 | } |
4579 | |
4580 | #[simd_test(enable = "sse2")] |
4581 | unsafe fn test_mm_cmple_pd() { |
4582 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4583 | let e = _mm_setr_epi64x(!0, !0); |
4584 | let r = transmute::<_, __m128i>(_mm_cmple_pd(a, b)); |
4585 | assert_eq_m128i(r, e); |
4586 | } |
4587 | |
4588 | #[simd_test(enable = "sse2")] |
4589 | unsafe fn test_mm_cmpgt_pd() { |
4590 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4591 | let e = _mm_setr_epi64x(0, 0); |
4592 | let r = transmute::<_, __m128i>(_mm_cmpgt_pd(a, b)); |
4593 | assert_eq_m128i(r, e); |
4594 | } |
4595 | |
4596 | #[simd_test(enable = "sse2")] |
4597 | unsafe fn test_mm_cmpge_pd() { |
4598 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4599 | let e = _mm_setr_epi64x(!0, 0); |
4600 | let r = transmute::<_, __m128i>(_mm_cmpge_pd(a, b)); |
4601 | assert_eq_m128i(r, e); |
4602 | } |
4603 | |
4604 | #[simd_test(enable = "sse2")] |
4605 | unsafe fn test_mm_cmpord_pd() { |
4606 | let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); |
4607 | let e = _mm_setr_epi64x(0, !0); |
4608 | let r = transmute::<_, __m128i>(_mm_cmpord_pd(a, b)); |
4609 | assert_eq_m128i(r, e); |
4610 | } |
4611 | |
4612 | #[simd_test(enable = "sse2")] |
4613 | unsafe fn test_mm_cmpunord_pd() { |
4614 | let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); |
4615 | let e = _mm_setr_epi64x(!0, 0); |
4616 | let r = transmute::<_, __m128i>(_mm_cmpunord_pd(a, b)); |
4617 | assert_eq_m128i(r, e); |
4618 | } |
4619 | |
4620 | #[simd_test(enable = "sse2")] |
4621 | unsafe fn test_mm_cmpneq_pd() { |
4622 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); |
4623 | let e = _mm_setr_epi64x(!0, !0); |
4624 | let r = transmute::<_, __m128i>(_mm_cmpneq_pd(a, b)); |
4625 | assert_eq_m128i(r, e); |
4626 | } |
4627 | |
4628 | #[simd_test(enable = "sse2")] |
4629 | unsafe fn test_mm_cmpnlt_pd() { |
4630 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); |
4631 | let e = _mm_setr_epi64x(0, 0); |
4632 | let r = transmute::<_, __m128i>(_mm_cmpnlt_pd(a, b)); |
4633 | assert_eq_m128i(r, e); |
4634 | } |
4635 | |
4636 | #[simd_test(enable = "sse2")] |
4637 | unsafe fn test_mm_cmpnle_pd() { |
4638 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4639 | let e = _mm_setr_epi64x(0, 0); |
4640 | let r = transmute::<_, __m128i>(_mm_cmpnle_pd(a, b)); |
4641 | assert_eq_m128i(r, e); |
4642 | } |
4643 | |
4644 | #[simd_test(enable = "sse2")] |
4645 | unsafe fn test_mm_cmpngt_pd() { |
4646 | let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4647 | let e = _mm_setr_epi64x(0, !0); |
4648 | let r = transmute::<_, __m128i>(_mm_cmpngt_pd(a, b)); |
4649 | assert_eq_m128i(r, e); |
4650 | } |
4651 | |
4652 | #[simd_test(enable = "sse2")] |
4653 | unsafe fn test_mm_cmpnge_pd() { |
4654 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4655 | let e = _mm_setr_epi64x(0, !0); |
4656 | let r = transmute::<_, __m128i>(_mm_cmpnge_pd(a, b)); |
4657 | assert_eq_m128i(r, e); |
4658 | } |
4659 | |
4660 | #[simd_test(enable = "sse2")] |
4661 | unsafe fn test_mm_comieq_sd() { |
4662 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4663 | assert!(_mm_comieq_sd(a, b) != 0); |
4664 | |
4665 | let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0)); |
4666 | assert!(_mm_comieq_sd(a, b) == 0); |
4667 | } |
4668 | |
4669 | #[simd_test(enable = "sse2")] |
4670 | unsafe fn test_mm_comilt_sd() { |
4671 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4672 | assert!(_mm_comilt_sd(a, b) == 0); |
4673 | } |
4674 | |
4675 | #[simd_test(enable = "sse2")] |
4676 | unsafe fn test_mm_comile_sd() { |
4677 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4678 | assert!(_mm_comile_sd(a, b) != 0); |
4679 | } |
4680 | |
4681 | #[simd_test(enable = "sse2")] |
4682 | unsafe fn test_mm_comigt_sd() { |
4683 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4684 | assert!(_mm_comigt_sd(a, b) == 0); |
4685 | } |
4686 | |
4687 | #[simd_test(enable = "sse2")] |
4688 | unsafe fn test_mm_comige_sd() { |
4689 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4690 | assert!(_mm_comige_sd(a, b) != 0); |
4691 | } |
4692 | |
4693 | #[simd_test(enable = "sse2")] |
4694 | unsafe fn test_mm_comineq_sd() { |
4695 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4696 | assert!(_mm_comineq_sd(a, b) == 0); |
4697 | } |
4698 | |
4699 | #[simd_test(enable = "sse2")] |
4700 | unsafe fn test_mm_ucomieq_sd() { |
4701 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4702 | assert!(_mm_ucomieq_sd(a, b) != 0); |
4703 | |
4704 | let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0)); |
4705 | assert!(_mm_ucomieq_sd(a, b) == 0); |
4706 | } |
4707 | |
4708 | #[simd_test(enable = "sse2")] |
4709 | unsafe fn test_mm_ucomilt_sd() { |
4710 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4711 | assert!(_mm_ucomilt_sd(a, b) == 0); |
4712 | } |
4713 | |
4714 | #[simd_test(enable = "sse2")] |
4715 | unsafe fn test_mm_ucomile_sd() { |
4716 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4717 | assert!(_mm_ucomile_sd(a, b) != 0); |
4718 | } |
4719 | |
4720 | #[simd_test(enable = "sse2")] |
4721 | unsafe fn test_mm_ucomigt_sd() { |
4722 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4723 | assert!(_mm_ucomigt_sd(a, b) == 0); |
4724 | } |
4725 | |
4726 | #[simd_test(enable = "sse2")] |
4727 | unsafe fn test_mm_ucomige_sd() { |
4728 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4729 | assert!(_mm_ucomige_sd(a, b) != 0); |
4730 | } |
4731 | |
4732 | #[simd_test(enable = "sse2")] |
4733 | unsafe fn test_mm_ucomineq_sd() { |
4734 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4735 | assert!(_mm_ucomineq_sd(a, b) == 0); |
4736 | } |
4737 | |
4738 | #[simd_test(enable = "sse2")] |
4739 | unsafe fn test_mm_movemask_pd() { |
4740 | let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0)); |
4741 | assert_eq!(r, 0b01); |
4742 | |
4743 | let r = _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0)); |
4744 | assert_eq!(r, 0b11); |
4745 | } |
4746 | |
4747 | #[repr(align(16))] |
4748 | struct Memory { |
4749 | data: [f64; 4], |
4750 | } |
4751 | |
4752 | #[simd_test(enable = "sse2")] |
4753 | unsafe fn test_mm_load_pd() { |
4754 | let mem = Memory { |
4755 | data: [1.0f64, 2.0, 3.0, 4.0], |
4756 | }; |
4757 | let vals = &mem.data; |
4758 | let d = vals.as_ptr(); |
4759 | |
4760 | let r = _mm_load_pd(d); |
4761 | assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0)); |
4762 | } |
4763 | |
4764 | #[simd_test(enable = "sse2")] |
4765 | unsafe fn test_mm_load_sd() { |
4766 | let a = 1.; |
4767 | let expected = _mm_setr_pd(a, 0.); |
4768 | let r = _mm_load_sd(&a); |
4769 | assert_eq_m128d(r, expected); |
4770 | } |
4771 | |
4772 | #[simd_test(enable = "sse2")] |
4773 | unsafe fn test_mm_loadh_pd() { |
4774 | let a = _mm_setr_pd(1., 2.); |
4775 | let b = 3.; |
4776 | let expected = _mm_setr_pd(_mm_cvtsd_f64(a), 3.); |
4777 | let r = _mm_loadh_pd(a, &b); |
4778 | assert_eq_m128d(r, expected); |
4779 | } |
4780 | |
4781 | #[simd_test(enable = "sse2")] |
4782 | unsafe fn test_mm_loadl_pd() { |
4783 | let a = _mm_setr_pd(1., 2.); |
4784 | let b = 3.; |
4785 | let expected = _mm_setr_pd(3., get_m128d(a, 1)); |
4786 | let r = _mm_loadl_pd(a, &b); |
4787 | assert_eq_m128d(r, expected); |
4788 | } |
4789 | |
4790 | #[simd_test(enable = "sse2")] |
4791 | // Miri cannot support this until it is clear how it fits in the Rust memory model |
4792 | // (non-temporal store) |
4793 | #[cfg_attr(miri, ignore)] |
4794 | unsafe fn test_mm_stream_pd() { |
4795 | #[repr(align(128))] |
4796 | struct Memory { |
4797 | pub data: [f64; 2], |
4798 | } |
4799 | let a = _mm_set1_pd(7.0); |
4800 | let mut mem = Memory { data: [-1.0; 2] }; |
4801 | |
4802 | _mm_stream_pd(ptr::addr_of_mut!(mem.data[0]), a); |
4803 | for i in 0..2 { |
4804 | assert_eq!(mem.data[i], get_m128d(a, i)); |
4805 | } |
4806 | } |
4807 | |
4808 | #[simd_test(enable = "sse2")] |
4809 | unsafe fn test_mm_store_sd() { |
4810 | let mut dest = 0.; |
4811 | let a = _mm_setr_pd(1., 2.); |
4812 | _mm_store_sd(&mut dest, a); |
4813 | assert_eq!(dest, _mm_cvtsd_f64(a)); |
4814 | } |
4815 | |
4816 | #[simd_test(enable = "sse2")] |
4817 | unsafe fn test_mm_store_pd() { |
4818 | let mut mem = Memory { data: [0.0f64; 4] }; |
4819 | let vals = &mut mem.data; |
4820 | let a = _mm_setr_pd(1.0, 2.0); |
4821 | let d = vals.as_mut_ptr(); |
4822 | |
4823 | _mm_store_pd(d, *black_box(&a)); |
4824 | assert_eq!(vals[0], 1.0); |
4825 | assert_eq!(vals[1], 2.0); |
4826 | } |
4827 | |
4828 | #[simd_test(enable = "sse2")] |
4829 | unsafe fn test_mm_storeu_pd() { |
4830 | let mut mem = Memory { data: [0.0f64; 4] }; |
4831 | let vals = &mut mem.data; |
4832 | let a = _mm_setr_pd(1.0, 2.0); |
4833 | |
4834 | let mut ofs = 0; |
4835 | let mut p = vals.as_mut_ptr(); |
4836 | |
4837 | // Make sure p is **not** aligned to 16-byte boundary |
4838 | if (p as usize) & 0xf == 0 { |
4839 | ofs = 1; |
4840 | p = p.add(1); |
4841 | } |
4842 | |
4843 | _mm_storeu_pd(p, *black_box(&a)); |
4844 | |
4845 | if ofs > 0 { |
4846 | assert_eq!(vals[ofs - 1], 0.0); |
4847 | } |
4848 | assert_eq!(vals[ofs + 0], 1.0); |
4849 | assert_eq!(vals[ofs + 1], 2.0); |
4850 | } |
4851 | |
4852 | #[simd_test(enable = "sse2")] |
4853 | unsafe fn test_mm_storeu_si16() { |
4854 | let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); |
4855 | let mut r = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16); |
4856 | _mm_storeu_si16(ptr::addr_of_mut!(r).cast(), a); |
4857 | let e = _mm_setr_epi16(1, 10, 11, 12, 13, 14, 15, 16); |
4858 | assert_eq_m128i(r, e); |
4859 | } |
4860 | |
4861 | #[simd_test(enable = "sse2")] |
4862 | unsafe fn test_mm_storeu_si32() { |
4863 | let a = _mm_setr_epi32(1, 2, 3, 4); |
4864 | let mut r = _mm_setr_epi32(5, 6, 7, 8); |
4865 | _mm_storeu_si32(ptr::addr_of_mut!(r).cast(), a); |
4866 | let e = _mm_setr_epi32(1, 6, 7, 8); |
4867 | assert_eq_m128i(r, e); |
4868 | } |
4869 | |
4870 | #[simd_test(enable = "sse2")] |
4871 | unsafe fn test_mm_storeu_si64() { |
4872 | let a = _mm_setr_epi64x(1, 2); |
4873 | let mut r = _mm_setr_epi64x(3, 4); |
4874 | _mm_storeu_si64(ptr::addr_of_mut!(r).cast(), a); |
4875 | let e = _mm_setr_epi64x(1, 4); |
4876 | assert_eq_m128i(r, e); |
4877 | } |
4878 | |
4879 | #[simd_test(enable = "sse2")] |
4880 | unsafe fn test_mm_store1_pd() { |
4881 | let mut mem = Memory { data: [0.0f64; 4] }; |
4882 | let vals = &mut mem.data; |
4883 | let a = _mm_setr_pd(1.0, 2.0); |
4884 | let d = vals.as_mut_ptr(); |
4885 | |
4886 | _mm_store1_pd(d, *black_box(&a)); |
4887 | assert_eq!(vals[0], 1.0); |
4888 | assert_eq!(vals[1], 1.0); |
4889 | } |
4890 | |
4891 | #[simd_test(enable = "sse2")] |
4892 | unsafe fn test_mm_store_pd1() { |
4893 | let mut mem = Memory { data: [0.0f64; 4] }; |
4894 | let vals = &mut mem.data; |
4895 | let a = _mm_setr_pd(1.0, 2.0); |
4896 | let d = vals.as_mut_ptr(); |
4897 | |
4898 | _mm_store_pd1(d, *black_box(&a)); |
4899 | assert_eq!(vals[0], 1.0); |
4900 | assert_eq!(vals[1], 1.0); |
4901 | } |
4902 | |
4903 | #[simd_test(enable = "sse2")] |
4904 | unsafe fn test_mm_storer_pd() { |
4905 | let mut mem = Memory { data: [0.0f64; 4] }; |
4906 | let vals = &mut mem.data; |
4907 | let a = _mm_setr_pd(1.0, 2.0); |
4908 | let d = vals.as_mut_ptr(); |
4909 | |
4910 | _mm_storer_pd(d, *black_box(&a)); |
4911 | assert_eq!(vals[0], 2.0); |
4912 | assert_eq!(vals[1], 1.0); |
4913 | } |
4914 | |
4915 | #[simd_test(enable = "sse2")] |
4916 | unsafe fn test_mm_storeh_pd() { |
4917 | let mut dest = 0.; |
4918 | let a = _mm_setr_pd(1., 2.); |
4919 | _mm_storeh_pd(&mut dest, a); |
4920 | assert_eq!(dest, get_m128d(a, 1)); |
4921 | } |
4922 | |
4923 | #[simd_test(enable = "sse2")] |
4924 | unsafe fn test_mm_storel_pd() { |
4925 | let mut dest = 0.; |
4926 | let a = _mm_setr_pd(1., 2.); |
4927 | _mm_storel_pd(&mut dest, a); |
4928 | assert_eq!(dest, _mm_cvtsd_f64(a)); |
4929 | } |
4930 | |
4931 | #[simd_test(enable = "sse2")] |
4932 | unsafe fn test_mm_loadr_pd() { |
4933 | let mut mem = Memory { |
4934 | data: [1.0f64, 2.0, 3.0, 4.0], |
4935 | }; |
4936 | let vals = &mut mem.data; |
4937 | let d = vals.as_ptr(); |
4938 | |
4939 | let r = _mm_loadr_pd(d); |
4940 | assert_eq_m128d(r, _mm_setr_pd(2.0, 1.0)); |
4941 | } |
4942 | |
4943 | #[simd_test(enable = "sse2")] |
4944 | unsafe fn test_mm_loadu_pd() { |
4945 | let mut mem = Memory { |
4946 | data: [1.0f64, 2.0, 3.0, 4.0], |
4947 | }; |
4948 | let vals = &mut mem.data; |
4949 | let mut d = vals.as_ptr(); |
4950 | |
4951 | // make sure d is not aligned to 16-byte boundary |
4952 | let mut offset = 0; |
4953 | if (d as usize) & 0xf == 0 { |
4954 | offset = 1; |
4955 | d = d.add(offset); |
4956 | } |
4957 | |
4958 | let r = _mm_loadu_pd(d); |
4959 | let e = _mm_add_pd(_mm_setr_pd(1.0, 2.0), _mm_set1_pd(offset as f64)); |
4960 | assert_eq_m128d(r, e); |
4961 | } |
4962 | |
4963 | #[simd_test(enable = "sse2")] |
4964 | unsafe fn test_mm_loadu_si16() { |
4965 | let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); |
4966 | let r = _mm_loadu_si16(ptr::addr_of!(a) as *const _); |
4967 | assert_eq_m128i(r, _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0)); |
4968 | } |
4969 | |
4970 | #[simd_test(enable = "sse2")] |
4971 | unsafe fn test_mm_loadu_si32() { |
4972 | let a = _mm_setr_epi32(1, 2, 3, 4); |
4973 | let r = _mm_loadu_si32(ptr::addr_of!(a) as *const _); |
4974 | assert_eq_m128i(r, _mm_setr_epi32(1, 0, 0, 0)); |
4975 | } |
4976 | |
4977 | #[simd_test(enable = "sse2")] |
4978 | unsafe fn test_mm_loadu_si64() { |
4979 | let a = _mm_setr_epi64x(5, 6); |
4980 | let r = _mm_loadu_si64(ptr::addr_of!(a) as *const _); |
4981 | assert_eq_m128i(r, _mm_setr_epi64x(5, 0)); |
4982 | } |
4983 | |
4984 | #[simd_test(enable = "sse2")] |
4985 | unsafe fn test_mm_cvtpd_ps() { |
4986 | let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0)); |
4987 | assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0)); |
4988 | |
4989 | let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0)); |
4990 | assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0)); |
4991 | |
4992 | let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN)); |
4993 | assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0)); |
4994 | |
4995 | let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64)); |
4996 | assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0)); |
4997 | } |
4998 | |
4999 | #[simd_test(enable = "sse2")] |
5000 | unsafe fn test_mm_cvtps_pd() { |
5001 | let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0)); |
5002 | assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0)); |
5003 | |
5004 | let r = _mm_cvtps_pd(_mm_setr_ps( |
5005 | f32::MAX, |
5006 | f32::INFINITY, |
5007 | f32::NEG_INFINITY, |
5008 | f32::MIN, |
5009 | )); |
5010 | assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY)); |
5011 | } |
5012 | |
5013 | #[simd_test(enable = "sse2")] |
5014 | unsafe fn test_mm_cvtpd_epi32() { |
5015 | let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0)); |
5016 | assert_eq_m128i(r, _mm_setr_epi32(-1, 5, 0, 0)); |
5017 | |
5018 | let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0)); |
5019 | assert_eq_m128i(r, _mm_setr_epi32(-1, -5, 0, 0)); |
5020 | |
5021 | let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN)); |
5022 | assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); |
5023 | |
5024 | let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY)); |
5025 | assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); |
5026 | |
5027 | let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN)); |
5028 | assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); |
5029 | } |
5030 | |
5031 | #[simd_test(enable = "sse2")] |
5032 | unsafe fn test_mm_cvtsd_si32() { |
5033 | let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0)); |
5034 | assert_eq!(r, -2); |
5035 | |
5036 | let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN)); |
5037 | assert_eq!(r, i32::MIN); |
5038 | |
5039 | let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN)); |
5040 | assert_eq!(r, i32::MIN); |
5041 | } |
5042 | |
5043 | #[simd_test(enable = "sse2")] |
5044 | unsafe fn test_mm_cvtsd_ss() { |
5045 | let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4); |
5046 | let b = _mm_setr_pd(2.0, -5.0); |
5047 | |
5048 | let r = _mm_cvtsd_ss(a, b); |
5049 | |
5050 | assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4)); |
5051 | |
5052 | let a = _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY); |
5053 | let b = _mm_setr_pd(f64::INFINITY, -5.0); |
5054 | |
5055 | let r = _mm_cvtsd_ss(a, b); |
5056 | |
5057 | assert_eq_m128( |
5058 | r, |
5059 | _mm_setr_ps( |
5060 | f32::INFINITY, |
5061 | f32::NEG_INFINITY, |
5062 | f32::MAX, |
5063 | f32::NEG_INFINITY, |
5064 | ), |
5065 | ); |
5066 | } |
5067 | |
5068 | #[simd_test(enable = "sse2")] |
5069 | unsafe fn test_mm_cvtsd_f64() { |
5070 | let r = _mm_cvtsd_f64(_mm_setr_pd(-1.1, 2.2)); |
5071 | assert_eq!(r, -1.1); |
5072 | } |
5073 | |
5074 | #[simd_test(enable = "sse2")] |
5075 | unsafe fn test_mm_cvtss_sd() { |
5076 | let a = _mm_setr_pd(-1.1, 2.2); |
5077 | let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); |
5078 | |
5079 | let r = _mm_cvtss_sd(a, b); |
5080 | assert_eq_m128d(r, _mm_setr_pd(1.0, 2.2)); |
5081 | |
5082 | let a = _mm_setr_pd(-1.1, f64::INFINITY); |
5083 | let b = _mm_setr_ps(f32::NEG_INFINITY, 2.0, 3.0, 4.0); |
5084 | |
5085 | let r = _mm_cvtss_sd(a, b); |
5086 | assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY)); |
5087 | } |
5088 | |
5089 | #[simd_test(enable = "sse2")] |
5090 | unsafe fn test_mm_cvttpd_epi32() { |
5091 | let a = _mm_setr_pd(-1.1, 2.2); |
5092 | let r = _mm_cvttpd_epi32(a); |
5093 | assert_eq_m128i(r, _mm_setr_epi32(-1, 2, 0, 0)); |
5094 | |
5095 | let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN); |
5096 | let r = _mm_cvttpd_epi32(a); |
5097 | assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); |
5098 | } |
5099 | |
5100 | #[simd_test(enable = "sse2")] |
5101 | unsafe fn test_mm_cvttsd_si32() { |
5102 | let a = _mm_setr_pd(-1.1, 2.2); |
5103 | let r = _mm_cvttsd_si32(a); |
5104 | assert_eq!(r, -1); |
5105 | |
5106 | let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN); |
5107 | let r = _mm_cvttsd_si32(a); |
5108 | assert_eq!(r, i32::MIN); |
5109 | } |
5110 | |
5111 | #[simd_test(enable = "sse2")] |
5112 | unsafe fn test_mm_cvttps_epi32() { |
5113 | let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6); |
5114 | let r = _mm_cvttps_epi32(a); |
5115 | assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6)); |
5116 | |
5117 | let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX); |
5118 | let r = _mm_cvttps_epi32(a); |
5119 | assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN)); |
5120 | } |
5121 | |
5122 | #[simd_test(enable = "sse2")] |
5123 | unsafe fn test_mm_set_sd() { |
5124 | let r = _mm_set_sd(-1.0_f64); |
5125 | assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, 0_f64)); |
5126 | } |
5127 | |
5128 | #[simd_test(enable = "sse2")] |
5129 | unsafe fn test_mm_set1_pd() { |
5130 | let r = _mm_set1_pd(-1.0_f64); |
5131 | assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, -1.0_f64)); |
5132 | } |
5133 | |
5134 | #[simd_test(enable = "sse2")] |
5135 | unsafe fn test_mm_set_pd1() { |
5136 | let r = _mm_set_pd1(-2.0_f64); |
5137 | assert_eq_m128d(r, _mm_setr_pd(-2.0_f64, -2.0_f64)); |
5138 | } |
5139 | |
5140 | #[simd_test(enable = "sse2")] |
5141 | unsafe fn test_mm_set_pd() { |
5142 | let r = _mm_set_pd(1.0_f64, 5.0_f64); |
5143 | assert_eq_m128d(r, _mm_setr_pd(5.0_f64, 1.0_f64)); |
5144 | } |
5145 | |
5146 | #[simd_test(enable = "sse2")] |
5147 | unsafe fn test_mm_setr_pd() { |
5148 | let r = _mm_setr_pd(1.0_f64, -5.0_f64); |
5149 | assert_eq_m128d(r, _mm_setr_pd(1.0_f64, -5.0_f64)); |
5150 | } |
5151 | |
5152 | #[simd_test(enable = "sse2")] |
5153 | unsafe fn test_mm_setzero_pd() { |
5154 | let r = _mm_setzero_pd(); |
5155 | assert_eq_m128d(r, _mm_setr_pd(0_f64, 0_f64)); |
5156 | } |
5157 | |
5158 | #[simd_test(enable = "sse2")] |
5159 | unsafe fn test_mm_load1_pd() { |
5160 | let d = -5.0; |
5161 | let r = _mm_load1_pd(&d); |
5162 | assert_eq_m128d(r, _mm_setr_pd(d, d)); |
5163 | } |
5164 | |
5165 | #[simd_test(enable = "sse2")] |
5166 | unsafe fn test_mm_load_pd1() { |
5167 | let d = -5.0; |
5168 | let r = _mm_load_pd1(&d); |
5169 | assert_eq_m128d(r, _mm_setr_pd(d, d)); |
5170 | } |
5171 | |
5172 | #[simd_test(enable = "sse2")] |
5173 | unsafe fn test_mm_unpackhi_pd() { |
5174 | let a = _mm_setr_pd(1.0, 2.0); |
5175 | let b = _mm_setr_pd(3.0, 4.0); |
5176 | let r = _mm_unpackhi_pd(a, b); |
5177 | assert_eq_m128d(r, _mm_setr_pd(2.0, 4.0)); |
5178 | } |
5179 | |
5180 | #[simd_test(enable = "sse2")] |
5181 | unsafe fn test_mm_unpacklo_pd() { |
5182 | let a = _mm_setr_pd(1.0, 2.0); |
5183 | let b = _mm_setr_pd(3.0, 4.0); |
5184 | let r = _mm_unpacklo_pd(a, b); |
5185 | assert_eq_m128d(r, _mm_setr_pd(1.0, 3.0)); |
5186 | } |
5187 | |
5188 | #[simd_test(enable = "sse2")] |
5189 | unsafe fn test_mm_shuffle_pd() { |
5190 | let a = _mm_setr_pd(1., 2.); |
5191 | let b = _mm_setr_pd(3., 4.); |
5192 | let expected = _mm_setr_pd(1., 3.); |
5193 | let r = _mm_shuffle_pd::<0b00_00_00_00>(a, b); |
5194 | assert_eq_m128d(r, expected); |
5195 | } |
5196 | |
5197 | #[simd_test(enable = "sse2")] |
5198 | unsafe fn test_mm_move_sd() { |
5199 | let a = _mm_setr_pd(1., 2.); |
5200 | let b = _mm_setr_pd(3., 4.); |
5201 | let expected = _mm_setr_pd(3., 2.); |
5202 | let r = _mm_move_sd(a, b); |
5203 | assert_eq_m128d(r, expected); |
5204 | } |
5205 | |
5206 | #[simd_test(enable = "sse2")] |
5207 | unsafe fn test_mm_castpd_ps() { |
5208 | let a = _mm_set1_pd(0.); |
5209 | let expected = _mm_set1_ps(0.); |
5210 | let r = _mm_castpd_ps(a); |
5211 | assert_eq_m128(r, expected); |
5212 | } |
5213 | |
5214 | #[simd_test(enable = "sse2")] |
5215 | unsafe fn test_mm_castpd_si128() { |
5216 | let a = _mm_set1_pd(0.); |
5217 | let expected = _mm_set1_epi64x(0); |
5218 | let r = _mm_castpd_si128(a); |
5219 | assert_eq_m128i(r, expected); |
5220 | } |
5221 | |
5222 | #[simd_test(enable = "sse2")] |
5223 | unsafe fn test_mm_castps_pd() { |
5224 | let a = _mm_set1_ps(0.); |
5225 | let expected = _mm_set1_pd(0.); |
5226 | let r = _mm_castps_pd(a); |
5227 | assert_eq_m128d(r, expected); |
5228 | } |
5229 | |
5230 | #[simd_test(enable = "sse2")] |
5231 | unsafe fn test_mm_castps_si128() { |
5232 | let a = _mm_set1_ps(0.); |
5233 | let expected = _mm_set1_epi32(0); |
5234 | let r = _mm_castps_si128(a); |
5235 | assert_eq_m128i(r, expected); |
5236 | } |
5237 | |
5238 | #[simd_test(enable = "sse2")] |
5239 | unsafe fn test_mm_castsi128_pd() { |
5240 | let a = _mm_set1_epi64x(0); |
5241 | let expected = _mm_set1_pd(0.); |
5242 | let r = _mm_castsi128_pd(a); |
5243 | assert_eq_m128d(r, expected); |
5244 | } |
5245 | |
5246 | #[simd_test(enable = "sse2")] |
5247 | unsafe fn test_mm_castsi128_ps() { |
5248 | let a = _mm_set1_epi32(0); |
5249 | let expected = _mm_set1_ps(0.); |
5250 | let r = _mm_castsi128_ps(a); |
5251 | assert_eq_m128(r, expected); |
5252 | } |
5253 | } |
5254 |
Definitions
- _mm_pause
- _mm_clflush
- _mm_lfence
- _mm_mfence
- _mm_add_epi8
- _mm_add_epi16
- _mm_add_epi32
- _mm_add_epi64
- _mm_adds_epi8
- _mm_adds_epi16
- _mm_adds_epu8
- _mm_adds_epu16
- _mm_avg_epu8
- _mm_avg_epu16
- _mm_madd_epi16
- _mm_max_epi16
- _mm_max_epu8
- _mm_min_epi16
- _mm_min_epu8
- _mm_mulhi_epi16
- _mm_mulhi_epu16
- _mm_mullo_epi16
- _mm_mul_epu32
- _mm_sad_epu8
- _mm_sub_epi8
- _mm_sub_epi16
- _mm_sub_epi32
- _mm_sub_epi64
- _mm_subs_epi8
- _mm_subs_epi16
- _mm_subs_epu8
- _mm_subs_epu16
- _mm_slli_si128
- _mm_slli_si128_impl
- mask
- _mm_bslli_si128
- _mm_bsrli_si128
- _mm_slli_epi16
- _mm_sll_epi16
- _mm_slli_epi32
- _mm_sll_epi32
- _mm_slli_epi64
- _mm_sll_epi64
- _mm_srai_epi16
- _mm_sra_epi16
- _mm_srai_epi32
- _mm_sra_epi32
- _mm_srli_si128
- _mm_srli_si128_impl
- mask
- _mm_srli_epi16
- _mm_srl_epi16
- _mm_srli_epi32
- _mm_srl_epi32
- _mm_srli_epi64
- _mm_srl_epi64
- _mm_and_si128
- _mm_andnot_si128
- _mm_or_si128
- _mm_xor_si128
- _mm_cmpeq_epi8
- _mm_cmpeq_epi16
- _mm_cmpeq_epi32
- _mm_cmpgt_epi8
- _mm_cmpgt_epi16
- _mm_cmpgt_epi32
- _mm_cmplt_epi8
- _mm_cmplt_epi16
- _mm_cmplt_epi32
- _mm_cvtepi32_pd
- _mm_cvtsi32_sd
- _mm_cvtepi32_ps
- _mm_cvtps_epi32
- _mm_cvtsi32_si128
- _mm_cvtsi128_si32
- _mm_set_epi64x
- _mm_set_epi32
- _mm_set_epi16
- _mm_set_epi8
- _mm_set1_epi64x
- _mm_set1_epi32
- _mm_set1_epi16
- _mm_set1_epi8
- _mm_setr_epi32
- _mm_setr_epi16
- _mm_setr_epi8
- _mm_setzero_si128
- _mm_loadl_epi64
- _mm_load_si128
- _mm_loadu_si128
- _mm_maskmoveu_si128
- _mm_store_si128
- _mm_storeu_si128
- _mm_storel_epi64
- _mm_stream_si128
- _mm_stream_si32
- _mm_move_epi64
- _mm_packs_epi16
- _mm_packs_epi32
- _mm_packus_epi16
- _mm_extract_epi16
- _mm_insert_epi16
- _mm_movemask_epi8
- _mm_shuffle_epi32
- _mm_shufflehi_epi16
- _mm_shufflelo_epi16
- _mm_unpackhi_epi8
- _mm_unpackhi_epi16
- _mm_unpackhi_epi32
- _mm_unpackhi_epi64
- _mm_unpacklo_epi8
- _mm_unpacklo_epi16
- _mm_unpacklo_epi32
- _mm_unpacklo_epi64
- _mm_add_sd
- _mm_add_pd
- _mm_div_sd
- _mm_div_pd
- _mm_max_sd
- _mm_max_pd
- _mm_min_sd
- _mm_min_pd
- _mm_mul_sd
- _mm_mul_pd
- _mm_sqrt_sd
- _mm_sqrt_pd
- _mm_sub_sd
- _mm_sub_pd
- _mm_and_pd
- _mm_andnot_pd
- _mm_or_pd
- _mm_xor_pd
- _mm_cmpeq_sd
- _mm_cmplt_sd
- _mm_cmple_sd
- _mm_cmpgt_sd
- _mm_cmpge_sd
- _mm_cmpord_sd
- _mm_cmpunord_sd
- _mm_cmpneq_sd
- _mm_cmpnlt_sd
- _mm_cmpnle_sd
- _mm_cmpngt_sd
- _mm_cmpnge_sd
- _mm_cmpeq_pd
- _mm_cmplt_pd
- _mm_cmple_pd
- _mm_cmpgt_pd
- _mm_cmpge_pd
- _mm_cmpord_pd
- _mm_cmpunord_pd
- _mm_cmpneq_pd
- _mm_cmpnlt_pd
- _mm_cmpnle_pd
- _mm_cmpngt_pd
- _mm_cmpnge_pd
- _mm_comieq_sd
- _mm_comilt_sd
- _mm_comile_sd
- _mm_comigt_sd
- _mm_comige_sd
- _mm_comineq_sd
- _mm_ucomieq_sd
- _mm_ucomilt_sd
- _mm_ucomile_sd
- _mm_ucomigt_sd
- _mm_ucomige_sd
- _mm_ucomineq_sd
- _mm_cvtpd_ps
- _mm_cvtps_pd
- _mm_cvtpd_epi32
- _mm_cvtsd_si32
- _mm_cvtsd_ss
- _mm_cvtsd_f64
- _mm_cvtss_sd
- _mm_cvttpd_epi32
- _mm_cvttsd_si32
- _mm_cvttps_epi32
- _mm_set_sd
- _mm_set1_pd
- _mm_set_pd1
- _mm_set_pd
- _mm_setr_pd
- _mm_setzero_pd
- _mm_movemask_pd
- _mm_load_pd
- _mm_load_sd
- _mm_loadh_pd
- _mm_loadl_pd
- _mm_stream_pd
- _mm_store_sd
- _mm_store_pd
- _mm_storeu_pd
- _mm_storeu_si16
- _mm_storeu_si32
- _mm_storeu_si64
- _mm_store1_pd
- _mm_store_pd1
- _mm_storer_pd
- _mm_storeh_pd
- _mm_storel_pd
- _mm_load1_pd
- _mm_load_pd1
- _mm_loadr_pd
- _mm_loadu_pd
- _mm_loadu_si16
- _mm_loadu_si32
- _mm_loadu_si64
- _mm_shuffle_pd
- _mm_move_sd
- _mm_castpd_ps
- _mm_castpd_si128
- _mm_castps_pd
- _mm_castps_si128
- _mm_castsi128_pd
- _mm_castsi128_ps
- _mm_undefined_pd
- _mm_undefined_si128
- _mm_unpackhi_pd
- _mm_unpacklo_pd
- pause
- clflush
- lfence
- mfence
- pmaddwd
- psadbw
- psllw
- pslld
- psllq
- psraw
- psrad
- psrlw
- psrld
- psrlq
- cvtps2dq
- maskmovdqu
- packsswb
- packssdw
- packuswb
- maxsd
- maxpd
- minsd
- minpd
- cmpsd
- cmppd
- comieqsd
- comiltsd
- comilesd
- comigtsd
- comigesd
- comineqsd
- ucomieqsd
- ucomiltsd
- ucomilesd
- ucomigtsd
- ucomigesd
- ucomineqsd
- cvtpd2dq
- cvtsd2si
- cvtsd2ss
- cvtss2sd
- cvttpd2dq
- cvttsd2si
Learn Rust with the experts
Find out more