1 | //! Streaming SIMD Extensions 2 (SSE2) |
2 | |
3 | #[cfg (test)] |
4 | use stdarch_test::assert_instr; |
5 | |
6 | use crate::{ |
7 | core_arch::{simd::*, x86::*}, |
8 | intrinsics::simd::*, |
9 | mem, ptr, |
10 | }; |
11 | |
12 | /// Provides a hint to the processor that the code sequence is a spin-wait loop. |
13 | /// |
14 | /// This can help improve the performance and power consumption of spin-wait |
15 | /// loops. |
16 | /// |
17 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_pause) |
18 | #[inline ] |
19 | #[cfg_attr (all(test, target_feature = "sse2" ), assert_instr(pause))] |
20 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
21 | pub unsafe fn _mm_pause() { |
22 | // note: `pause` is guaranteed to be interpreted as a `nop` by CPUs without |
23 | // the SSE2 target-feature - therefore it does not require any target features |
24 | pause() |
25 | } |
26 | |
27 | /// Invalidates and flushes the cache line that contains `p` from all levels of |
28 | /// the cache hierarchy. |
29 | /// |
30 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clflush) |
31 | #[inline ] |
32 | #[target_feature (enable = "sse2" )] |
33 | #[cfg_attr (test, assert_instr(clflush))] |
34 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
35 | pub unsafe fn _mm_clflush(p: *const u8) { |
36 | clflush(p) |
37 | } |
38 | |
39 | /// Performs a serializing operation on all load-from-memory instructions |
40 | /// that were issued prior to this instruction. |
41 | /// |
42 | /// Guarantees that every load instruction that precedes, in program order, is |
43 | /// globally visible before any load instruction which follows the fence in |
44 | /// program order. |
45 | /// |
46 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_lfence) |
47 | #[inline ] |
48 | #[target_feature (enable = "sse2" )] |
49 | #[cfg_attr (test, assert_instr(lfence))] |
50 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
51 | pub unsafe fn _mm_lfence() { |
52 | lfence() |
53 | } |
54 | |
55 | /// Performs a serializing operation on all load-from-memory and store-to-memory |
56 | /// instructions that were issued prior to this instruction. |
57 | /// |
58 | /// Guarantees that every memory access that precedes, in program order, the |
59 | /// memory fence instruction is globally visible before any memory instruction |
60 | /// which follows the fence in program order. |
61 | /// |
62 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mfence) |
63 | #[inline ] |
64 | #[target_feature (enable = "sse2" )] |
65 | #[cfg_attr (test, assert_instr(mfence))] |
66 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
67 | pub unsafe fn _mm_mfence() { |
68 | mfence() |
69 | } |
70 | |
71 | /// Adds packed 8-bit integers in `a` and `b`. |
72 | /// |
73 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi8) |
74 | #[inline ] |
75 | #[target_feature (enable = "sse2" )] |
76 | #[cfg_attr (test, assert_instr(paddb))] |
77 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
78 | pub unsafe fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i { |
79 | transmute(src:simd_add(x:a.as_i8x16(), y:b.as_i8x16())) |
80 | } |
81 | |
82 | /// Adds packed 16-bit integers in `a` and `b`. |
83 | /// |
84 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi16) |
85 | #[inline ] |
86 | #[target_feature (enable = "sse2" )] |
87 | #[cfg_attr (test, assert_instr(paddw))] |
88 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
89 | pub unsafe fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i { |
90 | transmute(src:simd_add(x:a.as_i16x8(), y:b.as_i16x8())) |
91 | } |
92 | |
93 | /// Adds packed 32-bit integers in `a` and `b`. |
94 | /// |
95 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi32) |
96 | #[inline ] |
97 | #[target_feature (enable = "sse2" )] |
98 | #[cfg_attr (test, assert_instr(paddd))] |
99 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
100 | pub unsafe fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i { |
101 | transmute(src:simd_add(x:a.as_i32x4(), y:b.as_i32x4())) |
102 | } |
103 | |
104 | /// Adds packed 64-bit integers in `a` and `b`. |
105 | /// |
106 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi64) |
107 | #[inline ] |
108 | #[target_feature (enable = "sse2" )] |
109 | #[cfg_attr (test, assert_instr(paddq))] |
110 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
111 | pub unsafe fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i { |
112 | transmute(src:simd_add(x:a.as_i64x2(), y:b.as_i64x2())) |
113 | } |
114 | |
115 | /// Adds packed 8-bit integers in `a` and `b` using saturation. |
116 | /// |
117 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epi8) |
118 | #[inline ] |
119 | #[target_feature (enable = "sse2" )] |
120 | #[cfg_attr (test, assert_instr(paddsb))] |
121 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
122 | pub unsafe fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i { |
123 | transmute(src:simd_saturating_add(x:a.as_i8x16(), y:b.as_i8x16())) |
124 | } |
125 | |
126 | /// Adds packed 16-bit integers in `a` and `b` using saturation. |
127 | /// |
128 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epi16) |
129 | #[inline ] |
130 | #[target_feature (enable = "sse2" )] |
131 | #[cfg_attr (test, assert_instr(paddsw))] |
132 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
133 | pub unsafe fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i { |
134 | transmute(src:simd_saturating_add(x:a.as_i16x8(), y:b.as_i16x8())) |
135 | } |
136 | |
137 | /// Adds packed unsigned 8-bit integers in `a` and `b` using saturation. |
138 | /// |
139 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epu8) |
140 | #[inline ] |
141 | #[target_feature (enable = "sse2" )] |
142 | #[cfg_attr (test, assert_instr(paddusb))] |
143 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
144 | pub unsafe fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i { |
145 | transmute(src:simd_saturating_add(x:a.as_u8x16(), y:b.as_u8x16())) |
146 | } |
147 | |
148 | /// Adds packed unsigned 16-bit integers in `a` and `b` using saturation. |
149 | /// |
150 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epu16) |
151 | #[inline ] |
152 | #[target_feature (enable = "sse2" )] |
153 | #[cfg_attr (test, assert_instr(paddusw))] |
154 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
155 | pub unsafe fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i { |
156 | transmute(src:simd_saturating_add(x:a.as_u16x8(), y:b.as_u16x8())) |
157 | } |
158 | |
159 | /// Averages packed unsigned 8-bit integers in `a` and `b`. |
160 | /// |
161 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_avg_epu8) |
162 | #[inline ] |
163 | #[target_feature (enable = "sse2" )] |
164 | #[cfg_attr (test, assert_instr(pavgb))] |
165 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
166 | pub unsafe fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i { |
167 | let a: u16x16 = simd_cast::<_, u16x16>(a.as_u8x16()); |
168 | let b: u16x16 = simd_cast::<_, u16x16>(b.as_u8x16()); |
169 | let r: u16x16 = simd_shr(lhs:simd_add(simd_add(a, b), u16x16::splat(1)), rhs:u16x16::splat(1)); |
170 | transmute(src:simd_cast::<_, u8x16>(r)) |
171 | } |
172 | |
173 | /// Averages packed unsigned 16-bit integers in `a` and `b`. |
174 | /// |
175 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_avg_epu16) |
176 | #[inline ] |
177 | #[target_feature (enable = "sse2" )] |
178 | #[cfg_attr (test, assert_instr(pavgw))] |
179 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
180 | pub unsafe fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i { |
181 | let a: u32x8 = simd_cast::<_, u32x8>(a.as_u16x8()); |
182 | let b: u32x8 = simd_cast::<_, u32x8>(b.as_u16x8()); |
183 | let r: u32x8 = simd_shr(lhs:simd_add(simd_add(a, b), u32x8::splat(1)), rhs:u32x8::splat(1)); |
184 | transmute(src:simd_cast::<_, u16x8>(r)) |
185 | } |
186 | |
187 | /// Multiplies and then horizontally add signed 16 bit integers in `a` and `b`. |
188 | /// |
189 | /// Multiplies packed signed 16-bit integers in `a` and `b`, producing |
190 | /// intermediate signed 32-bit integers. Horizontally add adjacent pairs of |
191 | /// intermediate 32-bit integers. |
192 | /// |
193 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_madd_epi16) |
194 | #[inline ] |
195 | #[target_feature (enable = "sse2" )] |
196 | #[cfg_attr (test, assert_instr(pmaddwd))] |
197 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
198 | pub unsafe fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i { |
199 | transmute(src:pmaddwd(a:a.as_i16x8(), b:b.as_i16x8())) |
200 | } |
201 | |
202 | /// Compares packed 16-bit integers in `a` and `b`, and returns the packed |
203 | /// maximum values. |
204 | /// |
205 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi16) |
206 | #[inline ] |
207 | #[target_feature (enable = "sse2" )] |
208 | #[cfg_attr (test, assert_instr(pmaxsw))] |
209 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
210 | pub unsafe fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i { |
211 | let a: i16x8 = a.as_i16x8(); |
212 | let b: i16x8 = b.as_i16x8(); |
213 | transmute(src:simd_select::<i16x8, _>(mask:simd_gt(a, b), if_true:a, if_false:b)) |
214 | } |
215 | |
216 | /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the |
217 | /// packed maximum values. |
218 | /// |
219 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu8) |
220 | #[inline ] |
221 | #[target_feature (enable = "sse2" )] |
222 | #[cfg_attr (test, assert_instr(pmaxub))] |
223 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
224 | pub unsafe fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i { |
225 | let a: u8x16 = a.as_u8x16(); |
226 | let b: u8x16 = b.as_u8x16(); |
227 | transmute(src:simd_select::<i8x16, _>(mask:simd_gt(a, b), if_true:a, if_false:b)) |
228 | } |
229 | |
230 | /// Compares packed 16-bit integers in `a` and `b`, and returns the packed |
231 | /// minimum values. |
232 | /// |
233 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi16) |
234 | #[inline ] |
235 | #[target_feature (enable = "sse2" )] |
236 | #[cfg_attr (test, assert_instr(pminsw))] |
237 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
238 | pub unsafe fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i { |
239 | let a: i16x8 = a.as_i16x8(); |
240 | let b: i16x8 = b.as_i16x8(); |
241 | transmute(src:simd_select::<i16x8, _>(mask:simd_lt(a, b), if_true:a, if_false:b)) |
242 | } |
243 | |
244 | /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the |
245 | /// packed minimum values. |
246 | /// |
247 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu8) |
248 | #[inline ] |
249 | #[target_feature (enable = "sse2" )] |
250 | #[cfg_attr (test, assert_instr(pminub))] |
251 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
252 | pub unsafe fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i { |
253 | let a: u8x16 = a.as_u8x16(); |
254 | let b: u8x16 = b.as_u8x16(); |
255 | transmute(src:simd_select::<i8x16, _>(mask:simd_lt(a, b), if_true:a, if_false:b)) |
256 | } |
257 | |
258 | /// Multiplies the packed 16-bit integers in `a` and `b`. |
259 | /// |
260 | /// The multiplication produces intermediate 32-bit integers, and returns the |
261 | /// high 16 bits of the intermediate integers. |
262 | /// |
263 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhi_epi16) |
264 | #[inline ] |
265 | #[target_feature (enable = "sse2" )] |
266 | #[cfg_attr (test, assert_instr(pmulhw))] |
267 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
268 | pub unsafe fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i { |
269 | let a: i32x8 = simd_cast::<_, i32x8>(a.as_i16x8()); |
270 | let b: i32x8 = simd_cast::<_, i32x8>(b.as_i16x8()); |
271 | let r: i32x8 = simd_shr(lhs:simd_mul(a, b), rhs:i32x8::splat(16)); |
272 | transmute(src:simd_cast::<i32x8, i16x8>(r)) |
273 | } |
274 | |
275 | /// Multiplies the packed unsigned 16-bit integers in `a` and `b`. |
276 | /// |
277 | /// The multiplication produces intermediate 32-bit integers, and returns the |
278 | /// high 16 bits of the intermediate integers. |
279 | /// |
280 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhi_epu16) |
281 | #[inline ] |
282 | #[target_feature (enable = "sse2" )] |
283 | #[cfg_attr (test, assert_instr(pmulhuw))] |
284 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
285 | pub unsafe fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i { |
286 | let a: u32x8 = simd_cast::<_, u32x8>(a.as_u16x8()); |
287 | let b: u32x8 = simd_cast::<_, u32x8>(b.as_u16x8()); |
288 | let r: u32x8 = simd_shr(lhs:simd_mul(a, b), rhs:u32x8::splat(16)); |
289 | transmute(src:simd_cast::<u32x8, u16x8>(r)) |
290 | } |
291 | |
292 | /// Multiplies the packed 16-bit integers in `a` and `b`. |
293 | /// |
294 | /// The multiplication produces intermediate 32-bit integers, and returns the |
295 | /// low 16 bits of the intermediate integers. |
296 | /// |
297 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mullo_epi16) |
298 | #[inline ] |
299 | #[target_feature (enable = "sse2" )] |
300 | #[cfg_attr (test, assert_instr(pmullw))] |
301 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
302 | pub unsafe fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i { |
303 | transmute(src:simd_mul(x:a.as_i16x8(), y:b.as_i16x8())) |
304 | } |
305 | |
306 | /// Multiplies the low unsigned 32-bit integers from each packed 64-bit element |
307 | /// in `a` and `b`. |
308 | /// |
309 | /// Returns the unsigned 64-bit results. |
310 | /// |
311 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_epu32) |
312 | #[inline ] |
313 | #[target_feature (enable = "sse2" )] |
314 | #[cfg_attr (test, assert_instr(pmuludq))] |
315 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
316 | pub unsafe fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i { |
317 | let a: u64x2 = a.as_u64x2(); |
318 | let b: u64x2 = b.as_u64x2(); |
319 | let mask: u64x2 = u64x2::splat(u32::MAX.into()); |
320 | transmute(src:simd_mul(x:simd_and(a, mask), y:simd_and(x:b, y:mask))) |
321 | } |
322 | |
323 | /// Sum the absolute differences of packed unsigned 8-bit integers. |
324 | /// |
325 | /// Computes the absolute differences of packed unsigned 8-bit integers in `a` |
326 | /// and `b`, then horizontally sum each consecutive 8 differences to produce |
327 | /// two unsigned 16-bit integers, and pack these unsigned 16-bit integers in |
328 | /// the low 16 bits of 64-bit elements returned. |
329 | /// |
330 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sad_epu8) |
331 | #[inline ] |
332 | #[target_feature (enable = "sse2" )] |
333 | #[cfg_attr (test, assert_instr(psadbw))] |
334 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
335 | pub unsafe fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i { |
336 | transmute(src:psadbw(a:a.as_u8x16(), b:b.as_u8x16())) |
337 | } |
338 | |
339 | /// Subtracts packed 8-bit integers in `b` from packed 8-bit integers in `a`. |
340 | /// |
341 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi8) |
342 | #[inline ] |
343 | #[target_feature (enable = "sse2" )] |
344 | #[cfg_attr (test, assert_instr(psubb))] |
345 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
346 | pub unsafe fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i { |
347 | transmute(src:simd_sub(lhs:a.as_i8x16(), rhs:b.as_i8x16())) |
348 | } |
349 | |
350 | /// Subtracts packed 16-bit integers in `b` from packed 16-bit integers in `a`. |
351 | /// |
352 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi16) |
353 | #[inline ] |
354 | #[target_feature (enable = "sse2" )] |
355 | #[cfg_attr (test, assert_instr(psubw))] |
356 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
357 | pub unsafe fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i { |
358 | transmute(src:simd_sub(lhs:a.as_i16x8(), rhs:b.as_i16x8())) |
359 | } |
360 | |
361 | /// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`. |
362 | /// |
363 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi32) |
364 | #[inline ] |
365 | #[target_feature (enable = "sse2" )] |
366 | #[cfg_attr (test, assert_instr(psubd))] |
367 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
368 | pub unsafe fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i { |
369 | transmute(src:simd_sub(lhs:a.as_i32x4(), rhs:b.as_i32x4())) |
370 | } |
371 | |
372 | /// Subtract packed 64-bit integers in `b` from packed 64-bit integers in `a`. |
373 | /// |
374 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi64) |
375 | #[inline ] |
376 | #[target_feature (enable = "sse2" )] |
377 | #[cfg_attr (test, assert_instr(psubq))] |
378 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
379 | pub unsafe fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i { |
380 | transmute(src:simd_sub(lhs:a.as_i64x2(), rhs:b.as_i64x2())) |
381 | } |
382 | |
383 | /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a` |
384 | /// using saturation. |
385 | /// |
386 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epi8) |
387 | #[inline ] |
388 | #[target_feature (enable = "sse2" )] |
389 | #[cfg_attr (test, assert_instr(psubsb))] |
390 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
391 | pub unsafe fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i { |
392 | transmute(src:simd_saturating_sub(lhs:a.as_i8x16(), rhs:b.as_i8x16())) |
393 | } |
394 | |
395 | /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a` |
396 | /// using saturation. |
397 | /// |
398 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epi16) |
399 | #[inline ] |
400 | #[target_feature (enable = "sse2" )] |
401 | #[cfg_attr (test, assert_instr(psubsw))] |
402 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
403 | pub unsafe fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i { |
404 | transmute(src:simd_saturating_sub(lhs:a.as_i16x8(), rhs:b.as_i16x8())) |
405 | } |
406 | |
407 | /// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit |
408 | /// integers in `a` using saturation. |
409 | /// |
410 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epu8) |
411 | #[inline ] |
412 | #[target_feature (enable = "sse2" )] |
413 | #[cfg_attr (test, assert_instr(psubusb))] |
414 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
415 | pub unsafe fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i { |
416 | transmute(src:simd_saturating_sub(lhs:a.as_u8x16(), rhs:b.as_u8x16())) |
417 | } |
418 | |
419 | /// Subtract packed unsigned 16-bit integers in `b` from packed unsigned 16-bit |
420 | /// integers in `a` using saturation. |
421 | /// |
422 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epu16) |
423 | #[inline ] |
424 | #[target_feature (enable = "sse2" )] |
425 | #[cfg_attr (test, assert_instr(psubusw))] |
426 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
427 | pub unsafe fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i { |
428 | transmute(src:simd_saturating_sub(lhs:a.as_u16x8(), rhs:b.as_u16x8())) |
429 | } |
430 | |
431 | /// Shifts `a` left by `IMM8` bytes while shifting in zeros. |
432 | /// |
433 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_si128) |
434 | #[inline ] |
435 | #[target_feature (enable = "sse2" )] |
436 | #[cfg_attr (test, assert_instr(pslldq, IMM8 = 1))] |
437 | #[rustc_legacy_const_generics (1)] |
438 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
439 | pub unsafe fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i { |
440 | static_assert_uimm_bits!(IMM8, 8); |
441 | _mm_slli_si128_impl::<IMM8>(a) |
442 | } |
443 | |
444 | /// Implementation detail: converts the immediate argument of the |
445 | /// `_mm_slli_si128` intrinsic into a compile-time constant. |
446 | #[inline ] |
447 | #[target_feature (enable = "sse2" )] |
448 | unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i { |
449 | const fn mask(shift: i32, i: u32) -> u32 { |
450 | let shift = shift as u32 & 0xff; |
451 | if shift > 15 { |
452 | i |
453 | } else { |
454 | 16 - shift + i |
455 | } |
456 | } |
457 | let zero = _mm_set1_epi8(0).as_i8x16(); |
458 | transmute::<i8x16, _>(simd_shuffle!( |
459 | zero, |
460 | a.as_i8x16(), |
461 | [ |
462 | mask(IMM8, 0), |
463 | mask(IMM8, 1), |
464 | mask(IMM8, 2), |
465 | mask(IMM8, 3), |
466 | mask(IMM8, 4), |
467 | mask(IMM8, 5), |
468 | mask(IMM8, 6), |
469 | mask(IMM8, 7), |
470 | mask(IMM8, 8), |
471 | mask(IMM8, 9), |
472 | mask(IMM8, 10), |
473 | mask(IMM8, 11), |
474 | mask(IMM8, 12), |
475 | mask(IMM8, 13), |
476 | mask(IMM8, 14), |
477 | mask(IMM8, 15), |
478 | ], |
479 | )) |
480 | } |
481 | |
482 | /// Shifts `a` left by `IMM8` bytes while shifting in zeros. |
483 | /// |
484 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bslli_si128) |
485 | #[inline ] |
486 | #[target_feature (enable = "sse2" )] |
487 | #[cfg_attr (test, assert_instr(pslldq, IMM8 = 1))] |
488 | #[rustc_legacy_const_generics (1)] |
489 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
490 | pub unsafe fn _mm_bslli_si128<const IMM8: i32>(a: __m128i) -> __m128i { |
491 | static_assert_uimm_bits!(IMM8, 8); |
492 | _mm_slli_si128_impl::<IMM8>(a) |
493 | } |
494 | |
495 | /// Shifts `a` right by `IMM8` bytes while shifting in zeros. |
496 | /// |
497 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bsrli_si128) |
498 | #[inline ] |
499 | #[target_feature (enable = "sse2" )] |
500 | #[cfg_attr (test, assert_instr(psrldq, IMM8 = 1))] |
501 | #[rustc_legacy_const_generics (1)] |
502 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
503 | pub unsafe fn _mm_bsrli_si128<const IMM8: i32>(a: __m128i) -> __m128i { |
504 | static_assert_uimm_bits!(IMM8, 8); |
505 | _mm_srli_si128_impl::<IMM8>(a) |
506 | } |
507 | |
508 | /// Shifts packed 16-bit integers in `a` left by `IMM8` while shifting in zeros. |
509 | /// |
510 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi16) |
511 | #[inline ] |
512 | #[target_feature (enable = "sse2" )] |
513 | #[cfg_attr (test, assert_instr(psllw, IMM8 = 7))] |
514 | #[rustc_legacy_const_generics (1)] |
515 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
516 | pub unsafe fn _mm_slli_epi16<const IMM8: i32>(a: __m128i) -> __m128i { |
517 | static_assert_uimm_bits!(IMM8, 8); |
518 | if IMM8 >= 16 { |
519 | _mm_setzero_si128() |
520 | } else { |
521 | transmute(src:simd_shl(lhs:a.as_u16x8(), rhs:u16x8::splat(IMM8 as u16))) |
522 | } |
523 | } |
524 | |
525 | /// Shifts packed 16-bit integers in `a` left by `count` while shifting in |
526 | /// zeros. |
527 | /// |
528 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi16) |
529 | #[inline ] |
530 | #[target_feature (enable = "sse2" )] |
531 | #[cfg_attr (test, assert_instr(psllw))] |
532 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
533 | pub unsafe fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i { |
534 | transmute(src:psllw(a:a.as_i16x8(), count:count.as_i16x8())) |
535 | } |
536 | |
537 | /// Shifts packed 32-bit integers in `a` left by `IMM8` while shifting in zeros. |
538 | /// |
539 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi32) |
540 | #[inline ] |
541 | #[target_feature (enable = "sse2" )] |
542 | #[cfg_attr (test, assert_instr(pslld, IMM8 = 7))] |
543 | #[rustc_legacy_const_generics (1)] |
544 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
545 | pub unsafe fn _mm_slli_epi32<const IMM8: i32>(a: __m128i) -> __m128i { |
546 | static_assert_uimm_bits!(IMM8, 8); |
547 | if IMM8 >= 32 { |
548 | _mm_setzero_si128() |
549 | } else { |
550 | transmute(src:simd_shl(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8 as u32))) |
551 | } |
552 | } |
553 | |
554 | /// Shifts packed 32-bit integers in `a` left by `count` while shifting in |
555 | /// zeros. |
556 | /// |
557 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi32) |
558 | #[inline ] |
559 | #[target_feature (enable = "sse2" )] |
560 | #[cfg_attr (test, assert_instr(pslld))] |
561 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
562 | pub unsafe fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i { |
563 | transmute(src:pslld(a:a.as_i32x4(), count:count.as_i32x4())) |
564 | } |
565 | |
566 | /// Shifts packed 64-bit integers in `a` left by `IMM8` while shifting in zeros. |
567 | /// |
568 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi64) |
569 | #[inline ] |
570 | #[target_feature (enable = "sse2" )] |
571 | #[cfg_attr (test, assert_instr(psllq, IMM8 = 7))] |
572 | #[rustc_legacy_const_generics (1)] |
573 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
574 | pub unsafe fn _mm_slli_epi64<const IMM8: i32>(a: __m128i) -> __m128i { |
575 | static_assert_uimm_bits!(IMM8, 8); |
576 | if IMM8 >= 64 { |
577 | _mm_setzero_si128() |
578 | } else { |
579 | transmute(src:simd_shl(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64))) |
580 | } |
581 | } |
582 | |
583 | /// Shifts packed 64-bit integers in `a` left by `count` while shifting in |
584 | /// zeros. |
585 | /// |
586 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi64) |
587 | #[inline ] |
588 | #[target_feature (enable = "sse2" )] |
589 | #[cfg_attr (test, assert_instr(psllq))] |
590 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
591 | pub unsafe fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i { |
592 | transmute(src:psllq(a:a.as_i64x2(), count:count.as_i64x2())) |
593 | } |
594 | |
595 | /// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in sign |
596 | /// bits. |
597 | /// |
598 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi16) |
599 | #[inline ] |
600 | #[target_feature (enable = "sse2" )] |
601 | #[cfg_attr (test, assert_instr(psraw, IMM8 = 1))] |
602 | #[rustc_legacy_const_generics (1)] |
603 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
604 | pub unsafe fn _mm_srai_epi16<const IMM8: i32>(a: __m128i) -> __m128i { |
605 | static_assert_uimm_bits!(IMM8, 8); |
606 | transmute(src:simd_shr(lhs:a.as_i16x8(), rhs:i16x8::splat(IMM8.min(15) as i16))) |
607 | } |
608 | |
609 | /// Shifts packed 16-bit integers in `a` right by `count` while shifting in sign |
610 | /// bits. |
611 | /// |
612 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi16) |
613 | #[inline ] |
614 | #[target_feature (enable = "sse2" )] |
615 | #[cfg_attr (test, assert_instr(psraw))] |
616 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
617 | pub unsafe fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i { |
618 | transmute(src:psraw(a:a.as_i16x8(), count:count.as_i16x8())) |
619 | } |
620 | |
621 | /// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in sign |
622 | /// bits. |
623 | /// |
624 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi32) |
625 | #[inline ] |
626 | #[target_feature (enable = "sse2" )] |
627 | #[cfg_attr (test, assert_instr(psrad, IMM8 = 1))] |
628 | #[rustc_legacy_const_generics (1)] |
629 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
630 | pub unsafe fn _mm_srai_epi32<const IMM8: i32>(a: __m128i) -> __m128i { |
631 | static_assert_uimm_bits!(IMM8, 8); |
632 | transmute(src:simd_shr(lhs:a.as_i32x4(), rhs:i32x4::splat(IMM8.min(31)))) |
633 | } |
634 | |
635 | /// Shifts packed 32-bit integers in `a` right by `count` while shifting in sign |
636 | /// bits. |
637 | /// |
638 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi32) |
639 | #[inline ] |
640 | #[target_feature (enable = "sse2" )] |
641 | #[cfg_attr (test, assert_instr(psrad))] |
642 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
643 | pub unsafe fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i { |
644 | transmute(src:psrad(a:a.as_i32x4(), count:count.as_i32x4())) |
645 | } |
646 | |
647 | /// Shifts `a` right by `IMM8` bytes while shifting in zeros. |
648 | /// |
649 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_si128) |
650 | #[inline ] |
651 | #[target_feature (enable = "sse2" )] |
652 | #[cfg_attr (test, assert_instr(psrldq, IMM8 = 1))] |
653 | #[rustc_legacy_const_generics (1)] |
654 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
655 | pub unsafe fn _mm_srli_si128<const IMM8: i32>(a: __m128i) -> __m128i { |
656 | static_assert_uimm_bits!(IMM8, 8); |
657 | _mm_srli_si128_impl::<IMM8>(a) |
658 | } |
659 | |
660 | /// Implementation detail: converts the immediate argument of the |
661 | /// `_mm_srli_si128` intrinsic into a compile-time constant. |
662 | #[inline ] |
663 | #[target_feature (enable = "sse2" )] |
664 | unsafe fn _mm_srli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i { |
665 | const fn mask(shift: i32, i: u32) -> u32 { |
666 | if (shift as u32) > 15 { |
667 | i + 16 |
668 | } else { |
669 | i + (shift as u32) |
670 | } |
671 | } |
672 | let zero = _mm_set1_epi8(0).as_i8x16(); |
673 | let x: i8x16 = simd_shuffle!( |
674 | a.as_i8x16(), |
675 | zero, |
676 | [ |
677 | mask(IMM8, 0), |
678 | mask(IMM8, 1), |
679 | mask(IMM8, 2), |
680 | mask(IMM8, 3), |
681 | mask(IMM8, 4), |
682 | mask(IMM8, 5), |
683 | mask(IMM8, 6), |
684 | mask(IMM8, 7), |
685 | mask(IMM8, 8), |
686 | mask(IMM8, 9), |
687 | mask(IMM8, 10), |
688 | mask(IMM8, 11), |
689 | mask(IMM8, 12), |
690 | mask(IMM8, 13), |
691 | mask(IMM8, 14), |
692 | mask(IMM8, 15), |
693 | ], |
694 | ); |
695 | transmute(x) |
696 | } |
697 | |
698 | /// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in |
699 | /// zeros. |
700 | /// |
701 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi16) |
702 | #[inline ] |
703 | #[target_feature (enable = "sse2" )] |
704 | #[cfg_attr (test, assert_instr(psrlw, IMM8 = 1))] |
705 | #[rustc_legacy_const_generics (1)] |
706 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
707 | pub unsafe fn _mm_srli_epi16<const IMM8: i32>(a: __m128i) -> __m128i { |
708 | static_assert_uimm_bits!(IMM8, 8); |
709 | if IMM8 >= 16 { |
710 | _mm_setzero_si128() |
711 | } else { |
712 | transmute(src:simd_shr(lhs:a.as_u16x8(), rhs:u16x8::splat(IMM8 as u16))) |
713 | } |
714 | } |
715 | |
716 | /// Shifts packed 16-bit integers in `a` right by `count` while shifting in |
717 | /// zeros. |
718 | /// |
719 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi16) |
720 | #[inline ] |
721 | #[target_feature (enable = "sse2" )] |
722 | #[cfg_attr (test, assert_instr(psrlw))] |
723 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
724 | pub unsafe fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i { |
725 | transmute(src:psrlw(a:a.as_i16x8(), count:count.as_i16x8())) |
726 | } |
727 | |
728 | /// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in |
729 | /// zeros. |
730 | /// |
731 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi32) |
732 | #[inline ] |
733 | #[target_feature (enable = "sse2" )] |
734 | #[cfg_attr (test, assert_instr(psrld, IMM8 = 8))] |
735 | #[rustc_legacy_const_generics (1)] |
736 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
737 | pub unsafe fn _mm_srli_epi32<const IMM8: i32>(a: __m128i) -> __m128i { |
738 | static_assert_uimm_bits!(IMM8, 8); |
739 | if IMM8 >= 32 { |
740 | _mm_setzero_si128() |
741 | } else { |
742 | transmute(src:simd_shr(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8 as u32))) |
743 | } |
744 | } |
745 | |
746 | /// Shifts packed 32-bit integers in `a` right by `count` while shifting in |
747 | /// zeros. |
748 | /// |
749 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi32) |
750 | #[inline ] |
751 | #[target_feature (enable = "sse2" )] |
752 | #[cfg_attr (test, assert_instr(psrld))] |
753 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
754 | pub unsafe fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i { |
755 | transmute(src:psrld(a:a.as_i32x4(), count:count.as_i32x4())) |
756 | } |
757 | |
758 | /// Shifts packed 64-bit integers in `a` right by `IMM8` while shifting in |
759 | /// zeros. |
760 | /// |
761 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi64) |
762 | #[inline ] |
763 | #[target_feature (enable = "sse2" )] |
764 | #[cfg_attr (test, assert_instr(psrlq, IMM8 = 1))] |
765 | #[rustc_legacy_const_generics (1)] |
766 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
767 | pub unsafe fn _mm_srli_epi64<const IMM8: i32>(a: __m128i) -> __m128i { |
768 | static_assert_uimm_bits!(IMM8, 8); |
769 | if IMM8 >= 64 { |
770 | _mm_setzero_si128() |
771 | } else { |
772 | transmute(src:simd_shr(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64))) |
773 | } |
774 | } |
775 | |
776 | /// Shifts packed 64-bit integers in `a` right by `count` while shifting in |
777 | /// zeros. |
778 | /// |
779 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi64) |
780 | #[inline ] |
781 | #[target_feature (enable = "sse2" )] |
782 | #[cfg_attr (test, assert_instr(psrlq))] |
783 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
784 | pub unsafe fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i { |
785 | transmute(src:psrlq(a:a.as_i64x2(), count:count.as_i64x2())) |
786 | } |
787 | |
788 | /// Computes the bitwise AND of 128 bits (representing integer data) in `a` and |
789 | /// `b`. |
790 | /// |
791 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_and_si128) |
792 | #[inline ] |
793 | #[target_feature (enable = "sse2" )] |
794 | #[cfg_attr (test, assert_instr(andps))] |
795 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
796 | pub unsafe fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i { |
797 | simd_and(x:a, y:b) |
798 | } |
799 | |
800 | /// Computes the bitwise NOT of 128 bits (representing integer data) in `a` and |
801 | /// then AND with `b`. |
802 | /// |
803 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_andnot_si128) |
804 | #[inline ] |
805 | #[target_feature (enable = "sse2" )] |
806 | #[cfg_attr (test, assert_instr(andnps))] |
807 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
808 | pub unsafe fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i { |
809 | simd_and(x:simd_xor(_mm_set1_epi8(-1), a), y:b) |
810 | } |
811 | |
812 | /// Computes the bitwise OR of 128 bits (representing integer data) in `a` and |
813 | /// `b`. |
814 | /// |
815 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_si128) |
816 | #[inline ] |
817 | #[target_feature (enable = "sse2" )] |
818 | #[cfg_attr (test, assert_instr(orps))] |
819 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
820 | pub unsafe fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i { |
821 | simd_or(x:a, y:b) |
822 | } |
823 | |
824 | /// Computes the bitwise XOR of 128 bits (representing integer data) in `a` and |
825 | /// `b`. |
826 | /// |
827 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_si128) |
828 | #[inline ] |
829 | #[target_feature (enable = "sse2" )] |
830 | #[cfg_attr (test, assert_instr(xorps))] |
831 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
832 | pub unsafe fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i { |
833 | simd_xor(x:a, y:b) |
834 | } |
835 | |
836 | /// Compares packed 8-bit integers in `a` and `b` for equality. |
837 | /// |
838 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi8) |
839 | #[inline ] |
840 | #[target_feature (enable = "sse2" )] |
841 | #[cfg_attr (test, assert_instr(pcmpeqb))] |
842 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
843 | pub unsafe fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i { |
844 | transmute::<i8x16, _>(src:simd_eq(x:a.as_i8x16(), y:b.as_i8x16())) |
845 | } |
846 | |
847 | /// Compares packed 16-bit integers in `a` and `b` for equality. |
848 | /// |
849 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi16) |
850 | #[inline ] |
851 | #[target_feature (enable = "sse2" )] |
852 | #[cfg_attr (test, assert_instr(pcmpeqw))] |
853 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
854 | pub unsafe fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i { |
855 | transmute::<i16x8, _>(src:simd_eq(x:a.as_i16x8(), y:b.as_i16x8())) |
856 | } |
857 | |
858 | /// Compares packed 32-bit integers in `a` and `b` for equality. |
859 | /// |
860 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi32) |
861 | #[inline ] |
862 | #[target_feature (enable = "sse2" )] |
863 | #[cfg_attr (test, assert_instr(pcmpeqd))] |
864 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
865 | pub unsafe fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i { |
866 | transmute::<i32x4, _>(src:simd_eq(x:a.as_i32x4(), y:b.as_i32x4())) |
867 | } |
868 | |
869 | /// Compares packed 8-bit integers in `a` and `b` for greater-than. |
870 | /// |
871 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi8) |
872 | #[inline ] |
873 | #[target_feature (enable = "sse2" )] |
874 | #[cfg_attr (test, assert_instr(pcmpgtb))] |
875 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
876 | pub unsafe fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i { |
877 | transmute::<i8x16, _>(src:simd_gt(x:a.as_i8x16(), y:b.as_i8x16())) |
878 | } |
879 | |
880 | /// Compares packed 16-bit integers in `a` and `b` for greater-than. |
881 | /// |
882 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi16) |
883 | #[inline ] |
884 | #[target_feature (enable = "sse2" )] |
885 | #[cfg_attr (test, assert_instr(pcmpgtw))] |
886 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
887 | pub unsafe fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i { |
888 | transmute::<i16x8, _>(src:simd_gt(x:a.as_i16x8(), y:b.as_i16x8())) |
889 | } |
890 | |
891 | /// Compares packed 32-bit integers in `a` and `b` for greater-than. |
892 | /// |
893 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi32) |
894 | #[inline ] |
895 | #[target_feature (enable = "sse2" )] |
896 | #[cfg_attr (test, assert_instr(pcmpgtd))] |
897 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
898 | pub unsafe fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i { |
899 | transmute::<i32x4, _>(src:simd_gt(x:a.as_i32x4(), y:b.as_i32x4())) |
900 | } |
901 | |
902 | /// Compares packed 8-bit integers in `a` and `b` for less-than. |
903 | /// |
904 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi8) |
905 | #[inline ] |
906 | #[target_feature (enable = "sse2" )] |
907 | #[cfg_attr (test, assert_instr(pcmpgtb))] |
908 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
909 | pub unsafe fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i { |
910 | transmute::<i8x16, _>(src:simd_lt(x:a.as_i8x16(), y:b.as_i8x16())) |
911 | } |
912 | |
913 | /// Compares packed 16-bit integers in `a` and `b` for less-than. |
914 | /// |
915 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi16) |
916 | #[inline ] |
917 | #[target_feature (enable = "sse2" )] |
918 | #[cfg_attr (test, assert_instr(pcmpgtw))] |
919 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
920 | pub unsafe fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i { |
921 | transmute::<i16x8, _>(src:simd_lt(x:a.as_i16x8(), y:b.as_i16x8())) |
922 | } |
923 | |
924 | /// Compares packed 32-bit integers in `a` and `b` for less-than. |
925 | /// |
926 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi32) |
927 | #[inline ] |
928 | #[target_feature (enable = "sse2" )] |
929 | #[cfg_attr (test, assert_instr(pcmpgtd))] |
930 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
931 | pub unsafe fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i { |
932 | transmute::<i32x4, _>(src:simd_lt(x:a.as_i32x4(), y:b.as_i32x4())) |
933 | } |
934 | |
935 | /// Converts the lower two packed 32-bit integers in `a` to packed |
936 | /// double-precision (64-bit) floating-point elements. |
937 | /// |
938 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_pd) |
939 | #[inline ] |
940 | #[target_feature (enable = "sse2" )] |
941 | #[cfg_attr (test, assert_instr(cvtdq2pd))] |
942 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
943 | pub unsafe fn _mm_cvtepi32_pd(a: __m128i) -> __m128d { |
944 | let a: i32x4 = a.as_i32x4(); |
945 | simd_cast::<i32x2, __m128d>(simd_shuffle!(a, a, [0, 1])) |
946 | } |
947 | |
948 | /// Returns `a` with its lower element replaced by `b` after converting it to |
949 | /// an `f64`. |
950 | /// |
951 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi32_sd) |
952 | #[inline ] |
953 | #[target_feature (enable = "sse2" )] |
954 | #[cfg_attr (test, assert_instr(cvtsi2sd))] |
955 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
956 | pub unsafe fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d { |
957 | simd_insert!(a, 0, b as f64) |
958 | } |
959 | |
960 | /// Converts packed 32-bit integers in `a` to packed single-precision (32-bit) |
961 | /// floating-point elements. |
962 | /// |
963 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_ps) |
964 | #[inline ] |
965 | #[target_feature (enable = "sse2" )] |
966 | #[cfg_attr (test, assert_instr(cvtdq2ps))] |
967 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
968 | pub unsafe fn _mm_cvtepi32_ps(a: __m128i) -> __m128 { |
969 | transmute(src:simd_cast::<_, f32x4>(a.as_i32x4())) |
970 | } |
971 | |
972 | /// Converts packed single-precision (32-bit) floating-point elements in `a` |
973 | /// to packed 32-bit integers. |
974 | /// |
975 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epi32) |
976 | #[inline ] |
977 | #[target_feature (enable = "sse2" )] |
978 | #[cfg_attr (test, assert_instr(cvtps2dq))] |
979 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
980 | pub unsafe fn _mm_cvtps_epi32(a: __m128) -> __m128i { |
981 | transmute(src:cvtps2dq(a)) |
982 | } |
983 | |
984 | /// Returns a vector whose lowest element is `a` and all higher elements are |
985 | /// `0`. |
986 | /// |
987 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi32_si128) |
988 | #[inline ] |
989 | #[target_feature (enable = "sse2" )] |
990 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
991 | pub unsafe fn _mm_cvtsi32_si128(a: i32) -> __m128i { |
992 | transmute(src:i32x4::new(x0:a, x1:0, x2:0, x3:0)) |
993 | } |
994 | |
995 | /// Returns the lowest element of `a`. |
996 | /// |
997 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi128_si32) |
998 | #[inline ] |
999 | #[target_feature (enable = "sse2" )] |
1000 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1001 | pub unsafe fn _mm_cvtsi128_si32(a: __m128i) -> i32 { |
1002 | simd_extract!(a.as_i32x4(), 0) |
1003 | } |
1004 | |
1005 | /// Sets packed 64-bit integers with the supplied values, from highest to |
1006 | /// lowest. |
1007 | /// |
1008 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi64x) |
1009 | #[inline ] |
1010 | #[target_feature (enable = "sse2" )] |
1011 | // no particular instruction to test |
1012 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1013 | pub unsafe fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i { |
1014 | transmute(src:i64x2::new(x0:e0, x1:e1)) |
1015 | } |
1016 | |
1017 | /// Sets packed 32-bit integers with the supplied values. |
1018 | /// |
1019 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi32) |
1020 | #[inline ] |
1021 | #[target_feature (enable = "sse2" )] |
1022 | // no particular instruction to test |
1023 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1024 | pub unsafe fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i { |
1025 | transmute(src:i32x4::new(x0:e0, x1:e1, x2:e2, x3:e3)) |
1026 | } |
1027 | |
1028 | /// Sets packed 16-bit integers with the supplied values. |
1029 | /// |
1030 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi16) |
1031 | #[inline ] |
1032 | #[target_feature (enable = "sse2" )] |
1033 | // no particular instruction to test |
1034 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1035 | pub unsafe fn _mm_set_epi16( |
1036 | e7: i16, |
1037 | e6: i16, |
1038 | e5: i16, |
1039 | e4: i16, |
1040 | e3: i16, |
1041 | e2: i16, |
1042 | e1: i16, |
1043 | e0: i16, |
1044 | ) -> __m128i { |
1045 | transmute(src:i16x8::new(x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7)) |
1046 | } |
1047 | |
1048 | /// Sets packed 8-bit integers with the supplied values. |
1049 | /// |
1050 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi8) |
1051 | #[inline ] |
1052 | #[target_feature (enable = "sse2" )] |
1053 | // no particular instruction to test |
1054 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1055 | pub unsafe fn _mm_set_epi8( |
1056 | e15: i8, |
1057 | e14: i8, |
1058 | e13: i8, |
1059 | e12: i8, |
1060 | e11: i8, |
1061 | e10: i8, |
1062 | e9: i8, |
1063 | e8: i8, |
1064 | e7: i8, |
1065 | e6: i8, |
1066 | e5: i8, |
1067 | e4: i8, |
1068 | e3: i8, |
1069 | e2: i8, |
1070 | e1: i8, |
1071 | e0: i8, |
1072 | ) -> __m128i { |
1073 | #[rustfmt::skip] |
1074 | transmute(src:i8x16::new( |
1075 | x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7, x8:e8, x9:e9, x10:e10, x11:e11, x12:e12, x13:e13, x14:e14, x15:e15, |
1076 | )) |
1077 | } |
1078 | |
1079 | /// Broadcasts 64-bit integer `a` to all elements. |
1080 | /// |
1081 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi64x) |
1082 | #[inline ] |
1083 | #[target_feature (enable = "sse2" )] |
1084 | // no particular instruction to test |
1085 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1086 | pub unsafe fn _mm_set1_epi64x(a: i64) -> __m128i { |
1087 | _mm_set_epi64x(e1:a, e0:a) |
1088 | } |
1089 | |
1090 | /// Broadcasts 32-bit integer `a` to all elements. |
1091 | /// |
1092 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi32) |
1093 | #[inline ] |
1094 | #[target_feature (enable = "sse2" )] |
1095 | // no particular instruction to test |
1096 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1097 | pub unsafe fn _mm_set1_epi32(a: i32) -> __m128i { |
1098 | _mm_set_epi32(e3:a, e2:a, e1:a, e0:a) |
1099 | } |
1100 | |
1101 | /// Broadcasts 16-bit integer `a` to all elements. |
1102 | /// |
1103 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi16) |
1104 | #[inline ] |
1105 | #[target_feature (enable = "sse2" )] |
1106 | // no particular instruction to test |
1107 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1108 | pub unsafe fn _mm_set1_epi16(a: i16) -> __m128i { |
1109 | _mm_set_epi16(e7:a, e6:a, e5:a, e4:a, e3:a, e2:a, e1:a, e0:a) |
1110 | } |
1111 | |
1112 | /// Broadcasts 8-bit integer `a` to all elements. |
1113 | /// |
1114 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi8) |
1115 | #[inline ] |
1116 | #[target_feature (enable = "sse2" )] |
1117 | // no particular instruction to test |
1118 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1119 | pub unsafe fn _mm_set1_epi8(a: i8) -> __m128i { |
1120 | _mm_set_epi8(e15:a, e14:a, e13:a, e12:a, e11:a, e10:a, e9:a, e8:a, e7:a, e6:a, e5:a, e4:a, e3:a, e2:a, e1:a, e0:a) |
1121 | } |
1122 | |
1123 | /// Sets packed 32-bit integers with the supplied values in reverse order. |
1124 | /// |
1125 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi32) |
1126 | #[inline ] |
1127 | #[target_feature (enable = "sse2" )] |
1128 | // no particular instruction to test |
1129 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1130 | pub unsafe fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i { |
1131 | _mm_set_epi32(e3:e0, e2:e1, e1:e2, e0:e3) |
1132 | } |
1133 | |
1134 | /// Sets packed 16-bit integers with the supplied values in reverse order. |
1135 | /// |
1136 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi16) |
1137 | #[inline ] |
1138 | #[target_feature (enable = "sse2" )] |
1139 | // no particular instruction to test |
1140 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1141 | pub unsafe fn _mm_setr_epi16( |
1142 | e7: i16, |
1143 | e6: i16, |
1144 | e5: i16, |
1145 | e4: i16, |
1146 | e3: i16, |
1147 | e2: i16, |
1148 | e1: i16, |
1149 | e0: i16, |
1150 | ) -> __m128i { |
1151 | _mm_set_epi16(e7:e0, e6:e1, e5:e2, e4:e3, e3:e4, e2:e5, e1:e6, e0:e7) |
1152 | } |
1153 | |
1154 | /// Sets packed 8-bit integers with the supplied values in reverse order. |
1155 | /// |
1156 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi8) |
1157 | #[inline ] |
1158 | #[target_feature (enable = "sse2" )] |
1159 | // no particular instruction to test |
1160 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1161 | pub unsafe fn _mm_setr_epi8( |
1162 | e15: i8, |
1163 | e14: i8, |
1164 | e13: i8, |
1165 | e12: i8, |
1166 | e11: i8, |
1167 | e10: i8, |
1168 | e9: i8, |
1169 | e8: i8, |
1170 | e7: i8, |
1171 | e6: i8, |
1172 | e5: i8, |
1173 | e4: i8, |
1174 | e3: i8, |
1175 | e2: i8, |
1176 | e1: i8, |
1177 | e0: i8, |
1178 | ) -> __m128i { |
1179 | #[rustfmt::skip] |
1180 | _mm_set_epi8( |
1181 | e15:e0, e14:e1, e13:e2, e12:e3, e11:e4, e10:e5, e9:e6, e8:e7, e7:e8, e6:e9, e5:e10, e4:e11, e3:e12, e2:e13, e1:e14, e0:e15, |
1182 | ) |
1183 | } |
1184 | |
1185 | /// Returns a vector with all elements set to zero. |
1186 | /// |
1187 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_si128) |
1188 | #[inline ] |
1189 | #[target_feature (enable = "sse2" )] |
1190 | #[cfg_attr (test, assert_instr(xorps))] |
1191 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1192 | pub unsafe fn _mm_setzero_si128() -> __m128i { |
1193 | _mm_set1_epi64x(0) |
1194 | } |
1195 | |
1196 | /// Loads 64-bit integer from memory into first element of returned vector. |
1197 | /// |
1198 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadl_epi64) |
1199 | #[inline ] |
1200 | #[target_feature (enable = "sse2" )] |
1201 | // FIXME movsd on windows |
1202 | #[cfg_attr ( |
1203 | all( |
1204 | test, |
1205 | not(windows), |
1206 | not(all(target_os = "linux" , target_arch = "x86_64" )), |
1207 | target_arch = "x86_64" |
1208 | ), |
1209 | assert_instr(movq) |
1210 | )] |
1211 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1212 | pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i { |
1213 | _mm_set_epi64x(e1:0, e0:ptr::read_unaligned(src:mem_addr as *const i64)) |
1214 | } |
1215 | |
1216 | /// Loads 128-bits of integer data from memory into a new vector. |
1217 | /// |
1218 | /// `mem_addr` must be aligned on a 16-byte boundary. |
1219 | /// |
1220 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_si128) |
1221 | #[inline ] |
1222 | #[target_feature (enable = "sse2" )] |
1223 | #[cfg_attr (test, assert_instr(movaps))] |
1224 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1225 | pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i { |
1226 | *mem_addr |
1227 | } |
1228 | |
1229 | /// Loads 128-bits of integer data from memory into a new vector. |
1230 | /// |
1231 | /// `mem_addr` does not need to be aligned on any particular boundary. |
1232 | /// |
1233 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si128) |
1234 | #[inline ] |
1235 | #[target_feature (enable = "sse2" )] |
1236 | #[cfg_attr (test, assert_instr(movups))] |
1237 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1238 | pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i { |
1239 | let mut dst: __m128i = _mm_undefined_si128(); |
1240 | ptr::copy_nonoverlapping( |
1241 | src:mem_addr as *const u8, |
1242 | dst:ptr::addr_of_mut!(dst) as *mut u8, |
1243 | count:mem::size_of::<__m128i>(), |
1244 | ); |
1245 | dst |
1246 | } |
1247 | |
1248 | /// Conditionally store 8-bit integer elements from `a` into memory using |
1249 | /// `mask`. |
1250 | /// |
1251 | /// Elements are not stored when the highest bit is not set in the |
1252 | /// corresponding element. |
1253 | /// |
1254 | /// `mem_addr` should correspond to a 128-bit memory location and does not need |
1255 | /// to be aligned on any particular boundary. |
1256 | /// |
1257 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskmoveu_si128) |
1258 | #[inline ] |
1259 | #[target_feature (enable = "sse2" )] |
1260 | #[cfg_attr (test, assert_instr(maskmovdqu))] |
1261 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1262 | pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8) { |
1263 | maskmovdqu(a:a.as_i8x16(), mask:mask.as_i8x16(), mem_addr) |
1264 | } |
1265 | |
1266 | /// Stores 128-bits of integer data from `a` into memory. |
1267 | /// |
1268 | /// `mem_addr` must be aligned on a 16-byte boundary. |
1269 | /// |
1270 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_si128) |
1271 | #[inline ] |
1272 | #[target_feature (enable = "sse2" )] |
1273 | #[cfg_attr (test, assert_instr(movaps))] |
1274 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1275 | pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) { |
1276 | *mem_addr = a; |
1277 | } |
1278 | |
1279 | /// Stores 128-bits of integer data from `a` into memory. |
1280 | /// |
1281 | /// `mem_addr` does not need to be aligned on any particular boundary. |
1282 | /// |
1283 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si128) |
1284 | #[inline ] |
1285 | #[target_feature (enable = "sse2" )] |
1286 | #[cfg_attr (test, assert_instr(movups))] // FIXME movdqu expected |
1287 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1288 | pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) { |
1289 | mem_addr.write_unaligned(val:a); |
1290 | } |
1291 | |
1292 | /// Stores the lower 64-bit integer `a` to a memory location. |
1293 | /// |
1294 | /// `mem_addr` does not need to be aligned on any particular boundary. |
1295 | /// |
1296 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storel_epi64) |
1297 | #[inline ] |
1298 | #[target_feature (enable = "sse2" )] |
1299 | // FIXME mov on windows, movlps on i686 |
1300 | #[cfg_attr ( |
1301 | all( |
1302 | test, |
1303 | not(windows), |
1304 | not(all(target_os = "linux" , target_arch = "x86_64" )), |
1305 | target_arch = "x86_64" |
1306 | ), |
1307 | assert_instr(movq) |
1308 | )] |
1309 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1310 | pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) { |
1311 | ptr::copy_nonoverlapping(src:ptr::addr_of!(a) as *const u8, dst:mem_addr as *mut u8, count:8); |
1312 | } |
1313 | |
1314 | /// Stores a 128-bit integer vector to a 128-bit aligned memory location. |
1315 | /// To minimize caching, the data is flagged as non-temporal (unlikely to be |
1316 | /// used again soon). |
1317 | /// |
1318 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si128) |
1319 | /// |
1320 | /// # Safety of non-temporal stores |
1321 | /// |
1322 | /// After using this intrinsic, but before any other access to the memory that this intrinsic |
1323 | /// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In |
1324 | /// particular, functions that call this intrinsic should generally call `_mm_sfence` before they |
1325 | /// return. |
1326 | /// |
1327 | /// See [`_mm_sfence`] for details. |
1328 | #[inline ] |
1329 | #[target_feature (enable = "sse2" )] |
1330 | #[cfg_attr (test, assert_instr(movntps))] // FIXME movntdq |
1331 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1332 | pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) { |
1333 | intrinsics::nontemporal_store(ptr:mem_addr, val:a); |
1334 | } |
1335 | |
1336 | /// Stores a 32-bit integer value in the specified memory location. |
1337 | /// To minimize caching, the data is flagged as non-temporal (unlikely to be |
1338 | /// used again soon). |
1339 | /// |
1340 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si32) |
1341 | /// |
1342 | /// # Safety of non-temporal stores |
1343 | /// |
1344 | /// After using this intrinsic, but before any other access to the memory that this intrinsic |
1345 | /// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In |
1346 | /// particular, functions that call this intrinsic should generally call `_mm_sfence` before they |
1347 | /// return. |
1348 | /// |
1349 | /// See [`_mm_sfence`] for details. |
1350 | #[inline ] |
1351 | #[target_feature (enable = "sse2" )] |
1352 | #[cfg_attr (test, assert_instr(movnti))] |
1353 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1354 | pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) { |
1355 | intrinsics::nontemporal_store(ptr:mem_addr, val:a); |
1356 | } |
1357 | |
1358 | /// Returns a vector where the low element is extracted from `a` and its upper |
1359 | /// element is zero. |
1360 | /// |
1361 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_move_epi64) |
1362 | #[inline ] |
1363 | #[target_feature (enable = "sse2" )] |
1364 | // FIXME movd on windows, movd on i686 |
1365 | #[cfg_attr (all(test, not(windows), target_arch = "x86_64" ), assert_instr(movq))] |
1366 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1367 | pub unsafe fn _mm_move_epi64(a: __m128i) -> __m128i { |
1368 | let zero: __m128i = _mm_setzero_si128(); |
1369 | let r: i64x2 = simd_shuffle!(a.as_i64x2(), zero.as_i64x2(), [0, 2]); |
1370 | transmute(src:r) |
1371 | } |
1372 | |
1373 | /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers |
1374 | /// using signed saturation. |
1375 | /// |
1376 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi16) |
1377 | #[inline ] |
1378 | #[target_feature (enable = "sse2" )] |
1379 | #[cfg_attr (test, assert_instr(packsswb))] |
1380 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1381 | pub unsafe fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i { |
1382 | transmute(src:packsswb(a:a.as_i16x8(), b:b.as_i16x8())) |
1383 | } |
1384 | |
1385 | /// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers |
1386 | /// using signed saturation. |
1387 | /// |
1388 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi32) |
1389 | #[inline ] |
1390 | #[target_feature (enable = "sse2" )] |
1391 | #[cfg_attr (test, assert_instr(packssdw))] |
1392 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1393 | pub unsafe fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i { |
1394 | transmute(src:packssdw(a:a.as_i32x4(), b:b.as_i32x4())) |
1395 | } |
1396 | |
1397 | /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers |
1398 | /// using unsigned saturation. |
1399 | /// |
1400 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi16) |
1401 | #[inline ] |
1402 | #[target_feature (enable = "sse2" )] |
1403 | #[cfg_attr (test, assert_instr(packuswb))] |
1404 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1405 | pub unsafe fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i { |
1406 | transmute(src:packuswb(a:a.as_i16x8(), b:b.as_i16x8())) |
1407 | } |
1408 | |
1409 | /// Returns the `imm8` element of `a`. |
1410 | /// |
1411 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_epi16) |
1412 | #[inline ] |
1413 | #[target_feature (enable = "sse2" )] |
1414 | #[cfg_attr (test, assert_instr(pextrw, IMM8 = 7))] |
1415 | #[rustc_legacy_const_generics (1)] |
1416 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1417 | pub unsafe fn _mm_extract_epi16<const IMM8: i32>(a: __m128i) -> i32 { |
1418 | static_assert_uimm_bits!(IMM8, 3); |
1419 | simd_extract!(a.as_u16x8(), IMM8 as u32, u16) as i32 |
1420 | } |
1421 | |
1422 | /// Returns a new vector where the `imm8` element of `a` is replaced with `i`. |
1423 | /// |
1424 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_epi16) |
1425 | #[inline ] |
1426 | #[target_feature (enable = "sse2" )] |
1427 | #[cfg_attr (test, assert_instr(pinsrw, IMM8 = 7))] |
1428 | #[rustc_legacy_const_generics (2)] |
1429 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1430 | pub unsafe fn _mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i { |
1431 | static_assert_uimm_bits!(IMM8, 3); |
1432 | transmute(src:simd_insert!(a.as_i16x8(), IMM8 as u32, i as i16)) |
1433 | } |
1434 | |
1435 | /// Returns a mask of the most significant bit of each element in `a`. |
1436 | /// |
1437 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_epi8) |
1438 | #[inline ] |
1439 | #[target_feature (enable = "sse2" )] |
1440 | #[cfg_attr (test, assert_instr(pmovmskb))] |
1441 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1442 | pub unsafe fn _mm_movemask_epi8(a: __m128i) -> i32 { |
1443 | let z: i8x16 = i8x16::splat(0); |
1444 | let m: i8x16 = simd_lt(x:a.as_i8x16(), y:z); |
1445 | simd_bitmask::<_, u16>(m) as u32 as i32 |
1446 | } |
1447 | |
1448 | /// Shuffles 32-bit integers in `a` using the control in `IMM8`. |
1449 | /// |
1450 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_epi32) |
1451 | #[inline ] |
1452 | #[target_feature (enable = "sse2" )] |
1453 | #[cfg_attr (test, assert_instr(pshufd, IMM8 = 9))] |
1454 | #[rustc_legacy_const_generics (1)] |
1455 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1456 | pub unsafe fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i { |
1457 | static_assert_uimm_bits!(IMM8, 8); |
1458 | let a: i32x4 = a.as_i32x4(); |
1459 | let x: i32x4 = simd_shuffle!( |
1460 | a, |
1461 | a, |
1462 | [ |
1463 | IMM8 as u32 & 0b11, |
1464 | (IMM8 as u32 >> 2) & 0b11, |
1465 | (IMM8 as u32 >> 4) & 0b11, |
1466 | (IMM8 as u32 >> 6) & 0b11, |
1467 | ], |
1468 | ); |
1469 | transmute(src:x) |
1470 | } |
1471 | |
1472 | /// Shuffles 16-bit integers in the high 64 bits of `a` using the control in |
1473 | /// `IMM8`. |
1474 | /// |
1475 | /// Put the results in the high 64 bits of the returned vector, with the low 64 |
1476 | /// bits being copied from `a`. |
1477 | /// |
1478 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shufflehi_epi16) |
1479 | #[inline ] |
1480 | #[target_feature (enable = "sse2" )] |
1481 | #[cfg_attr (test, assert_instr(pshufhw, IMM8 = 9))] |
1482 | #[rustc_legacy_const_generics (1)] |
1483 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1484 | pub unsafe fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i { |
1485 | static_assert_uimm_bits!(IMM8, 8); |
1486 | let a: i16x8 = a.as_i16x8(); |
1487 | let x: i16x8 = simd_shuffle!( |
1488 | a, |
1489 | a, |
1490 | [ |
1491 | 0, |
1492 | 1, |
1493 | 2, |
1494 | 3, |
1495 | (IMM8 as u32 & 0b11) + 4, |
1496 | ((IMM8 as u32 >> 2) & 0b11) + 4, |
1497 | ((IMM8 as u32 >> 4) & 0b11) + 4, |
1498 | ((IMM8 as u32 >> 6) & 0b11) + 4, |
1499 | ], |
1500 | ); |
1501 | transmute(src:x) |
1502 | } |
1503 | |
1504 | /// Shuffles 16-bit integers in the low 64 bits of `a` using the control in |
1505 | /// `IMM8`. |
1506 | /// |
1507 | /// Put the results in the low 64 bits of the returned vector, with the high 64 |
1508 | /// bits being copied from `a`. |
1509 | /// |
1510 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shufflelo_epi16) |
1511 | #[inline ] |
1512 | #[target_feature (enable = "sse2" )] |
1513 | #[cfg_attr (test, assert_instr(pshuflw, IMM8 = 9))] |
1514 | #[rustc_legacy_const_generics (1)] |
1515 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1516 | pub unsafe fn _mm_shufflelo_epi16<const IMM8: i32>(a: __m128i) -> __m128i { |
1517 | static_assert_uimm_bits!(IMM8, 8); |
1518 | let a: i16x8 = a.as_i16x8(); |
1519 | let x: i16x8 = simd_shuffle!( |
1520 | a, |
1521 | a, |
1522 | [ |
1523 | IMM8 as u32 & 0b11, |
1524 | (IMM8 as u32 >> 2) & 0b11, |
1525 | (IMM8 as u32 >> 4) & 0b11, |
1526 | (IMM8 as u32 >> 6) & 0b11, |
1527 | 4, |
1528 | 5, |
1529 | 6, |
1530 | 7, |
1531 | ], |
1532 | ); |
1533 | transmute(src:x) |
1534 | } |
1535 | |
1536 | /// Unpacks and interleave 8-bit integers from the high half of `a` and `b`. |
1537 | /// |
1538 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi8) |
1539 | #[inline ] |
1540 | #[target_feature (enable = "sse2" )] |
1541 | #[cfg_attr (test, assert_instr(punpckhbw))] |
1542 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1543 | pub unsafe fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i { |
1544 | transmute::<i8x16, _>(src:simd_shuffle!( |
1545 | a.as_i8x16(), |
1546 | b.as_i8x16(), |
1547 | [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31], |
1548 | )) |
1549 | } |
1550 | |
1551 | /// Unpacks and interleave 16-bit integers from the high half of `a` and `b`. |
1552 | /// |
1553 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi16) |
1554 | #[inline ] |
1555 | #[target_feature (enable = "sse2" )] |
1556 | #[cfg_attr (test, assert_instr(punpckhwd))] |
1557 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1558 | pub unsafe fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i { |
1559 | let x: i16x8 = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]); |
1560 | transmute::<i16x8, _>(src:x) |
1561 | } |
1562 | |
1563 | /// Unpacks and interleave 32-bit integers from the high half of `a` and `b`. |
1564 | /// |
1565 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi32) |
1566 | #[inline ] |
1567 | #[target_feature (enable = "sse2" )] |
1568 | #[cfg_attr (test, assert_instr(unpckhps))] |
1569 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1570 | pub unsafe fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i { |
1571 | transmute::<i32x4, _>(src:simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7])) |
1572 | } |
1573 | |
1574 | /// Unpacks and interleave 64-bit integers from the high half of `a` and `b`. |
1575 | /// |
1576 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi64) |
1577 | #[inline ] |
1578 | #[target_feature (enable = "sse2" )] |
1579 | #[cfg_attr (test, assert_instr(unpckhpd))] |
1580 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1581 | pub unsafe fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i { |
1582 | transmute::<i64x2, _>(src:simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [1, 3])) |
1583 | } |
1584 | |
1585 | /// Unpacks and interleave 8-bit integers from the low half of `a` and `b`. |
1586 | /// |
1587 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi8) |
1588 | #[inline ] |
1589 | #[target_feature (enable = "sse2" )] |
1590 | #[cfg_attr (test, assert_instr(punpcklbw))] |
1591 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1592 | pub unsafe fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i { |
1593 | transmute::<i8x16, _>(src:simd_shuffle!( |
1594 | a.as_i8x16(), |
1595 | b.as_i8x16(), |
1596 | [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23], |
1597 | )) |
1598 | } |
1599 | |
1600 | /// Unpacks and interleave 16-bit integers from the low half of `a` and `b`. |
1601 | /// |
1602 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi16) |
1603 | #[inline ] |
1604 | #[target_feature (enable = "sse2" )] |
1605 | #[cfg_attr (test, assert_instr(punpcklwd))] |
1606 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1607 | pub unsafe fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i { |
1608 | let x: i16x8 = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]); |
1609 | transmute::<i16x8, _>(src:x) |
1610 | } |
1611 | |
1612 | /// Unpacks and interleave 32-bit integers from the low half of `a` and `b`. |
1613 | /// |
1614 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi32) |
1615 | #[inline ] |
1616 | #[target_feature (enable = "sse2" )] |
1617 | #[cfg_attr (test, assert_instr(unpcklps))] |
1618 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1619 | pub unsafe fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i { |
1620 | transmute::<i32x4, _>(src:simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5])) |
1621 | } |
1622 | |
1623 | /// Unpacks and interleave 64-bit integers from the low half of `a` and `b`. |
1624 | /// |
1625 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi64) |
1626 | #[inline ] |
1627 | #[target_feature (enable = "sse2" )] |
1628 | #[cfg_attr (all(test, not(target_os = "windows" )), assert_instr(movlhps))] |
1629 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1630 | pub unsafe fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i { |
1631 | transmute::<i64x2, _>(src:simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [0, 2])) |
1632 | } |
1633 | |
1634 | /// Returns a new vector with the low element of `a` replaced by the sum of the |
1635 | /// low elements of `a` and `b`. |
1636 | /// |
1637 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_sd) |
1638 | #[inline ] |
1639 | #[target_feature (enable = "sse2" )] |
1640 | #[cfg_attr (test, assert_instr(addsd))] |
1641 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1642 | pub unsafe fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d { |
1643 | simd_insert!(a, 0, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b)) |
1644 | } |
1645 | |
1646 | /// Adds packed double-precision (64-bit) floating-point elements in `a` and |
1647 | /// `b`. |
1648 | /// |
1649 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_pd) |
1650 | #[inline ] |
1651 | #[target_feature (enable = "sse2" )] |
1652 | #[cfg_attr (test, assert_instr(addpd))] |
1653 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1654 | pub unsafe fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d { |
1655 | simd_add(x:a, y:b) |
1656 | } |
1657 | |
1658 | /// Returns a new vector with the low element of `a` replaced by the result of |
1659 | /// diving the lower element of `a` by the lower element of `b`. |
1660 | /// |
1661 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_sd) |
1662 | #[inline ] |
1663 | #[target_feature (enable = "sse2" )] |
1664 | #[cfg_attr (test, assert_instr(divsd))] |
1665 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1666 | pub unsafe fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d { |
1667 | simd_insert!(a, 0, _mm_cvtsd_f64(a) / _mm_cvtsd_f64(b)) |
1668 | } |
1669 | |
1670 | /// Divide packed double-precision (64-bit) floating-point elements in `a` by |
1671 | /// packed elements in `b`. |
1672 | /// |
1673 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_pd) |
1674 | #[inline ] |
1675 | #[target_feature (enable = "sse2" )] |
1676 | #[cfg_attr (test, assert_instr(divpd))] |
1677 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1678 | pub unsafe fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d { |
1679 | simd_div(lhs:a, rhs:b) |
1680 | } |
1681 | |
1682 | /// Returns a new vector with the low element of `a` replaced by the maximum |
1683 | /// of the lower elements of `a` and `b`. |
1684 | /// |
1685 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_sd) |
1686 | #[inline ] |
1687 | #[target_feature (enable = "sse2" )] |
1688 | #[cfg_attr (test, assert_instr(maxsd))] |
1689 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1690 | pub unsafe fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d { |
1691 | maxsd(a, b) |
1692 | } |
1693 | |
1694 | /// Returns a new vector with the maximum values from corresponding elements in |
1695 | /// `a` and `b`. |
1696 | /// |
1697 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_pd) |
1698 | #[inline ] |
1699 | #[target_feature (enable = "sse2" )] |
1700 | #[cfg_attr (test, assert_instr(maxpd))] |
1701 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1702 | pub unsafe fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d { |
1703 | maxpd(a, b) |
1704 | } |
1705 | |
1706 | /// Returns a new vector with the low element of `a` replaced by the minimum |
1707 | /// of the lower elements of `a` and `b`. |
1708 | /// |
1709 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_sd) |
1710 | #[inline ] |
1711 | #[target_feature (enable = "sse2" )] |
1712 | #[cfg_attr (test, assert_instr(minsd))] |
1713 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1714 | pub unsafe fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d { |
1715 | minsd(a, b) |
1716 | } |
1717 | |
1718 | /// Returns a new vector with the minimum values from corresponding elements in |
1719 | /// `a` and `b`. |
1720 | /// |
1721 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_pd) |
1722 | #[inline ] |
1723 | #[target_feature (enable = "sse2" )] |
1724 | #[cfg_attr (test, assert_instr(minpd))] |
1725 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1726 | pub unsafe fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d { |
1727 | minpd(a, b) |
1728 | } |
1729 | |
1730 | /// Returns a new vector with the low element of `a` replaced by multiplying the |
1731 | /// low elements of `a` and `b`. |
1732 | /// |
1733 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_sd) |
1734 | #[inline ] |
1735 | #[target_feature (enable = "sse2" )] |
1736 | #[cfg_attr (test, assert_instr(mulsd))] |
1737 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1738 | pub unsafe fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d { |
1739 | simd_insert!(a, 0, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b)) |
1740 | } |
1741 | |
1742 | /// Multiplies packed double-precision (64-bit) floating-point elements in `a` |
1743 | /// and `b`. |
1744 | /// |
1745 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_pd) |
1746 | #[inline ] |
1747 | #[target_feature (enable = "sse2" )] |
1748 | #[cfg_attr (test, assert_instr(mulpd))] |
1749 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1750 | pub unsafe fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d { |
1751 | simd_mul(x:a, y:b) |
1752 | } |
1753 | |
1754 | /// Returns a new vector with the low element of `a` replaced by the square |
1755 | /// root of the lower element `b`. |
1756 | /// |
1757 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_sd) |
1758 | #[inline ] |
1759 | #[target_feature (enable = "sse2" )] |
1760 | #[cfg_attr (test, assert_instr(sqrtsd))] |
1761 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1762 | pub unsafe fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d { |
1763 | simd_insert!(a, 0, _mm_cvtsd_f64(sqrtsd(b))) |
1764 | } |
1765 | |
1766 | /// Returns a new vector with the square root of each of the values in `a`. |
1767 | /// |
1768 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_pd) |
1769 | #[inline ] |
1770 | #[target_feature (enable = "sse2" )] |
1771 | #[cfg_attr (test, assert_instr(sqrtpd))] |
1772 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1773 | pub unsafe fn _mm_sqrt_pd(a: __m128d) -> __m128d { |
1774 | simd_fsqrt(a) |
1775 | } |
1776 | |
1777 | /// Returns a new vector with the low element of `a` replaced by subtracting the |
1778 | /// low element by `b` from the low element of `a`. |
1779 | /// |
1780 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_sd) |
1781 | #[inline ] |
1782 | #[target_feature (enable = "sse2" )] |
1783 | #[cfg_attr (test, assert_instr(subsd))] |
1784 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1785 | pub unsafe fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d { |
1786 | simd_insert!(a, 0, _mm_cvtsd_f64(a) - _mm_cvtsd_f64(b)) |
1787 | } |
1788 | |
1789 | /// Subtract packed double-precision (64-bit) floating-point elements in `b` |
1790 | /// from `a`. |
1791 | /// |
1792 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_pd) |
1793 | #[inline ] |
1794 | #[target_feature (enable = "sse2" )] |
1795 | #[cfg_attr (test, assert_instr(subpd))] |
1796 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1797 | pub unsafe fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d { |
1798 | simd_sub(lhs:a, rhs:b) |
1799 | } |
1800 | |
1801 | /// Computes the bitwise AND of packed double-precision (64-bit) floating-point |
1802 | /// elements in `a` and `b`. |
1803 | /// |
1804 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_and_pd) |
1805 | #[inline ] |
1806 | #[target_feature (enable = "sse2" )] |
1807 | #[cfg_attr (test, assert_instr(andps))] |
1808 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1809 | pub unsafe fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d { |
1810 | let a: __m128i = transmute(src:a); |
1811 | let b: __m128i = transmute(src:b); |
1812 | transmute(src:_mm_and_si128(a, b)) |
1813 | } |
1814 | |
1815 | /// Computes the bitwise NOT of `a` and then AND with `b`. |
1816 | /// |
1817 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_andnot_pd) |
1818 | #[inline ] |
1819 | #[target_feature (enable = "sse2" )] |
1820 | #[cfg_attr (test, assert_instr(andnps))] |
1821 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1822 | pub unsafe fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d { |
1823 | let a: __m128i = transmute(src:a); |
1824 | let b: __m128i = transmute(src:b); |
1825 | transmute(src:_mm_andnot_si128(a, b)) |
1826 | } |
1827 | |
1828 | /// Computes the bitwise OR of `a` and `b`. |
1829 | /// |
1830 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_pd) |
1831 | #[inline ] |
1832 | #[target_feature (enable = "sse2" )] |
1833 | #[cfg_attr (test, assert_instr(orps))] |
1834 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1835 | pub unsafe fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d { |
1836 | let a: __m128i = transmute(src:a); |
1837 | let b: __m128i = transmute(src:b); |
1838 | transmute(src:_mm_or_si128(a, b)) |
1839 | } |
1840 | |
1841 | /// Computes the bitwise XOR of `a` and `b`. |
1842 | /// |
1843 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_pd) |
1844 | #[inline ] |
1845 | #[target_feature (enable = "sse2" )] |
1846 | #[cfg_attr (test, assert_instr(xorps))] |
1847 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1848 | pub unsafe fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d { |
1849 | let a: __m128i = transmute(src:a); |
1850 | let b: __m128i = transmute(src:b); |
1851 | transmute(src:_mm_xor_si128(a, b)) |
1852 | } |
1853 | |
1854 | /// Returns a new vector with the low element of `a` replaced by the equality |
1855 | /// comparison of the lower elements of `a` and `b`. |
1856 | /// |
1857 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_sd) |
1858 | #[inline ] |
1859 | #[target_feature (enable = "sse2" )] |
1860 | #[cfg_attr (test, assert_instr(cmpeqsd))] |
1861 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1862 | pub unsafe fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d { |
1863 | cmpsd(a, b, imm8:0) |
1864 | } |
1865 | |
1866 | /// Returns a new vector with the low element of `a` replaced by the less-than |
1867 | /// comparison of the lower elements of `a` and `b`. |
1868 | /// |
1869 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_sd) |
1870 | #[inline ] |
1871 | #[target_feature (enable = "sse2" )] |
1872 | #[cfg_attr (test, assert_instr(cmpltsd))] |
1873 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1874 | pub unsafe fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d { |
1875 | cmpsd(a, b, imm8:1) |
1876 | } |
1877 | |
1878 | /// Returns a new vector with the low element of `a` replaced by the |
1879 | /// less-than-or-equal comparison of the lower elements of `a` and `b`. |
1880 | /// |
1881 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_sd) |
1882 | #[inline ] |
1883 | #[target_feature (enable = "sse2" )] |
1884 | #[cfg_attr (test, assert_instr(cmplesd))] |
1885 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1886 | pub unsafe fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d { |
1887 | cmpsd(a, b, imm8:2) |
1888 | } |
1889 | |
1890 | /// Returns a new vector with the low element of `a` replaced by the |
1891 | /// greater-than comparison of the lower elements of `a` and `b`. |
1892 | /// |
1893 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_sd) |
1894 | #[inline ] |
1895 | #[target_feature (enable = "sse2" )] |
1896 | #[cfg_attr (test, assert_instr(cmpltsd))] |
1897 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1898 | pub unsafe fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d { |
1899 | simd_insert!(_mm_cmplt_sd(b, a), 1, simd_extract!(a, 1, f64)) |
1900 | } |
1901 | |
1902 | /// Returns a new vector with the low element of `a` replaced by the |
1903 | /// greater-than-or-equal comparison of the lower elements of `a` and `b`. |
1904 | /// |
1905 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_sd) |
1906 | #[inline ] |
1907 | #[target_feature (enable = "sse2" )] |
1908 | #[cfg_attr (test, assert_instr(cmplesd))] |
1909 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1910 | pub unsafe fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d { |
1911 | simd_insert!(_mm_cmple_sd(b, a), 1, simd_extract!(a, 1, f64)) |
1912 | } |
1913 | |
1914 | /// Returns a new vector with the low element of `a` replaced by the result |
1915 | /// of comparing both of the lower elements of `a` and `b` to `NaN`. If |
1916 | /// neither are equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` |
1917 | /// otherwise. |
1918 | /// |
1919 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_sd) |
1920 | #[inline ] |
1921 | #[target_feature (enable = "sse2" )] |
1922 | #[cfg_attr (test, assert_instr(cmpordsd))] |
1923 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1924 | pub unsafe fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d { |
1925 | cmpsd(a, b, imm8:7) |
1926 | } |
1927 | |
1928 | /// Returns a new vector with the low element of `a` replaced by the result of |
1929 | /// comparing both of the lower elements of `a` and `b` to `NaN`. If either is |
1930 | /// equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` otherwise. |
1931 | /// |
1932 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_sd) |
1933 | #[inline ] |
1934 | #[target_feature (enable = "sse2" )] |
1935 | #[cfg_attr (test, assert_instr(cmpunordsd))] |
1936 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1937 | pub unsafe fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d { |
1938 | cmpsd(a, b, imm8:3) |
1939 | } |
1940 | |
1941 | /// Returns a new vector with the low element of `a` replaced by the not-equal |
1942 | /// comparison of the lower elements of `a` and `b`. |
1943 | /// |
1944 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_sd) |
1945 | #[inline ] |
1946 | #[target_feature (enable = "sse2" )] |
1947 | #[cfg_attr (test, assert_instr(cmpneqsd))] |
1948 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1949 | pub unsafe fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d { |
1950 | cmpsd(a, b, imm8:4) |
1951 | } |
1952 | |
1953 | /// Returns a new vector with the low element of `a` replaced by the |
1954 | /// not-less-than comparison of the lower elements of `a` and `b`. |
1955 | /// |
1956 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_sd) |
1957 | #[inline ] |
1958 | #[target_feature (enable = "sse2" )] |
1959 | #[cfg_attr (test, assert_instr(cmpnltsd))] |
1960 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1961 | pub unsafe fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d { |
1962 | cmpsd(a, b, imm8:5) |
1963 | } |
1964 | |
1965 | /// Returns a new vector with the low element of `a` replaced by the |
1966 | /// not-less-than-or-equal comparison of the lower elements of `a` and `b`. |
1967 | /// |
1968 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_sd) |
1969 | #[inline ] |
1970 | #[target_feature (enable = "sse2" )] |
1971 | #[cfg_attr (test, assert_instr(cmpnlesd))] |
1972 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1973 | pub unsafe fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d { |
1974 | cmpsd(a, b, imm8:6) |
1975 | } |
1976 | |
1977 | /// Returns a new vector with the low element of `a` replaced by the |
1978 | /// not-greater-than comparison of the lower elements of `a` and `b`. |
1979 | /// |
1980 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpngt_sd) |
1981 | #[inline ] |
1982 | #[target_feature (enable = "sse2" )] |
1983 | #[cfg_attr (test, assert_instr(cmpnltsd))] |
1984 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1985 | pub unsafe fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d { |
1986 | simd_insert!(_mm_cmpnlt_sd(b, a), 1, simd_extract!(a, 1, f64)) |
1987 | } |
1988 | |
1989 | /// Returns a new vector with the low element of `a` replaced by the |
1990 | /// not-greater-than-or-equal comparison of the lower elements of `a` and `b`. |
1991 | /// |
1992 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_sd) |
1993 | #[inline ] |
1994 | #[target_feature (enable = "sse2" )] |
1995 | #[cfg_attr (test, assert_instr(cmpnlesd))] |
1996 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1997 | pub unsafe fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d { |
1998 | simd_insert!(_mm_cmpnle_sd(b, a), 1, simd_extract!(a, 1, f64)) |
1999 | } |
2000 | |
2001 | /// Compares corresponding elements in `a` and `b` for equality. |
2002 | /// |
2003 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_pd) |
2004 | #[inline ] |
2005 | #[target_feature (enable = "sse2" )] |
2006 | #[cfg_attr (test, assert_instr(cmpeqpd))] |
2007 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2008 | pub unsafe fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d { |
2009 | cmppd(a, b, imm8:0) |
2010 | } |
2011 | |
2012 | /// Compares corresponding elements in `a` and `b` for less-than. |
2013 | /// |
2014 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_pd) |
2015 | #[inline ] |
2016 | #[target_feature (enable = "sse2" )] |
2017 | #[cfg_attr (test, assert_instr(cmpltpd))] |
2018 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2019 | pub unsafe fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d { |
2020 | cmppd(a, b, imm8:1) |
2021 | } |
2022 | |
2023 | /// Compares corresponding elements in `a` and `b` for less-than-or-equal |
2024 | /// |
2025 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_pd) |
2026 | #[inline ] |
2027 | #[target_feature (enable = "sse2" )] |
2028 | #[cfg_attr (test, assert_instr(cmplepd))] |
2029 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2030 | pub unsafe fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d { |
2031 | cmppd(a, b, imm8:2) |
2032 | } |
2033 | |
2034 | /// Compares corresponding elements in `a` and `b` for greater-than. |
2035 | /// |
2036 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_pd) |
2037 | #[inline ] |
2038 | #[target_feature (enable = "sse2" )] |
2039 | #[cfg_attr (test, assert_instr(cmpltpd))] |
2040 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2041 | pub unsafe fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d { |
2042 | _mm_cmplt_pd(a:b, b:a) |
2043 | } |
2044 | |
2045 | /// Compares corresponding elements in `a` and `b` for greater-than-or-equal. |
2046 | /// |
2047 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_pd) |
2048 | #[inline ] |
2049 | #[target_feature (enable = "sse2" )] |
2050 | #[cfg_attr (test, assert_instr(cmplepd))] |
2051 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2052 | pub unsafe fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d { |
2053 | _mm_cmple_pd(a:b, b:a) |
2054 | } |
2055 | |
2056 | /// Compares corresponding elements in `a` and `b` to see if neither is `NaN`. |
2057 | /// |
2058 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_pd) |
2059 | #[inline ] |
2060 | #[target_feature (enable = "sse2" )] |
2061 | #[cfg_attr (test, assert_instr(cmpordpd))] |
2062 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2063 | pub unsafe fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d { |
2064 | cmppd(a, b, imm8:7) |
2065 | } |
2066 | |
2067 | /// Compares corresponding elements in `a` and `b` to see if either is `NaN`. |
2068 | /// |
2069 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_pd) |
2070 | #[inline ] |
2071 | #[target_feature (enable = "sse2" )] |
2072 | #[cfg_attr (test, assert_instr(cmpunordpd))] |
2073 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2074 | pub unsafe fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d { |
2075 | cmppd(a, b, imm8:3) |
2076 | } |
2077 | |
2078 | /// Compares corresponding elements in `a` and `b` for not-equal. |
2079 | /// |
2080 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_pd) |
2081 | #[inline ] |
2082 | #[target_feature (enable = "sse2" )] |
2083 | #[cfg_attr (test, assert_instr(cmpneqpd))] |
2084 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2085 | pub unsafe fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d { |
2086 | cmppd(a, b, imm8:4) |
2087 | } |
2088 | |
2089 | /// Compares corresponding elements in `a` and `b` for not-less-than. |
2090 | /// |
2091 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_pd) |
2092 | #[inline ] |
2093 | #[target_feature (enable = "sse2" )] |
2094 | #[cfg_attr (test, assert_instr(cmpnltpd))] |
2095 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2096 | pub unsafe fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d { |
2097 | cmppd(a, b, imm8:5) |
2098 | } |
2099 | |
2100 | /// Compares corresponding elements in `a` and `b` for not-less-than-or-equal. |
2101 | /// |
2102 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_pd) |
2103 | #[inline ] |
2104 | #[target_feature (enable = "sse2" )] |
2105 | #[cfg_attr (test, assert_instr(cmpnlepd))] |
2106 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2107 | pub unsafe fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d { |
2108 | cmppd(a, b, imm8:6) |
2109 | } |
2110 | |
2111 | /// Compares corresponding elements in `a` and `b` for not-greater-than. |
2112 | /// |
2113 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpngt_pd) |
2114 | #[inline ] |
2115 | #[target_feature (enable = "sse2" )] |
2116 | #[cfg_attr (test, assert_instr(cmpnltpd))] |
2117 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2118 | pub unsafe fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d { |
2119 | _mm_cmpnlt_pd(a:b, b:a) |
2120 | } |
2121 | |
2122 | /// Compares corresponding elements in `a` and `b` for |
2123 | /// not-greater-than-or-equal. |
2124 | /// |
2125 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_pd) |
2126 | #[inline ] |
2127 | #[target_feature (enable = "sse2" )] |
2128 | #[cfg_attr (test, assert_instr(cmpnlepd))] |
2129 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2130 | pub unsafe fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d { |
2131 | _mm_cmpnle_pd(a:b, b:a) |
2132 | } |
2133 | |
2134 | /// Compares the lower element of `a` and `b` for equality. |
2135 | /// |
2136 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comieq_sd) |
2137 | #[inline ] |
2138 | #[target_feature (enable = "sse2" )] |
2139 | #[cfg_attr (test, assert_instr(comisd))] |
2140 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2141 | pub unsafe fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 { |
2142 | comieqsd(a, b) |
2143 | } |
2144 | |
2145 | /// Compares the lower element of `a` and `b` for less-than. |
2146 | /// |
2147 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comilt_sd) |
2148 | #[inline ] |
2149 | #[target_feature (enable = "sse2" )] |
2150 | #[cfg_attr (test, assert_instr(comisd))] |
2151 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2152 | pub unsafe fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 { |
2153 | comiltsd(a, b) |
2154 | } |
2155 | |
2156 | /// Compares the lower element of `a` and `b` for less-than-or-equal. |
2157 | /// |
2158 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comile_sd) |
2159 | #[inline ] |
2160 | #[target_feature (enable = "sse2" )] |
2161 | #[cfg_attr (test, assert_instr(comisd))] |
2162 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2163 | pub unsafe fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 { |
2164 | comilesd(a, b) |
2165 | } |
2166 | |
2167 | /// Compares the lower element of `a` and `b` for greater-than. |
2168 | /// |
2169 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comigt_sd) |
2170 | #[inline ] |
2171 | #[target_feature (enable = "sse2" )] |
2172 | #[cfg_attr (test, assert_instr(comisd))] |
2173 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2174 | pub unsafe fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 { |
2175 | comigtsd(a, b) |
2176 | } |
2177 | |
2178 | /// Compares the lower element of `a` and `b` for greater-than-or-equal. |
2179 | /// |
2180 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comige_sd) |
2181 | #[inline ] |
2182 | #[target_feature (enable = "sse2" )] |
2183 | #[cfg_attr (test, assert_instr(comisd))] |
2184 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2185 | pub unsafe fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 { |
2186 | comigesd(a, b) |
2187 | } |
2188 | |
2189 | /// Compares the lower element of `a` and `b` for not-equal. |
2190 | /// |
2191 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comineq_sd) |
2192 | #[inline ] |
2193 | #[target_feature (enable = "sse2" )] |
2194 | #[cfg_attr (test, assert_instr(comisd))] |
2195 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2196 | pub unsafe fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 { |
2197 | comineqsd(a, b) |
2198 | } |
2199 | |
2200 | /// Compares the lower element of `a` and `b` for equality. |
2201 | /// |
2202 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomieq_sd) |
2203 | #[inline ] |
2204 | #[target_feature (enable = "sse2" )] |
2205 | #[cfg_attr (test, assert_instr(ucomisd))] |
2206 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2207 | pub unsafe fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 { |
2208 | ucomieqsd(a, b) |
2209 | } |
2210 | |
2211 | /// Compares the lower element of `a` and `b` for less-than. |
2212 | /// |
2213 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomilt_sd) |
2214 | #[inline ] |
2215 | #[target_feature (enable = "sse2" )] |
2216 | #[cfg_attr (test, assert_instr(ucomisd))] |
2217 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2218 | pub unsafe fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 { |
2219 | ucomiltsd(a, b) |
2220 | } |
2221 | |
2222 | /// Compares the lower element of `a` and `b` for less-than-or-equal. |
2223 | /// |
2224 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomile_sd) |
2225 | #[inline ] |
2226 | #[target_feature (enable = "sse2" )] |
2227 | #[cfg_attr (test, assert_instr(ucomisd))] |
2228 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2229 | pub unsafe fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 { |
2230 | ucomilesd(a, b) |
2231 | } |
2232 | |
2233 | /// Compares the lower element of `a` and `b` for greater-than. |
2234 | /// |
2235 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomigt_sd) |
2236 | #[inline ] |
2237 | #[target_feature (enable = "sse2" )] |
2238 | #[cfg_attr (test, assert_instr(ucomisd))] |
2239 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2240 | pub unsafe fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 { |
2241 | ucomigtsd(a, b) |
2242 | } |
2243 | |
2244 | /// Compares the lower element of `a` and `b` for greater-than-or-equal. |
2245 | /// |
2246 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomige_sd) |
2247 | #[inline ] |
2248 | #[target_feature (enable = "sse2" )] |
2249 | #[cfg_attr (test, assert_instr(ucomisd))] |
2250 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2251 | pub unsafe fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 { |
2252 | ucomigesd(a, b) |
2253 | } |
2254 | |
2255 | /// Compares the lower element of `a` and `b` for not-equal. |
2256 | /// |
2257 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomineq_sd) |
2258 | #[inline ] |
2259 | #[target_feature (enable = "sse2" )] |
2260 | #[cfg_attr (test, assert_instr(ucomisd))] |
2261 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2262 | pub unsafe fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 { |
2263 | ucomineqsd(a, b) |
2264 | } |
2265 | |
2266 | /// Converts packed double-precision (64-bit) floating-point elements in `a` to |
2267 | /// packed single-precision (32-bit) floating-point elements |
2268 | /// |
2269 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_ps) |
2270 | #[inline ] |
2271 | #[target_feature (enable = "sse2" )] |
2272 | #[cfg_attr (test, assert_instr(cvtpd2ps))] |
2273 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2274 | pub unsafe fn _mm_cvtpd_ps(a: __m128d) -> __m128 { |
2275 | let r: f32x2 = simd_cast::<_, f32x2>(a.as_f64x2()); |
2276 | let zero: f32x2 = f32x2::new(x0:0.0, x1:0.0); |
2277 | transmute::<f32x4, _>(src:simd_shuffle!(r, zero, [0, 1, 2, 3])) |
2278 | } |
2279 | |
2280 | /// Converts packed single-precision (32-bit) floating-point elements in `a` to |
2281 | /// packed |
2282 | /// double-precision (64-bit) floating-point elements. |
2283 | /// |
2284 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_pd) |
2285 | #[inline ] |
2286 | #[target_feature (enable = "sse2" )] |
2287 | #[cfg_attr (test, assert_instr(cvtps2pd))] |
2288 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2289 | pub unsafe fn _mm_cvtps_pd(a: __m128) -> __m128d { |
2290 | let a: f32x4 = a.as_f32x4(); |
2291 | transmute(src:simd_cast::<f32x2, f64x2>(simd_shuffle!(a, a, [0, 1]))) |
2292 | } |
2293 | |
2294 | /// Converts packed double-precision (64-bit) floating-point elements in `a` to |
2295 | /// packed 32-bit integers. |
2296 | /// |
2297 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epi32) |
2298 | #[inline ] |
2299 | #[target_feature (enable = "sse2" )] |
2300 | #[cfg_attr (test, assert_instr(cvtpd2dq))] |
2301 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2302 | pub unsafe fn _mm_cvtpd_epi32(a: __m128d) -> __m128i { |
2303 | transmute(src:cvtpd2dq(a)) |
2304 | } |
2305 | |
2306 | /// Converts the lower double-precision (64-bit) floating-point element in a to |
2307 | /// a 32-bit integer. |
2308 | /// |
2309 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_si32) |
2310 | #[inline ] |
2311 | #[target_feature (enable = "sse2" )] |
2312 | #[cfg_attr (test, assert_instr(cvtsd2si))] |
2313 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2314 | pub unsafe fn _mm_cvtsd_si32(a: __m128d) -> i32 { |
2315 | cvtsd2si(a) |
2316 | } |
2317 | |
2318 | /// Converts the lower double-precision (64-bit) floating-point element in `b` |
2319 | /// to a single-precision (32-bit) floating-point element, store the result in |
2320 | /// the lower element of the return value, and copies the upper element from `a` |
2321 | /// to the upper element the return value. |
2322 | /// |
2323 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_ss) |
2324 | #[inline ] |
2325 | #[target_feature (enable = "sse2" )] |
2326 | #[cfg_attr (test, assert_instr(cvtsd2ss))] |
2327 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2328 | pub unsafe fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 { |
2329 | cvtsd2ss(a, b) |
2330 | } |
2331 | |
2332 | /// Returns the lower double-precision (64-bit) floating-point element of `a`. |
2333 | /// |
2334 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_f64) |
2335 | #[inline ] |
2336 | #[target_feature (enable = "sse2" )] |
2337 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2338 | pub unsafe fn _mm_cvtsd_f64(a: __m128d) -> f64 { |
2339 | simd_extract!(a, 0) |
2340 | } |
2341 | |
2342 | /// Converts the lower single-precision (32-bit) floating-point element in `b` |
2343 | /// to a double-precision (64-bit) floating-point element, store the result in |
2344 | /// the lower element of the return value, and copies the upper element from `a` |
2345 | /// to the upper element the return value. |
2346 | /// |
2347 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtss_sd) |
2348 | #[inline ] |
2349 | #[target_feature (enable = "sse2" )] |
2350 | #[cfg_attr (test, assert_instr(cvtss2sd))] |
2351 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2352 | pub unsafe fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d { |
2353 | cvtss2sd(a, b) |
2354 | } |
2355 | |
2356 | /// Converts packed double-precision (64-bit) floating-point elements in `a` to |
2357 | /// packed 32-bit integers with truncation. |
2358 | /// |
2359 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epi32) |
2360 | #[inline ] |
2361 | #[target_feature (enable = "sse2" )] |
2362 | #[cfg_attr (test, assert_instr(cvttpd2dq))] |
2363 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2364 | pub unsafe fn _mm_cvttpd_epi32(a: __m128d) -> __m128i { |
2365 | transmute(src:cvttpd2dq(a)) |
2366 | } |
2367 | |
2368 | /// Converts the lower double-precision (64-bit) floating-point element in `a` |
2369 | /// to a 32-bit integer with truncation. |
2370 | /// |
2371 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_si32) |
2372 | #[inline ] |
2373 | #[target_feature (enable = "sse2" )] |
2374 | #[cfg_attr (test, assert_instr(cvttsd2si))] |
2375 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2376 | pub unsafe fn _mm_cvttsd_si32(a: __m128d) -> i32 { |
2377 | cvttsd2si(a) |
2378 | } |
2379 | |
2380 | /// Converts packed single-precision (32-bit) floating-point elements in `a` to |
2381 | /// packed 32-bit integers with truncation. |
2382 | /// |
2383 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epi32) |
2384 | #[inline ] |
2385 | #[target_feature (enable = "sse2" )] |
2386 | #[cfg_attr (test, assert_instr(cvttps2dq))] |
2387 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2388 | pub unsafe fn _mm_cvttps_epi32(a: __m128) -> __m128i { |
2389 | transmute(src:cvttps2dq(a)) |
2390 | } |
2391 | |
2392 | /// Copies double-precision (64-bit) floating-point element `a` to the lower |
2393 | /// element of the packed 64-bit return value. |
2394 | /// |
2395 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_sd) |
2396 | #[inline ] |
2397 | #[target_feature (enable = "sse2" )] |
2398 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2399 | pub unsafe fn _mm_set_sd(a: f64) -> __m128d { |
2400 | _mm_set_pd(a:0.0, b:a) |
2401 | } |
2402 | |
2403 | /// Broadcasts double-precision (64-bit) floating-point value a to all elements |
2404 | /// of the return value. |
2405 | /// |
2406 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_pd) |
2407 | #[inline ] |
2408 | #[target_feature (enable = "sse2" )] |
2409 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2410 | pub unsafe fn _mm_set1_pd(a: f64) -> __m128d { |
2411 | _mm_set_pd(a, b:a) |
2412 | } |
2413 | |
2414 | /// Broadcasts double-precision (64-bit) floating-point value a to all elements |
2415 | /// of the return value. |
2416 | /// |
2417 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_pd1) |
2418 | #[inline ] |
2419 | #[target_feature (enable = "sse2" )] |
2420 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2421 | pub unsafe fn _mm_set_pd1(a: f64) -> __m128d { |
2422 | _mm_set_pd(a, b:a) |
2423 | } |
2424 | |
2425 | /// Sets packed double-precision (64-bit) floating-point elements in the return |
2426 | /// value with the supplied values. |
2427 | /// |
2428 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_pd) |
2429 | #[inline ] |
2430 | #[target_feature (enable = "sse2" )] |
2431 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2432 | pub unsafe fn _mm_set_pd(a: f64, b: f64) -> __m128d { |
2433 | __m128d(b, a) |
2434 | } |
2435 | |
2436 | /// Sets packed double-precision (64-bit) floating-point elements in the return |
2437 | /// value with the supplied values in reverse order. |
2438 | /// |
2439 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_pd) |
2440 | #[inline ] |
2441 | #[target_feature (enable = "sse2" )] |
2442 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2443 | pub unsafe fn _mm_setr_pd(a: f64, b: f64) -> __m128d { |
2444 | _mm_set_pd(a:b, b:a) |
2445 | } |
2446 | |
2447 | /// Returns packed double-precision (64-bit) floating-point elements with all |
2448 | /// zeros. |
2449 | /// |
2450 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_pd) |
2451 | #[inline ] |
2452 | #[target_feature (enable = "sse2" )] |
2453 | #[cfg_attr (test, assert_instr(xorps))] // FIXME xorpd expected |
2454 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2455 | pub unsafe fn _mm_setzero_pd() -> __m128d { |
2456 | _mm_set_pd(a:0.0, b:0.0) |
2457 | } |
2458 | |
2459 | /// Returns a mask of the most significant bit of each element in `a`. |
2460 | /// |
2461 | /// The mask is stored in the 2 least significant bits of the return value. |
2462 | /// All other bits are set to `0`. |
2463 | /// |
2464 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_pd) |
2465 | #[inline ] |
2466 | #[target_feature (enable = "sse2" )] |
2467 | #[cfg_attr (test, assert_instr(movmskpd))] |
2468 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2469 | pub unsafe fn _mm_movemask_pd(a: __m128d) -> i32 { |
2470 | // Propagate the highest bit to the rest, because simd_bitmask |
2471 | // requires all-1 or all-0. |
2472 | let mask: i64x2 = simd_lt(x:transmute(a), y:i64x2::splat(0)); |
2473 | simd_bitmask::<i64x2, u8>(mask).into() |
2474 | } |
2475 | |
2476 | /// Loads 128-bits (composed of 2 packed double-precision (64-bit) |
2477 | /// floating-point elements) from memory into the returned vector. |
2478 | /// `mem_addr` must be aligned on a 16-byte boundary or a general-protection |
2479 | /// exception may be generated. |
2480 | /// |
2481 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_pd) |
2482 | #[inline ] |
2483 | #[target_feature (enable = "sse2" )] |
2484 | #[cfg_attr (test, assert_instr(movaps))] |
2485 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2486 | #[allow (clippy::cast_ptr_alignment)] |
2487 | pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d { |
2488 | *(mem_addr as *const __m128d) |
2489 | } |
2490 | |
2491 | /// Loads a 64-bit double-precision value to the low element of a |
2492 | /// 128-bit integer vector and clears the upper element. |
2493 | /// |
2494 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_sd) |
2495 | #[inline ] |
2496 | #[target_feature (enable = "sse2" )] |
2497 | #[cfg_attr (test, assert_instr(movsd))] |
2498 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2499 | pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d { |
2500 | _mm_setr_pd(*mem_addr, b:0.) |
2501 | } |
2502 | |
2503 | /// Loads a double-precision value into the high-order bits of a 128-bit |
2504 | /// vector of `[2 x double]`. The low-order bits are copied from the low-order |
2505 | /// bits of the first operand. |
2506 | /// |
2507 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadh_pd) |
2508 | #[inline ] |
2509 | #[target_feature (enable = "sse2" )] |
2510 | #[cfg_attr (test, assert_instr(movhps))] |
2511 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2512 | pub unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d { |
2513 | _mm_setr_pd(a:simd_extract!(a, 0), *mem_addr) |
2514 | } |
2515 | |
2516 | /// Loads a double-precision value into the low-order bits of a 128-bit |
2517 | /// vector of `[2 x double]`. The high-order bits are copied from the |
2518 | /// high-order bits of the first operand. |
2519 | /// |
2520 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadl_pd) |
2521 | #[inline ] |
2522 | #[target_feature (enable = "sse2" )] |
2523 | #[cfg_attr (test, assert_instr(movlps))] |
2524 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2525 | pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d { |
2526 | _mm_setr_pd(*mem_addr, b:simd_extract!(a, 1)) |
2527 | } |
2528 | |
2529 | /// Stores a 128-bit floating point vector of `[2 x double]` to a 128-bit |
2530 | /// aligned memory location. |
2531 | /// To minimize caching, the data is flagged as non-temporal (unlikely to be |
2532 | /// used again soon). |
2533 | /// |
2534 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_pd) |
2535 | /// |
2536 | /// # Safety of non-temporal stores |
2537 | /// |
2538 | /// After using this intrinsic, but before any other access to the memory that this intrinsic |
2539 | /// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In |
2540 | /// particular, functions that call this intrinsic should generally call `_mm_sfence` before they |
2541 | /// return. |
2542 | /// |
2543 | /// See [`_mm_sfence`] for details. |
2544 | #[inline ] |
2545 | #[target_feature (enable = "sse2" )] |
2546 | #[cfg_attr (test, assert_instr(movntps))] // FIXME movntpd |
2547 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2548 | #[allow (clippy::cast_ptr_alignment)] |
2549 | pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) { |
2550 | intrinsics::nontemporal_store(ptr:mem_addr as *mut __m128d, val:a); |
2551 | } |
2552 | |
2553 | /// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a |
2554 | /// memory location. |
2555 | /// |
2556 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_sd) |
2557 | #[inline ] |
2558 | #[target_feature (enable = "sse2" )] |
2559 | #[cfg_attr (all(test, not(target_os = "windows" )), assert_instr(movlps))] |
2560 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2561 | pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) { |
2562 | *mem_addr = simd_extract!(a, 0) |
2563 | } |
2564 | |
2565 | /// Stores 128-bits (composed of 2 packed double-precision (64-bit) |
2566 | /// floating-point elements) from `a` into memory. `mem_addr` must be aligned |
2567 | /// on a 16-byte boundary or a general-protection exception may be generated. |
2568 | /// |
2569 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_pd) |
2570 | #[inline ] |
2571 | #[target_feature (enable = "sse2" )] |
2572 | #[cfg_attr (test, assert_instr(movaps))] |
2573 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2574 | #[allow (clippy::cast_ptr_alignment)] |
2575 | pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) { |
2576 | *(mem_addr as *mut __m128d) = a; |
2577 | } |
2578 | |
2579 | /// Stores 128-bits (composed of 2 packed double-precision (64-bit) |
2580 | /// floating-point elements) from `a` into memory. |
2581 | /// `mem_addr` does not need to be aligned on any particular boundary. |
2582 | /// |
2583 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_pd) |
2584 | #[inline ] |
2585 | #[target_feature (enable = "sse2" )] |
2586 | #[cfg_attr (test, assert_instr(movups))] // FIXME movupd expected |
2587 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2588 | pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) { |
2589 | mem_addr.cast::<__m128d>().write_unaligned(val:a); |
2590 | } |
2591 | |
2592 | /// Stores the lower double-precision (64-bit) floating-point element from `a` |
2593 | /// into 2 contiguous elements in memory. `mem_addr` must be aligned on a |
2594 | /// 16-byte boundary or a general-protection exception may be generated. |
2595 | /// |
2596 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store1_pd) |
2597 | #[inline ] |
2598 | #[target_feature (enable = "sse2" )] |
2599 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2600 | #[allow (clippy::cast_ptr_alignment)] |
2601 | pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) { |
2602 | let b: __m128d = simd_shuffle!(a, a, [0, 0]); |
2603 | *(mem_addr as *mut __m128d) = b; |
2604 | } |
2605 | |
2606 | /// Stores the lower double-precision (64-bit) floating-point element from `a` |
2607 | /// into 2 contiguous elements in memory. `mem_addr` must be aligned on a |
2608 | /// 16-byte boundary or a general-protection exception may be generated. |
2609 | /// |
2610 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_pd1) |
2611 | #[inline ] |
2612 | #[target_feature (enable = "sse2" )] |
2613 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2614 | #[allow (clippy::cast_ptr_alignment)] |
2615 | pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) { |
2616 | let b: __m128d = simd_shuffle!(a, a, [0, 0]); |
2617 | *(mem_addr as *mut __m128d) = b; |
2618 | } |
2619 | |
2620 | /// Stores 2 double-precision (64-bit) floating-point elements from `a` into |
2621 | /// memory in reverse order. |
2622 | /// `mem_addr` must be aligned on a 16-byte boundary or a general-protection |
2623 | /// exception may be generated. |
2624 | /// |
2625 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storer_pd) |
2626 | #[inline ] |
2627 | #[target_feature (enable = "sse2" )] |
2628 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2629 | #[allow (clippy::cast_ptr_alignment)] |
2630 | pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) { |
2631 | let b: __m128d = simd_shuffle!(a, a, [1, 0]); |
2632 | *(mem_addr as *mut __m128d) = b; |
2633 | } |
2634 | |
2635 | /// Stores the upper 64 bits of a 128-bit vector of `[2 x double]` to a |
2636 | /// memory location. |
2637 | /// |
2638 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeh_pd) |
2639 | #[inline ] |
2640 | #[target_feature (enable = "sse2" )] |
2641 | #[cfg_attr (all(test, not(target_os = "windows" )), assert_instr(movhps))] |
2642 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2643 | pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) { |
2644 | *mem_addr = simd_extract!(a, 1); |
2645 | } |
2646 | |
2647 | /// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a |
2648 | /// memory location. |
2649 | /// |
2650 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storel_pd) |
2651 | #[inline ] |
2652 | #[target_feature (enable = "sse2" )] |
2653 | #[cfg_attr (all(test, not(target_os = "windows" )), assert_instr(movlps))] |
2654 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2655 | pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) { |
2656 | *mem_addr = simd_extract!(a, 0); |
2657 | } |
2658 | |
2659 | /// Loads a double-precision (64-bit) floating-point element from memory |
2660 | /// into both elements of returned vector. |
2661 | /// |
2662 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load1_pd) |
2663 | #[inline ] |
2664 | #[target_feature (enable = "sse2" )] |
2665 | // #[cfg_attr(test, assert_instr(movapd))] // FIXME LLVM uses different codegen |
2666 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2667 | pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d { |
2668 | let d: f64 = *mem_addr; |
2669 | _mm_setr_pd(a:d, b:d) |
2670 | } |
2671 | |
2672 | /// Loads a double-precision (64-bit) floating-point element from memory |
2673 | /// into both elements of returned vector. |
2674 | /// |
2675 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_pd1) |
2676 | #[inline ] |
2677 | #[target_feature (enable = "sse2" )] |
2678 | // #[cfg_attr(test, assert_instr(movapd))] // FIXME same as _mm_load1_pd |
2679 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2680 | pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d { |
2681 | _mm_load1_pd(mem_addr) |
2682 | } |
2683 | |
2684 | /// Loads 2 double-precision (64-bit) floating-point elements from memory into |
2685 | /// the returned vector in reverse order. `mem_addr` must be aligned on a |
2686 | /// 16-byte boundary or a general-protection exception may be generated. |
2687 | /// |
2688 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadr_pd) |
2689 | #[inline ] |
2690 | #[target_feature (enable = "sse2" )] |
2691 | #[cfg_attr (test, assert_instr(movaps))] |
2692 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2693 | pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d { |
2694 | let a: __m128d = _mm_load_pd(mem_addr); |
2695 | simd_shuffle!(a, a, [1, 0]) |
2696 | } |
2697 | |
2698 | /// Loads 128-bits (composed of 2 packed double-precision (64-bit) |
2699 | /// floating-point elements) from memory into the returned vector. |
2700 | /// `mem_addr` does not need to be aligned on any particular boundary. |
2701 | /// |
2702 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_pd) |
2703 | #[inline ] |
2704 | #[target_feature (enable = "sse2" )] |
2705 | #[cfg_attr (test, assert_instr(movups))] |
2706 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2707 | pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d { |
2708 | let mut dst: __m128d = _mm_undefined_pd(); |
2709 | ptr::copy_nonoverlapping( |
2710 | src:mem_addr as *const u8, |
2711 | dst:ptr::addr_of_mut!(dst) as *mut u8, |
2712 | count:mem::size_of::<__m128d>(), |
2713 | ); |
2714 | dst |
2715 | } |
2716 | |
2717 | /// Constructs a 128-bit floating-point vector of `[2 x double]` from two |
2718 | /// 128-bit vector parameters of `[2 x double]`, using the immediate-value |
2719 | /// parameter as a specifier. |
2720 | /// |
2721 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_pd) |
2722 | #[inline ] |
2723 | #[target_feature (enable = "sse2" )] |
2724 | #[cfg_attr (test, assert_instr(shufps, MASK = 2))] |
2725 | #[rustc_legacy_const_generics (2)] |
2726 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2727 | pub unsafe fn _mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d { |
2728 | static_assert_uimm_bits!(MASK, 8); |
2729 | simd_shuffle!(a, b, [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2]) |
2730 | } |
2731 | |
2732 | /// Constructs a 128-bit floating-point vector of `[2 x double]`. The lower |
2733 | /// 64 bits are set to the lower 64 bits of the second parameter. The upper |
2734 | /// 64 bits are set to the upper 64 bits of the first parameter. |
2735 | /// |
2736 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_move_sd) |
2737 | #[inline ] |
2738 | #[target_feature (enable = "sse2" )] |
2739 | #[cfg_attr (test, assert_instr(movsd))] |
2740 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2741 | pub unsafe fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d { |
2742 | _mm_setr_pd(a:simd_extract!(b, 0), b:simd_extract!(a, 1)) |
2743 | } |
2744 | |
2745 | /// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit |
2746 | /// floating-point vector of `[4 x float]`. |
2747 | /// |
2748 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castpd_ps) |
2749 | #[inline ] |
2750 | #[target_feature (enable = "sse2" )] |
2751 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2752 | pub unsafe fn _mm_castpd_ps(a: __m128d) -> __m128 { |
2753 | transmute(src:a) |
2754 | } |
2755 | |
2756 | /// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit |
2757 | /// integer vector. |
2758 | /// |
2759 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castpd_si128) |
2760 | #[inline ] |
2761 | #[target_feature (enable = "sse2" )] |
2762 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2763 | pub unsafe fn _mm_castpd_si128(a: __m128d) -> __m128i { |
2764 | transmute(src:a) |
2765 | } |
2766 | |
2767 | /// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit |
2768 | /// floating-point vector of `[2 x double]`. |
2769 | /// |
2770 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castps_pd) |
2771 | #[inline ] |
2772 | #[target_feature (enable = "sse2" )] |
2773 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2774 | pub unsafe fn _mm_castps_pd(a: __m128) -> __m128d { |
2775 | transmute(src:a) |
2776 | } |
2777 | |
2778 | /// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit |
2779 | /// integer vector. |
2780 | /// |
2781 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castps_si128) |
2782 | #[inline ] |
2783 | #[target_feature (enable = "sse2" )] |
2784 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2785 | pub unsafe fn _mm_castps_si128(a: __m128) -> __m128i { |
2786 | transmute(src:a) |
2787 | } |
2788 | |
2789 | /// Casts a 128-bit integer vector into a 128-bit floating-point vector |
2790 | /// of `[2 x double]`. |
2791 | /// |
2792 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castsi128_pd) |
2793 | #[inline ] |
2794 | #[target_feature (enable = "sse2" )] |
2795 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2796 | pub unsafe fn _mm_castsi128_pd(a: __m128i) -> __m128d { |
2797 | transmute(src:a) |
2798 | } |
2799 | |
2800 | /// Casts a 128-bit integer vector into a 128-bit floating-point vector |
2801 | /// of `[4 x float]`. |
2802 | /// |
2803 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castsi128_ps) |
2804 | #[inline ] |
2805 | #[target_feature (enable = "sse2" )] |
2806 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2807 | pub unsafe fn _mm_castsi128_ps(a: __m128i) -> __m128 { |
2808 | transmute(src:a) |
2809 | } |
2810 | |
2811 | /// Returns vector of type __m128d with indeterminate elements. |
2812 | /// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`]. |
2813 | /// In practice, this is equivalent to [`mem::zeroed`]. |
2814 | /// |
2815 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_undefined_pd) |
2816 | #[inline ] |
2817 | #[target_feature (enable = "sse2" )] |
2818 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2819 | pub unsafe fn _mm_undefined_pd() -> __m128d { |
2820 | __m128d(0.0, 0.0) |
2821 | } |
2822 | |
2823 | /// Returns vector of type __m128i with indeterminate elements. |
2824 | /// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`]. |
2825 | /// In practice, this is equivalent to [`mem::zeroed`]. |
2826 | /// |
2827 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_undefined_si128) |
2828 | #[inline ] |
2829 | #[target_feature (enable = "sse2" )] |
2830 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2831 | pub unsafe fn _mm_undefined_si128() -> __m128i { |
2832 | __m128i(0, 0) |
2833 | } |
2834 | |
2835 | /// The resulting `__m128d` element is composed by the low-order values of |
2836 | /// the two `__m128d` interleaved input elements, i.e.: |
2837 | /// |
2838 | /// * The `[127:64]` bits are copied from the `[127:64]` bits of the second |
2839 | /// input * The `[63:0]` bits are copied from the `[127:64]` bits of the first |
2840 | /// input |
2841 | /// |
2842 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_pd) |
2843 | #[inline ] |
2844 | #[target_feature (enable = "sse2" )] |
2845 | #[cfg_attr (test, assert_instr(unpckhpd))] |
2846 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2847 | pub unsafe fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d { |
2848 | simd_shuffle!(a, b, [1, 3]) |
2849 | } |
2850 | |
2851 | /// The resulting `__m128d` element is composed by the high-order values of |
2852 | /// the two `__m128d` interleaved input elements, i.e.: |
2853 | /// |
2854 | /// * The `[127:64]` bits are copied from the `[63:0]` bits of the second input |
2855 | /// * The `[63:0]` bits are copied from the `[63:0]` bits of the first input |
2856 | /// |
2857 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_pd) |
2858 | #[inline ] |
2859 | #[target_feature (enable = "sse2" )] |
2860 | #[cfg_attr (all(test, not(target_os = "windows" )), assert_instr(movlhps))] |
2861 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2862 | pub unsafe fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d { |
2863 | simd_shuffle!(a, b, [0, 2]) |
2864 | } |
2865 | |
2866 | #[allow (improper_ctypes)] |
2867 | extern "C" { |
2868 | #[link_name = "llvm.x86.sse2.pause" ] |
2869 | fn pause(); |
2870 | #[link_name = "llvm.x86.sse2.clflush" ] |
2871 | fn clflush(p: *const u8); |
2872 | #[link_name = "llvm.x86.sse2.lfence" ] |
2873 | fn lfence(); |
2874 | #[link_name = "llvm.x86.sse2.mfence" ] |
2875 | fn mfence(); |
2876 | #[link_name = "llvm.x86.sse2.pmadd.wd" ] |
2877 | fn pmaddwd(a: i16x8, b: i16x8) -> i32x4; |
2878 | #[link_name = "llvm.x86.sse2.psad.bw" ] |
2879 | fn psadbw(a: u8x16, b: u8x16) -> u64x2; |
2880 | #[link_name = "llvm.x86.sse2.psll.w" ] |
2881 | fn psllw(a: i16x8, count: i16x8) -> i16x8; |
2882 | #[link_name = "llvm.x86.sse2.psll.d" ] |
2883 | fn pslld(a: i32x4, count: i32x4) -> i32x4; |
2884 | #[link_name = "llvm.x86.sse2.psll.q" ] |
2885 | fn psllq(a: i64x2, count: i64x2) -> i64x2; |
2886 | #[link_name = "llvm.x86.sse2.psra.w" ] |
2887 | fn psraw(a: i16x8, count: i16x8) -> i16x8; |
2888 | #[link_name = "llvm.x86.sse2.psra.d" ] |
2889 | fn psrad(a: i32x4, count: i32x4) -> i32x4; |
2890 | #[link_name = "llvm.x86.sse2.psrl.w" ] |
2891 | fn psrlw(a: i16x8, count: i16x8) -> i16x8; |
2892 | #[link_name = "llvm.x86.sse2.psrl.d" ] |
2893 | fn psrld(a: i32x4, count: i32x4) -> i32x4; |
2894 | #[link_name = "llvm.x86.sse2.psrl.q" ] |
2895 | fn psrlq(a: i64x2, count: i64x2) -> i64x2; |
2896 | #[link_name = "llvm.x86.sse2.cvtps2dq" ] |
2897 | fn cvtps2dq(a: __m128) -> i32x4; |
2898 | #[link_name = "llvm.x86.sse2.maskmov.dqu" ] |
2899 | fn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8); |
2900 | #[link_name = "llvm.x86.sse2.packsswb.128" ] |
2901 | fn packsswb(a: i16x8, b: i16x8) -> i8x16; |
2902 | #[link_name = "llvm.x86.sse2.packssdw.128" ] |
2903 | fn packssdw(a: i32x4, b: i32x4) -> i16x8; |
2904 | #[link_name = "llvm.x86.sse2.packuswb.128" ] |
2905 | fn packuswb(a: i16x8, b: i16x8) -> u8x16; |
2906 | #[link_name = "llvm.x86.sse2.max.sd" ] |
2907 | fn maxsd(a: __m128d, b: __m128d) -> __m128d; |
2908 | #[link_name = "llvm.x86.sse2.max.pd" ] |
2909 | fn maxpd(a: __m128d, b: __m128d) -> __m128d; |
2910 | #[link_name = "llvm.x86.sse2.min.sd" ] |
2911 | fn minsd(a: __m128d, b: __m128d) -> __m128d; |
2912 | #[link_name = "llvm.x86.sse2.min.pd" ] |
2913 | fn minpd(a: __m128d, b: __m128d) -> __m128d; |
2914 | #[link_name = "llvm.x86.sse2.sqrt.sd" ] |
2915 | fn sqrtsd(a: __m128d) -> __m128d; |
2916 | #[link_name = "llvm.x86.sse2.sqrt.pd" ] |
2917 | fn sqrtpd(a: __m128d) -> __m128d; |
2918 | #[link_name = "llvm.x86.sse2.cmp.sd" ] |
2919 | fn cmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d; |
2920 | #[link_name = "llvm.x86.sse2.cmp.pd" ] |
2921 | fn cmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d; |
2922 | #[link_name = "llvm.x86.sse2.comieq.sd" ] |
2923 | fn comieqsd(a: __m128d, b: __m128d) -> i32; |
2924 | #[link_name = "llvm.x86.sse2.comilt.sd" ] |
2925 | fn comiltsd(a: __m128d, b: __m128d) -> i32; |
2926 | #[link_name = "llvm.x86.sse2.comile.sd" ] |
2927 | fn comilesd(a: __m128d, b: __m128d) -> i32; |
2928 | #[link_name = "llvm.x86.sse2.comigt.sd" ] |
2929 | fn comigtsd(a: __m128d, b: __m128d) -> i32; |
2930 | #[link_name = "llvm.x86.sse2.comige.sd" ] |
2931 | fn comigesd(a: __m128d, b: __m128d) -> i32; |
2932 | #[link_name = "llvm.x86.sse2.comineq.sd" ] |
2933 | fn comineqsd(a: __m128d, b: __m128d) -> i32; |
2934 | #[link_name = "llvm.x86.sse2.ucomieq.sd" ] |
2935 | fn ucomieqsd(a: __m128d, b: __m128d) -> i32; |
2936 | #[link_name = "llvm.x86.sse2.ucomilt.sd" ] |
2937 | fn ucomiltsd(a: __m128d, b: __m128d) -> i32; |
2938 | #[link_name = "llvm.x86.sse2.ucomile.sd" ] |
2939 | fn ucomilesd(a: __m128d, b: __m128d) -> i32; |
2940 | #[link_name = "llvm.x86.sse2.ucomigt.sd" ] |
2941 | fn ucomigtsd(a: __m128d, b: __m128d) -> i32; |
2942 | #[link_name = "llvm.x86.sse2.ucomige.sd" ] |
2943 | fn ucomigesd(a: __m128d, b: __m128d) -> i32; |
2944 | #[link_name = "llvm.x86.sse2.ucomineq.sd" ] |
2945 | fn ucomineqsd(a: __m128d, b: __m128d) -> i32; |
2946 | #[link_name = "llvm.x86.sse2.cvtpd2dq" ] |
2947 | fn cvtpd2dq(a: __m128d) -> i32x4; |
2948 | #[link_name = "llvm.x86.sse2.cvtsd2si" ] |
2949 | fn cvtsd2si(a: __m128d) -> i32; |
2950 | #[link_name = "llvm.x86.sse2.cvtsd2ss" ] |
2951 | fn cvtsd2ss(a: __m128, b: __m128d) -> __m128; |
2952 | #[link_name = "llvm.x86.sse2.cvtss2sd" ] |
2953 | fn cvtss2sd(a: __m128d, b: __m128) -> __m128d; |
2954 | #[link_name = "llvm.x86.sse2.cvttpd2dq" ] |
2955 | fn cvttpd2dq(a: __m128d) -> i32x4; |
2956 | #[link_name = "llvm.x86.sse2.cvttsd2si" ] |
2957 | fn cvttsd2si(a: __m128d) -> i32; |
2958 | #[link_name = "llvm.x86.sse2.cvttps2dq" ] |
2959 | fn cvttps2dq(a: __m128) -> i32x4; |
2960 | } |
2961 | |
2962 | #[cfg (test)] |
2963 | mod tests { |
2964 | use crate::{ |
2965 | core_arch::{simd::*, x86::*}, |
2966 | hint::black_box, |
2967 | }; |
2968 | use std::{ |
2969 | boxed, f32, |
2970 | f64::{self, NAN}, |
2971 | i32, |
2972 | mem::{self, transmute}, |
2973 | ptr, |
2974 | }; |
2975 | use stdarch_test::simd_test; |
2976 | |
2977 | #[test ] |
2978 | fn test_mm_pause() { |
2979 | unsafe { _mm_pause() } |
2980 | } |
2981 | |
2982 | #[simd_test(enable = "sse2" )] |
2983 | unsafe fn test_mm_clflush() { |
2984 | let x = 0_u8; |
2985 | _mm_clflush(ptr::addr_of!(x)); |
2986 | } |
2987 | |
2988 | #[simd_test(enable = "sse2" )] |
2989 | // Miri cannot support this until it is clear how it fits in the Rust memory model |
2990 | #[cfg_attr (miri, ignore)] |
2991 | unsafe fn test_mm_lfence() { |
2992 | _mm_lfence(); |
2993 | } |
2994 | |
2995 | #[simd_test(enable = "sse2" )] |
2996 | // Miri cannot support this until it is clear how it fits in the Rust memory model |
2997 | #[cfg_attr (miri, ignore)] |
2998 | unsafe fn test_mm_mfence() { |
2999 | _mm_mfence(); |
3000 | } |
3001 | |
3002 | #[simd_test(enable = "sse2" )] |
3003 | unsafe fn test_mm_add_epi8() { |
3004 | let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
3005 | #[rustfmt::skip] |
3006 | let b = _mm_setr_epi8( |
3007 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
3008 | ); |
3009 | let r = _mm_add_epi8(a, b); |
3010 | #[rustfmt::skip] |
3011 | let e = _mm_setr_epi8( |
3012 | 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, |
3013 | ); |
3014 | assert_eq_m128i(r, e); |
3015 | } |
3016 | |
3017 | #[simd_test(enable = "sse2" )] |
3018 | unsafe fn test_mm_add_epi8_overflow() { |
3019 | let a = _mm_set1_epi8(0x7F); |
3020 | let b = _mm_set1_epi8(1); |
3021 | let r = _mm_add_epi8(a, b); |
3022 | assert_eq_m128i(r, _mm_set1_epi8(-128)); |
3023 | } |
3024 | |
3025 | #[simd_test(enable = "sse2" )] |
3026 | unsafe fn test_mm_add_epi16() { |
3027 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
3028 | let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); |
3029 | let r = _mm_add_epi16(a, b); |
3030 | let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22); |
3031 | assert_eq_m128i(r, e); |
3032 | } |
3033 | |
3034 | #[simd_test(enable = "sse2" )] |
3035 | unsafe fn test_mm_add_epi32() { |
3036 | let a = _mm_setr_epi32(0, 1, 2, 3); |
3037 | let b = _mm_setr_epi32(4, 5, 6, 7); |
3038 | let r = _mm_add_epi32(a, b); |
3039 | let e = _mm_setr_epi32(4, 6, 8, 10); |
3040 | assert_eq_m128i(r, e); |
3041 | } |
3042 | |
3043 | #[simd_test(enable = "sse2" )] |
3044 | unsafe fn test_mm_add_epi64() { |
3045 | let a = _mm_setr_epi64x(0, 1); |
3046 | let b = _mm_setr_epi64x(2, 3); |
3047 | let r = _mm_add_epi64(a, b); |
3048 | let e = _mm_setr_epi64x(2, 4); |
3049 | assert_eq_m128i(r, e); |
3050 | } |
3051 | |
3052 | #[simd_test(enable = "sse2" )] |
3053 | unsafe fn test_mm_adds_epi8() { |
3054 | let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
3055 | #[rustfmt::skip] |
3056 | let b = _mm_setr_epi8( |
3057 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
3058 | ); |
3059 | let r = _mm_adds_epi8(a, b); |
3060 | #[rustfmt::skip] |
3061 | let e = _mm_setr_epi8( |
3062 | 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, |
3063 | ); |
3064 | assert_eq_m128i(r, e); |
3065 | } |
3066 | |
3067 | #[simd_test(enable = "sse2" )] |
3068 | unsafe fn test_mm_adds_epi8_saturate_positive() { |
3069 | let a = _mm_set1_epi8(0x7F); |
3070 | let b = _mm_set1_epi8(1); |
3071 | let r = _mm_adds_epi8(a, b); |
3072 | assert_eq_m128i(r, a); |
3073 | } |
3074 | |
3075 | #[simd_test(enable = "sse2" )] |
3076 | unsafe fn test_mm_adds_epi8_saturate_negative() { |
3077 | let a = _mm_set1_epi8(-0x80); |
3078 | let b = _mm_set1_epi8(-1); |
3079 | let r = _mm_adds_epi8(a, b); |
3080 | assert_eq_m128i(r, a); |
3081 | } |
3082 | |
3083 | #[simd_test(enable = "sse2" )] |
3084 | unsafe fn test_mm_adds_epi16() { |
3085 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
3086 | let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); |
3087 | let r = _mm_adds_epi16(a, b); |
3088 | let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22); |
3089 | assert_eq_m128i(r, e); |
3090 | } |
3091 | |
3092 | #[simd_test(enable = "sse2" )] |
3093 | unsafe fn test_mm_adds_epi16_saturate_positive() { |
3094 | let a = _mm_set1_epi16(0x7FFF); |
3095 | let b = _mm_set1_epi16(1); |
3096 | let r = _mm_adds_epi16(a, b); |
3097 | assert_eq_m128i(r, a); |
3098 | } |
3099 | |
3100 | #[simd_test(enable = "sse2" )] |
3101 | unsafe fn test_mm_adds_epi16_saturate_negative() { |
3102 | let a = _mm_set1_epi16(-0x8000); |
3103 | let b = _mm_set1_epi16(-1); |
3104 | let r = _mm_adds_epi16(a, b); |
3105 | assert_eq_m128i(r, a); |
3106 | } |
3107 | |
3108 | #[simd_test(enable = "sse2" )] |
3109 | unsafe fn test_mm_adds_epu8() { |
3110 | let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
3111 | #[rustfmt::skip] |
3112 | let b = _mm_setr_epi8( |
3113 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
3114 | ); |
3115 | let r = _mm_adds_epu8(a, b); |
3116 | #[rustfmt::skip] |
3117 | let e = _mm_setr_epi8( |
3118 | 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, |
3119 | ); |
3120 | assert_eq_m128i(r, e); |
3121 | } |
3122 | |
3123 | #[simd_test(enable = "sse2" )] |
3124 | unsafe fn test_mm_adds_epu8_saturate() { |
3125 | let a = _mm_set1_epi8(!0); |
3126 | let b = _mm_set1_epi8(1); |
3127 | let r = _mm_adds_epu8(a, b); |
3128 | assert_eq_m128i(r, a); |
3129 | } |
3130 | |
3131 | #[simd_test(enable = "sse2" )] |
3132 | unsafe fn test_mm_adds_epu16() { |
3133 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
3134 | let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); |
3135 | let r = _mm_adds_epu16(a, b); |
3136 | let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22); |
3137 | assert_eq_m128i(r, e); |
3138 | } |
3139 | |
3140 | #[simd_test(enable = "sse2" )] |
3141 | unsafe fn test_mm_adds_epu16_saturate() { |
3142 | let a = _mm_set1_epi16(!0); |
3143 | let b = _mm_set1_epi16(1); |
3144 | let r = _mm_adds_epu16(a, b); |
3145 | assert_eq_m128i(r, a); |
3146 | } |
3147 | |
3148 | #[simd_test(enable = "sse2" )] |
3149 | unsafe fn test_mm_avg_epu8() { |
3150 | let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9)); |
3151 | let r = _mm_avg_epu8(a, b); |
3152 | assert_eq_m128i(r, _mm_set1_epi8(6)); |
3153 | } |
3154 | |
3155 | #[simd_test(enable = "sse2" )] |
3156 | unsafe fn test_mm_avg_epu16() { |
3157 | let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9)); |
3158 | let r = _mm_avg_epu16(a, b); |
3159 | assert_eq_m128i(r, _mm_set1_epi16(6)); |
3160 | } |
3161 | |
3162 | #[simd_test(enable = "sse2" )] |
3163 | unsafe fn test_mm_madd_epi16() { |
3164 | let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); |
3165 | let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16); |
3166 | let r = _mm_madd_epi16(a, b); |
3167 | let e = _mm_setr_epi32(29, 81, 149, 233); |
3168 | assert_eq_m128i(r, e); |
3169 | |
3170 | // Test large values. |
3171 | // MIN*MIN+MIN*MIN will overflow into i32::MIN. |
3172 | let a = _mm_setr_epi16( |
3173 | i16::MAX, |
3174 | i16::MAX, |
3175 | i16::MIN, |
3176 | i16::MIN, |
3177 | i16::MIN, |
3178 | i16::MAX, |
3179 | 0, |
3180 | 0, |
3181 | ); |
3182 | let b = _mm_setr_epi16( |
3183 | i16::MAX, |
3184 | i16::MAX, |
3185 | i16::MIN, |
3186 | i16::MIN, |
3187 | i16::MAX, |
3188 | i16::MIN, |
3189 | 0, |
3190 | 0, |
3191 | ); |
3192 | let r = _mm_madd_epi16(a, b); |
3193 | let e = _mm_setr_epi32(0x7FFE0002, i32::MIN, -0x7FFF0000, 0); |
3194 | assert_eq_m128i(r, e); |
3195 | } |
3196 | |
3197 | #[simd_test(enable = "sse2" )] |
3198 | unsafe fn test_mm_max_epi16() { |
3199 | let a = _mm_set1_epi16(1); |
3200 | let b = _mm_set1_epi16(-1); |
3201 | let r = _mm_max_epi16(a, b); |
3202 | assert_eq_m128i(r, a); |
3203 | } |
3204 | |
3205 | #[simd_test(enable = "sse2" )] |
3206 | unsafe fn test_mm_max_epu8() { |
3207 | let a = _mm_set1_epi8(1); |
3208 | let b = _mm_set1_epi8(!0); |
3209 | let r = _mm_max_epu8(a, b); |
3210 | assert_eq_m128i(r, b); |
3211 | } |
3212 | |
3213 | #[simd_test(enable = "sse2" )] |
3214 | unsafe fn test_mm_min_epi16() { |
3215 | let a = _mm_set1_epi16(1); |
3216 | let b = _mm_set1_epi16(-1); |
3217 | let r = _mm_min_epi16(a, b); |
3218 | assert_eq_m128i(r, b); |
3219 | } |
3220 | |
3221 | #[simd_test(enable = "sse2" )] |
3222 | unsafe fn test_mm_min_epu8() { |
3223 | let a = _mm_set1_epi8(1); |
3224 | let b = _mm_set1_epi8(!0); |
3225 | let r = _mm_min_epu8(a, b); |
3226 | assert_eq_m128i(r, a); |
3227 | } |
3228 | |
3229 | #[simd_test(enable = "sse2" )] |
3230 | unsafe fn test_mm_mulhi_epi16() { |
3231 | let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001)); |
3232 | let r = _mm_mulhi_epi16(a, b); |
3233 | assert_eq_m128i(r, _mm_set1_epi16(-16)); |
3234 | } |
3235 | |
3236 | #[simd_test(enable = "sse2" )] |
3237 | unsafe fn test_mm_mulhi_epu16() { |
3238 | let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001)); |
3239 | let r = _mm_mulhi_epu16(a, b); |
3240 | assert_eq_m128i(r, _mm_set1_epi16(15)); |
3241 | } |
3242 | |
3243 | #[simd_test(enable = "sse2" )] |
3244 | unsafe fn test_mm_mullo_epi16() { |
3245 | let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001)); |
3246 | let r = _mm_mullo_epi16(a, b); |
3247 | assert_eq_m128i(r, _mm_set1_epi16(-17960)); |
3248 | } |
3249 | |
3250 | #[simd_test(enable = "sse2" )] |
3251 | unsafe fn test_mm_mul_epu32() { |
3252 | let a = _mm_setr_epi64x(1_000_000_000, 1 << 34); |
3253 | let b = _mm_setr_epi64x(1_000_000_000, 1 << 35); |
3254 | let r = _mm_mul_epu32(a, b); |
3255 | let e = _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0); |
3256 | assert_eq_m128i(r, e); |
3257 | } |
3258 | |
3259 | #[simd_test(enable = "sse2" )] |
3260 | unsafe fn test_mm_sad_epu8() { |
3261 | #[rustfmt::skip] |
3262 | let a = _mm_setr_epi8( |
3263 | 255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8, |
3264 | 1, 2, 3, 4, |
3265 | 155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8, |
3266 | 1, 2, 3, 4, |
3267 | ); |
3268 | let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2); |
3269 | let r = _mm_sad_epu8(a, b); |
3270 | let e = _mm_setr_epi64x(1020, 614); |
3271 | assert_eq_m128i(r, e); |
3272 | } |
3273 | |
3274 | #[simd_test(enable = "sse2" )] |
3275 | unsafe fn test_mm_sub_epi8() { |
3276 | let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(6)); |
3277 | let r = _mm_sub_epi8(a, b); |
3278 | assert_eq_m128i(r, _mm_set1_epi8(-1)); |
3279 | } |
3280 | |
3281 | #[simd_test(enable = "sse2" )] |
3282 | unsafe fn test_mm_sub_epi16() { |
3283 | let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(6)); |
3284 | let r = _mm_sub_epi16(a, b); |
3285 | assert_eq_m128i(r, _mm_set1_epi16(-1)); |
3286 | } |
3287 | |
3288 | #[simd_test(enable = "sse2" )] |
3289 | unsafe fn test_mm_sub_epi32() { |
3290 | let (a, b) = (_mm_set1_epi32(5), _mm_set1_epi32(6)); |
3291 | let r = _mm_sub_epi32(a, b); |
3292 | assert_eq_m128i(r, _mm_set1_epi32(-1)); |
3293 | } |
3294 | |
3295 | #[simd_test(enable = "sse2" )] |
3296 | unsafe fn test_mm_sub_epi64() { |
3297 | let (a, b) = (_mm_set1_epi64x(5), _mm_set1_epi64x(6)); |
3298 | let r = _mm_sub_epi64(a, b); |
3299 | assert_eq_m128i(r, _mm_set1_epi64x(-1)); |
3300 | } |
3301 | |
3302 | #[simd_test(enable = "sse2" )] |
3303 | unsafe fn test_mm_subs_epi8() { |
3304 | let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2)); |
3305 | let r = _mm_subs_epi8(a, b); |
3306 | assert_eq_m128i(r, _mm_set1_epi8(3)); |
3307 | } |
3308 | |
3309 | #[simd_test(enable = "sse2" )] |
3310 | unsafe fn test_mm_subs_epi8_saturate_positive() { |
3311 | let a = _mm_set1_epi8(0x7F); |
3312 | let b = _mm_set1_epi8(-1); |
3313 | let r = _mm_subs_epi8(a, b); |
3314 | assert_eq_m128i(r, a); |
3315 | } |
3316 | |
3317 | #[simd_test(enable = "sse2" )] |
3318 | unsafe fn test_mm_subs_epi8_saturate_negative() { |
3319 | let a = _mm_set1_epi8(-0x80); |
3320 | let b = _mm_set1_epi8(1); |
3321 | let r = _mm_subs_epi8(a, b); |
3322 | assert_eq_m128i(r, a); |
3323 | } |
3324 | |
3325 | #[simd_test(enable = "sse2" )] |
3326 | unsafe fn test_mm_subs_epi16() { |
3327 | let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2)); |
3328 | let r = _mm_subs_epi16(a, b); |
3329 | assert_eq_m128i(r, _mm_set1_epi16(3)); |
3330 | } |
3331 | |
3332 | #[simd_test(enable = "sse2" )] |
3333 | unsafe fn test_mm_subs_epi16_saturate_positive() { |
3334 | let a = _mm_set1_epi16(0x7FFF); |
3335 | let b = _mm_set1_epi16(-1); |
3336 | let r = _mm_subs_epi16(a, b); |
3337 | assert_eq_m128i(r, a); |
3338 | } |
3339 | |
3340 | #[simd_test(enable = "sse2" )] |
3341 | unsafe fn test_mm_subs_epi16_saturate_negative() { |
3342 | let a = _mm_set1_epi16(-0x8000); |
3343 | let b = _mm_set1_epi16(1); |
3344 | let r = _mm_subs_epi16(a, b); |
3345 | assert_eq_m128i(r, a); |
3346 | } |
3347 | |
3348 | #[simd_test(enable = "sse2" )] |
3349 | unsafe fn test_mm_subs_epu8() { |
3350 | let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2)); |
3351 | let r = _mm_subs_epu8(a, b); |
3352 | assert_eq_m128i(r, _mm_set1_epi8(3)); |
3353 | } |
3354 | |
3355 | #[simd_test(enable = "sse2" )] |
3356 | unsafe fn test_mm_subs_epu8_saturate() { |
3357 | let a = _mm_set1_epi8(0); |
3358 | let b = _mm_set1_epi8(1); |
3359 | let r = _mm_subs_epu8(a, b); |
3360 | assert_eq_m128i(r, a); |
3361 | } |
3362 | |
3363 | #[simd_test(enable = "sse2" )] |
3364 | unsafe fn test_mm_subs_epu16() { |
3365 | let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2)); |
3366 | let r = _mm_subs_epu16(a, b); |
3367 | assert_eq_m128i(r, _mm_set1_epi16(3)); |
3368 | } |
3369 | |
3370 | #[simd_test(enable = "sse2" )] |
3371 | unsafe fn test_mm_subs_epu16_saturate() { |
3372 | let a = _mm_set1_epi16(0); |
3373 | let b = _mm_set1_epi16(1); |
3374 | let r = _mm_subs_epu16(a, b); |
3375 | assert_eq_m128i(r, a); |
3376 | } |
3377 | |
3378 | #[simd_test(enable = "sse2" )] |
3379 | unsafe fn test_mm_slli_si128() { |
3380 | #[rustfmt::skip] |
3381 | let a = _mm_setr_epi8( |
3382 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
3383 | ); |
3384 | let r = _mm_slli_si128::<1>(a); |
3385 | let e = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
3386 | assert_eq_m128i(r, e); |
3387 | |
3388 | #[rustfmt::skip] |
3389 | let a = _mm_setr_epi8( |
3390 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
3391 | ); |
3392 | let r = _mm_slli_si128::<15>(a); |
3393 | let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1); |
3394 | assert_eq_m128i(r, e); |
3395 | |
3396 | #[rustfmt::skip] |
3397 | let a = _mm_setr_epi8( |
3398 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
3399 | ); |
3400 | let r = _mm_slli_si128::<16>(a); |
3401 | assert_eq_m128i(r, _mm_set1_epi8(0)); |
3402 | } |
3403 | |
3404 | #[simd_test(enable = "sse2" )] |
3405 | unsafe fn test_mm_slli_epi16() { |
3406 | let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); |
3407 | let r = _mm_slli_epi16::<4>(a); |
3408 | assert_eq_m128i( |
3409 | r, |
3410 | _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0), |
3411 | ); |
3412 | let r = _mm_slli_epi16::<16>(a); |
3413 | assert_eq_m128i(r, _mm_set1_epi16(0)); |
3414 | } |
3415 | |
3416 | #[simd_test(enable = "sse2" )] |
3417 | unsafe fn test_mm_sll_epi16() { |
3418 | let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); |
3419 | let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 4)); |
3420 | assert_eq_m128i( |
3421 | r, |
3422 | _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0), |
3423 | ); |
3424 | let r = _mm_sll_epi16(a, _mm_set_epi64x(4, 0)); |
3425 | assert_eq_m128i(r, a); |
3426 | let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 16)); |
3427 | assert_eq_m128i(r, _mm_set1_epi16(0)); |
3428 | let r = _mm_sll_epi16(a, _mm_set_epi64x(0, i64::MAX)); |
3429 | assert_eq_m128i(r, _mm_set1_epi16(0)); |
3430 | } |
3431 | |
3432 | #[simd_test(enable = "sse2" )] |
3433 | unsafe fn test_mm_slli_epi32() { |
3434 | let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); |
3435 | let r = _mm_slli_epi32::<4>(a); |
3436 | assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0)); |
3437 | let r = _mm_slli_epi32::<32>(a); |
3438 | assert_eq_m128i(r, _mm_set1_epi32(0)); |
3439 | } |
3440 | |
3441 | #[simd_test(enable = "sse2" )] |
3442 | unsafe fn test_mm_sll_epi32() { |
3443 | let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); |
3444 | let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 4)); |
3445 | assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0)); |
3446 | let r = _mm_sll_epi32(a, _mm_set_epi64x(4, 0)); |
3447 | assert_eq_m128i(r, a); |
3448 | let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 32)); |
3449 | assert_eq_m128i(r, _mm_set1_epi32(0)); |
3450 | let r = _mm_sll_epi32(a, _mm_set_epi64x(0, i64::MAX)); |
3451 | assert_eq_m128i(r, _mm_set1_epi32(0)); |
3452 | } |
3453 | |
3454 | #[simd_test(enable = "sse2" )] |
3455 | unsafe fn test_mm_slli_epi64() { |
3456 | let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF); |
3457 | let r = _mm_slli_epi64::<4>(a); |
3458 | assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0)); |
3459 | let r = _mm_slli_epi64::<64>(a); |
3460 | assert_eq_m128i(r, _mm_set1_epi64x(0)); |
3461 | } |
3462 | |
3463 | #[simd_test(enable = "sse2" )] |
3464 | unsafe fn test_mm_sll_epi64() { |
3465 | let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF); |
3466 | let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 4)); |
3467 | assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0)); |
3468 | let r = _mm_sll_epi64(a, _mm_set_epi64x(4, 0)); |
3469 | assert_eq_m128i(r, a); |
3470 | let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 64)); |
3471 | assert_eq_m128i(r, _mm_set1_epi64x(0)); |
3472 | let r = _mm_sll_epi64(a, _mm_set_epi64x(0, i64::MAX)); |
3473 | assert_eq_m128i(r, _mm_set1_epi64x(0)); |
3474 | } |
3475 | |
3476 | #[simd_test(enable = "sse2" )] |
3477 | unsafe fn test_mm_srai_epi16() { |
3478 | let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); |
3479 | let r = _mm_srai_epi16::<4>(a); |
3480 | assert_eq_m128i( |
3481 | r, |
3482 | _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10), |
3483 | ); |
3484 | let r = _mm_srai_epi16::<16>(a); |
3485 | assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1)); |
3486 | } |
3487 | |
3488 | #[simd_test(enable = "sse2" )] |
3489 | unsafe fn test_mm_sra_epi16() { |
3490 | let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); |
3491 | let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 4)); |
3492 | assert_eq_m128i( |
3493 | r, |
3494 | _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10), |
3495 | ); |
3496 | let r = _mm_sra_epi16(a, _mm_set_epi64x(4, 0)); |
3497 | assert_eq_m128i(r, a); |
3498 | let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 16)); |
3499 | assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1)); |
3500 | let r = _mm_sra_epi16(a, _mm_set_epi64x(0, i64::MAX)); |
3501 | assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1)); |
3502 | } |
3503 | |
3504 | #[simd_test(enable = "sse2" )] |
3505 | unsafe fn test_mm_srai_epi32() { |
3506 | let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); |
3507 | let r = _mm_srai_epi32::<4>(a); |
3508 | assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000)); |
3509 | let r = _mm_srai_epi32::<32>(a); |
3510 | assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1)); |
3511 | } |
3512 | |
3513 | #[simd_test(enable = "sse2" )] |
3514 | unsafe fn test_mm_sra_epi32() { |
3515 | let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); |
3516 | let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 4)); |
3517 | assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000)); |
3518 | let r = _mm_sra_epi32(a, _mm_set_epi64x(4, 0)); |
3519 | assert_eq_m128i(r, a); |
3520 | let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 32)); |
3521 | assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1)); |
3522 | let r = _mm_sra_epi32(a, _mm_set_epi64x(0, i64::MAX)); |
3523 | assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1)); |
3524 | } |
3525 | |
3526 | #[simd_test(enable = "sse2" )] |
3527 | unsafe fn test_mm_srli_si128() { |
3528 | #[rustfmt::skip] |
3529 | let a = _mm_setr_epi8( |
3530 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
3531 | ); |
3532 | let r = _mm_srli_si128::<1>(a); |
3533 | #[rustfmt::skip] |
3534 | let e = _mm_setr_epi8( |
3535 | 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, |
3536 | ); |
3537 | assert_eq_m128i(r, e); |
3538 | |
3539 | #[rustfmt::skip] |
3540 | let a = _mm_setr_epi8( |
3541 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
3542 | ); |
3543 | let r = _mm_srli_si128::<15>(a); |
3544 | let e = _mm_setr_epi8(16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
3545 | assert_eq_m128i(r, e); |
3546 | |
3547 | #[rustfmt::skip] |
3548 | let a = _mm_setr_epi8( |
3549 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
3550 | ); |
3551 | let r = _mm_srli_si128::<16>(a); |
3552 | assert_eq_m128i(r, _mm_set1_epi8(0)); |
3553 | } |
3554 | |
3555 | #[simd_test(enable = "sse2" )] |
3556 | unsafe fn test_mm_srli_epi16() { |
3557 | let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); |
3558 | let r = _mm_srli_epi16::<4>(a); |
3559 | assert_eq_m128i( |
3560 | r, |
3561 | _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0), |
3562 | ); |
3563 | let r = _mm_srli_epi16::<16>(a); |
3564 | assert_eq_m128i(r, _mm_set1_epi16(0)); |
3565 | } |
3566 | |
3567 | #[simd_test(enable = "sse2" )] |
3568 | unsafe fn test_mm_srl_epi16() { |
3569 | let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); |
3570 | let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 4)); |
3571 | assert_eq_m128i( |
3572 | r, |
3573 | _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0), |
3574 | ); |
3575 | let r = _mm_srl_epi16(a, _mm_set_epi64x(4, 0)); |
3576 | assert_eq_m128i(r, a); |
3577 | let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 16)); |
3578 | assert_eq_m128i(r, _mm_set1_epi16(0)); |
3579 | let r = _mm_srl_epi16(a, _mm_set_epi64x(0, i64::MAX)); |
3580 | assert_eq_m128i(r, _mm_set1_epi16(0)); |
3581 | } |
3582 | |
3583 | #[simd_test(enable = "sse2" )] |
3584 | unsafe fn test_mm_srli_epi32() { |
3585 | let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); |
3586 | let r = _mm_srli_epi32::<4>(a); |
3587 | assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000)); |
3588 | let r = _mm_srli_epi32::<32>(a); |
3589 | assert_eq_m128i(r, _mm_set1_epi32(0)); |
3590 | } |
3591 | |
3592 | #[simd_test(enable = "sse2" )] |
3593 | unsafe fn test_mm_srl_epi32() { |
3594 | let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); |
3595 | let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 4)); |
3596 | assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000)); |
3597 | let r = _mm_srl_epi32(a, _mm_set_epi64x(4, 0)); |
3598 | assert_eq_m128i(r, a); |
3599 | let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 32)); |
3600 | assert_eq_m128i(r, _mm_set1_epi32(0)); |
3601 | let r = _mm_srl_epi32(a, _mm_set_epi64x(0, i64::MAX)); |
3602 | assert_eq_m128i(r, _mm_set1_epi32(0)); |
3603 | } |
3604 | |
3605 | #[simd_test(enable = "sse2" )] |
3606 | unsafe fn test_mm_srli_epi64() { |
3607 | let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF); |
3608 | let r = _mm_srli_epi64::<4>(a); |
3609 | assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000)); |
3610 | let r = _mm_srli_epi64::<64>(a); |
3611 | assert_eq_m128i(r, _mm_set1_epi64x(0)); |
3612 | } |
3613 | |
3614 | #[simd_test(enable = "sse2" )] |
3615 | unsafe fn test_mm_srl_epi64() { |
3616 | let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF); |
3617 | let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 4)); |
3618 | assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000)); |
3619 | let r = _mm_srl_epi64(a, _mm_set_epi64x(4, 0)); |
3620 | assert_eq_m128i(r, a); |
3621 | let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 64)); |
3622 | assert_eq_m128i(r, _mm_set1_epi64x(0)); |
3623 | let r = _mm_srl_epi64(a, _mm_set_epi64x(0, i64::MAX)); |
3624 | assert_eq_m128i(r, _mm_set1_epi64x(0)); |
3625 | } |
3626 | |
3627 | #[simd_test(enable = "sse2" )] |
3628 | unsafe fn test_mm_and_si128() { |
3629 | let a = _mm_set1_epi8(5); |
3630 | let b = _mm_set1_epi8(3); |
3631 | let r = _mm_and_si128(a, b); |
3632 | assert_eq_m128i(r, _mm_set1_epi8(1)); |
3633 | } |
3634 | |
3635 | #[simd_test(enable = "sse2" )] |
3636 | unsafe fn test_mm_andnot_si128() { |
3637 | let a = _mm_set1_epi8(5); |
3638 | let b = _mm_set1_epi8(3); |
3639 | let r = _mm_andnot_si128(a, b); |
3640 | assert_eq_m128i(r, _mm_set1_epi8(2)); |
3641 | } |
3642 | |
3643 | #[simd_test(enable = "sse2" )] |
3644 | unsafe fn test_mm_or_si128() { |
3645 | let a = _mm_set1_epi8(5); |
3646 | let b = _mm_set1_epi8(3); |
3647 | let r = _mm_or_si128(a, b); |
3648 | assert_eq_m128i(r, _mm_set1_epi8(7)); |
3649 | } |
3650 | |
3651 | #[simd_test(enable = "sse2" )] |
3652 | unsafe fn test_mm_xor_si128() { |
3653 | let a = _mm_set1_epi8(5); |
3654 | let b = _mm_set1_epi8(3); |
3655 | let r = _mm_xor_si128(a, b); |
3656 | assert_eq_m128i(r, _mm_set1_epi8(6)); |
3657 | } |
3658 | |
3659 | #[simd_test(enable = "sse2" )] |
3660 | unsafe fn test_mm_cmpeq_epi8() { |
3661 | let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
3662 | let b = _mm_setr_epi8(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
3663 | let r = _mm_cmpeq_epi8(a, b); |
3664 | #[rustfmt::skip] |
3665 | assert_eq_m128i( |
3666 | r, |
3667 | _mm_setr_epi8( |
3668 | 0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
3669 | ) |
3670 | ); |
3671 | } |
3672 | |
3673 | #[simd_test(enable = "sse2" )] |
3674 | unsafe fn test_mm_cmpeq_epi16() { |
3675 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
3676 | let b = _mm_setr_epi16(7, 6, 2, 4, 3, 2, 1, 0); |
3677 | let r = _mm_cmpeq_epi16(a, b); |
3678 | assert_eq_m128i(r, _mm_setr_epi16(0, 0, !0, 0, 0, 0, 0, 0)); |
3679 | } |
3680 | |
3681 | #[simd_test(enable = "sse2" )] |
3682 | unsafe fn test_mm_cmpeq_epi32() { |
3683 | let a = _mm_setr_epi32(0, 1, 2, 3); |
3684 | let b = _mm_setr_epi32(3, 2, 2, 0); |
3685 | let r = _mm_cmpeq_epi32(a, b); |
3686 | assert_eq_m128i(r, _mm_setr_epi32(0, 0, !0, 0)); |
3687 | } |
3688 | |
3689 | #[simd_test(enable = "sse2" )] |
3690 | unsafe fn test_mm_cmpgt_epi8() { |
3691 | let a = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
3692 | let b = _mm_set1_epi8(0); |
3693 | let r = _mm_cmpgt_epi8(a, b); |
3694 | let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
3695 | assert_eq_m128i(r, e); |
3696 | } |
3697 | |
3698 | #[simd_test(enable = "sse2" )] |
3699 | unsafe fn test_mm_cmpgt_epi16() { |
3700 | let a = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0); |
3701 | let b = _mm_set1_epi16(0); |
3702 | let r = _mm_cmpgt_epi16(a, b); |
3703 | let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0); |
3704 | assert_eq_m128i(r, e); |
3705 | } |
3706 | |
3707 | #[simd_test(enable = "sse2" )] |
3708 | unsafe fn test_mm_cmpgt_epi32() { |
3709 | let a = _mm_set_epi32(5, 0, 0, 0); |
3710 | let b = _mm_set1_epi32(0); |
3711 | let r = _mm_cmpgt_epi32(a, b); |
3712 | assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0)); |
3713 | } |
3714 | |
3715 | #[simd_test(enable = "sse2" )] |
3716 | unsafe fn test_mm_cmplt_epi8() { |
3717 | let a = _mm_set1_epi8(0); |
3718 | let b = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
3719 | let r = _mm_cmplt_epi8(a, b); |
3720 | let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
3721 | assert_eq_m128i(r, e); |
3722 | } |
3723 | |
3724 | #[simd_test(enable = "sse2" )] |
3725 | unsafe fn test_mm_cmplt_epi16() { |
3726 | let a = _mm_set1_epi16(0); |
3727 | let b = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0); |
3728 | let r = _mm_cmplt_epi16(a, b); |
3729 | let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0); |
3730 | assert_eq_m128i(r, e); |
3731 | } |
3732 | |
3733 | #[simd_test(enable = "sse2" )] |
3734 | unsafe fn test_mm_cmplt_epi32() { |
3735 | let a = _mm_set1_epi32(0); |
3736 | let b = _mm_set_epi32(5, 0, 0, 0); |
3737 | let r = _mm_cmplt_epi32(a, b); |
3738 | assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0)); |
3739 | } |
3740 | |
3741 | #[simd_test(enable = "sse2" )] |
3742 | unsafe fn test_mm_cvtepi32_pd() { |
3743 | let a = _mm_set_epi32(35, 25, 15, 5); |
3744 | let r = _mm_cvtepi32_pd(a); |
3745 | assert_eq_m128d(r, _mm_setr_pd(5.0, 15.0)); |
3746 | } |
3747 | |
3748 | #[simd_test(enable = "sse2" )] |
3749 | unsafe fn test_mm_cvtsi32_sd() { |
3750 | let a = _mm_set1_pd(3.5); |
3751 | let r = _mm_cvtsi32_sd(a, 5); |
3752 | assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5)); |
3753 | } |
3754 | |
3755 | #[simd_test(enable = "sse2" )] |
3756 | unsafe fn test_mm_cvtepi32_ps() { |
3757 | let a = _mm_setr_epi32(1, 2, 3, 4); |
3758 | let r = _mm_cvtepi32_ps(a); |
3759 | assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0)); |
3760 | } |
3761 | |
3762 | #[simd_test(enable = "sse2" )] |
3763 | unsafe fn test_mm_cvtps_epi32() { |
3764 | let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); |
3765 | let r = _mm_cvtps_epi32(a); |
3766 | assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4)); |
3767 | } |
3768 | |
3769 | #[simd_test(enable = "sse2" )] |
3770 | unsafe fn test_mm_cvtsi32_si128() { |
3771 | let r = _mm_cvtsi32_si128(5); |
3772 | assert_eq_m128i(r, _mm_setr_epi32(5, 0, 0, 0)); |
3773 | } |
3774 | |
3775 | #[simd_test(enable = "sse2" )] |
3776 | unsafe fn test_mm_cvtsi128_si32() { |
3777 | let r = _mm_cvtsi128_si32(_mm_setr_epi32(5, 0, 0, 0)); |
3778 | assert_eq!(r, 5); |
3779 | } |
3780 | |
3781 | #[simd_test(enable = "sse2" )] |
3782 | unsafe fn test_mm_set_epi64x() { |
3783 | let r = _mm_set_epi64x(0, 1); |
3784 | assert_eq_m128i(r, _mm_setr_epi64x(1, 0)); |
3785 | } |
3786 | |
3787 | #[simd_test(enable = "sse2" )] |
3788 | unsafe fn test_mm_set_epi32() { |
3789 | let r = _mm_set_epi32(0, 1, 2, 3); |
3790 | assert_eq_m128i(r, _mm_setr_epi32(3, 2, 1, 0)); |
3791 | } |
3792 | |
3793 | #[simd_test(enable = "sse2" )] |
3794 | unsafe fn test_mm_set_epi16() { |
3795 | let r = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
3796 | assert_eq_m128i(r, _mm_setr_epi16(7, 6, 5, 4, 3, 2, 1, 0)); |
3797 | } |
3798 | |
3799 | #[simd_test(enable = "sse2" )] |
3800 | unsafe fn test_mm_set_epi8() { |
3801 | #[rustfmt::skip] |
3802 | let r = _mm_set_epi8( |
3803 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
3804 | ); |
3805 | #[rustfmt::skip] |
3806 | let e = _mm_setr_epi8( |
3807 | 15, 14, 13, 12, 11, 10, 9, 8, |
3808 | 7, 6, 5, 4, 3, 2, 1, 0, |
3809 | ); |
3810 | assert_eq_m128i(r, e); |
3811 | } |
3812 | |
3813 | #[simd_test(enable = "sse2" )] |
3814 | unsafe fn test_mm_set1_epi64x() { |
3815 | let r = _mm_set1_epi64x(1); |
3816 | assert_eq_m128i(r, _mm_set1_epi64x(1)); |
3817 | } |
3818 | |
3819 | #[simd_test(enable = "sse2" )] |
3820 | unsafe fn test_mm_set1_epi32() { |
3821 | let r = _mm_set1_epi32(1); |
3822 | assert_eq_m128i(r, _mm_set1_epi32(1)); |
3823 | } |
3824 | |
3825 | #[simd_test(enable = "sse2" )] |
3826 | unsafe fn test_mm_set1_epi16() { |
3827 | let r = _mm_set1_epi16(1); |
3828 | assert_eq_m128i(r, _mm_set1_epi16(1)); |
3829 | } |
3830 | |
3831 | #[simd_test(enable = "sse2" )] |
3832 | unsafe fn test_mm_set1_epi8() { |
3833 | let r = _mm_set1_epi8(1); |
3834 | assert_eq_m128i(r, _mm_set1_epi8(1)); |
3835 | } |
3836 | |
3837 | #[simd_test(enable = "sse2" )] |
3838 | unsafe fn test_mm_setr_epi32() { |
3839 | let r = _mm_setr_epi32(0, 1, 2, 3); |
3840 | assert_eq_m128i(r, _mm_setr_epi32(0, 1, 2, 3)); |
3841 | } |
3842 | |
3843 | #[simd_test(enable = "sse2" )] |
3844 | unsafe fn test_mm_setr_epi16() { |
3845 | let r = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
3846 | assert_eq_m128i(r, _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7)); |
3847 | } |
3848 | |
3849 | #[simd_test(enable = "sse2" )] |
3850 | unsafe fn test_mm_setr_epi8() { |
3851 | #[rustfmt::skip] |
3852 | let r = _mm_setr_epi8( |
3853 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
3854 | ); |
3855 | #[rustfmt::skip] |
3856 | let e = _mm_setr_epi8( |
3857 | 0, 1, 2, 3, 4, 5, 6, 7, |
3858 | 8, 9, 10, 11, 12, 13, 14, 15, |
3859 | ); |
3860 | assert_eq_m128i(r, e); |
3861 | } |
3862 | |
3863 | #[simd_test(enable = "sse2" )] |
3864 | unsafe fn test_mm_setzero_si128() { |
3865 | let r = _mm_setzero_si128(); |
3866 | assert_eq_m128i(r, _mm_set1_epi64x(0)); |
3867 | } |
3868 | |
3869 | #[simd_test(enable = "sse2" )] |
3870 | unsafe fn test_mm_loadl_epi64() { |
3871 | let a = _mm_setr_epi64x(6, 5); |
3872 | let r = _mm_loadl_epi64(ptr::addr_of!(a)); |
3873 | assert_eq_m128i(r, _mm_setr_epi64x(6, 0)); |
3874 | } |
3875 | |
3876 | #[simd_test(enable = "sse2" )] |
3877 | unsafe fn test_mm_load_si128() { |
3878 | let a = _mm_set_epi64x(5, 6); |
3879 | let r = _mm_load_si128(ptr::addr_of!(a) as *const _); |
3880 | assert_eq_m128i(a, r); |
3881 | } |
3882 | |
3883 | #[simd_test(enable = "sse2" )] |
3884 | unsafe fn test_mm_loadu_si128() { |
3885 | let a = _mm_set_epi64x(5, 6); |
3886 | let r = _mm_loadu_si128(ptr::addr_of!(a) as *const _); |
3887 | assert_eq_m128i(a, r); |
3888 | } |
3889 | |
3890 | #[simd_test(enable = "sse2" )] |
3891 | // Miri cannot support this until it is clear how it fits in the Rust memory model |
3892 | // (non-temporal store) |
3893 | #[cfg_attr (miri, ignore)] |
3894 | unsafe fn test_mm_maskmoveu_si128() { |
3895 | let a = _mm_set1_epi8(9); |
3896 | #[rustfmt::skip] |
3897 | let mask = _mm_set_epi8( |
3898 | 0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0, |
3899 | 0, 0, 0, 0, 0, 0, 0, 0, |
3900 | ); |
3901 | let mut r = _mm_set1_epi8(0); |
3902 | _mm_maskmoveu_si128(a, mask, ptr::addr_of_mut!(r) as *mut i8); |
3903 | let e = _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
3904 | assert_eq_m128i(r, e); |
3905 | } |
3906 | |
3907 | #[simd_test(enable = "sse2" )] |
3908 | unsafe fn test_mm_store_si128() { |
3909 | let a = _mm_set1_epi8(9); |
3910 | let mut r = _mm_set1_epi8(0); |
3911 | _mm_store_si128(&mut r, a); |
3912 | assert_eq_m128i(r, a); |
3913 | } |
3914 | |
3915 | #[simd_test(enable = "sse2" )] |
3916 | unsafe fn test_mm_storeu_si128() { |
3917 | let a = _mm_set1_epi8(9); |
3918 | let mut r = _mm_set1_epi8(0); |
3919 | _mm_storeu_si128(&mut r, a); |
3920 | assert_eq_m128i(r, a); |
3921 | } |
3922 | |
3923 | #[simd_test(enable = "sse2" )] |
3924 | unsafe fn test_mm_storel_epi64() { |
3925 | let a = _mm_setr_epi64x(2, 9); |
3926 | let mut r = _mm_set1_epi8(0); |
3927 | _mm_storel_epi64(&mut r, a); |
3928 | assert_eq_m128i(r, _mm_setr_epi64x(2, 0)); |
3929 | } |
3930 | |
3931 | #[simd_test(enable = "sse2" )] |
3932 | // Miri cannot support this until it is clear how it fits in the Rust memory model |
3933 | // (non-temporal store) |
3934 | #[cfg_attr (miri, ignore)] |
3935 | unsafe fn test_mm_stream_si128() { |
3936 | let a = _mm_setr_epi32(1, 2, 3, 4); |
3937 | let mut r = _mm_undefined_si128(); |
3938 | _mm_stream_si128(ptr::addr_of_mut!(r), a); |
3939 | assert_eq_m128i(r, a); |
3940 | } |
3941 | |
3942 | #[simd_test(enable = "sse2" )] |
3943 | // Miri cannot support this until it is clear how it fits in the Rust memory model |
3944 | // (non-temporal store) |
3945 | #[cfg_attr (miri, ignore)] |
3946 | unsafe fn test_mm_stream_si32() { |
3947 | let a: i32 = 7; |
3948 | let mut mem = boxed::Box::<i32>::new(-1); |
3949 | _mm_stream_si32(ptr::addr_of_mut!(*mem), a); |
3950 | assert_eq!(a, *mem); |
3951 | } |
3952 | |
3953 | #[simd_test(enable = "sse2" )] |
3954 | unsafe fn test_mm_move_epi64() { |
3955 | let a = _mm_setr_epi64x(5, 6); |
3956 | let r = _mm_move_epi64(a); |
3957 | assert_eq_m128i(r, _mm_setr_epi64x(5, 0)); |
3958 | } |
3959 | |
3960 | #[simd_test(enable = "sse2" )] |
3961 | unsafe fn test_mm_packs_epi16() { |
3962 | let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0); |
3963 | let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80); |
3964 | let r = _mm_packs_epi16(a, b); |
3965 | #[rustfmt::skip] |
3966 | assert_eq_m128i( |
3967 | r, |
3968 | _mm_setr_epi8( |
3969 | 0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F |
3970 | ) |
3971 | ); |
3972 | } |
3973 | |
3974 | #[simd_test(enable = "sse2" )] |
3975 | unsafe fn test_mm_packs_epi32() { |
3976 | let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0); |
3977 | let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000); |
3978 | let r = _mm_packs_epi32(a, b); |
3979 | assert_eq_m128i( |
3980 | r, |
3981 | _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF), |
3982 | ); |
3983 | } |
3984 | |
3985 | #[simd_test(enable = "sse2" )] |
3986 | unsafe fn test_mm_packus_epi16() { |
3987 | let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0); |
3988 | let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100); |
3989 | let r = _mm_packus_epi16(a, b); |
3990 | assert_eq_m128i( |
3991 | r, |
3992 | _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0), |
3993 | ); |
3994 | } |
3995 | |
3996 | #[simd_test(enable = "sse2" )] |
3997 | unsafe fn test_mm_extract_epi16() { |
3998 | let a = _mm_setr_epi16(-1, 1, 2, 3, 4, 5, 6, 7); |
3999 | let r1 = _mm_extract_epi16::<0>(a); |
4000 | let r2 = _mm_extract_epi16::<3>(a); |
4001 | assert_eq!(r1, 0xFFFF); |
4002 | assert_eq!(r2, 3); |
4003 | } |
4004 | |
4005 | #[simd_test(enable = "sse2" )] |
4006 | unsafe fn test_mm_insert_epi16() { |
4007 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
4008 | let r = _mm_insert_epi16::<0>(a, 9); |
4009 | let e = _mm_setr_epi16(9, 1, 2, 3, 4, 5, 6, 7); |
4010 | assert_eq_m128i(r, e); |
4011 | } |
4012 | |
4013 | #[simd_test(enable = "sse2" )] |
4014 | unsafe fn test_mm_movemask_epi8() { |
4015 | #[rustfmt::skip] |
4016 | let a = _mm_setr_epi8( |
4017 | 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01, |
4018 | 0b0101, 0b1111_0000u8 as i8, 0, 0, |
4019 | 0, 0b1011_0101u8 as i8, 0b1111_0000u8 as i8, 0b0101, |
4020 | 0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, |
4021 | ); |
4022 | let r = _mm_movemask_epi8(a); |
4023 | assert_eq!(r, 0b10100110_00100101); |
4024 | } |
4025 | |
4026 | #[simd_test(enable = "sse2" )] |
4027 | unsafe fn test_mm_shuffle_epi32() { |
4028 | let a = _mm_setr_epi32(5, 10, 15, 20); |
4029 | let r = _mm_shuffle_epi32::<0b00_01_01_11>(a); |
4030 | let e = _mm_setr_epi32(20, 10, 10, 5); |
4031 | assert_eq_m128i(r, e); |
4032 | } |
4033 | |
4034 | #[simd_test(enable = "sse2" )] |
4035 | unsafe fn test_mm_shufflehi_epi16() { |
4036 | let a = _mm_setr_epi16(1, 2, 3, 4, 5, 10, 15, 20); |
4037 | let r = _mm_shufflehi_epi16::<0b00_01_01_11>(a); |
4038 | let e = _mm_setr_epi16(1, 2, 3, 4, 20, 10, 10, 5); |
4039 | assert_eq_m128i(r, e); |
4040 | } |
4041 | |
4042 | #[simd_test(enable = "sse2" )] |
4043 | unsafe fn test_mm_shufflelo_epi16() { |
4044 | let a = _mm_setr_epi16(5, 10, 15, 20, 1, 2, 3, 4); |
4045 | let r = _mm_shufflelo_epi16::<0b00_01_01_11>(a); |
4046 | let e = _mm_setr_epi16(20, 10, 10, 5, 1, 2, 3, 4); |
4047 | assert_eq_m128i(r, e); |
4048 | } |
4049 | |
4050 | #[simd_test(enable = "sse2" )] |
4051 | unsafe fn test_mm_unpackhi_epi8() { |
4052 | #[rustfmt::skip] |
4053 | let a = _mm_setr_epi8( |
4054 | 0, 1, 2, 3, 4, 5, 6, 7, |
4055 | 8, 9, 10, 11, 12, 13, 14, 15, |
4056 | ); |
4057 | #[rustfmt::skip] |
4058 | let b = _mm_setr_epi8( |
4059 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
4060 | ); |
4061 | let r = _mm_unpackhi_epi8(a, b); |
4062 | #[rustfmt::skip] |
4063 | let e = _mm_setr_epi8( |
4064 | 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31, |
4065 | ); |
4066 | assert_eq_m128i(r, e); |
4067 | } |
4068 | |
4069 | #[simd_test(enable = "sse2" )] |
4070 | unsafe fn test_mm_unpackhi_epi16() { |
4071 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
4072 | let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); |
4073 | let r = _mm_unpackhi_epi16(a, b); |
4074 | let e = _mm_setr_epi16(4, 12, 5, 13, 6, 14, 7, 15); |
4075 | assert_eq_m128i(r, e); |
4076 | } |
4077 | |
4078 | #[simd_test(enable = "sse2" )] |
4079 | unsafe fn test_mm_unpackhi_epi32() { |
4080 | let a = _mm_setr_epi32(0, 1, 2, 3); |
4081 | let b = _mm_setr_epi32(4, 5, 6, 7); |
4082 | let r = _mm_unpackhi_epi32(a, b); |
4083 | let e = _mm_setr_epi32(2, 6, 3, 7); |
4084 | assert_eq_m128i(r, e); |
4085 | } |
4086 | |
4087 | #[simd_test(enable = "sse2" )] |
4088 | unsafe fn test_mm_unpackhi_epi64() { |
4089 | let a = _mm_setr_epi64x(0, 1); |
4090 | let b = _mm_setr_epi64x(2, 3); |
4091 | let r = _mm_unpackhi_epi64(a, b); |
4092 | let e = _mm_setr_epi64x(1, 3); |
4093 | assert_eq_m128i(r, e); |
4094 | } |
4095 | |
4096 | #[simd_test(enable = "sse2" )] |
4097 | unsafe fn test_mm_unpacklo_epi8() { |
4098 | #[rustfmt::skip] |
4099 | let a = _mm_setr_epi8( |
4100 | 0, 1, 2, 3, 4, 5, 6, 7, |
4101 | 8, 9, 10, 11, 12, 13, 14, 15, |
4102 | ); |
4103 | #[rustfmt::skip] |
4104 | let b = _mm_setr_epi8( |
4105 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
4106 | ); |
4107 | let r = _mm_unpacklo_epi8(a, b); |
4108 | #[rustfmt::skip] |
4109 | let e = _mm_setr_epi8( |
4110 | 0, 16, 1, 17, 2, 18, 3, 19, |
4111 | 4, 20, 5, 21, 6, 22, 7, 23, |
4112 | ); |
4113 | assert_eq_m128i(r, e); |
4114 | } |
4115 | |
4116 | #[simd_test(enable = "sse2" )] |
4117 | unsafe fn test_mm_unpacklo_epi16() { |
4118 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
4119 | let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); |
4120 | let r = _mm_unpacklo_epi16(a, b); |
4121 | let e = _mm_setr_epi16(0, 8, 1, 9, 2, 10, 3, 11); |
4122 | assert_eq_m128i(r, e); |
4123 | } |
4124 | |
4125 | #[simd_test(enable = "sse2" )] |
4126 | unsafe fn test_mm_unpacklo_epi32() { |
4127 | let a = _mm_setr_epi32(0, 1, 2, 3); |
4128 | let b = _mm_setr_epi32(4, 5, 6, 7); |
4129 | let r = _mm_unpacklo_epi32(a, b); |
4130 | let e = _mm_setr_epi32(0, 4, 1, 5); |
4131 | assert_eq_m128i(r, e); |
4132 | } |
4133 | |
4134 | #[simd_test(enable = "sse2" )] |
4135 | unsafe fn test_mm_unpacklo_epi64() { |
4136 | let a = _mm_setr_epi64x(0, 1); |
4137 | let b = _mm_setr_epi64x(2, 3); |
4138 | let r = _mm_unpacklo_epi64(a, b); |
4139 | let e = _mm_setr_epi64x(0, 2); |
4140 | assert_eq_m128i(r, e); |
4141 | } |
4142 | |
4143 | #[simd_test(enable = "sse2" )] |
4144 | unsafe fn test_mm_add_sd() { |
4145 | let a = _mm_setr_pd(1.0, 2.0); |
4146 | let b = _mm_setr_pd(5.0, 10.0); |
4147 | let r = _mm_add_sd(a, b); |
4148 | assert_eq_m128d(r, _mm_setr_pd(6.0, 2.0)); |
4149 | } |
4150 | |
4151 | #[simd_test(enable = "sse2" )] |
4152 | unsafe fn test_mm_add_pd() { |
4153 | let a = _mm_setr_pd(1.0, 2.0); |
4154 | let b = _mm_setr_pd(5.0, 10.0); |
4155 | let r = _mm_add_pd(a, b); |
4156 | assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0)); |
4157 | } |
4158 | |
4159 | #[simd_test(enable = "sse2" )] |
4160 | unsafe fn test_mm_div_sd() { |
4161 | let a = _mm_setr_pd(1.0, 2.0); |
4162 | let b = _mm_setr_pd(5.0, 10.0); |
4163 | let r = _mm_div_sd(a, b); |
4164 | assert_eq_m128d(r, _mm_setr_pd(0.2, 2.0)); |
4165 | } |
4166 | |
4167 | #[simd_test(enable = "sse2" )] |
4168 | unsafe fn test_mm_div_pd() { |
4169 | let a = _mm_setr_pd(1.0, 2.0); |
4170 | let b = _mm_setr_pd(5.0, 10.0); |
4171 | let r = _mm_div_pd(a, b); |
4172 | assert_eq_m128d(r, _mm_setr_pd(0.2, 0.2)); |
4173 | } |
4174 | |
4175 | #[simd_test(enable = "sse2" )] |
4176 | unsafe fn test_mm_max_sd() { |
4177 | let a = _mm_setr_pd(1.0, 2.0); |
4178 | let b = _mm_setr_pd(5.0, 10.0); |
4179 | let r = _mm_max_sd(a, b); |
4180 | assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0)); |
4181 | } |
4182 | |
4183 | #[simd_test(enable = "sse2" )] |
4184 | unsafe fn test_mm_max_pd() { |
4185 | let a = _mm_setr_pd(1.0, 2.0); |
4186 | let b = _mm_setr_pd(5.0, 10.0); |
4187 | let r = _mm_max_pd(a, b); |
4188 | assert_eq_m128d(r, _mm_setr_pd(5.0, 10.0)); |
4189 | |
4190 | // Check SSE(2)-specific semantics for -0.0 handling. |
4191 | let a = _mm_setr_pd(-0.0, 0.0); |
4192 | let b = _mm_setr_pd(0.0, 0.0); |
4193 | let r1: [u8; 16] = transmute(_mm_max_pd(a, b)); |
4194 | let r2: [u8; 16] = transmute(_mm_max_pd(b, a)); |
4195 | let a: [u8; 16] = transmute(a); |
4196 | let b: [u8; 16] = transmute(b); |
4197 | assert_eq!(r1, b); |
4198 | assert_eq!(r2, a); |
4199 | assert_ne!(a, b); // sanity check that -0.0 is actually present |
4200 | } |
4201 | |
4202 | #[simd_test(enable = "sse2" )] |
4203 | unsafe fn test_mm_min_sd() { |
4204 | let a = _mm_setr_pd(1.0, 2.0); |
4205 | let b = _mm_setr_pd(5.0, 10.0); |
4206 | let r = _mm_min_sd(a, b); |
4207 | assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0)); |
4208 | } |
4209 | |
4210 | #[simd_test(enable = "sse2" )] |
4211 | unsafe fn test_mm_min_pd() { |
4212 | let a = _mm_setr_pd(1.0, 2.0); |
4213 | let b = _mm_setr_pd(5.0, 10.0); |
4214 | let r = _mm_min_pd(a, b); |
4215 | assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0)); |
4216 | |
4217 | // Check SSE(2)-specific semantics for -0.0 handling. |
4218 | let a = _mm_setr_pd(-0.0, 0.0); |
4219 | let b = _mm_setr_pd(0.0, 0.0); |
4220 | let r1: [u8; 16] = transmute(_mm_min_pd(a, b)); |
4221 | let r2: [u8; 16] = transmute(_mm_min_pd(b, a)); |
4222 | let a: [u8; 16] = transmute(a); |
4223 | let b: [u8; 16] = transmute(b); |
4224 | assert_eq!(r1, b); |
4225 | assert_eq!(r2, a); |
4226 | assert_ne!(a, b); // sanity check that -0.0 is actually present |
4227 | } |
4228 | |
4229 | #[simd_test(enable = "sse2" )] |
4230 | unsafe fn test_mm_mul_sd() { |
4231 | let a = _mm_setr_pd(1.0, 2.0); |
4232 | let b = _mm_setr_pd(5.0, 10.0); |
4233 | let r = _mm_mul_sd(a, b); |
4234 | assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0)); |
4235 | } |
4236 | |
4237 | #[simd_test(enable = "sse2" )] |
4238 | unsafe fn test_mm_mul_pd() { |
4239 | let a = _mm_setr_pd(1.0, 2.0); |
4240 | let b = _mm_setr_pd(5.0, 10.0); |
4241 | let r = _mm_mul_pd(a, b); |
4242 | assert_eq_m128d(r, _mm_setr_pd(5.0, 20.0)); |
4243 | } |
4244 | |
4245 | #[simd_test(enable = "sse2" )] |
4246 | unsafe fn test_mm_sqrt_sd() { |
4247 | let a = _mm_setr_pd(1.0, 2.0); |
4248 | let b = _mm_setr_pd(5.0, 10.0); |
4249 | let r = _mm_sqrt_sd(a, b); |
4250 | assert_eq_m128d(r, _mm_setr_pd(5.0f64.sqrt(), 2.0)); |
4251 | } |
4252 | |
4253 | #[simd_test(enable = "sse2" )] |
4254 | unsafe fn test_mm_sqrt_pd() { |
4255 | let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0)); |
4256 | assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt())); |
4257 | } |
4258 | |
4259 | #[simd_test(enable = "sse2" )] |
4260 | unsafe fn test_mm_sub_sd() { |
4261 | let a = _mm_setr_pd(1.0, 2.0); |
4262 | let b = _mm_setr_pd(5.0, 10.0); |
4263 | let r = _mm_sub_sd(a, b); |
4264 | assert_eq_m128d(r, _mm_setr_pd(-4.0, 2.0)); |
4265 | } |
4266 | |
4267 | #[simd_test(enable = "sse2" )] |
4268 | unsafe fn test_mm_sub_pd() { |
4269 | let a = _mm_setr_pd(1.0, 2.0); |
4270 | let b = _mm_setr_pd(5.0, 10.0); |
4271 | let r = _mm_sub_pd(a, b); |
4272 | assert_eq_m128d(r, _mm_setr_pd(-4.0, -8.0)); |
4273 | } |
4274 | |
4275 | #[simd_test(enable = "sse2" )] |
4276 | unsafe fn test_mm_and_pd() { |
4277 | let a = transmute(u64x2::splat(5)); |
4278 | let b = transmute(u64x2::splat(3)); |
4279 | let r = _mm_and_pd(a, b); |
4280 | let e = transmute(u64x2::splat(1)); |
4281 | assert_eq_m128d(r, e); |
4282 | } |
4283 | |
4284 | #[simd_test(enable = "sse2" )] |
4285 | unsafe fn test_mm_andnot_pd() { |
4286 | let a = transmute(u64x2::splat(5)); |
4287 | let b = transmute(u64x2::splat(3)); |
4288 | let r = _mm_andnot_pd(a, b); |
4289 | let e = transmute(u64x2::splat(2)); |
4290 | assert_eq_m128d(r, e); |
4291 | } |
4292 | |
4293 | #[simd_test(enable = "sse2" )] |
4294 | unsafe fn test_mm_or_pd() { |
4295 | let a = transmute(u64x2::splat(5)); |
4296 | let b = transmute(u64x2::splat(3)); |
4297 | let r = _mm_or_pd(a, b); |
4298 | let e = transmute(u64x2::splat(7)); |
4299 | assert_eq_m128d(r, e); |
4300 | } |
4301 | |
4302 | #[simd_test(enable = "sse2" )] |
4303 | unsafe fn test_mm_xor_pd() { |
4304 | let a = transmute(u64x2::splat(5)); |
4305 | let b = transmute(u64x2::splat(3)); |
4306 | let r = _mm_xor_pd(a, b); |
4307 | let e = transmute(u64x2::splat(6)); |
4308 | assert_eq_m128d(r, e); |
4309 | } |
4310 | |
4311 | #[simd_test(enable = "sse2" )] |
4312 | unsafe fn test_mm_cmpeq_sd() { |
4313 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4314 | let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64); |
4315 | let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b)); |
4316 | assert_eq_m128i(r, e); |
4317 | } |
4318 | |
4319 | #[simd_test(enable = "sse2" )] |
4320 | unsafe fn test_mm_cmplt_sd() { |
4321 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); |
4322 | let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64); |
4323 | let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b)); |
4324 | assert_eq_m128i(r, e); |
4325 | } |
4326 | |
4327 | #[simd_test(enable = "sse2" )] |
4328 | unsafe fn test_mm_cmple_sd() { |
4329 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4330 | let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64); |
4331 | let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b)); |
4332 | assert_eq_m128i(r, e); |
4333 | } |
4334 | |
4335 | #[simd_test(enable = "sse2" )] |
4336 | unsafe fn test_mm_cmpgt_sd() { |
4337 | let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4338 | let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64); |
4339 | let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b)); |
4340 | assert_eq_m128i(r, e); |
4341 | } |
4342 | |
4343 | #[simd_test(enable = "sse2" )] |
4344 | unsafe fn test_mm_cmpge_sd() { |
4345 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4346 | let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64); |
4347 | let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b)); |
4348 | assert_eq_m128i(r, e); |
4349 | } |
4350 | |
4351 | #[simd_test(enable = "sse2" )] |
4352 | unsafe fn test_mm_cmpord_sd() { |
4353 | let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); |
4354 | let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64); |
4355 | let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b)); |
4356 | assert_eq_m128i(r, e); |
4357 | } |
4358 | |
4359 | #[simd_test(enable = "sse2" )] |
4360 | unsafe fn test_mm_cmpunord_sd() { |
4361 | let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); |
4362 | let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64); |
4363 | let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b)); |
4364 | assert_eq_m128i(r, e); |
4365 | } |
4366 | |
4367 | #[simd_test(enable = "sse2" )] |
4368 | unsafe fn test_mm_cmpneq_sd() { |
4369 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); |
4370 | let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64); |
4371 | let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b)); |
4372 | assert_eq_m128i(r, e); |
4373 | } |
4374 | |
4375 | #[simd_test(enable = "sse2" )] |
4376 | unsafe fn test_mm_cmpnlt_sd() { |
4377 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); |
4378 | let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64); |
4379 | let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b)); |
4380 | assert_eq_m128i(r, e); |
4381 | } |
4382 | |
4383 | #[simd_test(enable = "sse2" )] |
4384 | unsafe fn test_mm_cmpnle_sd() { |
4385 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4386 | let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64); |
4387 | let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b)); |
4388 | assert_eq_m128i(r, e); |
4389 | } |
4390 | |
4391 | #[simd_test(enable = "sse2" )] |
4392 | unsafe fn test_mm_cmpngt_sd() { |
4393 | let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4394 | let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64); |
4395 | let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b)); |
4396 | assert_eq_m128i(r, e); |
4397 | } |
4398 | |
4399 | #[simd_test(enable = "sse2" )] |
4400 | unsafe fn test_mm_cmpnge_sd() { |
4401 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4402 | let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64); |
4403 | let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b)); |
4404 | assert_eq_m128i(r, e); |
4405 | } |
4406 | |
4407 | #[simd_test(enable = "sse2" )] |
4408 | unsafe fn test_mm_cmpeq_pd() { |
4409 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4410 | let e = _mm_setr_epi64x(!0, 0); |
4411 | let r = transmute::<_, __m128i>(_mm_cmpeq_pd(a, b)); |
4412 | assert_eq_m128i(r, e); |
4413 | } |
4414 | |
4415 | #[simd_test(enable = "sse2" )] |
4416 | unsafe fn test_mm_cmplt_pd() { |
4417 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4418 | let e = _mm_setr_epi64x(0, !0); |
4419 | let r = transmute::<_, __m128i>(_mm_cmplt_pd(a, b)); |
4420 | assert_eq_m128i(r, e); |
4421 | } |
4422 | |
4423 | #[simd_test(enable = "sse2" )] |
4424 | unsafe fn test_mm_cmple_pd() { |
4425 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4426 | let e = _mm_setr_epi64x(!0, !0); |
4427 | let r = transmute::<_, __m128i>(_mm_cmple_pd(a, b)); |
4428 | assert_eq_m128i(r, e); |
4429 | } |
4430 | |
4431 | #[simd_test(enable = "sse2" )] |
4432 | unsafe fn test_mm_cmpgt_pd() { |
4433 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4434 | let e = _mm_setr_epi64x(0, 0); |
4435 | let r = transmute::<_, __m128i>(_mm_cmpgt_pd(a, b)); |
4436 | assert_eq_m128i(r, e); |
4437 | } |
4438 | |
4439 | #[simd_test(enable = "sse2" )] |
4440 | unsafe fn test_mm_cmpge_pd() { |
4441 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4442 | let e = _mm_setr_epi64x(!0, 0); |
4443 | let r = transmute::<_, __m128i>(_mm_cmpge_pd(a, b)); |
4444 | assert_eq_m128i(r, e); |
4445 | } |
4446 | |
4447 | #[simd_test(enable = "sse2" )] |
4448 | unsafe fn test_mm_cmpord_pd() { |
4449 | let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); |
4450 | let e = _mm_setr_epi64x(0, !0); |
4451 | let r = transmute::<_, __m128i>(_mm_cmpord_pd(a, b)); |
4452 | assert_eq_m128i(r, e); |
4453 | } |
4454 | |
4455 | #[simd_test(enable = "sse2" )] |
4456 | unsafe fn test_mm_cmpunord_pd() { |
4457 | let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); |
4458 | let e = _mm_setr_epi64x(!0, 0); |
4459 | let r = transmute::<_, __m128i>(_mm_cmpunord_pd(a, b)); |
4460 | assert_eq_m128i(r, e); |
4461 | } |
4462 | |
4463 | #[simd_test(enable = "sse2" )] |
4464 | unsafe fn test_mm_cmpneq_pd() { |
4465 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); |
4466 | let e = _mm_setr_epi64x(!0, !0); |
4467 | let r = transmute::<_, __m128i>(_mm_cmpneq_pd(a, b)); |
4468 | assert_eq_m128i(r, e); |
4469 | } |
4470 | |
4471 | #[simd_test(enable = "sse2" )] |
4472 | unsafe fn test_mm_cmpnlt_pd() { |
4473 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); |
4474 | let e = _mm_setr_epi64x(0, 0); |
4475 | let r = transmute::<_, __m128i>(_mm_cmpnlt_pd(a, b)); |
4476 | assert_eq_m128i(r, e); |
4477 | } |
4478 | |
4479 | #[simd_test(enable = "sse2" )] |
4480 | unsafe fn test_mm_cmpnle_pd() { |
4481 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4482 | let e = _mm_setr_epi64x(0, 0); |
4483 | let r = transmute::<_, __m128i>(_mm_cmpnle_pd(a, b)); |
4484 | assert_eq_m128i(r, e); |
4485 | } |
4486 | |
4487 | #[simd_test(enable = "sse2" )] |
4488 | unsafe fn test_mm_cmpngt_pd() { |
4489 | let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4490 | let e = _mm_setr_epi64x(0, !0); |
4491 | let r = transmute::<_, __m128i>(_mm_cmpngt_pd(a, b)); |
4492 | assert_eq_m128i(r, e); |
4493 | } |
4494 | |
4495 | #[simd_test(enable = "sse2" )] |
4496 | unsafe fn test_mm_cmpnge_pd() { |
4497 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4498 | let e = _mm_setr_epi64x(0, !0); |
4499 | let r = transmute::<_, __m128i>(_mm_cmpnge_pd(a, b)); |
4500 | assert_eq_m128i(r, e); |
4501 | } |
4502 | |
4503 | #[simd_test(enable = "sse2" )] |
4504 | unsafe fn test_mm_comieq_sd() { |
4505 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4506 | assert!(_mm_comieq_sd(a, b) != 0); |
4507 | |
4508 | let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0)); |
4509 | assert!(_mm_comieq_sd(a, b) == 0); |
4510 | } |
4511 | |
4512 | #[simd_test(enable = "sse2" )] |
4513 | unsafe fn test_mm_comilt_sd() { |
4514 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4515 | assert!(_mm_comilt_sd(a, b) == 0); |
4516 | } |
4517 | |
4518 | #[simd_test(enable = "sse2" )] |
4519 | unsafe fn test_mm_comile_sd() { |
4520 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4521 | assert!(_mm_comile_sd(a, b) != 0); |
4522 | } |
4523 | |
4524 | #[simd_test(enable = "sse2" )] |
4525 | unsafe fn test_mm_comigt_sd() { |
4526 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4527 | assert!(_mm_comigt_sd(a, b) == 0); |
4528 | } |
4529 | |
4530 | #[simd_test(enable = "sse2" )] |
4531 | unsafe fn test_mm_comige_sd() { |
4532 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4533 | assert!(_mm_comige_sd(a, b) != 0); |
4534 | } |
4535 | |
4536 | #[simd_test(enable = "sse2" )] |
4537 | unsafe fn test_mm_comineq_sd() { |
4538 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4539 | assert!(_mm_comineq_sd(a, b) == 0); |
4540 | } |
4541 | |
4542 | #[simd_test(enable = "sse2" )] |
4543 | unsafe fn test_mm_ucomieq_sd() { |
4544 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4545 | assert!(_mm_ucomieq_sd(a, b) != 0); |
4546 | |
4547 | let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0)); |
4548 | assert!(_mm_ucomieq_sd(a, b) == 0); |
4549 | } |
4550 | |
4551 | #[simd_test(enable = "sse2" )] |
4552 | unsafe fn test_mm_ucomilt_sd() { |
4553 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4554 | assert!(_mm_ucomilt_sd(a, b) == 0); |
4555 | } |
4556 | |
4557 | #[simd_test(enable = "sse2" )] |
4558 | unsafe fn test_mm_ucomile_sd() { |
4559 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4560 | assert!(_mm_ucomile_sd(a, b) != 0); |
4561 | } |
4562 | |
4563 | #[simd_test(enable = "sse2" )] |
4564 | unsafe fn test_mm_ucomigt_sd() { |
4565 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4566 | assert!(_mm_ucomigt_sd(a, b) == 0); |
4567 | } |
4568 | |
4569 | #[simd_test(enable = "sse2" )] |
4570 | unsafe fn test_mm_ucomige_sd() { |
4571 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4572 | assert!(_mm_ucomige_sd(a, b) != 0); |
4573 | } |
4574 | |
4575 | #[simd_test(enable = "sse2" )] |
4576 | unsafe fn test_mm_ucomineq_sd() { |
4577 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4578 | assert!(_mm_ucomineq_sd(a, b) == 0); |
4579 | } |
4580 | |
4581 | #[simd_test(enable = "sse2" )] |
4582 | unsafe fn test_mm_movemask_pd() { |
4583 | let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0)); |
4584 | assert_eq!(r, 0b01); |
4585 | |
4586 | let r = _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0)); |
4587 | assert_eq!(r, 0b11); |
4588 | } |
4589 | |
4590 | #[repr (align(16))] |
4591 | struct Memory { |
4592 | data: [f64; 4], |
4593 | } |
4594 | |
4595 | #[simd_test(enable = "sse2" )] |
4596 | unsafe fn test_mm_load_pd() { |
4597 | let mem = Memory { |
4598 | data: [1.0f64, 2.0, 3.0, 4.0], |
4599 | }; |
4600 | let vals = &mem.data; |
4601 | let d = vals.as_ptr(); |
4602 | |
4603 | let r = _mm_load_pd(d); |
4604 | assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0)); |
4605 | } |
4606 | |
4607 | #[simd_test(enable = "sse2" )] |
4608 | unsafe fn test_mm_load_sd() { |
4609 | let a = 1.; |
4610 | let expected = _mm_setr_pd(a, 0.); |
4611 | let r = _mm_load_sd(&a); |
4612 | assert_eq_m128d(r, expected); |
4613 | } |
4614 | |
4615 | #[simd_test(enable = "sse2" )] |
4616 | unsafe fn test_mm_loadh_pd() { |
4617 | let a = _mm_setr_pd(1., 2.); |
4618 | let b = 3.; |
4619 | let expected = _mm_setr_pd(_mm_cvtsd_f64(a), 3.); |
4620 | let r = _mm_loadh_pd(a, &b); |
4621 | assert_eq_m128d(r, expected); |
4622 | } |
4623 | |
4624 | #[simd_test(enable = "sse2" )] |
4625 | unsafe fn test_mm_loadl_pd() { |
4626 | let a = _mm_setr_pd(1., 2.); |
4627 | let b = 3.; |
4628 | let expected = _mm_setr_pd(3., get_m128d(a, 1)); |
4629 | let r = _mm_loadl_pd(a, &b); |
4630 | assert_eq_m128d(r, expected); |
4631 | } |
4632 | |
4633 | #[simd_test(enable = "sse2" )] |
4634 | // Miri cannot support this until it is clear how it fits in the Rust memory model |
4635 | // (non-temporal store) |
4636 | #[cfg_attr (miri, ignore)] |
4637 | unsafe fn test_mm_stream_pd() { |
4638 | #[repr (align(128))] |
4639 | struct Memory { |
4640 | pub data: [f64; 2], |
4641 | } |
4642 | let a = _mm_set1_pd(7.0); |
4643 | let mut mem = Memory { data: [-1.0; 2] }; |
4644 | |
4645 | _mm_stream_pd(ptr::addr_of_mut!(mem.data[0]), a); |
4646 | for i in 0..2 { |
4647 | assert_eq!(mem.data[i], get_m128d(a, i)); |
4648 | } |
4649 | } |
4650 | |
4651 | #[simd_test(enable = "sse2" )] |
4652 | unsafe fn test_mm_store_sd() { |
4653 | let mut dest = 0.; |
4654 | let a = _mm_setr_pd(1., 2.); |
4655 | _mm_store_sd(&mut dest, a); |
4656 | assert_eq!(dest, _mm_cvtsd_f64(a)); |
4657 | } |
4658 | |
4659 | #[simd_test(enable = "sse2" )] |
4660 | unsafe fn test_mm_store_pd() { |
4661 | let mut mem = Memory { data: [0.0f64; 4] }; |
4662 | let vals = &mut mem.data; |
4663 | let a = _mm_setr_pd(1.0, 2.0); |
4664 | let d = vals.as_mut_ptr(); |
4665 | |
4666 | _mm_store_pd(d, *black_box(&a)); |
4667 | assert_eq!(vals[0], 1.0); |
4668 | assert_eq!(vals[1], 2.0); |
4669 | } |
4670 | |
4671 | #[simd_test(enable = "sse2" )] |
4672 | unsafe fn test_mm_storeu_pd() { |
4673 | let mut mem = Memory { data: [0.0f64; 4] }; |
4674 | let vals = &mut mem.data; |
4675 | let a = _mm_setr_pd(1.0, 2.0); |
4676 | |
4677 | let mut ofs = 0; |
4678 | let mut p = vals.as_mut_ptr(); |
4679 | |
4680 | // Make sure p is **not** aligned to 16-byte boundary |
4681 | if (p as usize) & 0xf == 0 { |
4682 | ofs = 1; |
4683 | p = p.add(1); |
4684 | } |
4685 | |
4686 | _mm_storeu_pd(p, *black_box(&a)); |
4687 | |
4688 | if ofs > 0 { |
4689 | assert_eq!(vals[ofs - 1], 0.0); |
4690 | } |
4691 | assert_eq!(vals[ofs + 0], 1.0); |
4692 | assert_eq!(vals[ofs + 1], 2.0); |
4693 | } |
4694 | |
4695 | #[simd_test(enable = "sse2" )] |
4696 | unsafe fn test_mm_store1_pd() { |
4697 | let mut mem = Memory { data: [0.0f64; 4] }; |
4698 | let vals = &mut mem.data; |
4699 | let a = _mm_setr_pd(1.0, 2.0); |
4700 | let d = vals.as_mut_ptr(); |
4701 | |
4702 | _mm_store1_pd(d, *black_box(&a)); |
4703 | assert_eq!(vals[0], 1.0); |
4704 | assert_eq!(vals[1], 1.0); |
4705 | } |
4706 | |
4707 | #[simd_test(enable = "sse2" )] |
4708 | unsafe fn test_mm_store_pd1() { |
4709 | let mut mem = Memory { data: [0.0f64; 4] }; |
4710 | let vals = &mut mem.data; |
4711 | let a = _mm_setr_pd(1.0, 2.0); |
4712 | let d = vals.as_mut_ptr(); |
4713 | |
4714 | _mm_store_pd1(d, *black_box(&a)); |
4715 | assert_eq!(vals[0], 1.0); |
4716 | assert_eq!(vals[1], 1.0); |
4717 | } |
4718 | |
4719 | #[simd_test(enable = "sse2" )] |
4720 | unsafe fn test_mm_storer_pd() { |
4721 | let mut mem = Memory { data: [0.0f64; 4] }; |
4722 | let vals = &mut mem.data; |
4723 | let a = _mm_setr_pd(1.0, 2.0); |
4724 | let d = vals.as_mut_ptr(); |
4725 | |
4726 | _mm_storer_pd(d, *black_box(&a)); |
4727 | assert_eq!(vals[0], 2.0); |
4728 | assert_eq!(vals[1], 1.0); |
4729 | } |
4730 | |
4731 | #[simd_test(enable = "sse2" )] |
4732 | unsafe fn test_mm_storeh_pd() { |
4733 | let mut dest = 0.; |
4734 | let a = _mm_setr_pd(1., 2.); |
4735 | _mm_storeh_pd(&mut dest, a); |
4736 | assert_eq!(dest, get_m128d(a, 1)); |
4737 | } |
4738 | |
4739 | #[simd_test(enable = "sse2" )] |
4740 | unsafe fn test_mm_storel_pd() { |
4741 | let mut dest = 0.; |
4742 | let a = _mm_setr_pd(1., 2.); |
4743 | _mm_storel_pd(&mut dest, a); |
4744 | assert_eq!(dest, _mm_cvtsd_f64(a)); |
4745 | } |
4746 | |
4747 | #[simd_test(enable = "sse2" )] |
4748 | unsafe fn test_mm_loadr_pd() { |
4749 | let mut mem = Memory { |
4750 | data: [1.0f64, 2.0, 3.0, 4.0], |
4751 | }; |
4752 | let vals = &mut mem.data; |
4753 | let d = vals.as_ptr(); |
4754 | |
4755 | let r = _mm_loadr_pd(d); |
4756 | assert_eq_m128d(r, _mm_setr_pd(2.0, 1.0)); |
4757 | } |
4758 | |
4759 | #[simd_test(enable = "sse2" )] |
4760 | unsafe fn test_mm_loadu_pd() { |
4761 | let mut mem = Memory { |
4762 | data: [1.0f64, 2.0, 3.0, 4.0], |
4763 | }; |
4764 | let vals = &mut mem.data; |
4765 | let mut d = vals.as_ptr(); |
4766 | |
4767 | // make sure d is not aligned to 16-byte boundary |
4768 | let mut offset = 0; |
4769 | if (d as usize) & 0xf == 0 { |
4770 | offset = 1; |
4771 | d = d.add(offset); |
4772 | } |
4773 | |
4774 | let r = _mm_loadu_pd(d); |
4775 | let e = _mm_add_pd(_mm_setr_pd(1.0, 2.0), _mm_set1_pd(offset as f64)); |
4776 | assert_eq_m128d(r, e); |
4777 | } |
4778 | |
4779 | #[simd_test(enable = "sse2" )] |
4780 | unsafe fn test_mm_cvtpd_ps() { |
4781 | let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0)); |
4782 | assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0)); |
4783 | |
4784 | let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0)); |
4785 | assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0)); |
4786 | |
4787 | let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN)); |
4788 | assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0)); |
4789 | |
4790 | let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64)); |
4791 | assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0)); |
4792 | } |
4793 | |
4794 | #[simd_test(enable = "sse2" )] |
4795 | unsafe fn test_mm_cvtps_pd() { |
4796 | let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0)); |
4797 | assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0)); |
4798 | |
4799 | let r = _mm_cvtps_pd(_mm_setr_ps( |
4800 | f32::MAX, |
4801 | f32::INFINITY, |
4802 | f32::NEG_INFINITY, |
4803 | f32::MIN, |
4804 | )); |
4805 | assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY)); |
4806 | } |
4807 | |
4808 | #[simd_test(enable = "sse2" )] |
4809 | unsafe fn test_mm_cvtpd_epi32() { |
4810 | let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0)); |
4811 | assert_eq_m128i(r, _mm_setr_epi32(-1, 5, 0, 0)); |
4812 | |
4813 | let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0)); |
4814 | assert_eq_m128i(r, _mm_setr_epi32(-1, -5, 0, 0)); |
4815 | |
4816 | let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN)); |
4817 | assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); |
4818 | |
4819 | let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY)); |
4820 | assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); |
4821 | |
4822 | let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN)); |
4823 | assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); |
4824 | } |
4825 | |
4826 | #[simd_test(enable = "sse2" )] |
4827 | unsafe fn test_mm_cvtsd_si32() { |
4828 | let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0)); |
4829 | assert_eq!(r, -2); |
4830 | |
4831 | let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN)); |
4832 | assert_eq!(r, i32::MIN); |
4833 | |
4834 | let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN)); |
4835 | assert_eq!(r, i32::MIN); |
4836 | } |
4837 | |
4838 | #[simd_test(enable = "sse2" )] |
4839 | unsafe fn test_mm_cvtsd_ss() { |
4840 | let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4); |
4841 | let b = _mm_setr_pd(2.0, -5.0); |
4842 | |
4843 | let r = _mm_cvtsd_ss(a, b); |
4844 | |
4845 | assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4)); |
4846 | |
4847 | let a = _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY); |
4848 | let b = _mm_setr_pd(f64::INFINITY, -5.0); |
4849 | |
4850 | let r = _mm_cvtsd_ss(a, b); |
4851 | |
4852 | assert_eq_m128( |
4853 | r, |
4854 | _mm_setr_ps( |
4855 | f32::INFINITY, |
4856 | f32::NEG_INFINITY, |
4857 | f32::MAX, |
4858 | f32::NEG_INFINITY, |
4859 | ), |
4860 | ); |
4861 | } |
4862 | |
4863 | #[simd_test(enable = "sse2" )] |
4864 | unsafe fn test_mm_cvtsd_f64() { |
4865 | let r = _mm_cvtsd_f64(_mm_setr_pd(-1.1, 2.2)); |
4866 | assert_eq!(r, -1.1); |
4867 | } |
4868 | |
4869 | #[simd_test(enable = "sse2" )] |
4870 | unsafe fn test_mm_cvtss_sd() { |
4871 | let a = _mm_setr_pd(-1.1, 2.2); |
4872 | let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); |
4873 | |
4874 | let r = _mm_cvtss_sd(a, b); |
4875 | assert_eq_m128d(r, _mm_setr_pd(1.0, 2.2)); |
4876 | |
4877 | let a = _mm_setr_pd(-1.1, f64::INFINITY); |
4878 | let b = _mm_setr_ps(f32::NEG_INFINITY, 2.0, 3.0, 4.0); |
4879 | |
4880 | let r = _mm_cvtss_sd(a, b); |
4881 | assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY)); |
4882 | } |
4883 | |
4884 | #[simd_test(enable = "sse2" )] |
4885 | unsafe fn test_mm_cvttpd_epi32() { |
4886 | let a = _mm_setr_pd(-1.1, 2.2); |
4887 | let r = _mm_cvttpd_epi32(a); |
4888 | assert_eq_m128i(r, _mm_setr_epi32(-1, 2, 0, 0)); |
4889 | |
4890 | let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN); |
4891 | let r = _mm_cvttpd_epi32(a); |
4892 | assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); |
4893 | } |
4894 | |
4895 | #[simd_test(enable = "sse2" )] |
4896 | unsafe fn test_mm_cvttsd_si32() { |
4897 | let a = _mm_setr_pd(-1.1, 2.2); |
4898 | let r = _mm_cvttsd_si32(a); |
4899 | assert_eq!(r, -1); |
4900 | |
4901 | let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN); |
4902 | let r = _mm_cvttsd_si32(a); |
4903 | assert_eq!(r, i32::MIN); |
4904 | } |
4905 | |
4906 | #[simd_test(enable = "sse2" )] |
4907 | unsafe fn test_mm_cvttps_epi32() { |
4908 | let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6); |
4909 | let r = _mm_cvttps_epi32(a); |
4910 | assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6)); |
4911 | |
4912 | let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX); |
4913 | let r = _mm_cvttps_epi32(a); |
4914 | assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN)); |
4915 | } |
4916 | |
4917 | #[simd_test(enable = "sse2" )] |
4918 | unsafe fn test_mm_set_sd() { |
4919 | let r = _mm_set_sd(-1.0_f64); |
4920 | assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, 0_f64)); |
4921 | } |
4922 | |
4923 | #[simd_test(enable = "sse2" )] |
4924 | unsafe fn test_mm_set1_pd() { |
4925 | let r = _mm_set1_pd(-1.0_f64); |
4926 | assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, -1.0_f64)); |
4927 | } |
4928 | |
4929 | #[simd_test(enable = "sse2" )] |
4930 | unsafe fn test_mm_set_pd1() { |
4931 | let r = _mm_set_pd1(-2.0_f64); |
4932 | assert_eq_m128d(r, _mm_setr_pd(-2.0_f64, -2.0_f64)); |
4933 | } |
4934 | |
4935 | #[simd_test(enable = "sse2" )] |
4936 | unsafe fn test_mm_set_pd() { |
4937 | let r = _mm_set_pd(1.0_f64, 5.0_f64); |
4938 | assert_eq_m128d(r, _mm_setr_pd(5.0_f64, 1.0_f64)); |
4939 | } |
4940 | |
4941 | #[simd_test(enable = "sse2" )] |
4942 | unsafe fn test_mm_setr_pd() { |
4943 | let r = _mm_setr_pd(1.0_f64, -5.0_f64); |
4944 | assert_eq_m128d(r, _mm_setr_pd(1.0_f64, -5.0_f64)); |
4945 | } |
4946 | |
4947 | #[simd_test(enable = "sse2" )] |
4948 | unsafe fn test_mm_setzero_pd() { |
4949 | let r = _mm_setzero_pd(); |
4950 | assert_eq_m128d(r, _mm_setr_pd(0_f64, 0_f64)); |
4951 | } |
4952 | |
4953 | #[simd_test(enable = "sse2" )] |
4954 | unsafe fn test_mm_load1_pd() { |
4955 | let d = -5.0; |
4956 | let r = _mm_load1_pd(&d); |
4957 | assert_eq_m128d(r, _mm_setr_pd(d, d)); |
4958 | } |
4959 | |
4960 | #[simd_test(enable = "sse2" )] |
4961 | unsafe fn test_mm_load_pd1() { |
4962 | let d = -5.0; |
4963 | let r = _mm_load_pd1(&d); |
4964 | assert_eq_m128d(r, _mm_setr_pd(d, d)); |
4965 | } |
4966 | |
4967 | #[simd_test(enable = "sse2" )] |
4968 | unsafe fn test_mm_unpackhi_pd() { |
4969 | let a = _mm_setr_pd(1.0, 2.0); |
4970 | let b = _mm_setr_pd(3.0, 4.0); |
4971 | let r = _mm_unpackhi_pd(a, b); |
4972 | assert_eq_m128d(r, _mm_setr_pd(2.0, 4.0)); |
4973 | } |
4974 | |
4975 | #[simd_test(enable = "sse2" )] |
4976 | unsafe fn test_mm_unpacklo_pd() { |
4977 | let a = _mm_setr_pd(1.0, 2.0); |
4978 | let b = _mm_setr_pd(3.0, 4.0); |
4979 | let r = _mm_unpacklo_pd(a, b); |
4980 | assert_eq_m128d(r, _mm_setr_pd(1.0, 3.0)); |
4981 | } |
4982 | |
4983 | #[simd_test(enable = "sse2" )] |
4984 | unsafe fn test_mm_shuffle_pd() { |
4985 | let a = _mm_setr_pd(1., 2.); |
4986 | let b = _mm_setr_pd(3., 4.); |
4987 | let expected = _mm_setr_pd(1., 3.); |
4988 | let r = _mm_shuffle_pd::<0b00_00_00_00>(a, b); |
4989 | assert_eq_m128d(r, expected); |
4990 | } |
4991 | |
4992 | #[simd_test(enable = "sse2" )] |
4993 | unsafe fn test_mm_move_sd() { |
4994 | let a = _mm_setr_pd(1., 2.); |
4995 | let b = _mm_setr_pd(3., 4.); |
4996 | let expected = _mm_setr_pd(3., 2.); |
4997 | let r = _mm_move_sd(a, b); |
4998 | assert_eq_m128d(r, expected); |
4999 | } |
5000 | |
5001 | #[simd_test(enable = "sse2" )] |
5002 | unsafe fn test_mm_castpd_ps() { |
5003 | let a = _mm_set1_pd(0.); |
5004 | let expected = _mm_set1_ps(0.); |
5005 | let r = _mm_castpd_ps(a); |
5006 | assert_eq_m128(r, expected); |
5007 | } |
5008 | |
5009 | #[simd_test(enable = "sse2" )] |
5010 | unsafe fn test_mm_castpd_si128() { |
5011 | let a = _mm_set1_pd(0.); |
5012 | let expected = _mm_set1_epi64x(0); |
5013 | let r = _mm_castpd_si128(a); |
5014 | assert_eq_m128i(r, expected); |
5015 | } |
5016 | |
5017 | #[simd_test(enable = "sse2" )] |
5018 | unsafe fn test_mm_castps_pd() { |
5019 | let a = _mm_set1_ps(0.); |
5020 | let expected = _mm_set1_pd(0.); |
5021 | let r = _mm_castps_pd(a); |
5022 | assert_eq_m128d(r, expected); |
5023 | } |
5024 | |
5025 | #[simd_test(enable = "sse2" )] |
5026 | unsafe fn test_mm_castps_si128() { |
5027 | let a = _mm_set1_ps(0.); |
5028 | let expected = _mm_set1_epi32(0); |
5029 | let r = _mm_castps_si128(a); |
5030 | assert_eq_m128i(r, expected); |
5031 | } |
5032 | |
5033 | #[simd_test(enable = "sse2" )] |
5034 | unsafe fn test_mm_castsi128_pd() { |
5035 | let a = _mm_set1_epi64x(0); |
5036 | let expected = _mm_set1_pd(0.); |
5037 | let r = _mm_castsi128_pd(a); |
5038 | assert_eq_m128d(r, expected); |
5039 | } |
5040 | |
5041 | #[simd_test(enable = "sse2" )] |
5042 | unsafe fn test_mm_castsi128_ps() { |
5043 | let a = _mm_set1_epi32(0); |
5044 | let expected = _mm_set1_ps(0.); |
5045 | let r = _mm_castsi128_ps(a); |
5046 | assert_eq_m128(r, expected); |
5047 | } |
5048 | } |
5049 | |