1 | //! Streaming SIMD Extensions 2 (SSE2) |
2 | |
3 | #[cfg (test)] |
4 | use stdarch_test::assert_instr; |
5 | |
6 | use crate::{ |
7 | core_arch::{simd::*, simd_llvm::*, x86::*}, |
8 | intrinsics, |
9 | mem::{self, transmute}, |
10 | ptr, |
11 | }; |
12 | |
13 | /// Provides a hint to the processor that the code sequence is a spin-wait loop. |
14 | /// |
15 | /// This can help improve the performance and power consumption of spin-wait |
16 | /// loops. |
17 | /// |
18 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_pause) |
19 | #[inline ] |
20 | #[cfg_attr (all(test, target_feature = "sse2" ), assert_instr(pause))] |
21 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
22 | pub unsafe fn _mm_pause() { |
23 | // note: `pause` is guaranteed to be interpreted as a `nop` by CPUs without |
24 | // the SSE2 target-feature - therefore it does not require any target features |
25 | pause() |
26 | } |
27 | |
28 | /// Invalidates and flushes the cache line that contains `p` from all levels of |
29 | /// the cache hierarchy. |
30 | /// |
31 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clflush) |
32 | #[inline ] |
33 | #[target_feature (enable = "sse2" )] |
34 | #[cfg_attr (test, assert_instr(clflush))] |
35 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
36 | pub unsafe fn _mm_clflush(p: *const u8) { |
37 | clflush(p) |
38 | } |
39 | |
40 | /// Performs a serializing operation on all load-from-memory instructions |
41 | /// that were issued prior to this instruction. |
42 | /// |
43 | /// Guarantees that every load instruction that precedes, in program order, is |
44 | /// globally visible before any load instruction which follows the fence in |
45 | /// program order. |
46 | /// |
47 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_lfence) |
48 | #[inline ] |
49 | #[target_feature (enable = "sse2" )] |
50 | #[cfg_attr (test, assert_instr(lfence))] |
51 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
52 | pub unsafe fn _mm_lfence() { |
53 | lfence() |
54 | } |
55 | |
56 | /// Performs a serializing operation on all load-from-memory and store-to-memory |
57 | /// instructions that were issued prior to this instruction. |
58 | /// |
59 | /// Guarantees that every memory access that precedes, in program order, the |
60 | /// memory fence instruction is globally visible before any memory instruction |
61 | /// which follows the fence in program order. |
62 | /// |
63 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mfence) |
64 | #[inline ] |
65 | #[target_feature (enable = "sse2" )] |
66 | #[cfg_attr (test, assert_instr(mfence))] |
67 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
68 | pub unsafe fn _mm_mfence() { |
69 | mfence() |
70 | } |
71 | |
72 | /// Adds packed 8-bit integers in `a` and `b`. |
73 | /// |
74 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi8) |
75 | #[inline ] |
76 | #[target_feature (enable = "sse2" )] |
77 | #[cfg_attr (test, assert_instr(paddb))] |
78 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
79 | pub unsafe fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i { |
80 | transmute(src:simd_add(x:a.as_i8x16(), y:b.as_i8x16())) |
81 | } |
82 | |
83 | /// Adds packed 16-bit integers in `a` and `b`. |
84 | /// |
85 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi16) |
86 | #[inline ] |
87 | #[target_feature (enable = "sse2" )] |
88 | #[cfg_attr (test, assert_instr(paddw))] |
89 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
90 | pub unsafe fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i { |
91 | transmute(src:simd_add(x:a.as_i16x8(), y:b.as_i16x8())) |
92 | } |
93 | |
94 | /// Adds packed 32-bit integers in `a` and `b`. |
95 | /// |
96 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi32) |
97 | #[inline ] |
98 | #[target_feature (enable = "sse2" )] |
99 | #[cfg_attr (test, assert_instr(paddd))] |
100 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
101 | pub unsafe fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i { |
102 | transmute(src:simd_add(x:a.as_i32x4(), y:b.as_i32x4())) |
103 | } |
104 | |
105 | /// Adds packed 64-bit integers in `a` and `b`. |
106 | /// |
107 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi64) |
108 | #[inline ] |
109 | #[target_feature (enable = "sse2" )] |
110 | #[cfg_attr (test, assert_instr(paddq))] |
111 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
112 | pub unsafe fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i { |
113 | transmute(src:simd_add(x:a.as_i64x2(), y:b.as_i64x2())) |
114 | } |
115 | |
116 | /// Adds packed 8-bit integers in `a` and `b` using saturation. |
117 | /// |
118 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epi8) |
119 | #[inline ] |
120 | #[target_feature (enable = "sse2" )] |
121 | #[cfg_attr (test, assert_instr(paddsb))] |
122 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
123 | pub unsafe fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i { |
124 | transmute(src:simd_saturating_add(x:a.as_i8x16(), y:b.as_i8x16())) |
125 | } |
126 | |
127 | /// Adds packed 16-bit integers in `a` and `b` using saturation. |
128 | /// |
129 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epi16) |
130 | #[inline ] |
131 | #[target_feature (enable = "sse2" )] |
132 | #[cfg_attr (test, assert_instr(paddsw))] |
133 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
134 | pub unsafe fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i { |
135 | transmute(src:simd_saturating_add(x:a.as_i16x8(), y:b.as_i16x8())) |
136 | } |
137 | |
138 | /// Adds packed unsigned 8-bit integers in `a` and `b` using saturation. |
139 | /// |
140 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epu8) |
141 | #[inline ] |
142 | #[target_feature (enable = "sse2" )] |
143 | #[cfg_attr (test, assert_instr(paddusb))] |
144 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
145 | pub unsafe fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i { |
146 | transmute(src:simd_saturating_add(x:a.as_u8x16(), y:b.as_u8x16())) |
147 | } |
148 | |
149 | /// Adds packed unsigned 16-bit integers in `a` and `b` using saturation. |
150 | /// |
151 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epu16) |
152 | #[inline ] |
153 | #[target_feature (enable = "sse2" )] |
154 | #[cfg_attr (test, assert_instr(paddusw))] |
155 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
156 | pub unsafe fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i { |
157 | transmute(src:simd_saturating_add(x:a.as_u16x8(), y:b.as_u16x8())) |
158 | } |
159 | |
160 | /// Averages packed unsigned 8-bit integers in `a` and `b`. |
161 | /// |
162 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_avg_epu8) |
163 | #[inline ] |
164 | #[target_feature (enable = "sse2" )] |
165 | #[cfg_attr (test, assert_instr(pavgb))] |
166 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
167 | pub unsafe fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i { |
168 | let a: u16x16 = simd_cast::<_, u16x16>(a.as_u8x16()); |
169 | let b: u16x16 = simd_cast::<_, u16x16>(b.as_u8x16()); |
170 | let r: u16x16 = simd_shr(x:simd_add(simd_add(a, b), u16x16::splat(1)), y:u16x16::splat(1)); |
171 | transmute(src:simd_cast::<_, u8x16>(r)) |
172 | } |
173 | |
174 | /// Averages packed unsigned 16-bit integers in `a` and `b`. |
175 | /// |
176 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_avg_epu16) |
177 | #[inline ] |
178 | #[target_feature (enable = "sse2" )] |
179 | #[cfg_attr (test, assert_instr(pavgw))] |
180 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
181 | pub unsafe fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i { |
182 | let a: u32x8 = simd_cast::<_, u32x8>(a.as_u16x8()); |
183 | let b: u32x8 = simd_cast::<_, u32x8>(b.as_u16x8()); |
184 | let r: u32x8 = simd_shr(x:simd_add(simd_add(a, b), u32x8::splat(1)), y:u32x8::splat(1)); |
185 | transmute(src:simd_cast::<_, u16x8>(r)) |
186 | } |
187 | |
188 | /// Multiplies and then horizontally add signed 16 bit integers in `a` and `b`. |
189 | /// |
190 | /// Multiplies packed signed 16-bit integers in `a` and `b`, producing |
191 | /// intermediate signed 32-bit integers. Horizontally add adjacent pairs of |
192 | /// intermediate 32-bit integers. |
193 | /// |
194 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_madd_epi16) |
195 | #[inline ] |
196 | #[target_feature (enable = "sse2" )] |
197 | #[cfg_attr (test, assert_instr(pmaddwd))] |
198 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
199 | pub unsafe fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i { |
200 | transmute(src:pmaddwd(a:a.as_i16x8(), b:b.as_i16x8())) |
201 | } |
202 | |
203 | /// Compares packed 16-bit integers in `a` and `b`, and returns the packed |
204 | /// maximum values. |
205 | /// |
206 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi16) |
207 | #[inline ] |
208 | #[target_feature (enable = "sse2" )] |
209 | #[cfg_attr (test, assert_instr(pmaxsw))] |
210 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
211 | pub unsafe fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i { |
212 | let a: i16x8 = a.as_i16x8(); |
213 | let b: i16x8 = b.as_i16x8(); |
214 | transmute(src:simd_select::<i16x8, _>(m:simd_gt(x:a, y:b), a, b)) |
215 | } |
216 | |
217 | /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the |
218 | /// packed maximum values. |
219 | /// |
220 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu8) |
221 | #[inline ] |
222 | #[target_feature (enable = "sse2" )] |
223 | #[cfg_attr (test, assert_instr(pmaxub))] |
224 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
225 | pub unsafe fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i { |
226 | let a: u8x16 = a.as_u8x16(); |
227 | let b: u8x16 = b.as_u8x16(); |
228 | transmute(src:simd_select::<i8x16, _>(m:simd_gt(x:a, y:b), a, b)) |
229 | } |
230 | |
231 | /// Compares packed 16-bit integers in `a` and `b`, and returns the packed |
232 | /// minimum values. |
233 | /// |
234 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi16) |
235 | #[inline ] |
236 | #[target_feature (enable = "sse2" )] |
237 | #[cfg_attr (test, assert_instr(pminsw))] |
238 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
239 | pub unsafe fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i { |
240 | let a: i16x8 = a.as_i16x8(); |
241 | let b: i16x8 = b.as_i16x8(); |
242 | transmute(src:simd_select::<i16x8, _>(m:simd_lt(x:a, y:b), a, b)) |
243 | } |
244 | |
245 | /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the |
246 | /// packed minimum values. |
247 | /// |
248 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu8) |
249 | #[inline ] |
250 | #[target_feature (enable = "sse2" )] |
251 | #[cfg_attr (test, assert_instr(pminub))] |
252 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
253 | pub unsafe fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i { |
254 | let a: u8x16 = a.as_u8x16(); |
255 | let b: u8x16 = b.as_u8x16(); |
256 | transmute(src:simd_select::<i8x16, _>(m:simd_lt(x:a, y:b), a, b)) |
257 | } |
258 | |
259 | /// Multiplies the packed 16-bit integers in `a` and `b`. |
260 | /// |
261 | /// The multiplication produces intermediate 32-bit integers, and returns the |
262 | /// high 16 bits of the intermediate integers. |
263 | /// |
264 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhi_epi16) |
265 | #[inline ] |
266 | #[target_feature (enable = "sse2" )] |
267 | #[cfg_attr (test, assert_instr(pmulhw))] |
268 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
269 | pub unsafe fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i { |
270 | let a: i32x8 = simd_cast::<_, i32x8>(a.as_i16x8()); |
271 | let b: i32x8 = simd_cast::<_, i32x8>(b.as_i16x8()); |
272 | let r: i32x8 = simd_shr(x:simd_mul(a, b), y:i32x8::splat(16)); |
273 | transmute(src:simd_cast::<i32x8, i16x8>(r)) |
274 | } |
275 | |
276 | /// Multiplies the packed unsigned 16-bit integers in `a` and `b`. |
277 | /// |
278 | /// The multiplication produces intermediate 32-bit integers, and returns the |
279 | /// high 16 bits of the intermediate integers. |
280 | /// |
281 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhi_epu16) |
282 | #[inline ] |
283 | #[target_feature (enable = "sse2" )] |
284 | #[cfg_attr (test, assert_instr(pmulhuw))] |
285 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
286 | pub unsafe fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i { |
287 | let a: u32x8 = simd_cast::<_, u32x8>(a.as_u16x8()); |
288 | let b: u32x8 = simd_cast::<_, u32x8>(b.as_u16x8()); |
289 | let r: u32x8 = simd_shr(x:simd_mul(a, b), y:u32x8::splat(16)); |
290 | transmute(src:simd_cast::<u32x8, u16x8>(r)) |
291 | } |
292 | |
293 | /// Multiplies the packed 16-bit integers in `a` and `b`. |
294 | /// |
295 | /// The multiplication produces intermediate 32-bit integers, and returns the |
296 | /// low 16 bits of the intermediate integers. |
297 | /// |
298 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mullo_epi16) |
299 | #[inline ] |
300 | #[target_feature (enable = "sse2" )] |
301 | #[cfg_attr (test, assert_instr(pmullw))] |
302 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
303 | pub unsafe fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i { |
304 | transmute(src:simd_mul(x:a.as_i16x8(), y:b.as_i16x8())) |
305 | } |
306 | |
307 | /// Multiplies the low unsigned 32-bit integers from each packed 64-bit element |
308 | /// in `a` and `b`. |
309 | /// |
310 | /// Returns the unsigned 64-bit results. |
311 | /// |
312 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_epu32) |
313 | #[inline ] |
314 | #[target_feature (enable = "sse2" )] |
315 | #[cfg_attr (test, assert_instr(pmuludq))] |
316 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
317 | pub unsafe fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i { |
318 | let a: u64x2 = a.as_u64x2(); |
319 | let b: u64x2 = b.as_u64x2(); |
320 | let mask: u64x2 = u64x2::splat(u32::MAX.into()); |
321 | transmute(src:simd_mul(x:simd_and(a, mask), y:simd_and(x:b, y:mask))) |
322 | } |
323 | |
324 | /// Sum the absolute differences of packed unsigned 8-bit integers. |
325 | /// |
326 | /// Computes the absolute differences of packed unsigned 8-bit integers in `a` |
327 | /// and `b`, then horizontally sum each consecutive 8 differences to produce |
328 | /// two unsigned 16-bit integers, and pack these unsigned 16-bit integers in |
329 | /// the low 16 bits of 64-bit elements returned. |
330 | /// |
331 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sad_epu8) |
332 | #[inline ] |
333 | #[target_feature (enable = "sse2" )] |
334 | #[cfg_attr (test, assert_instr(psadbw))] |
335 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
336 | pub unsafe fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i { |
337 | transmute(src:psadbw(a:a.as_u8x16(), b:b.as_u8x16())) |
338 | } |
339 | |
340 | /// Subtracts packed 8-bit integers in `b` from packed 8-bit integers in `a`. |
341 | /// |
342 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi8) |
343 | #[inline ] |
344 | #[target_feature (enable = "sse2" )] |
345 | #[cfg_attr (test, assert_instr(psubb))] |
346 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
347 | pub unsafe fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i { |
348 | transmute(src:simd_sub(x:a.as_i8x16(), y:b.as_i8x16())) |
349 | } |
350 | |
351 | /// Subtracts packed 16-bit integers in `b` from packed 16-bit integers in `a`. |
352 | /// |
353 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi16) |
354 | #[inline ] |
355 | #[target_feature (enable = "sse2" )] |
356 | #[cfg_attr (test, assert_instr(psubw))] |
357 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
358 | pub unsafe fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i { |
359 | transmute(src:simd_sub(x:a.as_i16x8(), y:b.as_i16x8())) |
360 | } |
361 | |
362 | /// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`. |
363 | /// |
364 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi32) |
365 | #[inline ] |
366 | #[target_feature (enable = "sse2" )] |
367 | #[cfg_attr (test, assert_instr(psubd))] |
368 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
369 | pub unsafe fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i { |
370 | transmute(src:simd_sub(x:a.as_i32x4(), y:b.as_i32x4())) |
371 | } |
372 | |
373 | /// Subtract packed 64-bit integers in `b` from packed 64-bit integers in `a`. |
374 | /// |
375 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi64) |
376 | #[inline ] |
377 | #[target_feature (enable = "sse2" )] |
378 | #[cfg_attr (test, assert_instr(psubq))] |
379 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
380 | pub unsafe fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i { |
381 | transmute(src:simd_sub(x:a.as_i64x2(), y:b.as_i64x2())) |
382 | } |
383 | |
384 | /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a` |
385 | /// using saturation. |
386 | /// |
387 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epi8) |
388 | #[inline ] |
389 | #[target_feature (enable = "sse2" )] |
390 | #[cfg_attr (test, assert_instr(psubsb))] |
391 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
392 | pub unsafe fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i { |
393 | transmute(src:simd_saturating_sub(x:a.as_i8x16(), y:b.as_i8x16())) |
394 | } |
395 | |
396 | /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a` |
397 | /// using saturation. |
398 | /// |
399 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epi16) |
400 | #[inline ] |
401 | #[target_feature (enable = "sse2" )] |
402 | #[cfg_attr (test, assert_instr(psubsw))] |
403 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
404 | pub unsafe fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i { |
405 | transmute(src:simd_saturating_sub(x:a.as_i16x8(), y:b.as_i16x8())) |
406 | } |
407 | |
408 | /// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit |
409 | /// integers in `a` using saturation. |
410 | /// |
411 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epu8) |
412 | #[inline ] |
413 | #[target_feature (enable = "sse2" )] |
414 | #[cfg_attr (test, assert_instr(psubusb))] |
415 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
416 | pub unsafe fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i { |
417 | transmute(src:simd_saturating_sub(x:a.as_u8x16(), y:b.as_u8x16())) |
418 | } |
419 | |
420 | /// Subtract packed unsigned 16-bit integers in `b` from packed unsigned 16-bit |
421 | /// integers in `a` using saturation. |
422 | /// |
423 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epu16) |
424 | #[inline ] |
425 | #[target_feature (enable = "sse2" )] |
426 | #[cfg_attr (test, assert_instr(psubusw))] |
427 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
428 | pub unsafe fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i { |
429 | transmute(src:simd_saturating_sub(x:a.as_u16x8(), y:b.as_u16x8())) |
430 | } |
431 | |
432 | /// Shifts `a` left by `IMM8` bytes while shifting in zeros. |
433 | /// |
434 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_si128) |
435 | #[inline ] |
436 | #[target_feature (enable = "sse2" )] |
437 | #[cfg_attr (test, assert_instr(pslldq, IMM8 = 1))] |
438 | #[rustc_legacy_const_generics (1)] |
439 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
440 | pub unsafe fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i { |
441 | static_assert_uimm_bits!(IMM8, 8); |
442 | _mm_slli_si128_impl::<IMM8>(a) |
443 | } |
444 | |
445 | /// Implementation detail: converts the immediate argument of the |
446 | /// `_mm_slli_si128` intrinsic into a compile-time constant. |
447 | #[inline ] |
448 | #[target_feature (enable = "sse2" )] |
449 | unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i { |
450 | const fn mask(shift: i32, i: u32) -> u32 { |
451 | let shift = shift as u32 & 0xff; |
452 | if shift > 15 { |
453 | i |
454 | } else { |
455 | 16 - shift + i |
456 | } |
457 | } |
458 | let zero = _mm_set1_epi8(0).as_i8x16(); |
459 | transmute::<i8x16, _>(simd_shuffle!( |
460 | zero, |
461 | a.as_i8x16(), |
462 | [ |
463 | mask(IMM8, 0), |
464 | mask(IMM8, 1), |
465 | mask(IMM8, 2), |
466 | mask(IMM8, 3), |
467 | mask(IMM8, 4), |
468 | mask(IMM8, 5), |
469 | mask(IMM8, 6), |
470 | mask(IMM8, 7), |
471 | mask(IMM8, 8), |
472 | mask(IMM8, 9), |
473 | mask(IMM8, 10), |
474 | mask(IMM8, 11), |
475 | mask(IMM8, 12), |
476 | mask(IMM8, 13), |
477 | mask(IMM8, 14), |
478 | mask(IMM8, 15), |
479 | ], |
480 | )) |
481 | } |
482 | |
483 | /// Shifts `a` left by `IMM8` bytes while shifting in zeros. |
484 | /// |
485 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bslli_si128) |
486 | #[inline ] |
487 | #[target_feature (enable = "sse2" )] |
488 | #[cfg_attr (test, assert_instr(pslldq, IMM8 = 1))] |
489 | #[rustc_legacy_const_generics (1)] |
490 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
491 | pub unsafe fn _mm_bslli_si128<const IMM8: i32>(a: __m128i) -> __m128i { |
492 | static_assert_uimm_bits!(IMM8, 8); |
493 | _mm_slli_si128_impl::<IMM8>(a) |
494 | } |
495 | |
496 | /// Shifts `a` right by `IMM8` bytes while shifting in zeros. |
497 | /// |
498 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bsrli_si128) |
499 | #[inline ] |
500 | #[target_feature (enable = "sse2" )] |
501 | #[cfg_attr (test, assert_instr(psrldq, IMM8 = 1))] |
502 | #[rustc_legacy_const_generics (1)] |
503 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
504 | pub unsafe fn _mm_bsrli_si128<const IMM8: i32>(a: __m128i) -> __m128i { |
505 | static_assert_uimm_bits!(IMM8, 8); |
506 | _mm_srli_si128_impl::<IMM8>(a) |
507 | } |
508 | |
509 | /// Shifts packed 16-bit integers in `a` left by `IMM8` while shifting in zeros. |
510 | /// |
511 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi16) |
512 | #[inline ] |
513 | #[target_feature (enable = "sse2" )] |
514 | #[cfg_attr (test, assert_instr(psllw, IMM8 = 7))] |
515 | #[rustc_legacy_const_generics (1)] |
516 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
517 | pub unsafe fn _mm_slli_epi16<const IMM8: i32>(a: __m128i) -> __m128i { |
518 | static_assert_uimm_bits!(IMM8, 8); |
519 | if IMM8 >= 16 { |
520 | _mm_setzero_si128() |
521 | } else { |
522 | transmute(src:simd_shl(x:a.as_u16x8(), y:u16x8::splat(IMM8 as u16))) |
523 | } |
524 | } |
525 | |
526 | /// Shifts packed 16-bit integers in `a` left by `count` while shifting in |
527 | /// zeros. |
528 | /// |
529 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi16) |
530 | #[inline ] |
531 | #[target_feature (enable = "sse2" )] |
532 | #[cfg_attr (test, assert_instr(psllw))] |
533 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
534 | pub unsafe fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i { |
535 | transmute(src:psllw(a:a.as_i16x8(), count:count.as_i16x8())) |
536 | } |
537 | |
538 | /// Shifts packed 32-bit integers in `a` left by `IMM8` while shifting in zeros. |
539 | /// |
540 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi32) |
541 | #[inline ] |
542 | #[target_feature (enable = "sse2" )] |
543 | #[cfg_attr (test, assert_instr(pslld, IMM8 = 7))] |
544 | #[rustc_legacy_const_generics (1)] |
545 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
546 | pub unsafe fn _mm_slli_epi32<const IMM8: i32>(a: __m128i) -> __m128i { |
547 | static_assert_uimm_bits!(IMM8, 8); |
548 | if IMM8 >= 32 { |
549 | _mm_setzero_si128() |
550 | } else { |
551 | transmute(src:simd_shl(x:a.as_u32x4(), y:u32x4::splat(IMM8 as u32))) |
552 | } |
553 | } |
554 | |
555 | /// Shifts packed 32-bit integers in `a` left by `count` while shifting in |
556 | /// zeros. |
557 | /// |
558 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi32) |
559 | #[inline ] |
560 | #[target_feature (enable = "sse2" )] |
561 | #[cfg_attr (test, assert_instr(pslld))] |
562 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
563 | pub unsafe fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i { |
564 | transmute(src:pslld(a:a.as_i32x4(), count:count.as_i32x4())) |
565 | } |
566 | |
567 | /// Shifts packed 64-bit integers in `a` left by `IMM8` while shifting in zeros. |
568 | /// |
569 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi64) |
570 | #[inline ] |
571 | #[target_feature (enable = "sse2" )] |
572 | #[cfg_attr (test, assert_instr(psllq, IMM8 = 7))] |
573 | #[rustc_legacy_const_generics (1)] |
574 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
575 | pub unsafe fn _mm_slli_epi64<const IMM8: i32>(a: __m128i) -> __m128i { |
576 | static_assert_uimm_bits!(IMM8, 8); |
577 | if IMM8 >= 64 { |
578 | _mm_setzero_si128() |
579 | } else { |
580 | transmute(src:simd_shl(x:a.as_u64x2(), y:u64x2::splat(IMM8 as u64))) |
581 | } |
582 | } |
583 | |
584 | /// Shifts packed 64-bit integers in `a` left by `count` while shifting in |
585 | /// zeros. |
586 | /// |
587 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi64) |
588 | #[inline ] |
589 | #[target_feature (enable = "sse2" )] |
590 | #[cfg_attr (test, assert_instr(psllq))] |
591 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
592 | pub unsafe fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i { |
593 | transmute(src:psllq(a:a.as_i64x2(), count:count.as_i64x2())) |
594 | } |
595 | |
596 | /// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in sign |
597 | /// bits. |
598 | /// |
599 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi16) |
600 | #[inline ] |
601 | #[target_feature (enable = "sse2" )] |
602 | #[cfg_attr (test, assert_instr(psraw, IMM8 = 1))] |
603 | #[rustc_legacy_const_generics (1)] |
604 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
605 | pub unsafe fn _mm_srai_epi16<const IMM8: i32>(a: __m128i) -> __m128i { |
606 | static_assert_uimm_bits!(IMM8, 8); |
607 | transmute(src:simd_shr(x:a.as_i16x8(), y:i16x8::splat(IMM8.min(15) as i16))) |
608 | } |
609 | |
610 | /// Shifts packed 16-bit integers in `a` right by `count` while shifting in sign |
611 | /// bits. |
612 | /// |
613 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi16) |
614 | #[inline ] |
615 | #[target_feature (enable = "sse2" )] |
616 | #[cfg_attr (test, assert_instr(psraw))] |
617 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
618 | pub unsafe fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i { |
619 | transmute(src:psraw(a:a.as_i16x8(), count:count.as_i16x8())) |
620 | } |
621 | |
622 | /// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in sign |
623 | /// bits. |
624 | /// |
625 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi32) |
626 | #[inline ] |
627 | #[target_feature (enable = "sse2" )] |
628 | #[cfg_attr (test, assert_instr(psrad, IMM8 = 1))] |
629 | #[rustc_legacy_const_generics (1)] |
630 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
631 | pub unsafe fn _mm_srai_epi32<const IMM8: i32>(a: __m128i) -> __m128i { |
632 | static_assert_uimm_bits!(IMM8, 8); |
633 | transmute(src:simd_shr(x:a.as_i32x4(), y:i32x4::splat(IMM8.min(31)))) |
634 | } |
635 | |
636 | /// Shifts packed 32-bit integers in `a` right by `count` while shifting in sign |
637 | /// bits. |
638 | /// |
639 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi32) |
640 | #[inline ] |
641 | #[target_feature (enable = "sse2" )] |
642 | #[cfg_attr (test, assert_instr(psrad))] |
643 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
644 | pub unsafe fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i { |
645 | transmute(src:psrad(a:a.as_i32x4(), count:count.as_i32x4())) |
646 | } |
647 | |
648 | /// Shifts `a` right by `IMM8` bytes while shifting in zeros. |
649 | /// |
650 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_si128) |
651 | #[inline ] |
652 | #[target_feature (enable = "sse2" )] |
653 | #[cfg_attr (test, assert_instr(psrldq, IMM8 = 1))] |
654 | #[rustc_legacy_const_generics (1)] |
655 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
656 | pub unsafe fn _mm_srli_si128<const IMM8: i32>(a: __m128i) -> __m128i { |
657 | static_assert_uimm_bits!(IMM8, 8); |
658 | _mm_srli_si128_impl::<IMM8>(a) |
659 | } |
660 | |
661 | /// Implementation detail: converts the immediate argument of the |
662 | /// `_mm_srli_si128` intrinsic into a compile-time constant. |
663 | #[inline ] |
664 | #[target_feature (enable = "sse2" )] |
665 | unsafe fn _mm_srli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i { |
666 | const fn mask(shift: i32, i: u32) -> u32 { |
667 | if (shift as u32) > 15 { |
668 | i + 16 |
669 | } else { |
670 | i + (shift as u32) |
671 | } |
672 | } |
673 | let zero = _mm_set1_epi8(0).as_i8x16(); |
674 | let x: i8x16 = simd_shuffle!( |
675 | a.as_i8x16(), |
676 | zero, |
677 | [ |
678 | mask(IMM8, 0), |
679 | mask(IMM8, 1), |
680 | mask(IMM8, 2), |
681 | mask(IMM8, 3), |
682 | mask(IMM8, 4), |
683 | mask(IMM8, 5), |
684 | mask(IMM8, 6), |
685 | mask(IMM8, 7), |
686 | mask(IMM8, 8), |
687 | mask(IMM8, 9), |
688 | mask(IMM8, 10), |
689 | mask(IMM8, 11), |
690 | mask(IMM8, 12), |
691 | mask(IMM8, 13), |
692 | mask(IMM8, 14), |
693 | mask(IMM8, 15), |
694 | ], |
695 | ); |
696 | transmute(x) |
697 | } |
698 | |
699 | /// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in |
700 | /// zeros. |
701 | /// |
702 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi16) |
703 | #[inline ] |
704 | #[target_feature (enable = "sse2" )] |
705 | #[cfg_attr (test, assert_instr(psrlw, IMM8 = 1))] |
706 | #[rustc_legacy_const_generics (1)] |
707 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
708 | pub unsafe fn _mm_srli_epi16<const IMM8: i32>(a: __m128i) -> __m128i { |
709 | static_assert_uimm_bits!(IMM8, 8); |
710 | if IMM8 >= 16 { |
711 | _mm_setzero_si128() |
712 | } else { |
713 | transmute(src:simd_shr(x:a.as_u16x8(), y:u16x8::splat(IMM8 as u16))) |
714 | } |
715 | } |
716 | |
717 | /// Shifts packed 16-bit integers in `a` right by `count` while shifting in |
718 | /// zeros. |
719 | /// |
720 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi16) |
721 | #[inline ] |
722 | #[target_feature (enable = "sse2" )] |
723 | #[cfg_attr (test, assert_instr(psrlw))] |
724 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
725 | pub unsafe fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i { |
726 | transmute(src:psrlw(a:a.as_i16x8(), count:count.as_i16x8())) |
727 | } |
728 | |
729 | /// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in |
730 | /// zeros. |
731 | /// |
732 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi32) |
733 | #[inline ] |
734 | #[target_feature (enable = "sse2" )] |
735 | #[cfg_attr (test, assert_instr(psrld, IMM8 = 8))] |
736 | #[rustc_legacy_const_generics (1)] |
737 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
738 | pub unsafe fn _mm_srli_epi32<const IMM8: i32>(a: __m128i) -> __m128i { |
739 | static_assert_uimm_bits!(IMM8, 8); |
740 | if IMM8 >= 32 { |
741 | _mm_setzero_si128() |
742 | } else { |
743 | transmute(src:simd_shr(x:a.as_u32x4(), y:u32x4::splat(IMM8 as u32))) |
744 | } |
745 | } |
746 | |
747 | /// Shifts packed 32-bit integers in `a` right by `count` while shifting in |
748 | /// zeros. |
749 | /// |
750 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi32) |
751 | #[inline ] |
752 | #[target_feature (enable = "sse2" )] |
753 | #[cfg_attr (test, assert_instr(psrld))] |
754 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
755 | pub unsafe fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i { |
756 | transmute(src:psrld(a:a.as_i32x4(), count:count.as_i32x4())) |
757 | } |
758 | |
759 | /// Shifts packed 64-bit integers in `a` right by `IMM8` while shifting in |
760 | /// zeros. |
761 | /// |
762 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi64) |
763 | #[inline ] |
764 | #[target_feature (enable = "sse2" )] |
765 | #[cfg_attr (test, assert_instr(psrlq, IMM8 = 1))] |
766 | #[rustc_legacy_const_generics (1)] |
767 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
768 | pub unsafe fn _mm_srli_epi64<const IMM8: i32>(a: __m128i) -> __m128i { |
769 | static_assert_uimm_bits!(IMM8, 8); |
770 | if IMM8 >= 64 { |
771 | _mm_setzero_si128() |
772 | } else { |
773 | transmute(src:simd_shr(x:a.as_u64x2(), y:u64x2::splat(IMM8 as u64))) |
774 | } |
775 | } |
776 | |
777 | /// Shifts packed 64-bit integers in `a` right by `count` while shifting in |
778 | /// zeros. |
779 | /// |
780 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi64) |
781 | #[inline ] |
782 | #[target_feature (enable = "sse2" )] |
783 | #[cfg_attr (test, assert_instr(psrlq))] |
784 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
785 | pub unsafe fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i { |
786 | transmute(src:psrlq(a:a.as_i64x2(), count:count.as_i64x2())) |
787 | } |
788 | |
789 | /// Computes the bitwise AND of 128 bits (representing integer data) in `a` and |
790 | /// `b`. |
791 | /// |
792 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_and_si128) |
793 | #[inline ] |
794 | #[target_feature (enable = "sse2" )] |
795 | #[cfg_attr (test, assert_instr(andps))] |
796 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
797 | pub unsafe fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i { |
798 | simd_and(x:a, y:b) |
799 | } |
800 | |
801 | /// Computes the bitwise NOT of 128 bits (representing integer data) in `a` and |
802 | /// then AND with `b`. |
803 | /// |
804 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_andnot_si128) |
805 | #[inline ] |
806 | #[target_feature (enable = "sse2" )] |
807 | #[cfg_attr (test, assert_instr(andnps))] |
808 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
809 | pub unsafe fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i { |
810 | simd_and(x:simd_xor(_mm_set1_epi8(-1), a), y:b) |
811 | } |
812 | |
813 | /// Computes the bitwise OR of 128 bits (representing integer data) in `a` and |
814 | /// `b`. |
815 | /// |
816 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_si128) |
817 | #[inline ] |
818 | #[target_feature (enable = "sse2" )] |
819 | #[cfg_attr (test, assert_instr(orps))] |
820 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
821 | pub unsafe fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i { |
822 | simd_or(x:a, y:b) |
823 | } |
824 | |
825 | /// Computes the bitwise XOR of 128 bits (representing integer data) in `a` and |
826 | /// `b`. |
827 | /// |
828 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_si128) |
829 | #[inline ] |
830 | #[target_feature (enable = "sse2" )] |
831 | #[cfg_attr (test, assert_instr(xorps))] |
832 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
833 | pub unsafe fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i { |
834 | simd_xor(x:a, y:b) |
835 | } |
836 | |
837 | /// Compares packed 8-bit integers in `a` and `b` for equality. |
838 | /// |
839 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi8) |
840 | #[inline ] |
841 | #[target_feature (enable = "sse2" )] |
842 | #[cfg_attr (test, assert_instr(pcmpeqb))] |
843 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
844 | pub unsafe fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i { |
845 | transmute::<i8x16, _>(src:simd_eq(x:a.as_i8x16(), y:b.as_i8x16())) |
846 | } |
847 | |
848 | /// Compares packed 16-bit integers in `a` and `b` for equality. |
849 | /// |
850 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi16) |
851 | #[inline ] |
852 | #[target_feature (enable = "sse2" )] |
853 | #[cfg_attr (test, assert_instr(pcmpeqw))] |
854 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
855 | pub unsafe fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i { |
856 | transmute::<i16x8, _>(src:simd_eq(x:a.as_i16x8(), y:b.as_i16x8())) |
857 | } |
858 | |
859 | /// Compares packed 32-bit integers in `a` and `b` for equality. |
860 | /// |
861 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi32) |
862 | #[inline ] |
863 | #[target_feature (enable = "sse2" )] |
864 | #[cfg_attr (test, assert_instr(pcmpeqd))] |
865 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
866 | pub unsafe fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i { |
867 | transmute::<i32x4, _>(src:simd_eq(x:a.as_i32x4(), y:b.as_i32x4())) |
868 | } |
869 | |
870 | /// Compares packed 8-bit integers in `a` and `b` for greater-than. |
871 | /// |
872 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi8) |
873 | #[inline ] |
874 | #[target_feature (enable = "sse2" )] |
875 | #[cfg_attr (test, assert_instr(pcmpgtb))] |
876 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
877 | pub unsafe fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i { |
878 | transmute::<i8x16, _>(src:simd_gt(x:a.as_i8x16(), y:b.as_i8x16())) |
879 | } |
880 | |
881 | /// Compares packed 16-bit integers in `a` and `b` for greater-than. |
882 | /// |
883 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi16) |
884 | #[inline ] |
885 | #[target_feature (enable = "sse2" )] |
886 | #[cfg_attr (test, assert_instr(pcmpgtw))] |
887 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
888 | pub unsafe fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i { |
889 | transmute::<i16x8, _>(src:simd_gt(x:a.as_i16x8(), y:b.as_i16x8())) |
890 | } |
891 | |
892 | /// Compares packed 32-bit integers in `a` and `b` for greater-than. |
893 | /// |
894 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi32) |
895 | #[inline ] |
896 | #[target_feature (enable = "sse2" )] |
897 | #[cfg_attr (test, assert_instr(pcmpgtd))] |
898 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
899 | pub unsafe fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i { |
900 | transmute::<i32x4, _>(src:simd_gt(x:a.as_i32x4(), y:b.as_i32x4())) |
901 | } |
902 | |
903 | /// Compares packed 8-bit integers in `a` and `b` for less-than. |
904 | /// |
905 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi8) |
906 | #[inline ] |
907 | #[target_feature (enable = "sse2" )] |
908 | #[cfg_attr (test, assert_instr(pcmpgtb))] |
909 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
910 | pub unsafe fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i { |
911 | transmute::<i8x16, _>(src:simd_lt(x:a.as_i8x16(), y:b.as_i8x16())) |
912 | } |
913 | |
914 | /// Compares packed 16-bit integers in `a` and `b` for less-than. |
915 | /// |
916 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi16) |
917 | #[inline ] |
918 | #[target_feature (enable = "sse2" )] |
919 | #[cfg_attr (test, assert_instr(pcmpgtw))] |
920 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
921 | pub unsafe fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i { |
922 | transmute::<i16x8, _>(src:simd_lt(x:a.as_i16x8(), y:b.as_i16x8())) |
923 | } |
924 | |
925 | /// Compares packed 32-bit integers in `a` and `b` for less-than. |
926 | /// |
927 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi32) |
928 | #[inline ] |
929 | #[target_feature (enable = "sse2" )] |
930 | #[cfg_attr (test, assert_instr(pcmpgtd))] |
931 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
932 | pub unsafe fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i { |
933 | transmute::<i32x4, _>(src:simd_lt(x:a.as_i32x4(), y:b.as_i32x4())) |
934 | } |
935 | |
936 | /// Converts the lower two packed 32-bit integers in `a` to packed |
937 | /// double-precision (64-bit) floating-point elements. |
938 | /// |
939 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_pd) |
940 | #[inline ] |
941 | #[target_feature (enable = "sse2" )] |
942 | #[cfg_attr (test, assert_instr(cvtdq2pd))] |
943 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
944 | pub unsafe fn _mm_cvtepi32_pd(a: __m128i) -> __m128d { |
945 | let a: i32x4 = a.as_i32x4(); |
946 | simd_cast::<i32x2, __m128d>(simd_shuffle!(a, a, [0, 1])) |
947 | } |
948 | |
949 | /// Returns `a` with its lower element replaced by `b` after converting it to |
950 | /// an `f64`. |
951 | /// |
952 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi32_sd) |
953 | #[inline ] |
954 | #[target_feature (enable = "sse2" )] |
955 | #[cfg_attr (test, assert_instr(cvtsi2sd))] |
956 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
957 | pub unsafe fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d { |
958 | simd_insert(x:a, idx:0, val:b as f64) |
959 | } |
960 | |
961 | /// Converts packed 32-bit integers in `a` to packed single-precision (32-bit) |
962 | /// floating-point elements. |
963 | /// |
964 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_ps) |
965 | #[inline ] |
966 | #[target_feature (enable = "sse2" )] |
967 | #[cfg_attr (test, assert_instr(cvtdq2ps))] |
968 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
969 | pub unsafe fn _mm_cvtepi32_ps(a: __m128i) -> __m128 { |
970 | transmute(src:simd_cast::<_, f32x4>(a.as_i32x4())) |
971 | } |
972 | |
973 | /// Converts packed single-precision (32-bit) floating-point elements in `a` |
974 | /// to packed 32-bit integers. |
975 | /// |
976 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epi32) |
977 | #[inline ] |
978 | #[target_feature (enable = "sse2" )] |
979 | #[cfg_attr (test, assert_instr(cvtps2dq))] |
980 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
981 | pub unsafe fn _mm_cvtps_epi32(a: __m128) -> __m128i { |
982 | transmute(src:cvtps2dq(a)) |
983 | } |
984 | |
985 | /// Returns a vector whose lowest element is `a` and all higher elements are |
986 | /// `0`. |
987 | /// |
988 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi32_si128) |
989 | #[inline ] |
990 | #[target_feature (enable = "sse2" )] |
991 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
992 | pub unsafe fn _mm_cvtsi32_si128(a: i32) -> __m128i { |
993 | transmute(src:i32x4::new(x0:a, x1:0, x2:0, x3:0)) |
994 | } |
995 | |
996 | /// Returns the lowest element of `a`. |
997 | /// |
998 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi128_si32) |
999 | #[inline ] |
1000 | #[target_feature (enable = "sse2" )] |
1001 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1002 | pub unsafe fn _mm_cvtsi128_si32(a: __m128i) -> i32 { |
1003 | simd_extract(x:a.as_i32x4(), idx:0) |
1004 | } |
1005 | |
1006 | /// Sets packed 64-bit integers with the supplied values, from highest to |
1007 | /// lowest. |
1008 | /// |
1009 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi64x) |
1010 | #[inline ] |
1011 | #[target_feature (enable = "sse2" )] |
1012 | // no particular instruction to test |
1013 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1014 | pub unsafe fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i { |
1015 | transmute(src:i64x2::new(x0:e0, x1:e1)) |
1016 | } |
1017 | |
1018 | /// Sets packed 32-bit integers with the supplied values. |
1019 | /// |
1020 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi32) |
1021 | #[inline ] |
1022 | #[target_feature (enable = "sse2" )] |
1023 | // no particular instruction to test |
1024 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1025 | pub unsafe fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i { |
1026 | transmute(src:i32x4::new(x0:e0, x1:e1, x2:e2, x3:e3)) |
1027 | } |
1028 | |
1029 | /// Sets packed 16-bit integers with the supplied values. |
1030 | /// |
1031 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi16) |
1032 | #[inline ] |
1033 | #[target_feature (enable = "sse2" )] |
1034 | // no particular instruction to test |
1035 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1036 | pub unsafe fn _mm_set_epi16( |
1037 | e7: i16, |
1038 | e6: i16, |
1039 | e5: i16, |
1040 | e4: i16, |
1041 | e3: i16, |
1042 | e2: i16, |
1043 | e1: i16, |
1044 | e0: i16, |
1045 | ) -> __m128i { |
1046 | transmute(src:i16x8::new(x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7)) |
1047 | } |
1048 | |
1049 | /// Sets packed 8-bit integers with the supplied values. |
1050 | /// |
1051 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi8) |
1052 | #[inline ] |
1053 | #[target_feature (enable = "sse2" )] |
1054 | // no particular instruction to test |
1055 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1056 | pub unsafe fn _mm_set_epi8( |
1057 | e15: i8, |
1058 | e14: i8, |
1059 | e13: i8, |
1060 | e12: i8, |
1061 | e11: i8, |
1062 | e10: i8, |
1063 | e9: i8, |
1064 | e8: i8, |
1065 | e7: i8, |
1066 | e6: i8, |
1067 | e5: i8, |
1068 | e4: i8, |
1069 | e3: i8, |
1070 | e2: i8, |
1071 | e1: i8, |
1072 | e0: i8, |
1073 | ) -> __m128i { |
1074 | #[rustfmt::skip] |
1075 | transmute(src:i8x16::new( |
1076 | x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7, x8:e8, x9:e9, x10:e10, x11:e11, x12:e12, x13:e13, x14:e14, x15:e15, |
1077 | )) |
1078 | } |
1079 | |
1080 | /// Broadcasts 64-bit integer `a` to all elements. |
1081 | /// |
1082 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi64x) |
1083 | #[inline ] |
1084 | #[target_feature (enable = "sse2" )] |
1085 | // no particular instruction to test |
1086 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1087 | pub unsafe fn _mm_set1_epi64x(a: i64) -> __m128i { |
1088 | _mm_set_epi64x(e1:a, e0:a) |
1089 | } |
1090 | |
1091 | /// Broadcasts 32-bit integer `a` to all elements. |
1092 | /// |
1093 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi32) |
1094 | #[inline ] |
1095 | #[target_feature (enable = "sse2" )] |
1096 | // no particular instruction to test |
1097 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1098 | pub unsafe fn _mm_set1_epi32(a: i32) -> __m128i { |
1099 | _mm_set_epi32(e3:a, e2:a, e1:a, e0:a) |
1100 | } |
1101 | |
1102 | /// Broadcasts 16-bit integer `a` to all elements. |
1103 | /// |
1104 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi16) |
1105 | #[inline ] |
1106 | #[target_feature (enable = "sse2" )] |
1107 | // no particular instruction to test |
1108 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1109 | pub unsafe fn _mm_set1_epi16(a: i16) -> __m128i { |
1110 | _mm_set_epi16(e7:a, e6:a, e5:a, e4:a, e3:a, e2:a, e1:a, e0:a) |
1111 | } |
1112 | |
1113 | /// Broadcasts 8-bit integer `a` to all elements. |
1114 | /// |
1115 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi8) |
1116 | #[inline ] |
1117 | #[target_feature (enable = "sse2" )] |
1118 | // no particular instruction to test |
1119 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1120 | pub unsafe fn _mm_set1_epi8(a: i8) -> __m128i { |
1121 | _mm_set_epi8(e15:a, e14:a, e13:a, e12:a, e11:a, e10:a, e9:a, e8:a, e7:a, e6:a, e5:a, e4:a, e3:a, e2:a, e1:a, e0:a) |
1122 | } |
1123 | |
1124 | /// Sets packed 32-bit integers with the supplied values in reverse order. |
1125 | /// |
1126 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi32) |
1127 | #[inline ] |
1128 | #[target_feature (enable = "sse2" )] |
1129 | // no particular instruction to test |
1130 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1131 | pub unsafe fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i { |
1132 | _mm_set_epi32(e3:e0, e2:e1, e1:e2, e0:e3) |
1133 | } |
1134 | |
1135 | /// Sets packed 16-bit integers with the supplied values in reverse order. |
1136 | /// |
1137 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi16) |
1138 | #[inline ] |
1139 | #[target_feature (enable = "sse2" )] |
1140 | // no particular instruction to test |
1141 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1142 | pub unsafe fn _mm_setr_epi16( |
1143 | e7: i16, |
1144 | e6: i16, |
1145 | e5: i16, |
1146 | e4: i16, |
1147 | e3: i16, |
1148 | e2: i16, |
1149 | e1: i16, |
1150 | e0: i16, |
1151 | ) -> __m128i { |
1152 | _mm_set_epi16(e7:e0, e6:e1, e5:e2, e4:e3, e3:e4, e2:e5, e1:e6, e0:e7) |
1153 | } |
1154 | |
1155 | /// Sets packed 8-bit integers with the supplied values in reverse order. |
1156 | /// |
1157 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi8) |
1158 | #[inline ] |
1159 | #[target_feature (enable = "sse2" )] |
1160 | // no particular instruction to test |
1161 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1162 | pub unsafe fn _mm_setr_epi8( |
1163 | e15: i8, |
1164 | e14: i8, |
1165 | e13: i8, |
1166 | e12: i8, |
1167 | e11: i8, |
1168 | e10: i8, |
1169 | e9: i8, |
1170 | e8: i8, |
1171 | e7: i8, |
1172 | e6: i8, |
1173 | e5: i8, |
1174 | e4: i8, |
1175 | e3: i8, |
1176 | e2: i8, |
1177 | e1: i8, |
1178 | e0: i8, |
1179 | ) -> __m128i { |
1180 | #[rustfmt::skip] |
1181 | _mm_set_epi8( |
1182 | e15:e0, e14:e1, e13:e2, e12:e3, e11:e4, e10:e5, e9:e6, e8:e7, e7:e8, e6:e9, e5:e10, e4:e11, e3:e12, e2:e13, e1:e14, e0:e15, |
1183 | ) |
1184 | } |
1185 | |
1186 | /// Returns a vector with all elements set to zero. |
1187 | /// |
1188 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_si128) |
1189 | #[inline ] |
1190 | #[target_feature (enable = "sse2" )] |
1191 | #[cfg_attr (test, assert_instr(xorps))] |
1192 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1193 | pub unsafe fn _mm_setzero_si128() -> __m128i { |
1194 | _mm_set1_epi64x(0) |
1195 | } |
1196 | |
1197 | /// Loads 64-bit integer from memory into first element of returned vector. |
1198 | /// |
1199 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadl_epi64) |
1200 | #[inline ] |
1201 | #[target_feature (enable = "sse2" )] |
1202 | // FIXME movsd on windows |
1203 | #[cfg_attr ( |
1204 | all( |
1205 | test, |
1206 | not(windows), |
1207 | not(all(target_os = "linux" , target_arch = "x86_64" )), |
1208 | target_arch = "x86_64" |
1209 | ), |
1210 | assert_instr(movq) |
1211 | )] |
1212 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1213 | pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i { |
1214 | _mm_set_epi64x(e1:0, e0:ptr::read_unaligned(src:mem_addr as *const i64)) |
1215 | } |
1216 | |
1217 | /// Loads 128-bits of integer data from memory into a new vector. |
1218 | /// |
1219 | /// `mem_addr` must be aligned on a 16-byte boundary. |
1220 | /// |
1221 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_si128) |
1222 | #[inline ] |
1223 | #[target_feature (enable = "sse2" )] |
1224 | #[cfg_attr (test, assert_instr(movaps))] |
1225 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1226 | pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i { |
1227 | *mem_addr |
1228 | } |
1229 | |
1230 | /// Loads 128-bits of integer data from memory into a new vector. |
1231 | /// |
1232 | /// `mem_addr` does not need to be aligned on any particular boundary. |
1233 | /// |
1234 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si128) |
1235 | #[inline ] |
1236 | #[target_feature (enable = "sse2" )] |
1237 | #[cfg_attr (test, assert_instr(movups))] |
1238 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1239 | pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i { |
1240 | let mut dst: __m128i = _mm_undefined_si128(); |
1241 | ptr::copy_nonoverlapping( |
1242 | src:mem_addr as *const u8, |
1243 | &mut dst as *mut __m128i as *mut u8, |
1244 | count:mem::size_of::<__m128i>(), |
1245 | ); |
1246 | dst |
1247 | } |
1248 | |
1249 | /// Conditionally store 8-bit integer elements from `a` into memory using |
1250 | /// `mask`. |
1251 | /// |
1252 | /// Elements are not stored when the highest bit is not set in the |
1253 | /// corresponding element. |
1254 | /// |
1255 | /// `mem_addr` should correspond to a 128-bit memory location and does not need |
1256 | /// to be aligned on any particular boundary. |
1257 | /// |
1258 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskmoveu_si128) |
1259 | #[inline ] |
1260 | #[target_feature (enable = "sse2" )] |
1261 | #[cfg_attr (test, assert_instr(maskmovdqu))] |
1262 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1263 | pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8) { |
1264 | maskmovdqu(a:a.as_i8x16(), mask:mask.as_i8x16(), mem_addr) |
1265 | } |
1266 | |
1267 | /// Stores 128-bits of integer data from `a` into memory. |
1268 | /// |
1269 | /// `mem_addr` must be aligned on a 16-byte boundary. |
1270 | /// |
1271 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_si128) |
1272 | #[inline ] |
1273 | #[target_feature (enable = "sse2" )] |
1274 | #[cfg_attr (test, assert_instr(movaps))] |
1275 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1276 | pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) { |
1277 | *mem_addr = a; |
1278 | } |
1279 | |
1280 | /// Stores 128-bits of integer data from `a` into memory. |
1281 | /// |
1282 | /// `mem_addr` does not need to be aligned on any particular boundary. |
1283 | /// |
1284 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si128) |
1285 | #[inline ] |
1286 | #[target_feature (enable = "sse2" )] |
1287 | #[cfg_attr (test, assert_instr(movups))] // FIXME movdqu expected |
1288 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1289 | pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) { |
1290 | mem_addr.write_unaligned(val:a); |
1291 | } |
1292 | |
1293 | /// Stores the lower 64-bit integer `a` to a memory location. |
1294 | /// |
1295 | /// `mem_addr` does not need to be aligned on any particular boundary. |
1296 | /// |
1297 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storel_epi64) |
1298 | #[inline ] |
1299 | #[target_feature (enable = "sse2" )] |
1300 | // FIXME mov on windows, movlps on i686 |
1301 | #[cfg_attr ( |
1302 | all( |
1303 | test, |
1304 | not(windows), |
1305 | not(all(target_os = "linux" , target_arch = "x86_64" )), |
1306 | target_arch = "x86_64" |
1307 | ), |
1308 | assert_instr(movq) |
1309 | )] |
1310 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1311 | pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) { |
1312 | ptr::copy_nonoverlapping(&a as *const _ as *const u8, dst:mem_addr as *mut u8, count:8); |
1313 | } |
1314 | |
1315 | /// Stores a 128-bit integer vector to a 128-bit aligned memory location. |
1316 | /// To minimize caching, the data is flagged as non-temporal (unlikely to be |
1317 | /// used again soon). |
1318 | /// |
1319 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si128) |
1320 | #[inline ] |
1321 | #[target_feature (enable = "sse2" )] |
1322 | #[cfg_attr (test, assert_instr(movntps))] // FIXME movntdq |
1323 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1324 | pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) { |
1325 | intrinsics::nontemporal_store(ptr:mem_addr, val:a); |
1326 | } |
1327 | |
1328 | /// Stores a 32-bit integer value in the specified memory location. |
1329 | /// To minimize caching, the data is flagged as non-temporal (unlikely to be |
1330 | /// used again soon). |
1331 | /// |
1332 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si32) |
1333 | #[inline ] |
1334 | #[target_feature (enable = "sse2" )] |
1335 | #[cfg_attr (test, assert_instr(movnti))] |
1336 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1337 | pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) { |
1338 | intrinsics::nontemporal_store(ptr:mem_addr, val:a); |
1339 | } |
1340 | |
1341 | /// Returns a vector where the low element is extracted from `a` and its upper |
1342 | /// element is zero. |
1343 | /// |
1344 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_move_epi64) |
1345 | #[inline ] |
1346 | #[target_feature (enable = "sse2" )] |
1347 | // FIXME movd on windows, movd on i686 |
1348 | #[cfg_attr (all(test, not(windows), target_arch = "x86_64" ), assert_instr(movq))] |
1349 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1350 | pub unsafe fn _mm_move_epi64(a: __m128i) -> __m128i { |
1351 | let zero: __m128i = _mm_setzero_si128(); |
1352 | let r: i64x2 = simd_shuffle!(a.as_i64x2(), zero.as_i64x2(), [0, 2]); |
1353 | transmute(src:r) |
1354 | } |
1355 | |
1356 | /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers |
1357 | /// using signed saturation. |
1358 | /// |
1359 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi16) |
1360 | #[inline ] |
1361 | #[target_feature (enable = "sse2" )] |
1362 | #[cfg_attr (test, assert_instr(packsswb))] |
1363 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1364 | pub unsafe fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i { |
1365 | transmute(src:packsswb(a:a.as_i16x8(), b:b.as_i16x8())) |
1366 | } |
1367 | |
1368 | /// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers |
1369 | /// using signed saturation. |
1370 | /// |
1371 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi32) |
1372 | #[inline ] |
1373 | #[target_feature (enable = "sse2" )] |
1374 | #[cfg_attr (test, assert_instr(packssdw))] |
1375 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1376 | pub unsafe fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i { |
1377 | transmute(src:packssdw(a:a.as_i32x4(), b:b.as_i32x4())) |
1378 | } |
1379 | |
1380 | /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers |
1381 | /// using unsigned saturation. |
1382 | /// |
1383 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi16) |
1384 | #[inline ] |
1385 | #[target_feature (enable = "sse2" )] |
1386 | #[cfg_attr (test, assert_instr(packuswb))] |
1387 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1388 | pub unsafe fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i { |
1389 | transmute(src:packuswb(a:a.as_i16x8(), b:b.as_i16x8())) |
1390 | } |
1391 | |
1392 | /// Returns the `imm8` element of `a`. |
1393 | /// |
1394 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_epi16) |
1395 | #[inline ] |
1396 | #[target_feature (enable = "sse2" )] |
1397 | #[cfg_attr (test, assert_instr(pextrw, IMM8 = 7))] |
1398 | #[rustc_legacy_const_generics (1)] |
1399 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1400 | pub unsafe fn _mm_extract_epi16<const IMM8: i32>(a: __m128i) -> i32 { |
1401 | static_assert_uimm_bits!(IMM8, 3); |
1402 | simd_extract::<_, u16>(x:a.as_u16x8(), IMM8 as u32) as i32 |
1403 | } |
1404 | |
1405 | /// Returns a new vector where the `imm8` element of `a` is replaced with `i`. |
1406 | /// |
1407 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_epi16) |
1408 | #[inline ] |
1409 | #[target_feature (enable = "sse2" )] |
1410 | #[cfg_attr (test, assert_instr(pinsrw, IMM8 = 7))] |
1411 | #[rustc_legacy_const_generics (2)] |
1412 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1413 | pub unsafe fn _mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i { |
1414 | static_assert_uimm_bits!(IMM8, 3); |
1415 | transmute(src:simd_insert(x:a.as_i16x8(), IMM8 as u32, val:i as i16)) |
1416 | } |
1417 | |
1418 | /// Returns a mask of the most significant bit of each element in `a`. |
1419 | /// |
1420 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_epi8) |
1421 | #[inline ] |
1422 | #[target_feature (enable = "sse2" )] |
1423 | #[cfg_attr (test, assert_instr(pmovmskb))] |
1424 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1425 | pub unsafe fn _mm_movemask_epi8(a: __m128i) -> i32 { |
1426 | let z: i8x16 = i8x16::splat(0); |
1427 | let m: i8x16 = simd_lt(x:a.as_i8x16(), y:z); |
1428 | simd_bitmask::<_, u16>(m) as u32 as i32 |
1429 | } |
1430 | |
1431 | /// Shuffles 32-bit integers in `a` using the control in `IMM8`. |
1432 | /// |
1433 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_epi32) |
1434 | #[inline ] |
1435 | #[target_feature (enable = "sse2" )] |
1436 | #[cfg_attr (test, assert_instr(pshufd, IMM8 = 9))] |
1437 | #[rustc_legacy_const_generics (1)] |
1438 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1439 | pub unsafe fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i { |
1440 | static_assert_uimm_bits!(IMM8, 8); |
1441 | let a: i32x4 = a.as_i32x4(); |
1442 | let x: i32x4 = simd_shuffle!( |
1443 | a, |
1444 | a, |
1445 | [ |
1446 | IMM8 as u32 & 0b11, |
1447 | (IMM8 as u32 >> 2) & 0b11, |
1448 | (IMM8 as u32 >> 4) & 0b11, |
1449 | (IMM8 as u32 >> 6) & 0b11, |
1450 | ], |
1451 | ); |
1452 | transmute(src:x) |
1453 | } |
1454 | |
1455 | /// Shuffles 16-bit integers in the high 64 bits of `a` using the control in |
1456 | /// `IMM8`. |
1457 | /// |
1458 | /// Put the results in the high 64 bits of the returned vector, with the low 64 |
1459 | /// bits being copied from `a`. |
1460 | /// |
1461 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shufflehi_epi16) |
1462 | #[inline ] |
1463 | #[target_feature (enable = "sse2" )] |
1464 | #[cfg_attr (test, assert_instr(pshufhw, IMM8 = 9))] |
1465 | #[rustc_legacy_const_generics (1)] |
1466 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1467 | pub unsafe fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i { |
1468 | static_assert_uimm_bits!(IMM8, 8); |
1469 | let a: i16x8 = a.as_i16x8(); |
1470 | let x: i16x8 = simd_shuffle!( |
1471 | a, |
1472 | a, |
1473 | [ |
1474 | 0, |
1475 | 1, |
1476 | 2, |
1477 | 3, |
1478 | (IMM8 as u32 & 0b11) + 4, |
1479 | ((IMM8 as u32 >> 2) & 0b11) + 4, |
1480 | ((IMM8 as u32 >> 4) & 0b11) + 4, |
1481 | ((IMM8 as u32 >> 6) & 0b11) + 4, |
1482 | ], |
1483 | ); |
1484 | transmute(src:x) |
1485 | } |
1486 | |
1487 | /// Shuffles 16-bit integers in the low 64 bits of `a` using the control in |
1488 | /// `IMM8`. |
1489 | /// |
1490 | /// Put the results in the low 64 bits of the returned vector, with the high 64 |
1491 | /// bits being copied from `a`. |
1492 | /// |
1493 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shufflelo_epi16) |
1494 | #[inline ] |
1495 | #[target_feature (enable = "sse2" )] |
1496 | #[cfg_attr (test, assert_instr(pshuflw, IMM8 = 9))] |
1497 | #[rustc_legacy_const_generics (1)] |
1498 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1499 | pub unsafe fn _mm_shufflelo_epi16<const IMM8: i32>(a: __m128i) -> __m128i { |
1500 | static_assert_uimm_bits!(IMM8, 8); |
1501 | let a: i16x8 = a.as_i16x8(); |
1502 | let x: i16x8 = simd_shuffle!( |
1503 | a, |
1504 | a, |
1505 | [ |
1506 | IMM8 as u32 & 0b11, |
1507 | (IMM8 as u32 >> 2) & 0b11, |
1508 | (IMM8 as u32 >> 4) & 0b11, |
1509 | (IMM8 as u32 >> 6) & 0b11, |
1510 | 4, |
1511 | 5, |
1512 | 6, |
1513 | 7, |
1514 | ], |
1515 | ); |
1516 | transmute(src:x) |
1517 | } |
1518 | |
1519 | /// Unpacks and interleave 8-bit integers from the high half of `a` and `b`. |
1520 | /// |
1521 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi8) |
1522 | #[inline ] |
1523 | #[target_feature (enable = "sse2" )] |
1524 | #[cfg_attr (test, assert_instr(punpckhbw))] |
1525 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1526 | pub unsafe fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i { |
1527 | transmute::<i8x16, _>(src:simd_shuffle!( |
1528 | a.as_i8x16(), |
1529 | b.as_i8x16(), |
1530 | [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31], |
1531 | )) |
1532 | } |
1533 | |
1534 | /// Unpacks and interleave 16-bit integers from the high half of `a` and `b`. |
1535 | /// |
1536 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi16) |
1537 | #[inline ] |
1538 | #[target_feature (enable = "sse2" )] |
1539 | #[cfg_attr (test, assert_instr(punpckhwd))] |
1540 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1541 | pub unsafe fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i { |
1542 | let x: i16x8 = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]); |
1543 | transmute::<i16x8, _>(src:x) |
1544 | } |
1545 | |
1546 | /// Unpacks and interleave 32-bit integers from the high half of `a` and `b`. |
1547 | /// |
1548 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi32) |
1549 | #[inline ] |
1550 | #[target_feature (enable = "sse2" )] |
1551 | #[cfg_attr (test, assert_instr(unpckhps))] |
1552 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1553 | pub unsafe fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i { |
1554 | transmute::<i32x4, _>(src:simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7])) |
1555 | } |
1556 | |
1557 | /// Unpacks and interleave 64-bit integers from the high half of `a` and `b`. |
1558 | /// |
1559 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi64) |
1560 | #[inline ] |
1561 | #[target_feature (enable = "sse2" )] |
1562 | #[cfg_attr (test, assert_instr(unpckhpd))] |
1563 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1564 | pub unsafe fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i { |
1565 | transmute::<i64x2, _>(src:simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [1, 3])) |
1566 | } |
1567 | |
1568 | /// Unpacks and interleave 8-bit integers from the low half of `a` and `b`. |
1569 | /// |
1570 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi8) |
1571 | #[inline ] |
1572 | #[target_feature (enable = "sse2" )] |
1573 | #[cfg_attr (test, assert_instr(punpcklbw))] |
1574 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1575 | pub unsafe fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i { |
1576 | transmute::<i8x16, _>(src:simd_shuffle!( |
1577 | a.as_i8x16(), |
1578 | b.as_i8x16(), |
1579 | [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23], |
1580 | )) |
1581 | } |
1582 | |
1583 | /// Unpacks and interleave 16-bit integers from the low half of `a` and `b`. |
1584 | /// |
1585 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi16) |
1586 | #[inline ] |
1587 | #[target_feature (enable = "sse2" )] |
1588 | #[cfg_attr (test, assert_instr(punpcklwd))] |
1589 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1590 | pub unsafe fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i { |
1591 | let x: i16x8 = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]); |
1592 | transmute::<i16x8, _>(src:x) |
1593 | } |
1594 | |
1595 | /// Unpacks and interleave 32-bit integers from the low half of `a` and `b`. |
1596 | /// |
1597 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi32) |
1598 | #[inline ] |
1599 | #[target_feature (enable = "sse2" )] |
1600 | #[cfg_attr (test, assert_instr(unpcklps))] |
1601 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1602 | pub unsafe fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i { |
1603 | transmute::<i32x4, _>(src:simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5])) |
1604 | } |
1605 | |
1606 | /// Unpacks and interleave 64-bit integers from the low half of `a` and `b`. |
1607 | /// |
1608 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi64) |
1609 | #[inline ] |
1610 | #[target_feature (enable = "sse2" )] |
1611 | #[cfg_attr (all(test, not(target_os = "windows" )), assert_instr(movlhps))] |
1612 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1613 | pub unsafe fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i { |
1614 | transmute::<i64x2, _>(src:simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [0, 2])) |
1615 | } |
1616 | |
1617 | /// Returns a new vector with the low element of `a` replaced by the sum of the |
1618 | /// low elements of `a` and `b`. |
1619 | /// |
1620 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_sd) |
1621 | #[inline ] |
1622 | #[target_feature (enable = "sse2" )] |
1623 | #[cfg_attr (test, assert_instr(addsd))] |
1624 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1625 | pub unsafe fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d { |
1626 | simd_insert(x:a, idx:0, val:_mm_cvtsd_f64(a) + _mm_cvtsd_f64(b)) |
1627 | } |
1628 | |
1629 | /// Adds packed double-precision (64-bit) floating-point elements in `a` and |
1630 | /// `b`. |
1631 | /// |
1632 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_pd) |
1633 | #[inline ] |
1634 | #[target_feature (enable = "sse2" )] |
1635 | #[cfg_attr (test, assert_instr(addpd))] |
1636 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1637 | pub unsafe fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d { |
1638 | simd_add(x:a, y:b) |
1639 | } |
1640 | |
1641 | /// Returns a new vector with the low element of `a` replaced by the result of |
1642 | /// diving the lower element of `a` by the lower element of `b`. |
1643 | /// |
1644 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_sd) |
1645 | #[inline ] |
1646 | #[target_feature (enable = "sse2" )] |
1647 | #[cfg_attr (test, assert_instr(divsd))] |
1648 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1649 | pub unsafe fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d { |
1650 | simd_insert(x:a, idx:0, val:_mm_cvtsd_f64(a) / _mm_cvtsd_f64(b)) |
1651 | } |
1652 | |
1653 | /// Divide packed double-precision (64-bit) floating-point elements in `a` by |
1654 | /// packed elements in `b`. |
1655 | /// |
1656 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_pd) |
1657 | #[inline ] |
1658 | #[target_feature (enable = "sse2" )] |
1659 | #[cfg_attr (test, assert_instr(divpd))] |
1660 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1661 | pub unsafe fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d { |
1662 | simd_div(x:a, y:b) |
1663 | } |
1664 | |
1665 | /// Returns a new vector with the low element of `a` replaced by the maximum |
1666 | /// of the lower elements of `a` and `b`. |
1667 | /// |
1668 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_sd) |
1669 | #[inline ] |
1670 | #[target_feature (enable = "sse2" )] |
1671 | #[cfg_attr (test, assert_instr(maxsd))] |
1672 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1673 | pub unsafe fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d { |
1674 | maxsd(a, b) |
1675 | } |
1676 | |
1677 | /// Returns a new vector with the maximum values from corresponding elements in |
1678 | /// `a` and `b`. |
1679 | /// |
1680 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_pd) |
1681 | #[inline ] |
1682 | #[target_feature (enable = "sse2" )] |
1683 | #[cfg_attr (test, assert_instr(maxpd))] |
1684 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1685 | pub unsafe fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d { |
1686 | maxpd(a, b) |
1687 | } |
1688 | |
1689 | /// Returns a new vector with the low element of `a` replaced by the minimum |
1690 | /// of the lower elements of `a` and `b`. |
1691 | /// |
1692 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_sd) |
1693 | #[inline ] |
1694 | #[target_feature (enable = "sse2" )] |
1695 | #[cfg_attr (test, assert_instr(minsd))] |
1696 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1697 | pub unsafe fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d { |
1698 | minsd(a, b) |
1699 | } |
1700 | |
1701 | /// Returns a new vector with the minimum values from corresponding elements in |
1702 | /// `a` and `b`. |
1703 | /// |
1704 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_pd) |
1705 | #[inline ] |
1706 | #[target_feature (enable = "sse2" )] |
1707 | #[cfg_attr (test, assert_instr(minpd))] |
1708 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1709 | pub unsafe fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d { |
1710 | minpd(a, b) |
1711 | } |
1712 | |
1713 | /// Returns a new vector with the low element of `a` replaced by multiplying the |
1714 | /// low elements of `a` and `b`. |
1715 | /// |
1716 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_sd) |
1717 | #[inline ] |
1718 | #[target_feature (enable = "sse2" )] |
1719 | #[cfg_attr (test, assert_instr(mulsd))] |
1720 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1721 | pub unsafe fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d { |
1722 | simd_insert(x:a, idx:0, val:_mm_cvtsd_f64(a) * _mm_cvtsd_f64(b)) |
1723 | } |
1724 | |
1725 | /// Multiplies packed double-precision (64-bit) floating-point elements in `a` |
1726 | /// and `b`. |
1727 | /// |
1728 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_pd) |
1729 | #[inline ] |
1730 | #[target_feature (enable = "sse2" )] |
1731 | #[cfg_attr (test, assert_instr(mulpd))] |
1732 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1733 | pub unsafe fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d { |
1734 | simd_mul(x:a, y:b) |
1735 | } |
1736 | |
1737 | /// Returns a new vector with the low element of `a` replaced by the square |
1738 | /// root of the lower element `b`. |
1739 | /// |
1740 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_sd) |
1741 | #[inline ] |
1742 | #[target_feature (enable = "sse2" )] |
1743 | #[cfg_attr (test, assert_instr(sqrtsd))] |
1744 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1745 | pub unsafe fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d { |
1746 | simd_insert(x:a, idx:0, val:_mm_cvtsd_f64(sqrtsd(b))) |
1747 | } |
1748 | |
1749 | /// Returns a new vector with the square root of each of the values in `a`. |
1750 | /// |
1751 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_pd) |
1752 | #[inline ] |
1753 | #[target_feature (enable = "sse2" )] |
1754 | #[cfg_attr (test, assert_instr(sqrtpd))] |
1755 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1756 | pub unsafe fn _mm_sqrt_pd(a: __m128d) -> __m128d { |
1757 | simd_fsqrt(a) |
1758 | } |
1759 | |
1760 | /// Returns a new vector with the low element of `a` replaced by subtracting the |
1761 | /// low element by `b` from the low element of `a`. |
1762 | /// |
1763 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_sd) |
1764 | #[inline ] |
1765 | #[target_feature (enable = "sse2" )] |
1766 | #[cfg_attr (test, assert_instr(subsd))] |
1767 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1768 | pub unsafe fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d { |
1769 | simd_insert(x:a, idx:0, val:_mm_cvtsd_f64(a) - _mm_cvtsd_f64(b)) |
1770 | } |
1771 | |
1772 | /// Subtract packed double-precision (64-bit) floating-point elements in `b` |
1773 | /// from `a`. |
1774 | /// |
1775 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_pd) |
1776 | #[inline ] |
1777 | #[target_feature (enable = "sse2" )] |
1778 | #[cfg_attr (test, assert_instr(subpd))] |
1779 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1780 | pub unsafe fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d { |
1781 | simd_sub(x:a, y:b) |
1782 | } |
1783 | |
1784 | /// Computes the bitwise AND of packed double-precision (64-bit) floating-point |
1785 | /// elements in `a` and `b`. |
1786 | /// |
1787 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_and_pd) |
1788 | #[inline ] |
1789 | #[target_feature (enable = "sse2" )] |
1790 | #[cfg_attr (test, assert_instr(andps))] |
1791 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1792 | pub unsafe fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d { |
1793 | let a: __m128i = transmute(src:a); |
1794 | let b: __m128i = transmute(src:b); |
1795 | transmute(src:_mm_and_si128(a, b)) |
1796 | } |
1797 | |
1798 | /// Computes the bitwise NOT of `a` and then AND with `b`. |
1799 | /// |
1800 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_andnot_pd) |
1801 | #[inline ] |
1802 | #[target_feature (enable = "sse2" )] |
1803 | #[cfg_attr (test, assert_instr(andnps))] |
1804 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1805 | pub unsafe fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d { |
1806 | let a: __m128i = transmute(src:a); |
1807 | let b: __m128i = transmute(src:b); |
1808 | transmute(src:_mm_andnot_si128(a, b)) |
1809 | } |
1810 | |
1811 | /// Computes the bitwise OR of `a` and `b`. |
1812 | /// |
1813 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_pd) |
1814 | #[inline ] |
1815 | #[target_feature (enable = "sse2" )] |
1816 | #[cfg_attr (test, assert_instr(orps))] |
1817 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1818 | pub unsafe fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d { |
1819 | let a: __m128i = transmute(src:a); |
1820 | let b: __m128i = transmute(src:b); |
1821 | transmute(src:_mm_or_si128(a, b)) |
1822 | } |
1823 | |
1824 | /// Computes the bitwise XOR of `a` and `b`. |
1825 | /// |
1826 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_pd) |
1827 | #[inline ] |
1828 | #[target_feature (enable = "sse2" )] |
1829 | #[cfg_attr (test, assert_instr(xorps))] |
1830 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1831 | pub unsafe fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d { |
1832 | let a: __m128i = transmute(src:a); |
1833 | let b: __m128i = transmute(src:b); |
1834 | transmute(src:_mm_xor_si128(a, b)) |
1835 | } |
1836 | |
1837 | /// Returns a new vector with the low element of `a` replaced by the equality |
1838 | /// comparison of the lower elements of `a` and `b`. |
1839 | /// |
1840 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_sd) |
1841 | #[inline ] |
1842 | #[target_feature (enable = "sse2" )] |
1843 | #[cfg_attr (test, assert_instr(cmpeqsd))] |
1844 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1845 | pub unsafe fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d { |
1846 | cmpsd(a, b, imm8:0) |
1847 | } |
1848 | |
1849 | /// Returns a new vector with the low element of `a` replaced by the less-than |
1850 | /// comparison of the lower elements of `a` and `b`. |
1851 | /// |
1852 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_sd) |
1853 | #[inline ] |
1854 | #[target_feature (enable = "sse2" )] |
1855 | #[cfg_attr (test, assert_instr(cmpltsd))] |
1856 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1857 | pub unsafe fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d { |
1858 | cmpsd(a, b, imm8:1) |
1859 | } |
1860 | |
1861 | /// Returns a new vector with the low element of `a` replaced by the |
1862 | /// less-than-or-equal comparison of the lower elements of `a` and `b`. |
1863 | /// |
1864 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_sd) |
1865 | #[inline ] |
1866 | #[target_feature (enable = "sse2" )] |
1867 | #[cfg_attr (test, assert_instr(cmplesd))] |
1868 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1869 | pub unsafe fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d { |
1870 | cmpsd(a, b, imm8:2) |
1871 | } |
1872 | |
1873 | /// Returns a new vector with the low element of `a` replaced by the |
1874 | /// greater-than comparison of the lower elements of `a` and `b`. |
1875 | /// |
1876 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_sd) |
1877 | #[inline ] |
1878 | #[target_feature (enable = "sse2" )] |
1879 | #[cfg_attr (test, assert_instr(cmpltsd))] |
1880 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1881 | pub unsafe fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d { |
1882 | simd_insert(x:_mm_cmplt_sd(b, a), idx:1, val:simd_extract::<_, f64>(x:a, idx:1)) |
1883 | } |
1884 | |
1885 | /// Returns a new vector with the low element of `a` replaced by the |
1886 | /// greater-than-or-equal comparison of the lower elements of `a` and `b`. |
1887 | /// |
1888 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_sd) |
1889 | #[inline ] |
1890 | #[target_feature (enable = "sse2" )] |
1891 | #[cfg_attr (test, assert_instr(cmplesd))] |
1892 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1893 | pub unsafe fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d { |
1894 | simd_insert(x:_mm_cmple_sd(b, a), idx:1, val:simd_extract::<_, f64>(x:a, idx:1)) |
1895 | } |
1896 | |
1897 | /// Returns a new vector with the low element of `a` replaced by the result |
1898 | /// of comparing both of the lower elements of `a` and `b` to `NaN`. If |
1899 | /// neither are equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` |
1900 | /// otherwise. |
1901 | /// |
1902 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_sd) |
1903 | #[inline ] |
1904 | #[target_feature (enable = "sse2" )] |
1905 | #[cfg_attr (test, assert_instr(cmpordsd))] |
1906 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1907 | pub unsafe fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d { |
1908 | cmpsd(a, b, imm8:7) |
1909 | } |
1910 | |
1911 | /// Returns a new vector with the low element of `a` replaced by the result of |
1912 | /// comparing both of the lower elements of `a` and `b` to `NaN`. If either is |
1913 | /// equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` otherwise. |
1914 | /// |
1915 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_sd) |
1916 | #[inline ] |
1917 | #[target_feature (enable = "sse2" )] |
1918 | #[cfg_attr (test, assert_instr(cmpunordsd))] |
1919 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1920 | pub unsafe fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d { |
1921 | cmpsd(a, b, imm8:3) |
1922 | } |
1923 | |
1924 | /// Returns a new vector with the low element of `a` replaced by the not-equal |
1925 | /// comparison of the lower elements of `a` and `b`. |
1926 | /// |
1927 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_sd) |
1928 | #[inline ] |
1929 | #[target_feature (enable = "sse2" )] |
1930 | #[cfg_attr (test, assert_instr(cmpneqsd))] |
1931 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1932 | pub unsafe fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d { |
1933 | cmpsd(a, b, imm8:4) |
1934 | } |
1935 | |
1936 | /// Returns a new vector with the low element of `a` replaced by the |
1937 | /// not-less-than comparison of the lower elements of `a` and `b`. |
1938 | /// |
1939 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_sd) |
1940 | #[inline ] |
1941 | #[target_feature (enable = "sse2" )] |
1942 | #[cfg_attr (test, assert_instr(cmpnltsd))] |
1943 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1944 | pub unsafe fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d { |
1945 | cmpsd(a, b, imm8:5) |
1946 | } |
1947 | |
1948 | /// Returns a new vector with the low element of `a` replaced by the |
1949 | /// not-less-than-or-equal comparison of the lower elements of `a` and `b`. |
1950 | /// |
1951 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_sd) |
1952 | #[inline ] |
1953 | #[target_feature (enable = "sse2" )] |
1954 | #[cfg_attr (test, assert_instr(cmpnlesd))] |
1955 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1956 | pub unsafe fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d { |
1957 | cmpsd(a, b, imm8:6) |
1958 | } |
1959 | |
1960 | /// Returns a new vector with the low element of `a` replaced by the |
1961 | /// not-greater-than comparison of the lower elements of `a` and `b`. |
1962 | /// |
1963 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpngt_sd) |
1964 | #[inline ] |
1965 | #[target_feature (enable = "sse2" )] |
1966 | #[cfg_attr (test, assert_instr(cmpnltsd))] |
1967 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1968 | pub unsafe fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d { |
1969 | simd_insert(x:_mm_cmpnlt_sd(b, a), idx:1, val:simd_extract::<_, f64>(x:a, idx:1)) |
1970 | } |
1971 | |
1972 | /// Returns a new vector with the low element of `a` replaced by the |
1973 | /// not-greater-than-or-equal comparison of the lower elements of `a` and `b`. |
1974 | /// |
1975 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_sd) |
1976 | #[inline ] |
1977 | #[target_feature (enable = "sse2" )] |
1978 | #[cfg_attr (test, assert_instr(cmpnlesd))] |
1979 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1980 | pub unsafe fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d { |
1981 | simd_insert(x:_mm_cmpnle_sd(b, a), idx:1, val:simd_extract::<_, f64>(x:a, idx:1)) |
1982 | } |
1983 | |
1984 | /// Compares corresponding elements in `a` and `b` for equality. |
1985 | /// |
1986 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_pd) |
1987 | #[inline ] |
1988 | #[target_feature (enable = "sse2" )] |
1989 | #[cfg_attr (test, assert_instr(cmpeqpd))] |
1990 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1991 | pub unsafe fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d { |
1992 | cmppd(a, b, imm8:0) |
1993 | } |
1994 | |
1995 | /// Compares corresponding elements in `a` and `b` for less-than. |
1996 | /// |
1997 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_pd) |
1998 | #[inline ] |
1999 | #[target_feature (enable = "sse2" )] |
2000 | #[cfg_attr (test, assert_instr(cmpltpd))] |
2001 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2002 | pub unsafe fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d { |
2003 | cmppd(a, b, imm8:1) |
2004 | } |
2005 | |
2006 | /// Compares corresponding elements in `a` and `b` for less-than-or-equal |
2007 | /// |
2008 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_pd) |
2009 | #[inline ] |
2010 | #[target_feature (enable = "sse2" )] |
2011 | #[cfg_attr (test, assert_instr(cmplepd))] |
2012 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2013 | pub unsafe fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d { |
2014 | cmppd(a, b, imm8:2) |
2015 | } |
2016 | |
2017 | /// Compares corresponding elements in `a` and `b` for greater-than. |
2018 | /// |
2019 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_pd) |
2020 | #[inline ] |
2021 | #[target_feature (enable = "sse2" )] |
2022 | #[cfg_attr (test, assert_instr(cmpltpd))] |
2023 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2024 | pub unsafe fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d { |
2025 | _mm_cmplt_pd(a:b, b:a) |
2026 | } |
2027 | |
2028 | /// Compares corresponding elements in `a` and `b` for greater-than-or-equal. |
2029 | /// |
2030 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_pd) |
2031 | #[inline ] |
2032 | #[target_feature (enable = "sse2" )] |
2033 | #[cfg_attr (test, assert_instr(cmplepd))] |
2034 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2035 | pub unsafe fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d { |
2036 | _mm_cmple_pd(a:b, b:a) |
2037 | } |
2038 | |
2039 | /// Compares corresponding elements in `a` and `b` to see if neither is `NaN`. |
2040 | /// |
2041 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_pd) |
2042 | #[inline ] |
2043 | #[target_feature (enable = "sse2" )] |
2044 | #[cfg_attr (test, assert_instr(cmpordpd))] |
2045 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2046 | pub unsafe fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d { |
2047 | cmppd(a, b, imm8:7) |
2048 | } |
2049 | |
2050 | /// Compares corresponding elements in `a` and `b` to see if either is `NaN`. |
2051 | /// |
2052 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_pd) |
2053 | #[inline ] |
2054 | #[target_feature (enable = "sse2" )] |
2055 | #[cfg_attr (test, assert_instr(cmpunordpd))] |
2056 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2057 | pub unsafe fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d { |
2058 | cmppd(a, b, imm8:3) |
2059 | } |
2060 | |
2061 | /// Compares corresponding elements in `a` and `b` for not-equal. |
2062 | /// |
2063 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_pd) |
2064 | #[inline ] |
2065 | #[target_feature (enable = "sse2" )] |
2066 | #[cfg_attr (test, assert_instr(cmpneqpd))] |
2067 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2068 | pub unsafe fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d { |
2069 | cmppd(a, b, imm8:4) |
2070 | } |
2071 | |
2072 | /// Compares corresponding elements in `a` and `b` for not-less-than. |
2073 | /// |
2074 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_pd) |
2075 | #[inline ] |
2076 | #[target_feature (enable = "sse2" )] |
2077 | #[cfg_attr (test, assert_instr(cmpnltpd))] |
2078 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2079 | pub unsafe fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d { |
2080 | cmppd(a, b, imm8:5) |
2081 | } |
2082 | |
2083 | /// Compares corresponding elements in `a` and `b` for not-less-than-or-equal. |
2084 | /// |
2085 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_pd) |
2086 | #[inline ] |
2087 | #[target_feature (enable = "sse2" )] |
2088 | #[cfg_attr (test, assert_instr(cmpnlepd))] |
2089 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2090 | pub unsafe fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d { |
2091 | cmppd(a, b, imm8:6) |
2092 | } |
2093 | |
2094 | /// Compares corresponding elements in `a` and `b` for not-greater-than. |
2095 | /// |
2096 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpngt_pd) |
2097 | #[inline ] |
2098 | #[target_feature (enable = "sse2" )] |
2099 | #[cfg_attr (test, assert_instr(cmpnltpd))] |
2100 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2101 | pub unsafe fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d { |
2102 | _mm_cmpnlt_pd(a:b, b:a) |
2103 | } |
2104 | |
2105 | /// Compares corresponding elements in `a` and `b` for |
2106 | /// not-greater-than-or-equal. |
2107 | /// |
2108 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_pd) |
2109 | #[inline ] |
2110 | #[target_feature (enable = "sse2" )] |
2111 | #[cfg_attr (test, assert_instr(cmpnlepd))] |
2112 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2113 | pub unsafe fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d { |
2114 | _mm_cmpnle_pd(a:b, b:a) |
2115 | } |
2116 | |
2117 | /// Compares the lower element of `a` and `b` for equality. |
2118 | /// |
2119 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comieq_sd) |
2120 | #[inline ] |
2121 | #[target_feature (enable = "sse2" )] |
2122 | #[cfg_attr (test, assert_instr(comisd))] |
2123 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2124 | pub unsafe fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 { |
2125 | comieqsd(a, b) |
2126 | } |
2127 | |
2128 | /// Compares the lower element of `a` and `b` for less-than. |
2129 | /// |
2130 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comilt_sd) |
2131 | #[inline ] |
2132 | #[target_feature (enable = "sse2" )] |
2133 | #[cfg_attr (test, assert_instr(comisd))] |
2134 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2135 | pub unsafe fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 { |
2136 | comiltsd(a, b) |
2137 | } |
2138 | |
2139 | /// Compares the lower element of `a` and `b` for less-than-or-equal. |
2140 | /// |
2141 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comile_sd) |
2142 | #[inline ] |
2143 | #[target_feature (enable = "sse2" )] |
2144 | #[cfg_attr (test, assert_instr(comisd))] |
2145 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2146 | pub unsafe fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 { |
2147 | comilesd(a, b) |
2148 | } |
2149 | |
2150 | /// Compares the lower element of `a` and `b` for greater-than. |
2151 | /// |
2152 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comigt_sd) |
2153 | #[inline ] |
2154 | #[target_feature (enable = "sse2" )] |
2155 | #[cfg_attr (test, assert_instr(comisd))] |
2156 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2157 | pub unsafe fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 { |
2158 | comigtsd(a, b) |
2159 | } |
2160 | |
2161 | /// Compares the lower element of `a` and `b` for greater-than-or-equal. |
2162 | /// |
2163 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comige_sd) |
2164 | #[inline ] |
2165 | #[target_feature (enable = "sse2" )] |
2166 | #[cfg_attr (test, assert_instr(comisd))] |
2167 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2168 | pub unsafe fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 { |
2169 | comigesd(a, b) |
2170 | } |
2171 | |
2172 | /// Compares the lower element of `a` and `b` for not-equal. |
2173 | /// |
2174 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comineq_sd) |
2175 | #[inline ] |
2176 | #[target_feature (enable = "sse2" )] |
2177 | #[cfg_attr (test, assert_instr(comisd))] |
2178 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2179 | pub unsafe fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 { |
2180 | comineqsd(a, b) |
2181 | } |
2182 | |
2183 | /// Compares the lower element of `a` and `b` for equality. |
2184 | /// |
2185 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomieq_sd) |
2186 | #[inline ] |
2187 | #[target_feature (enable = "sse2" )] |
2188 | #[cfg_attr (test, assert_instr(ucomisd))] |
2189 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2190 | pub unsafe fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 { |
2191 | ucomieqsd(a, b) |
2192 | } |
2193 | |
2194 | /// Compares the lower element of `a` and `b` for less-than. |
2195 | /// |
2196 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomilt_sd) |
2197 | #[inline ] |
2198 | #[target_feature (enable = "sse2" )] |
2199 | #[cfg_attr (test, assert_instr(ucomisd))] |
2200 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2201 | pub unsafe fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 { |
2202 | ucomiltsd(a, b) |
2203 | } |
2204 | |
2205 | /// Compares the lower element of `a` and `b` for less-than-or-equal. |
2206 | /// |
2207 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomile_sd) |
2208 | #[inline ] |
2209 | #[target_feature (enable = "sse2" )] |
2210 | #[cfg_attr (test, assert_instr(ucomisd))] |
2211 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2212 | pub unsafe fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 { |
2213 | ucomilesd(a, b) |
2214 | } |
2215 | |
2216 | /// Compares the lower element of `a` and `b` for greater-than. |
2217 | /// |
2218 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomigt_sd) |
2219 | #[inline ] |
2220 | #[target_feature (enable = "sse2" )] |
2221 | #[cfg_attr (test, assert_instr(ucomisd))] |
2222 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2223 | pub unsafe fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 { |
2224 | ucomigtsd(a, b) |
2225 | } |
2226 | |
2227 | /// Compares the lower element of `a` and `b` for greater-than-or-equal. |
2228 | /// |
2229 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomige_sd) |
2230 | #[inline ] |
2231 | #[target_feature (enable = "sse2" )] |
2232 | #[cfg_attr (test, assert_instr(ucomisd))] |
2233 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2234 | pub unsafe fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 { |
2235 | ucomigesd(a, b) |
2236 | } |
2237 | |
2238 | /// Compares the lower element of `a` and `b` for not-equal. |
2239 | /// |
2240 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomineq_sd) |
2241 | #[inline ] |
2242 | #[target_feature (enable = "sse2" )] |
2243 | #[cfg_attr (test, assert_instr(ucomisd))] |
2244 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2245 | pub unsafe fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 { |
2246 | ucomineqsd(a, b) |
2247 | } |
2248 | |
2249 | /// Converts packed double-precision (64-bit) floating-point elements in `a` to |
2250 | /// packed single-precision (32-bit) floating-point elements |
2251 | /// |
2252 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_ps) |
2253 | #[inline ] |
2254 | #[target_feature (enable = "sse2" )] |
2255 | #[cfg_attr (test, assert_instr(cvtpd2ps))] |
2256 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2257 | pub unsafe fn _mm_cvtpd_ps(a: __m128d) -> __m128 { |
2258 | let r: f32x2 = simd_cast::<_, f32x2>(a.as_f64x2()); |
2259 | let zero: f32x2 = f32x2::new(x0:0.0, x1:0.0); |
2260 | transmute::<f32x4, _>(src:simd_shuffle!(r, zero, [0, 1, 2, 3])) |
2261 | } |
2262 | |
2263 | /// Converts packed single-precision (32-bit) floating-point elements in `a` to |
2264 | /// packed |
2265 | /// double-precision (64-bit) floating-point elements. |
2266 | /// |
2267 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_pd) |
2268 | #[inline ] |
2269 | #[target_feature (enable = "sse2" )] |
2270 | #[cfg_attr (test, assert_instr(cvtps2pd))] |
2271 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2272 | pub unsafe fn _mm_cvtps_pd(a: __m128) -> __m128d { |
2273 | let a: f32x4 = a.as_f32x4(); |
2274 | transmute(src:simd_cast::<f32x2, f64x2>(simd_shuffle!(a, a, [0, 1]))) |
2275 | } |
2276 | |
2277 | /// Converts packed double-precision (64-bit) floating-point elements in `a` to |
2278 | /// packed 32-bit integers. |
2279 | /// |
2280 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epi32) |
2281 | #[inline ] |
2282 | #[target_feature (enable = "sse2" )] |
2283 | #[cfg_attr (test, assert_instr(cvtpd2dq))] |
2284 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2285 | pub unsafe fn _mm_cvtpd_epi32(a: __m128d) -> __m128i { |
2286 | transmute(src:cvtpd2dq(a)) |
2287 | } |
2288 | |
2289 | /// Converts the lower double-precision (64-bit) floating-point element in a to |
2290 | /// a 32-bit integer. |
2291 | /// |
2292 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_si32) |
2293 | #[inline ] |
2294 | #[target_feature (enable = "sse2" )] |
2295 | #[cfg_attr (test, assert_instr(cvtsd2si))] |
2296 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2297 | pub unsafe fn _mm_cvtsd_si32(a: __m128d) -> i32 { |
2298 | cvtsd2si(a) |
2299 | } |
2300 | |
2301 | /// Converts the lower double-precision (64-bit) floating-point element in `b` |
2302 | /// to a single-precision (32-bit) floating-point element, store the result in |
2303 | /// the lower element of the return value, and copies the upper element from `a` |
2304 | /// to the upper element the return value. |
2305 | /// |
2306 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_ss) |
2307 | #[inline ] |
2308 | #[target_feature (enable = "sse2" )] |
2309 | #[cfg_attr (test, assert_instr(cvtsd2ss))] |
2310 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2311 | pub unsafe fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 { |
2312 | cvtsd2ss(a, b) |
2313 | } |
2314 | |
2315 | /// Returns the lower double-precision (64-bit) floating-point element of `a`. |
2316 | /// |
2317 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_f64) |
2318 | #[inline ] |
2319 | #[target_feature (enable = "sse2" )] |
2320 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2321 | pub unsafe fn _mm_cvtsd_f64(a: __m128d) -> f64 { |
2322 | simd_extract(x:a, idx:0) |
2323 | } |
2324 | |
2325 | /// Converts the lower single-precision (32-bit) floating-point element in `b` |
2326 | /// to a double-precision (64-bit) floating-point element, store the result in |
2327 | /// the lower element of the return value, and copies the upper element from `a` |
2328 | /// to the upper element the return value. |
2329 | /// |
2330 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtss_sd) |
2331 | #[inline ] |
2332 | #[target_feature (enable = "sse2" )] |
2333 | #[cfg_attr (test, assert_instr(cvtss2sd))] |
2334 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2335 | pub unsafe fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d { |
2336 | cvtss2sd(a, b) |
2337 | } |
2338 | |
2339 | /// Converts packed double-precision (64-bit) floating-point elements in `a` to |
2340 | /// packed 32-bit integers with truncation. |
2341 | /// |
2342 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epi32) |
2343 | #[inline ] |
2344 | #[target_feature (enable = "sse2" )] |
2345 | #[cfg_attr (test, assert_instr(cvttpd2dq))] |
2346 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2347 | pub unsafe fn _mm_cvttpd_epi32(a: __m128d) -> __m128i { |
2348 | transmute(src:cvttpd2dq(a)) |
2349 | } |
2350 | |
2351 | /// Converts the lower double-precision (64-bit) floating-point element in `a` |
2352 | /// to a 32-bit integer with truncation. |
2353 | /// |
2354 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_si32) |
2355 | #[inline ] |
2356 | #[target_feature (enable = "sse2" )] |
2357 | #[cfg_attr (test, assert_instr(cvttsd2si))] |
2358 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2359 | pub unsafe fn _mm_cvttsd_si32(a: __m128d) -> i32 { |
2360 | cvttsd2si(a) |
2361 | } |
2362 | |
2363 | /// Converts packed single-precision (32-bit) floating-point elements in `a` to |
2364 | /// packed 32-bit integers with truncation. |
2365 | /// |
2366 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epi32) |
2367 | #[inline ] |
2368 | #[target_feature (enable = "sse2" )] |
2369 | #[cfg_attr (test, assert_instr(cvttps2dq))] |
2370 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2371 | pub unsafe fn _mm_cvttps_epi32(a: __m128) -> __m128i { |
2372 | transmute(src:cvttps2dq(a)) |
2373 | } |
2374 | |
2375 | /// Copies double-precision (64-bit) floating-point element `a` to the lower |
2376 | /// element of the packed 64-bit return value. |
2377 | /// |
2378 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_sd) |
2379 | #[inline ] |
2380 | #[target_feature (enable = "sse2" )] |
2381 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2382 | pub unsafe fn _mm_set_sd(a: f64) -> __m128d { |
2383 | _mm_set_pd(a:0.0, b:a) |
2384 | } |
2385 | |
2386 | /// Broadcasts double-precision (64-bit) floating-point value a to all elements |
2387 | /// of the return value. |
2388 | /// |
2389 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_pd) |
2390 | #[inline ] |
2391 | #[target_feature (enable = "sse2" )] |
2392 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2393 | pub unsafe fn _mm_set1_pd(a: f64) -> __m128d { |
2394 | _mm_set_pd(a, b:a) |
2395 | } |
2396 | |
2397 | /// Broadcasts double-precision (64-bit) floating-point value a to all elements |
2398 | /// of the return value. |
2399 | /// |
2400 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_pd1) |
2401 | #[inline ] |
2402 | #[target_feature (enable = "sse2" )] |
2403 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2404 | pub unsafe fn _mm_set_pd1(a: f64) -> __m128d { |
2405 | _mm_set_pd(a, b:a) |
2406 | } |
2407 | |
2408 | /// Sets packed double-precision (64-bit) floating-point elements in the return |
2409 | /// value with the supplied values. |
2410 | /// |
2411 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_pd) |
2412 | #[inline ] |
2413 | #[target_feature (enable = "sse2" )] |
2414 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2415 | pub unsafe fn _mm_set_pd(a: f64, b: f64) -> __m128d { |
2416 | __m128d(b, a) |
2417 | } |
2418 | |
2419 | /// Sets packed double-precision (64-bit) floating-point elements in the return |
2420 | /// value with the supplied values in reverse order. |
2421 | /// |
2422 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_pd) |
2423 | #[inline ] |
2424 | #[target_feature (enable = "sse2" )] |
2425 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2426 | pub unsafe fn _mm_setr_pd(a: f64, b: f64) -> __m128d { |
2427 | _mm_set_pd(a:b, b:a) |
2428 | } |
2429 | |
2430 | /// Returns packed double-precision (64-bit) floating-point elements with all |
2431 | /// zeros. |
2432 | /// |
2433 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_pd) |
2434 | #[inline ] |
2435 | #[target_feature (enable = "sse2" )] |
2436 | #[cfg_attr (test, assert_instr(xorps))] // FIXME xorpd expected |
2437 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2438 | pub unsafe fn _mm_setzero_pd() -> __m128d { |
2439 | _mm_set_pd(a:0.0, b:0.0) |
2440 | } |
2441 | |
2442 | /// Returns a mask of the most significant bit of each element in `a`. |
2443 | /// |
2444 | /// The mask is stored in the 2 least significant bits of the return value. |
2445 | /// All other bits are set to `0`. |
2446 | /// |
2447 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_pd) |
2448 | #[inline ] |
2449 | #[target_feature (enable = "sse2" )] |
2450 | #[cfg_attr (test, assert_instr(movmskpd))] |
2451 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2452 | pub unsafe fn _mm_movemask_pd(a: __m128d) -> i32 { |
2453 | // Propagate the highest bit to the rest, because simd_bitmask |
2454 | // requires all-1 or all-0. |
2455 | let mask: i64x2 = simd_lt(x:transmute(a), y:i64x2::splat(0)); |
2456 | simd_bitmask::<i64x2, u8>(mask).into() |
2457 | } |
2458 | |
2459 | /// Loads 128-bits (composed of 2 packed double-precision (64-bit) |
2460 | /// floating-point elements) from memory into the returned vector. |
2461 | /// `mem_addr` must be aligned on a 16-byte boundary or a general-protection |
2462 | /// exception may be generated. |
2463 | /// |
2464 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_pd) |
2465 | #[inline ] |
2466 | #[target_feature (enable = "sse2" )] |
2467 | #[cfg_attr (test, assert_instr(movaps))] |
2468 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2469 | #[allow (clippy::cast_ptr_alignment)] |
2470 | pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d { |
2471 | *(mem_addr as *const __m128d) |
2472 | } |
2473 | |
2474 | /// Loads a 64-bit double-precision value to the low element of a |
2475 | /// 128-bit integer vector and clears the upper element. |
2476 | /// |
2477 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_sd) |
2478 | #[inline ] |
2479 | #[target_feature (enable = "sse2" )] |
2480 | #[cfg_attr (test, assert_instr(movsd))] |
2481 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2482 | pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d { |
2483 | _mm_setr_pd(*mem_addr, b:0.) |
2484 | } |
2485 | |
2486 | /// Loads a double-precision value into the high-order bits of a 128-bit |
2487 | /// vector of `[2 x double]`. The low-order bits are copied from the low-order |
2488 | /// bits of the first operand. |
2489 | /// |
2490 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadh_pd) |
2491 | #[inline ] |
2492 | #[target_feature (enable = "sse2" )] |
2493 | #[cfg_attr (test, assert_instr(movhps))] |
2494 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2495 | pub unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d { |
2496 | _mm_setr_pd(a:simd_extract(a, 0), *mem_addr) |
2497 | } |
2498 | |
2499 | /// Loads a double-precision value into the low-order bits of a 128-bit |
2500 | /// vector of `[2 x double]`. The high-order bits are copied from the |
2501 | /// high-order bits of the first operand. |
2502 | /// |
2503 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadl_pd) |
2504 | #[inline ] |
2505 | #[target_feature (enable = "sse2" )] |
2506 | #[cfg_attr (test, assert_instr(movlps))] |
2507 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2508 | pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d { |
2509 | _mm_setr_pd(*mem_addr, b:simd_extract(x:a, idx:1)) |
2510 | } |
2511 | |
2512 | /// Stores a 128-bit floating point vector of `[2 x double]` to a 128-bit |
2513 | /// aligned memory location. |
2514 | /// To minimize caching, the data is flagged as non-temporal (unlikely to be |
2515 | /// used again soon). |
2516 | /// |
2517 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_pd) |
2518 | #[inline ] |
2519 | #[target_feature (enable = "sse2" )] |
2520 | #[cfg_attr (test, assert_instr(movntps))] // FIXME movntpd |
2521 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2522 | #[allow (clippy::cast_ptr_alignment)] |
2523 | pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) { |
2524 | intrinsics::nontemporal_store(ptr:mem_addr as *mut __m128d, val:a); |
2525 | } |
2526 | |
2527 | /// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a |
2528 | /// memory location. |
2529 | /// |
2530 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_sd) |
2531 | #[inline ] |
2532 | #[target_feature (enable = "sse2" )] |
2533 | #[cfg_attr (all(test, not(target_os = "windows" )), assert_instr(movlps))] |
2534 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2535 | pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) { |
2536 | *mem_addr = simd_extract(x:a, idx:0) |
2537 | } |
2538 | |
2539 | /// Stores 128-bits (composed of 2 packed double-precision (64-bit) |
2540 | /// floating-point elements) from `a` into memory. `mem_addr` must be aligned |
2541 | /// on a 16-byte boundary or a general-protection exception may be generated. |
2542 | /// |
2543 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_pd) |
2544 | #[inline ] |
2545 | #[target_feature (enable = "sse2" )] |
2546 | #[cfg_attr (test, assert_instr(movaps))] |
2547 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2548 | #[allow (clippy::cast_ptr_alignment)] |
2549 | pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) { |
2550 | *(mem_addr as *mut __m128d) = a; |
2551 | } |
2552 | |
2553 | /// Stores 128-bits (composed of 2 packed double-precision (64-bit) |
2554 | /// floating-point elements) from `a` into memory. |
2555 | /// `mem_addr` does not need to be aligned on any particular boundary. |
2556 | /// |
2557 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_pd) |
2558 | #[inline ] |
2559 | #[target_feature (enable = "sse2" )] |
2560 | #[cfg_attr (test, assert_instr(movups))] // FIXME movupd expected |
2561 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2562 | pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) { |
2563 | mem_addr.cast::<__m128d>().write_unaligned(val:a); |
2564 | } |
2565 | |
2566 | /// Stores the lower double-precision (64-bit) floating-point element from `a` |
2567 | /// into 2 contiguous elements in memory. `mem_addr` must be aligned on a |
2568 | /// 16-byte boundary or a general-protection exception may be generated. |
2569 | /// |
2570 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store1_pd) |
2571 | #[inline ] |
2572 | #[target_feature (enable = "sse2" )] |
2573 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2574 | #[allow (clippy::cast_ptr_alignment)] |
2575 | pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) { |
2576 | let b: __m128d = simd_shuffle!(a, a, [0, 0]); |
2577 | *(mem_addr as *mut __m128d) = b; |
2578 | } |
2579 | |
2580 | /// Stores the lower double-precision (64-bit) floating-point element from `a` |
2581 | /// into 2 contiguous elements in memory. `mem_addr` must be aligned on a |
2582 | /// 16-byte boundary or a general-protection exception may be generated. |
2583 | /// |
2584 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_pd1) |
2585 | #[inline ] |
2586 | #[target_feature (enable = "sse2" )] |
2587 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2588 | #[allow (clippy::cast_ptr_alignment)] |
2589 | pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) { |
2590 | let b: __m128d = simd_shuffle!(a, a, [0, 0]); |
2591 | *(mem_addr as *mut __m128d) = b; |
2592 | } |
2593 | |
2594 | /// Stores 2 double-precision (64-bit) floating-point elements from `a` into |
2595 | /// memory in reverse order. |
2596 | /// `mem_addr` must be aligned on a 16-byte boundary or a general-protection |
2597 | /// exception may be generated. |
2598 | /// |
2599 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storer_pd) |
2600 | #[inline ] |
2601 | #[target_feature (enable = "sse2" )] |
2602 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2603 | #[allow (clippy::cast_ptr_alignment)] |
2604 | pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) { |
2605 | let b: __m128d = simd_shuffle!(a, a, [1, 0]); |
2606 | *(mem_addr as *mut __m128d) = b; |
2607 | } |
2608 | |
2609 | /// Stores the upper 64 bits of a 128-bit vector of `[2 x double]` to a |
2610 | /// memory location. |
2611 | /// |
2612 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeh_pd) |
2613 | #[inline ] |
2614 | #[target_feature (enable = "sse2" )] |
2615 | #[cfg_attr (all(test, not(target_os = "windows" )), assert_instr(movhps))] |
2616 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2617 | pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) { |
2618 | *mem_addr = simd_extract(x:a, idx:1); |
2619 | } |
2620 | |
2621 | /// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a |
2622 | /// memory location. |
2623 | /// |
2624 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storel_pd) |
2625 | #[inline ] |
2626 | #[target_feature (enable = "sse2" )] |
2627 | #[cfg_attr (all(test, not(target_os = "windows" )), assert_instr(movlps))] |
2628 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2629 | pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) { |
2630 | *mem_addr = simd_extract(x:a, idx:0); |
2631 | } |
2632 | |
2633 | /// Loads a double-precision (64-bit) floating-point element from memory |
2634 | /// into both elements of returned vector. |
2635 | /// |
2636 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load1_pd) |
2637 | #[inline ] |
2638 | #[target_feature (enable = "sse2" )] |
2639 | // #[cfg_attr(test, assert_instr(movapd))] // FIXME LLVM uses different codegen |
2640 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2641 | pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d { |
2642 | let d: f64 = *mem_addr; |
2643 | _mm_setr_pd(a:d, b:d) |
2644 | } |
2645 | |
2646 | /// Loads a double-precision (64-bit) floating-point element from memory |
2647 | /// into both elements of returned vector. |
2648 | /// |
2649 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_pd1) |
2650 | #[inline ] |
2651 | #[target_feature (enable = "sse2" )] |
2652 | // #[cfg_attr(test, assert_instr(movapd))] // FIXME same as _mm_load1_pd |
2653 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2654 | pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d { |
2655 | _mm_load1_pd(mem_addr) |
2656 | } |
2657 | |
2658 | /// Loads 2 double-precision (64-bit) floating-point elements from memory into |
2659 | /// the returned vector in reverse order. `mem_addr` must be aligned on a |
2660 | /// 16-byte boundary or a general-protection exception may be generated. |
2661 | /// |
2662 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadr_pd) |
2663 | #[inline ] |
2664 | #[target_feature (enable = "sse2" )] |
2665 | #[cfg_attr (test, assert_instr(movaps))] |
2666 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2667 | pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d { |
2668 | let a: __m128d = _mm_load_pd(mem_addr); |
2669 | simd_shuffle!(a, a, [1, 0]) |
2670 | } |
2671 | |
2672 | /// Loads 128-bits (composed of 2 packed double-precision (64-bit) |
2673 | /// floating-point elements) from memory into the returned vector. |
2674 | /// `mem_addr` does not need to be aligned on any particular boundary. |
2675 | /// |
2676 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_pd) |
2677 | #[inline ] |
2678 | #[target_feature (enable = "sse2" )] |
2679 | #[cfg_attr (test, assert_instr(movups))] |
2680 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2681 | pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d { |
2682 | let mut dst: __m128d = _mm_undefined_pd(); |
2683 | ptr::copy_nonoverlapping( |
2684 | src:mem_addr as *const u8, |
2685 | &mut dst as *mut __m128d as *mut u8, |
2686 | count:mem::size_of::<__m128d>(), |
2687 | ); |
2688 | dst |
2689 | } |
2690 | |
2691 | /// Constructs a 128-bit floating-point vector of `[2 x double]` from two |
2692 | /// 128-bit vector parameters of `[2 x double]`, using the immediate-value |
2693 | /// parameter as a specifier. |
2694 | /// |
2695 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_pd) |
2696 | #[inline ] |
2697 | #[target_feature (enable = "sse2" )] |
2698 | #[cfg_attr (test, assert_instr(shufps, MASK = 2))] |
2699 | #[rustc_legacy_const_generics (2)] |
2700 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2701 | pub unsafe fn _mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d { |
2702 | static_assert_uimm_bits!(MASK, 8); |
2703 | simd_shuffle!(a, b, [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2]) |
2704 | } |
2705 | |
2706 | /// Constructs a 128-bit floating-point vector of `[2 x double]`. The lower |
2707 | /// 64 bits are set to the lower 64 bits of the second parameter. The upper |
2708 | /// 64 bits are set to the upper 64 bits of the first parameter. |
2709 | /// |
2710 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_move_sd) |
2711 | #[inline ] |
2712 | #[target_feature (enable = "sse2" )] |
2713 | #[cfg_attr (test, assert_instr(movsd))] |
2714 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2715 | pub unsafe fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d { |
2716 | _mm_setr_pd(a:simd_extract(b, 0), b:simd_extract(x:a, idx:1)) |
2717 | } |
2718 | |
2719 | /// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit |
2720 | /// floating-point vector of `[4 x float]`. |
2721 | /// |
2722 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castpd_ps) |
2723 | #[inline ] |
2724 | #[target_feature (enable = "sse2" )] |
2725 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2726 | pub unsafe fn _mm_castpd_ps(a: __m128d) -> __m128 { |
2727 | transmute(src:a) |
2728 | } |
2729 | |
2730 | /// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit |
2731 | /// integer vector. |
2732 | /// |
2733 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castpd_si128) |
2734 | #[inline ] |
2735 | #[target_feature (enable = "sse2" )] |
2736 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2737 | pub unsafe fn _mm_castpd_si128(a: __m128d) -> __m128i { |
2738 | transmute(src:a) |
2739 | } |
2740 | |
2741 | /// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit |
2742 | /// floating-point vector of `[2 x double]`. |
2743 | /// |
2744 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castps_pd) |
2745 | #[inline ] |
2746 | #[target_feature (enable = "sse2" )] |
2747 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2748 | pub unsafe fn _mm_castps_pd(a: __m128) -> __m128d { |
2749 | transmute(src:a) |
2750 | } |
2751 | |
2752 | /// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit |
2753 | /// integer vector. |
2754 | /// |
2755 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castps_si128) |
2756 | #[inline ] |
2757 | #[target_feature (enable = "sse2" )] |
2758 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2759 | pub unsafe fn _mm_castps_si128(a: __m128) -> __m128i { |
2760 | transmute(src:a) |
2761 | } |
2762 | |
2763 | /// Casts a 128-bit integer vector into a 128-bit floating-point vector |
2764 | /// of `[2 x double]`. |
2765 | /// |
2766 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castsi128_pd) |
2767 | #[inline ] |
2768 | #[target_feature (enable = "sse2" )] |
2769 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2770 | pub unsafe fn _mm_castsi128_pd(a: __m128i) -> __m128d { |
2771 | transmute(src:a) |
2772 | } |
2773 | |
2774 | /// Casts a 128-bit integer vector into a 128-bit floating-point vector |
2775 | /// of `[4 x float]`. |
2776 | /// |
2777 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castsi128_ps) |
2778 | #[inline ] |
2779 | #[target_feature (enable = "sse2" )] |
2780 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2781 | pub unsafe fn _mm_castsi128_ps(a: __m128i) -> __m128 { |
2782 | transmute(src:a) |
2783 | } |
2784 | |
2785 | /// Returns vector of type __m128d with indeterminate elements. |
2786 | /// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`]. |
2787 | /// In practice, this is equivalent to [`mem::zeroed`]. |
2788 | /// |
2789 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_undefined_pd) |
2790 | #[inline ] |
2791 | #[target_feature (enable = "sse2" )] |
2792 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2793 | pub unsafe fn _mm_undefined_pd() -> __m128d { |
2794 | __m128d(0.0, 0.0) |
2795 | } |
2796 | |
2797 | /// Returns vector of type __m128i with indeterminate elements. |
2798 | /// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`]. |
2799 | /// In practice, this is equivalent to [`mem::zeroed`]. |
2800 | /// |
2801 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_undefined_si128) |
2802 | #[inline ] |
2803 | #[target_feature (enable = "sse2" )] |
2804 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2805 | pub unsafe fn _mm_undefined_si128() -> __m128i { |
2806 | __m128i(0, 0) |
2807 | } |
2808 | |
2809 | /// The resulting `__m128d` element is composed by the low-order values of |
2810 | /// the two `__m128d` interleaved input elements, i.e.: |
2811 | /// |
2812 | /// * The `[127:64]` bits are copied from the `[127:64]` bits of the second |
2813 | /// input * The `[63:0]` bits are copied from the `[127:64]` bits of the first |
2814 | /// input |
2815 | /// |
2816 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_pd) |
2817 | #[inline ] |
2818 | #[target_feature (enable = "sse2" )] |
2819 | #[cfg_attr (test, assert_instr(unpckhpd))] |
2820 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2821 | pub unsafe fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d { |
2822 | simd_shuffle!(a, b, [1, 3]) |
2823 | } |
2824 | |
2825 | /// The resulting `__m128d` element is composed by the high-order values of |
2826 | /// the two `__m128d` interleaved input elements, i.e.: |
2827 | /// |
2828 | /// * The `[127:64]` bits are copied from the `[63:0]` bits of the second input |
2829 | /// * The `[63:0]` bits are copied from the `[63:0]` bits of the first input |
2830 | /// |
2831 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_pd) |
2832 | #[inline ] |
2833 | #[target_feature (enable = "sse2" )] |
2834 | #[cfg_attr (all(test, not(target_os = "windows" )), assert_instr(movlhps))] |
2835 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2836 | pub unsafe fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d { |
2837 | simd_shuffle!(a, b, [0, 2]) |
2838 | } |
2839 | |
2840 | #[allow (improper_ctypes)] |
2841 | extern "C" { |
2842 | #[link_name = "llvm.x86.sse2.pause" ] |
2843 | fn pause(); |
2844 | #[link_name = "llvm.x86.sse2.clflush" ] |
2845 | fn clflush(p: *const u8); |
2846 | #[link_name = "llvm.x86.sse2.lfence" ] |
2847 | fn lfence(); |
2848 | #[link_name = "llvm.x86.sse2.mfence" ] |
2849 | fn mfence(); |
2850 | #[link_name = "llvm.x86.sse2.pmadd.wd" ] |
2851 | fn pmaddwd(a: i16x8, b: i16x8) -> i32x4; |
2852 | #[link_name = "llvm.x86.sse2.psad.bw" ] |
2853 | fn psadbw(a: u8x16, b: u8x16) -> u64x2; |
2854 | #[link_name = "llvm.x86.sse2.psll.w" ] |
2855 | fn psllw(a: i16x8, count: i16x8) -> i16x8; |
2856 | #[link_name = "llvm.x86.sse2.psll.d" ] |
2857 | fn pslld(a: i32x4, count: i32x4) -> i32x4; |
2858 | #[link_name = "llvm.x86.sse2.psll.q" ] |
2859 | fn psllq(a: i64x2, count: i64x2) -> i64x2; |
2860 | #[link_name = "llvm.x86.sse2.psra.w" ] |
2861 | fn psraw(a: i16x8, count: i16x8) -> i16x8; |
2862 | #[link_name = "llvm.x86.sse2.psra.d" ] |
2863 | fn psrad(a: i32x4, count: i32x4) -> i32x4; |
2864 | #[link_name = "llvm.x86.sse2.psrl.w" ] |
2865 | fn psrlw(a: i16x8, count: i16x8) -> i16x8; |
2866 | #[link_name = "llvm.x86.sse2.psrl.d" ] |
2867 | fn psrld(a: i32x4, count: i32x4) -> i32x4; |
2868 | #[link_name = "llvm.x86.sse2.psrl.q" ] |
2869 | fn psrlq(a: i64x2, count: i64x2) -> i64x2; |
2870 | #[link_name = "llvm.x86.sse2.cvtps2dq" ] |
2871 | fn cvtps2dq(a: __m128) -> i32x4; |
2872 | #[link_name = "llvm.x86.sse2.maskmov.dqu" ] |
2873 | fn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8); |
2874 | #[link_name = "llvm.x86.sse2.packsswb.128" ] |
2875 | fn packsswb(a: i16x8, b: i16x8) -> i8x16; |
2876 | #[link_name = "llvm.x86.sse2.packssdw.128" ] |
2877 | fn packssdw(a: i32x4, b: i32x4) -> i16x8; |
2878 | #[link_name = "llvm.x86.sse2.packuswb.128" ] |
2879 | fn packuswb(a: i16x8, b: i16x8) -> u8x16; |
2880 | #[link_name = "llvm.x86.sse2.max.sd" ] |
2881 | fn maxsd(a: __m128d, b: __m128d) -> __m128d; |
2882 | #[link_name = "llvm.x86.sse2.max.pd" ] |
2883 | fn maxpd(a: __m128d, b: __m128d) -> __m128d; |
2884 | #[link_name = "llvm.x86.sse2.min.sd" ] |
2885 | fn minsd(a: __m128d, b: __m128d) -> __m128d; |
2886 | #[link_name = "llvm.x86.sse2.min.pd" ] |
2887 | fn minpd(a: __m128d, b: __m128d) -> __m128d; |
2888 | #[link_name = "llvm.x86.sse2.sqrt.sd" ] |
2889 | fn sqrtsd(a: __m128d) -> __m128d; |
2890 | #[link_name = "llvm.x86.sse2.sqrt.pd" ] |
2891 | fn sqrtpd(a: __m128d) -> __m128d; |
2892 | #[link_name = "llvm.x86.sse2.cmp.sd" ] |
2893 | fn cmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d; |
2894 | #[link_name = "llvm.x86.sse2.cmp.pd" ] |
2895 | fn cmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d; |
2896 | #[link_name = "llvm.x86.sse2.comieq.sd" ] |
2897 | fn comieqsd(a: __m128d, b: __m128d) -> i32; |
2898 | #[link_name = "llvm.x86.sse2.comilt.sd" ] |
2899 | fn comiltsd(a: __m128d, b: __m128d) -> i32; |
2900 | #[link_name = "llvm.x86.sse2.comile.sd" ] |
2901 | fn comilesd(a: __m128d, b: __m128d) -> i32; |
2902 | #[link_name = "llvm.x86.sse2.comigt.sd" ] |
2903 | fn comigtsd(a: __m128d, b: __m128d) -> i32; |
2904 | #[link_name = "llvm.x86.sse2.comige.sd" ] |
2905 | fn comigesd(a: __m128d, b: __m128d) -> i32; |
2906 | #[link_name = "llvm.x86.sse2.comineq.sd" ] |
2907 | fn comineqsd(a: __m128d, b: __m128d) -> i32; |
2908 | #[link_name = "llvm.x86.sse2.ucomieq.sd" ] |
2909 | fn ucomieqsd(a: __m128d, b: __m128d) -> i32; |
2910 | #[link_name = "llvm.x86.sse2.ucomilt.sd" ] |
2911 | fn ucomiltsd(a: __m128d, b: __m128d) -> i32; |
2912 | #[link_name = "llvm.x86.sse2.ucomile.sd" ] |
2913 | fn ucomilesd(a: __m128d, b: __m128d) -> i32; |
2914 | #[link_name = "llvm.x86.sse2.ucomigt.sd" ] |
2915 | fn ucomigtsd(a: __m128d, b: __m128d) -> i32; |
2916 | #[link_name = "llvm.x86.sse2.ucomige.sd" ] |
2917 | fn ucomigesd(a: __m128d, b: __m128d) -> i32; |
2918 | #[link_name = "llvm.x86.sse2.ucomineq.sd" ] |
2919 | fn ucomineqsd(a: __m128d, b: __m128d) -> i32; |
2920 | #[link_name = "llvm.x86.sse2.cvtpd2dq" ] |
2921 | fn cvtpd2dq(a: __m128d) -> i32x4; |
2922 | #[link_name = "llvm.x86.sse2.cvtsd2si" ] |
2923 | fn cvtsd2si(a: __m128d) -> i32; |
2924 | #[link_name = "llvm.x86.sse2.cvtsd2ss" ] |
2925 | fn cvtsd2ss(a: __m128, b: __m128d) -> __m128; |
2926 | #[link_name = "llvm.x86.sse2.cvtss2sd" ] |
2927 | fn cvtss2sd(a: __m128d, b: __m128) -> __m128d; |
2928 | #[link_name = "llvm.x86.sse2.cvttpd2dq" ] |
2929 | fn cvttpd2dq(a: __m128d) -> i32x4; |
2930 | #[link_name = "llvm.x86.sse2.cvttsd2si" ] |
2931 | fn cvttsd2si(a: __m128d) -> i32; |
2932 | #[link_name = "llvm.x86.sse2.cvttps2dq" ] |
2933 | fn cvttps2dq(a: __m128) -> i32x4; |
2934 | } |
2935 | |
2936 | #[cfg (test)] |
2937 | mod tests { |
2938 | use crate::{ |
2939 | core_arch::{simd::*, x86::*}, |
2940 | hint::black_box, |
2941 | }; |
2942 | use std::{ |
2943 | boxed, f32, |
2944 | f64::{self, NAN}, |
2945 | i32, |
2946 | mem::{self, transmute}, |
2947 | }; |
2948 | use stdarch_test::simd_test; |
2949 | |
2950 | #[test ] |
2951 | fn test_mm_pause() { |
2952 | unsafe { _mm_pause() } |
2953 | } |
2954 | |
2955 | #[simd_test(enable = "sse2" )] |
2956 | unsafe fn test_mm_clflush() { |
2957 | let x = 0_u8; |
2958 | _mm_clflush(&x as *const _); |
2959 | } |
2960 | |
2961 | #[simd_test(enable = "sse2" )] |
2962 | // Miri cannot support this until it is clear how it fits in the Rust memory model |
2963 | #[cfg_attr (miri, ignore)] |
2964 | unsafe fn test_mm_lfence() { |
2965 | _mm_lfence(); |
2966 | } |
2967 | |
2968 | #[simd_test(enable = "sse2" )] |
2969 | // Miri cannot support this until it is clear how it fits in the Rust memory model |
2970 | #[cfg_attr (miri, ignore)] |
2971 | unsafe fn test_mm_mfence() { |
2972 | _mm_mfence(); |
2973 | } |
2974 | |
2975 | #[simd_test(enable = "sse2" )] |
2976 | unsafe fn test_mm_add_epi8() { |
2977 | let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
2978 | #[rustfmt::skip] |
2979 | let b = _mm_setr_epi8( |
2980 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
2981 | ); |
2982 | let r = _mm_add_epi8(a, b); |
2983 | #[rustfmt::skip] |
2984 | let e = _mm_setr_epi8( |
2985 | 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, |
2986 | ); |
2987 | assert_eq_m128i(r, e); |
2988 | } |
2989 | |
2990 | #[simd_test(enable = "sse2" )] |
2991 | unsafe fn test_mm_add_epi8_overflow() { |
2992 | let a = _mm_set1_epi8(0x7F); |
2993 | let b = _mm_set1_epi8(1); |
2994 | let r = _mm_add_epi8(a, b); |
2995 | assert_eq_m128i(r, _mm_set1_epi8(-128)); |
2996 | } |
2997 | |
2998 | #[simd_test(enable = "sse2" )] |
2999 | unsafe fn test_mm_add_epi16() { |
3000 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
3001 | let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); |
3002 | let r = _mm_add_epi16(a, b); |
3003 | let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22); |
3004 | assert_eq_m128i(r, e); |
3005 | } |
3006 | |
3007 | #[simd_test(enable = "sse2" )] |
3008 | unsafe fn test_mm_add_epi32() { |
3009 | let a = _mm_setr_epi32(0, 1, 2, 3); |
3010 | let b = _mm_setr_epi32(4, 5, 6, 7); |
3011 | let r = _mm_add_epi32(a, b); |
3012 | let e = _mm_setr_epi32(4, 6, 8, 10); |
3013 | assert_eq_m128i(r, e); |
3014 | } |
3015 | |
3016 | #[simd_test(enable = "sse2" )] |
3017 | unsafe fn test_mm_add_epi64() { |
3018 | let a = _mm_setr_epi64x(0, 1); |
3019 | let b = _mm_setr_epi64x(2, 3); |
3020 | let r = _mm_add_epi64(a, b); |
3021 | let e = _mm_setr_epi64x(2, 4); |
3022 | assert_eq_m128i(r, e); |
3023 | } |
3024 | |
3025 | #[simd_test(enable = "sse2" )] |
3026 | unsafe fn test_mm_adds_epi8() { |
3027 | let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
3028 | #[rustfmt::skip] |
3029 | let b = _mm_setr_epi8( |
3030 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
3031 | ); |
3032 | let r = _mm_adds_epi8(a, b); |
3033 | #[rustfmt::skip] |
3034 | let e = _mm_setr_epi8( |
3035 | 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, |
3036 | ); |
3037 | assert_eq_m128i(r, e); |
3038 | } |
3039 | |
3040 | #[simd_test(enable = "sse2" )] |
3041 | unsafe fn test_mm_adds_epi8_saturate_positive() { |
3042 | let a = _mm_set1_epi8(0x7F); |
3043 | let b = _mm_set1_epi8(1); |
3044 | let r = _mm_adds_epi8(a, b); |
3045 | assert_eq_m128i(r, a); |
3046 | } |
3047 | |
3048 | #[simd_test(enable = "sse2" )] |
3049 | unsafe fn test_mm_adds_epi8_saturate_negative() { |
3050 | let a = _mm_set1_epi8(-0x80); |
3051 | let b = _mm_set1_epi8(-1); |
3052 | let r = _mm_adds_epi8(a, b); |
3053 | assert_eq_m128i(r, a); |
3054 | } |
3055 | |
3056 | #[simd_test(enable = "sse2" )] |
3057 | unsafe fn test_mm_adds_epi16() { |
3058 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
3059 | let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); |
3060 | let r = _mm_adds_epi16(a, b); |
3061 | let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22); |
3062 | assert_eq_m128i(r, e); |
3063 | } |
3064 | |
3065 | #[simd_test(enable = "sse2" )] |
3066 | unsafe fn test_mm_adds_epi16_saturate_positive() { |
3067 | let a = _mm_set1_epi16(0x7FFF); |
3068 | let b = _mm_set1_epi16(1); |
3069 | let r = _mm_adds_epi16(a, b); |
3070 | assert_eq_m128i(r, a); |
3071 | } |
3072 | |
3073 | #[simd_test(enable = "sse2" )] |
3074 | unsafe fn test_mm_adds_epi16_saturate_negative() { |
3075 | let a = _mm_set1_epi16(-0x8000); |
3076 | let b = _mm_set1_epi16(-1); |
3077 | let r = _mm_adds_epi16(a, b); |
3078 | assert_eq_m128i(r, a); |
3079 | } |
3080 | |
3081 | #[simd_test(enable = "sse2" )] |
3082 | unsafe fn test_mm_adds_epu8() { |
3083 | let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
3084 | #[rustfmt::skip] |
3085 | let b = _mm_setr_epi8( |
3086 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
3087 | ); |
3088 | let r = _mm_adds_epu8(a, b); |
3089 | #[rustfmt::skip] |
3090 | let e = _mm_setr_epi8( |
3091 | 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, |
3092 | ); |
3093 | assert_eq_m128i(r, e); |
3094 | } |
3095 | |
3096 | #[simd_test(enable = "sse2" )] |
3097 | unsafe fn test_mm_adds_epu8_saturate() { |
3098 | let a = _mm_set1_epi8(!0); |
3099 | let b = _mm_set1_epi8(1); |
3100 | let r = _mm_adds_epu8(a, b); |
3101 | assert_eq_m128i(r, a); |
3102 | } |
3103 | |
3104 | #[simd_test(enable = "sse2" )] |
3105 | unsafe fn test_mm_adds_epu16() { |
3106 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
3107 | let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); |
3108 | let r = _mm_adds_epu16(a, b); |
3109 | let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22); |
3110 | assert_eq_m128i(r, e); |
3111 | } |
3112 | |
3113 | #[simd_test(enable = "sse2" )] |
3114 | unsafe fn test_mm_adds_epu16_saturate() { |
3115 | let a = _mm_set1_epi16(!0); |
3116 | let b = _mm_set1_epi16(1); |
3117 | let r = _mm_adds_epu16(a, b); |
3118 | assert_eq_m128i(r, a); |
3119 | } |
3120 | |
3121 | #[simd_test(enable = "sse2" )] |
3122 | unsafe fn test_mm_avg_epu8() { |
3123 | let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9)); |
3124 | let r = _mm_avg_epu8(a, b); |
3125 | assert_eq_m128i(r, _mm_set1_epi8(6)); |
3126 | } |
3127 | |
3128 | #[simd_test(enable = "sse2" )] |
3129 | unsafe fn test_mm_avg_epu16() { |
3130 | let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9)); |
3131 | let r = _mm_avg_epu16(a, b); |
3132 | assert_eq_m128i(r, _mm_set1_epi16(6)); |
3133 | } |
3134 | |
3135 | #[simd_test(enable = "sse2" )] |
3136 | unsafe fn test_mm_madd_epi16() { |
3137 | let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); |
3138 | let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16); |
3139 | let r = _mm_madd_epi16(a, b); |
3140 | let e = _mm_setr_epi32(29, 81, 149, 233); |
3141 | assert_eq_m128i(r, e); |
3142 | } |
3143 | |
3144 | #[simd_test(enable = "sse2" )] |
3145 | unsafe fn test_mm_max_epi16() { |
3146 | let a = _mm_set1_epi16(1); |
3147 | let b = _mm_set1_epi16(-1); |
3148 | let r = _mm_max_epi16(a, b); |
3149 | assert_eq_m128i(r, a); |
3150 | } |
3151 | |
3152 | #[simd_test(enable = "sse2" )] |
3153 | unsafe fn test_mm_max_epu8() { |
3154 | let a = _mm_set1_epi8(1); |
3155 | let b = _mm_set1_epi8(!0); |
3156 | let r = _mm_max_epu8(a, b); |
3157 | assert_eq_m128i(r, b); |
3158 | } |
3159 | |
3160 | #[simd_test(enable = "sse2" )] |
3161 | unsafe fn test_mm_min_epi16() { |
3162 | let a = _mm_set1_epi16(1); |
3163 | let b = _mm_set1_epi16(-1); |
3164 | let r = _mm_min_epi16(a, b); |
3165 | assert_eq_m128i(r, b); |
3166 | } |
3167 | |
3168 | #[simd_test(enable = "sse2" )] |
3169 | unsafe fn test_mm_min_epu8() { |
3170 | let a = _mm_set1_epi8(1); |
3171 | let b = _mm_set1_epi8(!0); |
3172 | let r = _mm_min_epu8(a, b); |
3173 | assert_eq_m128i(r, a); |
3174 | } |
3175 | |
3176 | #[simd_test(enable = "sse2" )] |
3177 | unsafe fn test_mm_mulhi_epi16() { |
3178 | let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001)); |
3179 | let r = _mm_mulhi_epi16(a, b); |
3180 | assert_eq_m128i(r, _mm_set1_epi16(-16)); |
3181 | } |
3182 | |
3183 | #[simd_test(enable = "sse2" )] |
3184 | unsafe fn test_mm_mulhi_epu16() { |
3185 | let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001)); |
3186 | let r = _mm_mulhi_epu16(a, b); |
3187 | assert_eq_m128i(r, _mm_set1_epi16(15)); |
3188 | } |
3189 | |
3190 | #[simd_test(enable = "sse2" )] |
3191 | unsafe fn test_mm_mullo_epi16() { |
3192 | let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001)); |
3193 | let r = _mm_mullo_epi16(a, b); |
3194 | assert_eq_m128i(r, _mm_set1_epi16(-17960)); |
3195 | } |
3196 | |
3197 | #[simd_test(enable = "sse2" )] |
3198 | unsafe fn test_mm_mul_epu32() { |
3199 | let a = _mm_setr_epi64x(1_000_000_000, 1 << 34); |
3200 | let b = _mm_setr_epi64x(1_000_000_000, 1 << 35); |
3201 | let r = _mm_mul_epu32(a, b); |
3202 | let e = _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0); |
3203 | assert_eq_m128i(r, e); |
3204 | } |
3205 | |
3206 | #[simd_test(enable = "sse2" )] |
3207 | unsafe fn test_mm_sad_epu8() { |
3208 | #[rustfmt::skip] |
3209 | let a = _mm_setr_epi8( |
3210 | 255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8, |
3211 | 1, 2, 3, 4, |
3212 | 155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8, |
3213 | 1, 2, 3, 4, |
3214 | ); |
3215 | let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2); |
3216 | let r = _mm_sad_epu8(a, b); |
3217 | let e = _mm_setr_epi64x(1020, 614); |
3218 | assert_eq_m128i(r, e); |
3219 | } |
3220 | |
3221 | #[simd_test(enable = "sse2" )] |
3222 | unsafe fn test_mm_sub_epi8() { |
3223 | let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(6)); |
3224 | let r = _mm_sub_epi8(a, b); |
3225 | assert_eq_m128i(r, _mm_set1_epi8(-1)); |
3226 | } |
3227 | |
3228 | #[simd_test(enable = "sse2" )] |
3229 | unsafe fn test_mm_sub_epi16() { |
3230 | let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(6)); |
3231 | let r = _mm_sub_epi16(a, b); |
3232 | assert_eq_m128i(r, _mm_set1_epi16(-1)); |
3233 | } |
3234 | |
3235 | #[simd_test(enable = "sse2" )] |
3236 | unsafe fn test_mm_sub_epi32() { |
3237 | let (a, b) = (_mm_set1_epi32(5), _mm_set1_epi32(6)); |
3238 | let r = _mm_sub_epi32(a, b); |
3239 | assert_eq_m128i(r, _mm_set1_epi32(-1)); |
3240 | } |
3241 | |
3242 | #[simd_test(enable = "sse2" )] |
3243 | unsafe fn test_mm_sub_epi64() { |
3244 | let (a, b) = (_mm_set1_epi64x(5), _mm_set1_epi64x(6)); |
3245 | let r = _mm_sub_epi64(a, b); |
3246 | assert_eq_m128i(r, _mm_set1_epi64x(-1)); |
3247 | } |
3248 | |
3249 | #[simd_test(enable = "sse2" )] |
3250 | unsafe fn test_mm_subs_epi8() { |
3251 | let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2)); |
3252 | let r = _mm_subs_epi8(a, b); |
3253 | assert_eq_m128i(r, _mm_set1_epi8(3)); |
3254 | } |
3255 | |
3256 | #[simd_test(enable = "sse2" )] |
3257 | unsafe fn test_mm_subs_epi8_saturate_positive() { |
3258 | let a = _mm_set1_epi8(0x7F); |
3259 | let b = _mm_set1_epi8(-1); |
3260 | let r = _mm_subs_epi8(a, b); |
3261 | assert_eq_m128i(r, a); |
3262 | } |
3263 | |
3264 | #[simd_test(enable = "sse2" )] |
3265 | unsafe fn test_mm_subs_epi8_saturate_negative() { |
3266 | let a = _mm_set1_epi8(-0x80); |
3267 | let b = _mm_set1_epi8(1); |
3268 | let r = _mm_subs_epi8(a, b); |
3269 | assert_eq_m128i(r, a); |
3270 | } |
3271 | |
3272 | #[simd_test(enable = "sse2" )] |
3273 | unsafe fn test_mm_subs_epi16() { |
3274 | let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2)); |
3275 | let r = _mm_subs_epi16(a, b); |
3276 | assert_eq_m128i(r, _mm_set1_epi16(3)); |
3277 | } |
3278 | |
3279 | #[simd_test(enable = "sse2" )] |
3280 | unsafe fn test_mm_subs_epi16_saturate_positive() { |
3281 | let a = _mm_set1_epi16(0x7FFF); |
3282 | let b = _mm_set1_epi16(-1); |
3283 | let r = _mm_subs_epi16(a, b); |
3284 | assert_eq_m128i(r, a); |
3285 | } |
3286 | |
3287 | #[simd_test(enable = "sse2" )] |
3288 | unsafe fn test_mm_subs_epi16_saturate_negative() { |
3289 | let a = _mm_set1_epi16(-0x8000); |
3290 | let b = _mm_set1_epi16(1); |
3291 | let r = _mm_subs_epi16(a, b); |
3292 | assert_eq_m128i(r, a); |
3293 | } |
3294 | |
3295 | #[simd_test(enable = "sse2" )] |
3296 | unsafe fn test_mm_subs_epu8() { |
3297 | let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2)); |
3298 | let r = _mm_subs_epu8(a, b); |
3299 | assert_eq_m128i(r, _mm_set1_epi8(3)); |
3300 | } |
3301 | |
3302 | #[simd_test(enable = "sse2" )] |
3303 | unsafe fn test_mm_subs_epu8_saturate() { |
3304 | let a = _mm_set1_epi8(0); |
3305 | let b = _mm_set1_epi8(1); |
3306 | let r = _mm_subs_epu8(a, b); |
3307 | assert_eq_m128i(r, a); |
3308 | } |
3309 | |
3310 | #[simd_test(enable = "sse2" )] |
3311 | unsafe fn test_mm_subs_epu16() { |
3312 | let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2)); |
3313 | let r = _mm_subs_epu16(a, b); |
3314 | assert_eq_m128i(r, _mm_set1_epi16(3)); |
3315 | } |
3316 | |
3317 | #[simd_test(enable = "sse2" )] |
3318 | unsafe fn test_mm_subs_epu16_saturate() { |
3319 | let a = _mm_set1_epi16(0); |
3320 | let b = _mm_set1_epi16(1); |
3321 | let r = _mm_subs_epu16(a, b); |
3322 | assert_eq_m128i(r, a); |
3323 | } |
3324 | |
3325 | #[simd_test(enable = "sse2" )] |
3326 | unsafe fn test_mm_slli_si128() { |
3327 | #[rustfmt::skip] |
3328 | let a = _mm_setr_epi8( |
3329 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
3330 | ); |
3331 | let r = _mm_slli_si128::<1>(a); |
3332 | let e = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
3333 | assert_eq_m128i(r, e); |
3334 | |
3335 | #[rustfmt::skip] |
3336 | let a = _mm_setr_epi8( |
3337 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
3338 | ); |
3339 | let r = _mm_slli_si128::<15>(a); |
3340 | let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1); |
3341 | assert_eq_m128i(r, e); |
3342 | |
3343 | #[rustfmt::skip] |
3344 | let a = _mm_setr_epi8( |
3345 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
3346 | ); |
3347 | let r = _mm_slli_si128::<16>(a); |
3348 | assert_eq_m128i(r, _mm_set1_epi8(0)); |
3349 | } |
3350 | |
3351 | #[simd_test(enable = "sse2" )] |
3352 | unsafe fn test_mm_slli_epi16() { |
3353 | let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); |
3354 | let r = _mm_slli_epi16::<4>(a); |
3355 | assert_eq_m128i( |
3356 | r, |
3357 | _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0), |
3358 | ); |
3359 | let r = _mm_slli_epi16::<16>(a); |
3360 | assert_eq_m128i(r, _mm_set1_epi16(0)); |
3361 | } |
3362 | |
3363 | #[simd_test(enable = "sse2" )] |
3364 | unsafe fn test_mm_sll_epi16() { |
3365 | let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); |
3366 | let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 4)); |
3367 | assert_eq_m128i( |
3368 | r, |
3369 | _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0), |
3370 | ); |
3371 | let r = _mm_sll_epi16(a, _mm_set_epi64x(4, 0)); |
3372 | assert_eq_m128i(r, a); |
3373 | let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 16)); |
3374 | assert_eq_m128i(r, _mm_set1_epi16(0)); |
3375 | let r = _mm_sll_epi16(a, _mm_set_epi64x(0, i64::MAX)); |
3376 | assert_eq_m128i(r, _mm_set1_epi16(0)); |
3377 | } |
3378 | |
3379 | #[simd_test(enable = "sse2" )] |
3380 | unsafe fn test_mm_slli_epi32() { |
3381 | let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); |
3382 | let r = _mm_slli_epi32::<4>(a); |
3383 | assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0)); |
3384 | let r = _mm_slli_epi32::<32>(a); |
3385 | assert_eq_m128i(r, _mm_set1_epi32(0)); |
3386 | } |
3387 | |
3388 | #[simd_test(enable = "sse2" )] |
3389 | unsafe fn test_mm_sll_epi32() { |
3390 | let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); |
3391 | let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 4)); |
3392 | assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0)); |
3393 | let r = _mm_sll_epi32(a, _mm_set_epi64x(4, 0)); |
3394 | assert_eq_m128i(r, a); |
3395 | let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 32)); |
3396 | assert_eq_m128i(r, _mm_set1_epi32(0)); |
3397 | let r = _mm_sll_epi32(a, _mm_set_epi64x(0, i64::MAX)); |
3398 | assert_eq_m128i(r, _mm_set1_epi32(0)); |
3399 | } |
3400 | |
3401 | #[simd_test(enable = "sse2" )] |
3402 | unsafe fn test_mm_slli_epi64() { |
3403 | let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF); |
3404 | let r = _mm_slli_epi64::<4>(a); |
3405 | assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0)); |
3406 | let r = _mm_slli_epi64::<64>(a); |
3407 | assert_eq_m128i(r, _mm_set1_epi64x(0)); |
3408 | } |
3409 | |
3410 | #[simd_test(enable = "sse2" )] |
3411 | unsafe fn test_mm_sll_epi64() { |
3412 | let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF); |
3413 | let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 4)); |
3414 | assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0)); |
3415 | let r = _mm_sll_epi64(a, _mm_set_epi64x(4, 0)); |
3416 | assert_eq_m128i(r, a); |
3417 | let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 64)); |
3418 | assert_eq_m128i(r, _mm_set1_epi64x(0)); |
3419 | let r = _mm_sll_epi64(a, _mm_set_epi64x(0, i64::MAX)); |
3420 | assert_eq_m128i(r, _mm_set1_epi64x(0)); |
3421 | } |
3422 | |
3423 | #[simd_test(enable = "sse2" )] |
3424 | unsafe fn test_mm_srai_epi16() { |
3425 | let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); |
3426 | let r = _mm_srai_epi16::<4>(a); |
3427 | assert_eq_m128i( |
3428 | r, |
3429 | _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10), |
3430 | ); |
3431 | let r = _mm_srai_epi16::<16>(a); |
3432 | assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1)); |
3433 | } |
3434 | |
3435 | #[simd_test(enable = "sse2" )] |
3436 | unsafe fn test_mm_sra_epi16() { |
3437 | let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); |
3438 | let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 4)); |
3439 | assert_eq_m128i( |
3440 | r, |
3441 | _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10), |
3442 | ); |
3443 | let r = _mm_sra_epi16(a, _mm_set_epi64x(4, 0)); |
3444 | assert_eq_m128i(r, a); |
3445 | let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 16)); |
3446 | assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1)); |
3447 | let r = _mm_sra_epi16(a, _mm_set_epi64x(0, i64::MAX)); |
3448 | assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1)); |
3449 | } |
3450 | |
3451 | #[simd_test(enable = "sse2" )] |
3452 | unsafe fn test_mm_srai_epi32() { |
3453 | let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); |
3454 | let r = _mm_srai_epi32::<4>(a); |
3455 | assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000)); |
3456 | let r = _mm_srai_epi32::<32>(a); |
3457 | assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1)); |
3458 | } |
3459 | |
3460 | #[simd_test(enable = "sse2" )] |
3461 | unsafe fn test_mm_sra_epi32() { |
3462 | let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); |
3463 | let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 4)); |
3464 | assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000)); |
3465 | let r = _mm_sra_epi32(a, _mm_set_epi64x(4, 0)); |
3466 | assert_eq_m128i(r, a); |
3467 | let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 32)); |
3468 | assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1)); |
3469 | let r = _mm_sra_epi32(a, _mm_set_epi64x(0, i64::MAX)); |
3470 | assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1)); |
3471 | } |
3472 | |
3473 | #[simd_test(enable = "sse2" )] |
3474 | unsafe fn test_mm_srli_si128() { |
3475 | #[rustfmt::skip] |
3476 | let a = _mm_setr_epi8( |
3477 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
3478 | ); |
3479 | let r = _mm_srli_si128::<1>(a); |
3480 | #[rustfmt::skip] |
3481 | let e = _mm_setr_epi8( |
3482 | 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, |
3483 | ); |
3484 | assert_eq_m128i(r, e); |
3485 | |
3486 | #[rustfmt::skip] |
3487 | let a = _mm_setr_epi8( |
3488 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
3489 | ); |
3490 | let r = _mm_srli_si128::<15>(a); |
3491 | let e = _mm_setr_epi8(16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
3492 | assert_eq_m128i(r, e); |
3493 | |
3494 | #[rustfmt::skip] |
3495 | let a = _mm_setr_epi8( |
3496 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
3497 | ); |
3498 | let r = _mm_srli_si128::<16>(a); |
3499 | assert_eq_m128i(r, _mm_set1_epi8(0)); |
3500 | } |
3501 | |
3502 | #[simd_test(enable = "sse2" )] |
3503 | unsafe fn test_mm_srli_epi16() { |
3504 | let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); |
3505 | let r = _mm_srli_epi16::<4>(a); |
3506 | assert_eq_m128i( |
3507 | r, |
3508 | _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0), |
3509 | ); |
3510 | let r = _mm_srli_epi16::<16>(a); |
3511 | assert_eq_m128i(r, _mm_set1_epi16(0)); |
3512 | } |
3513 | |
3514 | #[simd_test(enable = "sse2" )] |
3515 | unsafe fn test_mm_srl_epi16() { |
3516 | let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); |
3517 | let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 4)); |
3518 | assert_eq_m128i( |
3519 | r, |
3520 | _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0), |
3521 | ); |
3522 | let r = _mm_srl_epi16(a, _mm_set_epi64x(4, 0)); |
3523 | assert_eq_m128i(r, a); |
3524 | let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 16)); |
3525 | assert_eq_m128i(r, _mm_set1_epi16(0)); |
3526 | let r = _mm_srl_epi16(a, _mm_set_epi64x(0, i64::MAX)); |
3527 | assert_eq_m128i(r, _mm_set1_epi16(0)); |
3528 | } |
3529 | |
3530 | #[simd_test(enable = "sse2" )] |
3531 | unsafe fn test_mm_srli_epi32() { |
3532 | let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); |
3533 | let r = _mm_srli_epi32::<4>(a); |
3534 | assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000)); |
3535 | let r = _mm_srli_epi32::<32>(a); |
3536 | assert_eq_m128i(r, _mm_set1_epi32(0)); |
3537 | } |
3538 | |
3539 | #[simd_test(enable = "sse2" )] |
3540 | unsafe fn test_mm_srl_epi32() { |
3541 | let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); |
3542 | let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 4)); |
3543 | assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000)); |
3544 | let r = _mm_srl_epi32(a, _mm_set_epi64x(4, 0)); |
3545 | assert_eq_m128i(r, a); |
3546 | let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 32)); |
3547 | assert_eq_m128i(r, _mm_set1_epi32(0)); |
3548 | let r = _mm_srl_epi32(a, _mm_set_epi64x(0, i64::MAX)); |
3549 | assert_eq_m128i(r, _mm_set1_epi32(0)); |
3550 | } |
3551 | |
3552 | #[simd_test(enable = "sse2" )] |
3553 | unsafe fn test_mm_srli_epi64() { |
3554 | let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF); |
3555 | let r = _mm_srli_epi64::<4>(a); |
3556 | assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000)); |
3557 | let r = _mm_srli_epi64::<64>(a); |
3558 | assert_eq_m128i(r, _mm_set1_epi64x(0)); |
3559 | } |
3560 | |
3561 | #[simd_test(enable = "sse2" )] |
3562 | unsafe fn test_mm_srl_epi64() { |
3563 | let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF); |
3564 | let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 4)); |
3565 | assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000)); |
3566 | let r = _mm_srl_epi64(a, _mm_set_epi64x(4, 0)); |
3567 | assert_eq_m128i(r, a); |
3568 | let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 64)); |
3569 | assert_eq_m128i(r, _mm_set1_epi64x(0)); |
3570 | let r = _mm_srl_epi64(a, _mm_set_epi64x(0, i64::MAX)); |
3571 | assert_eq_m128i(r, _mm_set1_epi64x(0)); |
3572 | } |
3573 | |
3574 | #[simd_test(enable = "sse2" )] |
3575 | unsafe fn test_mm_and_si128() { |
3576 | let a = _mm_set1_epi8(5); |
3577 | let b = _mm_set1_epi8(3); |
3578 | let r = _mm_and_si128(a, b); |
3579 | assert_eq_m128i(r, _mm_set1_epi8(1)); |
3580 | } |
3581 | |
3582 | #[simd_test(enable = "sse2" )] |
3583 | unsafe fn test_mm_andnot_si128() { |
3584 | let a = _mm_set1_epi8(5); |
3585 | let b = _mm_set1_epi8(3); |
3586 | let r = _mm_andnot_si128(a, b); |
3587 | assert_eq_m128i(r, _mm_set1_epi8(2)); |
3588 | } |
3589 | |
3590 | #[simd_test(enable = "sse2" )] |
3591 | unsafe fn test_mm_or_si128() { |
3592 | let a = _mm_set1_epi8(5); |
3593 | let b = _mm_set1_epi8(3); |
3594 | let r = _mm_or_si128(a, b); |
3595 | assert_eq_m128i(r, _mm_set1_epi8(7)); |
3596 | } |
3597 | |
3598 | #[simd_test(enable = "sse2" )] |
3599 | unsafe fn test_mm_xor_si128() { |
3600 | let a = _mm_set1_epi8(5); |
3601 | let b = _mm_set1_epi8(3); |
3602 | let r = _mm_xor_si128(a, b); |
3603 | assert_eq_m128i(r, _mm_set1_epi8(6)); |
3604 | } |
3605 | |
3606 | #[simd_test(enable = "sse2" )] |
3607 | unsafe fn test_mm_cmpeq_epi8() { |
3608 | let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
3609 | let b = _mm_setr_epi8(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
3610 | let r = _mm_cmpeq_epi8(a, b); |
3611 | #[rustfmt::skip] |
3612 | assert_eq_m128i( |
3613 | r, |
3614 | _mm_setr_epi8( |
3615 | 0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
3616 | ) |
3617 | ); |
3618 | } |
3619 | |
3620 | #[simd_test(enable = "sse2" )] |
3621 | unsafe fn test_mm_cmpeq_epi16() { |
3622 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
3623 | let b = _mm_setr_epi16(7, 6, 2, 4, 3, 2, 1, 0); |
3624 | let r = _mm_cmpeq_epi16(a, b); |
3625 | assert_eq_m128i(r, _mm_setr_epi16(0, 0, !0, 0, 0, 0, 0, 0)); |
3626 | } |
3627 | |
3628 | #[simd_test(enable = "sse2" )] |
3629 | unsafe fn test_mm_cmpeq_epi32() { |
3630 | let a = _mm_setr_epi32(0, 1, 2, 3); |
3631 | let b = _mm_setr_epi32(3, 2, 2, 0); |
3632 | let r = _mm_cmpeq_epi32(a, b); |
3633 | assert_eq_m128i(r, _mm_setr_epi32(0, 0, !0, 0)); |
3634 | } |
3635 | |
3636 | #[simd_test(enable = "sse2" )] |
3637 | unsafe fn test_mm_cmpgt_epi8() { |
3638 | let a = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
3639 | let b = _mm_set1_epi8(0); |
3640 | let r = _mm_cmpgt_epi8(a, b); |
3641 | let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
3642 | assert_eq_m128i(r, e); |
3643 | } |
3644 | |
3645 | #[simd_test(enable = "sse2" )] |
3646 | unsafe fn test_mm_cmpgt_epi16() { |
3647 | let a = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0); |
3648 | let b = _mm_set1_epi16(0); |
3649 | let r = _mm_cmpgt_epi16(a, b); |
3650 | let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0); |
3651 | assert_eq_m128i(r, e); |
3652 | } |
3653 | |
3654 | #[simd_test(enable = "sse2" )] |
3655 | unsafe fn test_mm_cmpgt_epi32() { |
3656 | let a = _mm_set_epi32(5, 0, 0, 0); |
3657 | let b = _mm_set1_epi32(0); |
3658 | let r = _mm_cmpgt_epi32(a, b); |
3659 | assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0)); |
3660 | } |
3661 | |
3662 | #[simd_test(enable = "sse2" )] |
3663 | unsafe fn test_mm_cmplt_epi8() { |
3664 | let a = _mm_set1_epi8(0); |
3665 | let b = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
3666 | let r = _mm_cmplt_epi8(a, b); |
3667 | let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
3668 | assert_eq_m128i(r, e); |
3669 | } |
3670 | |
3671 | #[simd_test(enable = "sse2" )] |
3672 | unsafe fn test_mm_cmplt_epi16() { |
3673 | let a = _mm_set1_epi16(0); |
3674 | let b = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0); |
3675 | let r = _mm_cmplt_epi16(a, b); |
3676 | let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0); |
3677 | assert_eq_m128i(r, e); |
3678 | } |
3679 | |
3680 | #[simd_test(enable = "sse2" )] |
3681 | unsafe fn test_mm_cmplt_epi32() { |
3682 | let a = _mm_set1_epi32(0); |
3683 | let b = _mm_set_epi32(5, 0, 0, 0); |
3684 | let r = _mm_cmplt_epi32(a, b); |
3685 | assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0)); |
3686 | } |
3687 | |
3688 | #[simd_test(enable = "sse2" )] |
3689 | unsafe fn test_mm_cvtepi32_pd() { |
3690 | let a = _mm_set_epi32(35, 25, 15, 5); |
3691 | let r = _mm_cvtepi32_pd(a); |
3692 | assert_eq_m128d(r, _mm_setr_pd(5.0, 15.0)); |
3693 | } |
3694 | |
3695 | #[simd_test(enable = "sse2" )] |
3696 | unsafe fn test_mm_cvtsi32_sd() { |
3697 | let a = _mm_set1_pd(3.5); |
3698 | let r = _mm_cvtsi32_sd(a, 5); |
3699 | assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5)); |
3700 | } |
3701 | |
3702 | #[simd_test(enable = "sse2" )] |
3703 | unsafe fn test_mm_cvtepi32_ps() { |
3704 | let a = _mm_setr_epi32(1, 2, 3, 4); |
3705 | let r = _mm_cvtepi32_ps(a); |
3706 | assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0)); |
3707 | } |
3708 | |
3709 | #[simd_test(enable = "sse2" )] |
3710 | unsafe fn test_mm_cvtps_epi32() { |
3711 | let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); |
3712 | let r = _mm_cvtps_epi32(a); |
3713 | assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4)); |
3714 | } |
3715 | |
3716 | #[simd_test(enable = "sse2" )] |
3717 | unsafe fn test_mm_cvtsi32_si128() { |
3718 | let r = _mm_cvtsi32_si128(5); |
3719 | assert_eq_m128i(r, _mm_setr_epi32(5, 0, 0, 0)); |
3720 | } |
3721 | |
3722 | #[simd_test(enable = "sse2" )] |
3723 | unsafe fn test_mm_cvtsi128_si32() { |
3724 | let r = _mm_cvtsi128_si32(_mm_setr_epi32(5, 0, 0, 0)); |
3725 | assert_eq!(r, 5); |
3726 | } |
3727 | |
3728 | #[simd_test(enable = "sse2" )] |
3729 | unsafe fn test_mm_set_epi64x() { |
3730 | let r = _mm_set_epi64x(0, 1); |
3731 | assert_eq_m128i(r, _mm_setr_epi64x(1, 0)); |
3732 | } |
3733 | |
3734 | #[simd_test(enable = "sse2" )] |
3735 | unsafe fn test_mm_set_epi32() { |
3736 | let r = _mm_set_epi32(0, 1, 2, 3); |
3737 | assert_eq_m128i(r, _mm_setr_epi32(3, 2, 1, 0)); |
3738 | } |
3739 | |
3740 | #[simd_test(enable = "sse2" )] |
3741 | unsafe fn test_mm_set_epi16() { |
3742 | let r = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
3743 | assert_eq_m128i(r, _mm_setr_epi16(7, 6, 5, 4, 3, 2, 1, 0)); |
3744 | } |
3745 | |
3746 | #[simd_test(enable = "sse2" )] |
3747 | unsafe fn test_mm_set_epi8() { |
3748 | #[rustfmt::skip] |
3749 | let r = _mm_set_epi8( |
3750 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
3751 | ); |
3752 | #[rustfmt::skip] |
3753 | let e = _mm_setr_epi8( |
3754 | 15, 14, 13, 12, 11, 10, 9, 8, |
3755 | 7, 6, 5, 4, 3, 2, 1, 0, |
3756 | ); |
3757 | assert_eq_m128i(r, e); |
3758 | } |
3759 | |
3760 | #[simd_test(enable = "sse2" )] |
3761 | unsafe fn test_mm_set1_epi64x() { |
3762 | let r = _mm_set1_epi64x(1); |
3763 | assert_eq_m128i(r, _mm_set1_epi64x(1)); |
3764 | } |
3765 | |
3766 | #[simd_test(enable = "sse2" )] |
3767 | unsafe fn test_mm_set1_epi32() { |
3768 | let r = _mm_set1_epi32(1); |
3769 | assert_eq_m128i(r, _mm_set1_epi32(1)); |
3770 | } |
3771 | |
3772 | #[simd_test(enable = "sse2" )] |
3773 | unsafe fn test_mm_set1_epi16() { |
3774 | let r = _mm_set1_epi16(1); |
3775 | assert_eq_m128i(r, _mm_set1_epi16(1)); |
3776 | } |
3777 | |
3778 | #[simd_test(enable = "sse2" )] |
3779 | unsafe fn test_mm_set1_epi8() { |
3780 | let r = _mm_set1_epi8(1); |
3781 | assert_eq_m128i(r, _mm_set1_epi8(1)); |
3782 | } |
3783 | |
3784 | #[simd_test(enable = "sse2" )] |
3785 | unsafe fn test_mm_setr_epi32() { |
3786 | let r = _mm_setr_epi32(0, 1, 2, 3); |
3787 | assert_eq_m128i(r, _mm_setr_epi32(0, 1, 2, 3)); |
3788 | } |
3789 | |
3790 | #[simd_test(enable = "sse2" )] |
3791 | unsafe fn test_mm_setr_epi16() { |
3792 | let r = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
3793 | assert_eq_m128i(r, _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7)); |
3794 | } |
3795 | |
3796 | #[simd_test(enable = "sse2" )] |
3797 | unsafe fn test_mm_setr_epi8() { |
3798 | #[rustfmt::skip] |
3799 | let r = _mm_setr_epi8( |
3800 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
3801 | ); |
3802 | #[rustfmt::skip] |
3803 | let e = _mm_setr_epi8( |
3804 | 0, 1, 2, 3, 4, 5, 6, 7, |
3805 | 8, 9, 10, 11, 12, 13, 14, 15, |
3806 | ); |
3807 | assert_eq_m128i(r, e); |
3808 | } |
3809 | |
3810 | #[simd_test(enable = "sse2" )] |
3811 | unsafe fn test_mm_setzero_si128() { |
3812 | let r = _mm_setzero_si128(); |
3813 | assert_eq_m128i(r, _mm_set1_epi64x(0)); |
3814 | } |
3815 | |
3816 | #[simd_test(enable = "sse2" )] |
3817 | unsafe fn test_mm_loadl_epi64() { |
3818 | let a = _mm_setr_epi64x(6, 5); |
3819 | let r = _mm_loadl_epi64(&a as *const _); |
3820 | assert_eq_m128i(r, _mm_setr_epi64x(6, 0)); |
3821 | } |
3822 | |
3823 | #[simd_test(enable = "sse2" )] |
3824 | unsafe fn test_mm_load_si128() { |
3825 | let a = _mm_set_epi64x(5, 6); |
3826 | let r = _mm_load_si128(&a as *const _ as *const _); |
3827 | assert_eq_m128i(a, r); |
3828 | } |
3829 | |
3830 | #[simd_test(enable = "sse2" )] |
3831 | unsafe fn test_mm_loadu_si128() { |
3832 | let a = _mm_set_epi64x(5, 6); |
3833 | let r = _mm_loadu_si128(&a as *const _ as *const _); |
3834 | assert_eq_m128i(a, r); |
3835 | } |
3836 | |
3837 | #[simd_test(enable = "sse2" )] |
3838 | // Miri cannot support this until it is clear how it fits in the Rust memory model |
3839 | // (non-temporal store) |
3840 | #[cfg_attr (miri, ignore)] |
3841 | unsafe fn test_mm_maskmoveu_si128() { |
3842 | let a = _mm_set1_epi8(9); |
3843 | #[rustfmt::skip] |
3844 | let mask = _mm_set_epi8( |
3845 | 0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0, |
3846 | 0, 0, 0, 0, 0, 0, 0, 0, |
3847 | ); |
3848 | let mut r = _mm_set1_epi8(0); |
3849 | _mm_maskmoveu_si128(a, mask, &mut r as *mut _ as *mut i8); |
3850 | let e = _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
3851 | assert_eq_m128i(r, e); |
3852 | } |
3853 | |
3854 | #[simd_test(enable = "sse2" )] |
3855 | unsafe fn test_mm_store_si128() { |
3856 | let a = _mm_set1_epi8(9); |
3857 | let mut r = _mm_set1_epi8(0); |
3858 | _mm_store_si128(&mut r as *mut _ as *mut __m128i, a); |
3859 | assert_eq_m128i(r, a); |
3860 | } |
3861 | |
3862 | #[simd_test(enable = "sse2" )] |
3863 | unsafe fn test_mm_storeu_si128() { |
3864 | let a = _mm_set1_epi8(9); |
3865 | let mut r = _mm_set1_epi8(0); |
3866 | _mm_storeu_si128(&mut r as *mut _ as *mut __m128i, a); |
3867 | assert_eq_m128i(r, a); |
3868 | } |
3869 | |
3870 | #[simd_test(enable = "sse2" )] |
3871 | unsafe fn test_mm_storel_epi64() { |
3872 | let a = _mm_setr_epi64x(2, 9); |
3873 | let mut r = _mm_set1_epi8(0); |
3874 | _mm_storel_epi64(&mut r as *mut _ as *mut __m128i, a); |
3875 | assert_eq_m128i(r, _mm_setr_epi64x(2, 0)); |
3876 | } |
3877 | |
3878 | #[simd_test(enable = "sse2" )] |
3879 | // Miri cannot support this until it is clear how it fits in the Rust memory model |
3880 | // (non-temporal store) |
3881 | #[cfg_attr (miri, ignore)] |
3882 | unsafe fn test_mm_stream_si128() { |
3883 | let a = _mm_setr_epi32(1, 2, 3, 4); |
3884 | let mut r = _mm_undefined_si128(); |
3885 | _mm_stream_si128(&mut r as *mut _, a); |
3886 | assert_eq_m128i(r, a); |
3887 | } |
3888 | |
3889 | #[simd_test(enable = "sse2" )] |
3890 | // Miri cannot support this until it is clear how it fits in the Rust memory model |
3891 | // (non-temporal store) |
3892 | #[cfg_attr (miri, ignore)] |
3893 | unsafe fn test_mm_stream_si32() { |
3894 | let a: i32 = 7; |
3895 | let mut mem = boxed::Box::<i32>::new(-1); |
3896 | _mm_stream_si32(&mut *mem as *mut i32, a); |
3897 | assert_eq!(a, *mem); |
3898 | } |
3899 | |
3900 | #[simd_test(enable = "sse2" )] |
3901 | unsafe fn test_mm_move_epi64() { |
3902 | let a = _mm_setr_epi64x(5, 6); |
3903 | let r = _mm_move_epi64(a); |
3904 | assert_eq_m128i(r, _mm_setr_epi64x(5, 0)); |
3905 | } |
3906 | |
3907 | #[simd_test(enable = "sse2" )] |
3908 | unsafe fn test_mm_packs_epi16() { |
3909 | let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0); |
3910 | let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80); |
3911 | let r = _mm_packs_epi16(a, b); |
3912 | #[rustfmt::skip] |
3913 | assert_eq_m128i( |
3914 | r, |
3915 | _mm_setr_epi8( |
3916 | 0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F |
3917 | ) |
3918 | ); |
3919 | } |
3920 | |
3921 | #[simd_test(enable = "sse2" )] |
3922 | unsafe fn test_mm_packs_epi32() { |
3923 | let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0); |
3924 | let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000); |
3925 | let r = _mm_packs_epi32(a, b); |
3926 | assert_eq_m128i( |
3927 | r, |
3928 | _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF), |
3929 | ); |
3930 | } |
3931 | |
3932 | #[simd_test(enable = "sse2" )] |
3933 | unsafe fn test_mm_packus_epi16() { |
3934 | let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0); |
3935 | let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100); |
3936 | let r = _mm_packus_epi16(a, b); |
3937 | assert_eq_m128i( |
3938 | r, |
3939 | _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0), |
3940 | ); |
3941 | } |
3942 | |
3943 | #[simd_test(enable = "sse2" )] |
3944 | unsafe fn test_mm_extract_epi16() { |
3945 | let a = _mm_setr_epi16(-1, 1, 2, 3, 4, 5, 6, 7); |
3946 | let r1 = _mm_extract_epi16::<0>(a); |
3947 | let r2 = _mm_extract_epi16::<3>(a); |
3948 | assert_eq!(r1, 0xFFFF); |
3949 | assert_eq!(r2, 3); |
3950 | } |
3951 | |
3952 | #[simd_test(enable = "sse2" )] |
3953 | unsafe fn test_mm_insert_epi16() { |
3954 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
3955 | let r = _mm_insert_epi16::<0>(a, 9); |
3956 | let e = _mm_setr_epi16(9, 1, 2, 3, 4, 5, 6, 7); |
3957 | assert_eq_m128i(r, e); |
3958 | } |
3959 | |
3960 | #[simd_test(enable = "sse2" )] |
3961 | unsafe fn test_mm_movemask_epi8() { |
3962 | #[rustfmt::skip] |
3963 | let a = _mm_setr_epi8( |
3964 | 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01, |
3965 | 0b0101, 0b1111_0000u8 as i8, 0, 0, |
3966 | 0, 0b1011_0101u8 as i8, 0b1111_0000u8 as i8, 0b0101, |
3967 | 0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, |
3968 | ); |
3969 | let r = _mm_movemask_epi8(a); |
3970 | assert_eq!(r, 0b10100110_00100101); |
3971 | } |
3972 | |
3973 | #[simd_test(enable = "sse2" )] |
3974 | unsafe fn test_mm_shuffle_epi32() { |
3975 | let a = _mm_setr_epi32(5, 10, 15, 20); |
3976 | let r = _mm_shuffle_epi32::<0b00_01_01_11>(a); |
3977 | let e = _mm_setr_epi32(20, 10, 10, 5); |
3978 | assert_eq_m128i(r, e); |
3979 | } |
3980 | |
3981 | #[simd_test(enable = "sse2" )] |
3982 | unsafe fn test_mm_shufflehi_epi16() { |
3983 | let a = _mm_setr_epi16(1, 2, 3, 4, 5, 10, 15, 20); |
3984 | let r = _mm_shufflehi_epi16::<0b00_01_01_11>(a); |
3985 | let e = _mm_setr_epi16(1, 2, 3, 4, 20, 10, 10, 5); |
3986 | assert_eq_m128i(r, e); |
3987 | } |
3988 | |
3989 | #[simd_test(enable = "sse2" )] |
3990 | unsafe fn test_mm_shufflelo_epi16() { |
3991 | let a = _mm_setr_epi16(5, 10, 15, 20, 1, 2, 3, 4); |
3992 | let r = _mm_shufflelo_epi16::<0b00_01_01_11>(a); |
3993 | let e = _mm_setr_epi16(20, 10, 10, 5, 1, 2, 3, 4); |
3994 | assert_eq_m128i(r, e); |
3995 | } |
3996 | |
3997 | #[simd_test(enable = "sse2" )] |
3998 | unsafe fn test_mm_unpackhi_epi8() { |
3999 | #[rustfmt::skip] |
4000 | let a = _mm_setr_epi8( |
4001 | 0, 1, 2, 3, 4, 5, 6, 7, |
4002 | 8, 9, 10, 11, 12, 13, 14, 15, |
4003 | ); |
4004 | #[rustfmt::skip] |
4005 | let b = _mm_setr_epi8( |
4006 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
4007 | ); |
4008 | let r = _mm_unpackhi_epi8(a, b); |
4009 | #[rustfmt::skip] |
4010 | let e = _mm_setr_epi8( |
4011 | 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31, |
4012 | ); |
4013 | assert_eq_m128i(r, e); |
4014 | } |
4015 | |
4016 | #[simd_test(enable = "sse2" )] |
4017 | unsafe fn test_mm_unpackhi_epi16() { |
4018 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
4019 | let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); |
4020 | let r = _mm_unpackhi_epi16(a, b); |
4021 | let e = _mm_setr_epi16(4, 12, 5, 13, 6, 14, 7, 15); |
4022 | assert_eq_m128i(r, e); |
4023 | } |
4024 | |
4025 | #[simd_test(enable = "sse2" )] |
4026 | unsafe fn test_mm_unpackhi_epi32() { |
4027 | let a = _mm_setr_epi32(0, 1, 2, 3); |
4028 | let b = _mm_setr_epi32(4, 5, 6, 7); |
4029 | let r = _mm_unpackhi_epi32(a, b); |
4030 | let e = _mm_setr_epi32(2, 6, 3, 7); |
4031 | assert_eq_m128i(r, e); |
4032 | } |
4033 | |
4034 | #[simd_test(enable = "sse2" )] |
4035 | unsafe fn test_mm_unpackhi_epi64() { |
4036 | let a = _mm_setr_epi64x(0, 1); |
4037 | let b = _mm_setr_epi64x(2, 3); |
4038 | let r = _mm_unpackhi_epi64(a, b); |
4039 | let e = _mm_setr_epi64x(1, 3); |
4040 | assert_eq_m128i(r, e); |
4041 | } |
4042 | |
4043 | #[simd_test(enable = "sse2" )] |
4044 | unsafe fn test_mm_unpacklo_epi8() { |
4045 | #[rustfmt::skip] |
4046 | let a = _mm_setr_epi8( |
4047 | 0, 1, 2, 3, 4, 5, 6, 7, |
4048 | 8, 9, 10, 11, 12, 13, 14, 15, |
4049 | ); |
4050 | #[rustfmt::skip] |
4051 | let b = _mm_setr_epi8( |
4052 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
4053 | ); |
4054 | let r = _mm_unpacklo_epi8(a, b); |
4055 | #[rustfmt::skip] |
4056 | let e = _mm_setr_epi8( |
4057 | 0, 16, 1, 17, 2, 18, 3, 19, |
4058 | 4, 20, 5, 21, 6, 22, 7, 23, |
4059 | ); |
4060 | assert_eq_m128i(r, e); |
4061 | } |
4062 | |
4063 | #[simd_test(enable = "sse2" )] |
4064 | unsafe fn test_mm_unpacklo_epi16() { |
4065 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
4066 | let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); |
4067 | let r = _mm_unpacklo_epi16(a, b); |
4068 | let e = _mm_setr_epi16(0, 8, 1, 9, 2, 10, 3, 11); |
4069 | assert_eq_m128i(r, e); |
4070 | } |
4071 | |
4072 | #[simd_test(enable = "sse2" )] |
4073 | unsafe fn test_mm_unpacklo_epi32() { |
4074 | let a = _mm_setr_epi32(0, 1, 2, 3); |
4075 | let b = _mm_setr_epi32(4, 5, 6, 7); |
4076 | let r = _mm_unpacklo_epi32(a, b); |
4077 | let e = _mm_setr_epi32(0, 4, 1, 5); |
4078 | assert_eq_m128i(r, e); |
4079 | } |
4080 | |
4081 | #[simd_test(enable = "sse2" )] |
4082 | unsafe fn test_mm_unpacklo_epi64() { |
4083 | let a = _mm_setr_epi64x(0, 1); |
4084 | let b = _mm_setr_epi64x(2, 3); |
4085 | let r = _mm_unpacklo_epi64(a, b); |
4086 | let e = _mm_setr_epi64x(0, 2); |
4087 | assert_eq_m128i(r, e); |
4088 | } |
4089 | |
4090 | #[simd_test(enable = "sse2" )] |
4091 | unsafe fn test_mm_add_sd() { |
4092 | let a = _mm_setr_pd(1.0, 2.0); |
4093 | let b = _mm_setr_pd(5.0, 10.0); |
4094 | let r = _mm_add_sd(a, b); |
4095 | assert_eq_m128d(r, _mm_setr_pd(6.0, 2.0)); |
4096 | } |
4097 | |
4098 | #[simd_test(enable = "sse2" )] |
4099 | unsafe fn test_mm_add_pd() { |
4100 | let a = _mm_setr_pd(1.0, 2.0); |
4101 | let b = _mm_setr_pd(5.0, 10.0); |
4102 | let r = _mm_add_pd(a, b); |
4103 | assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0)); |
4104 | } |
4105 | |
4106 | #[simd_test(enable = "sse2" )] |
4107 | unsafe fn test_mm_div_sd() { |
4108 | let a = _mm_setr_pd(1.0, 2.0); |
4109 | let b = _mm_setr_pd(5.0, 10.0); |
4110 | let r = _mm_div_sd(a, b); |
4111 | assert_eq_m128d(r, _mm_setr_pd(0.2, 2.0)); |
4112 | } |
4113 | |
4114 | #[simd_test(enable = "sse2" )] |
4115 | unsafe fn test_mm_div_pd() { |
4116 | let a = _mm_setr_pd(1.0, 2.0); |
4117 | let b = _mm_setr_pd(5.0, 10.0); |
4118 | let r = _mm_div_pd(a, b); |
4119 | assert_eq_m128d(r, _mm_setr_pd(0.2, 0.2)); |
4120 | } |
4121 | |
4122 | #[simd_test(enable = "sse2" )] |
4123 | unsafe fn test_mm_max_sd() { |
4124 | let a = _mm_setr_pd(1.0, 2.0); |
4125 | let b = _mm_setr_pd(5.0, 10.0); |
4126 | let r = _mm_max_sd(a, b); |
4127 | assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0)); |
4128 | } |
4129 | |
4130 | #[simd_test(enable = "sse2" )] |
4131 | unsafe fn test_mm_max_pd() { |
4132 | let a = _mm_setr_pd(1.0, 2.0); |
4133 | let b = _mm_setr_pd(5.0, 10.0); |
4134 | let r = _mm_max_pd(a, b); |
4135 | assert_eq_m128d(r, _mm_setr_pd(5.0, 10.0)); |
4136 | |
4137 | // Check SSE(2)-specific semantics for -0.0 handling. |
4138 | let a = _mm_setr_pd(-0.0, 0.0); |
4139 | let b = _mm_setr_pd(0.0, 0.0); |
4140 | let r1: [u8; 16] = transmute(_mm_max_pd(a, b)); |
4141 | let r2: [u8; 16] = transmute(_mm_max_pd(b, a)); |
4142 | let a: [u8; 16] = transmute(a); |
4143 | let b: [u8; 16] = transmute(b); |
4144 | assert_eq!(r1, b); |
4145 | assert_eq!(r2, a); |
4146 | assert_ne!(a, b); // sanity check that -0.0 is actually present |
4147 | } |
4148 | |
4149 | #[simd_test(enable = "sse2" )] |
4150 | unsafe fn test_mm_min_sd() { |
4151 | let a = _mm_setr_pd(1.0, 2.0); |
4152 | let b = _mm_setr_pd(5.0, 10.0); |
4153 | let r = _mm_min_sd(a, b); |
4154 | assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0)); |
4155 | } |
4156 | |
4157 | #[simd_test(enable = "sse2" )] |
4158 | unsafe fn test_mm_min_pd() { |
4159 | let a = _mm_setr_pd(1.0, 2.0); |
4160 | let b = _mm_setr_pd(5.0, 10.0); |
4161 | let r = _mm_min_pd(a, b); |
4162 | assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0)); |
4163 | |
4164 | // Check SSE(2)-specific semantics for -0.0 handling. |
4165 | let a = _mm_setr_pd(-0.0, 0.0); |
4166 | let b = _mm_setr_pd(0.0, 0.0); |
4167 | let r1: [u8; 16] = transmute(_mm_min_pd(a, b)); |
4168 | let r2: [u8; 16] = transmute(_mm_min_pd(b, a)); |
4169 | let a: [u8; 16] = transmute(a); |
4170 | let b: [u8; 16] = transmute(b); |
4171 | assert_eq!(r1, b); |
4172 | assert_eq!(r2, a); |
4173 | assert_ne!(a, b); // sanity check that -0.0 is actually present |
4174 | } |
4175 | |
4176 | #[simd_test(enable = "sse2" )] |
4177 | unsafe fn test_mm_mul_sd() { |
4178 | let a = _mm_setr_pd(1.0, 2.0); |
4179 | let b = _mm_setr_pd(5.0, 10.0); |
4180 | let r = _mm_mul_sd(a, b); |
4181 | assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0)); |
4182 | } |
4183 | |
4184 | #[simd_test(enable = "sse2" )] |
4185 | unsafe fn test_mm_mul_pd() { |
4186 | let a = _mm_setr_pd(1.0, 2.0); |
4187 | let b = _mm_setr_pd(5.0, 10.0); |
4188 | let r = _mm_mul_pd(a, b); |
4189 | assert_eq_m128d(r, _mm_setr_pd(5.0, 20.0)); |
4190 | } |
4191 | |
4192 | #[simd_test(enable = "sse2" )] |
4193 | unsafe fn test_mm_sqrt_sd() { |
4194 | let a = _mm_setr_pd(1.0, 2.0); |
4195 | let b = _mm_setr_pd(5.0, 10.0); |
4196 | let r = _mm_sqrt_sd(a, b); |
4197 | assert_eq_m128d(r, _mm_setr_pd(5.0f64.sqrt(), 2.0)); |
4198 | } |
4199 | |
4200 | #[simd_test(enable = "sse2" )] |
4201 | unsafe fn test_mm_sqrt_pd() { |
4202 | let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0)); |
4203 | assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt())); |
4204 | } |
4205 | |
4206 | #[simd_test(enable = "sse2" )] |
4207 | unsafe fn test_mm_sub_sd() { |
4208 | let a = _mm_setr_pd(1.0, 2.0); |
4209 | let b = _mm_setr_pd(5.0, 10.0); |
4210 | let r = _mm_sub_sd(a, b); |
4211 | assert_eq_m128d(r, _mm_setr_pd(-4.0, 2.0)); |
4212 | } |
4213 | |
4214 | #[simd_test(enable = "sse2" )] |
4215 | unsafe fn test_mm_sub_pd() { |
4216 | let a = _mm_setr_pd(1.0, 2.0); |
4217 | let b = _mm_setr_pd(5.0, 10.0); |
4218 | let r = _mm_sub_pd(a, b); |
4219 | assert_eq_m128d(r, _mm_setr_pd(-4.0, -8.0)); |
4220 | } |
4221 | |
4222 | #[simd_test(enable = "sse2" )] |
4223 | unsafe fn test_mm_and_pd() { |
4224 | let a = transmute(u64x2::splat(5)); |
4225 | let b = transmute(u64x2::splat(3)); |
4226 | let r = _mm_and_pd(a, b); |
4227 | let e = transmute(u64x2::splat(1)); |
4228 | assert_eq_m128d(r, e); |
4229 | } |
4230 | |
4231 | #[simd_test(enable = "sse2" )] |
4232 | unsafe fn test_mm_andnot_pd() { |
4233 | let a = transmute(u64x2::splat(5)); |
4234 | let b = transmute(u64x2::splat(3)); |
4235 | let r = _mm_andnot_pd(a, b); |
4236 | let e = transmute(u64x2::splat(2)); |
4237 | assert_eq_m128d(r, e); |
4238 | } |
4239 | |
4240 | #[simd_test(enable = "sse2" )] |
4241 | unsafe fn test_mm_or_pd() { |
4242 | let a = transmute(u64x2::splat(5)); |
4243 | let b = transmute(u64x2::splat(3)); |
4244 | let r = _mm_or_pd(a, b); |
4245 | let e = transmute(u64x2::splat(7)); |
4246 | assert_eq_m128d(r, e); |
4247 | } |
4248 | |
4249 | #[simd_test(enable = "sse2" )] |
4250 | unsafe fn test_mm_xor_pd() { |
4251 | let a = transmute(u64x2::splat(5)); |
4252 | let b = transmute(u64x2::splat(3)); |
4253 | let r = _mm_xor_pd(a, b); |
4254 | let e = transmute(u64x2::splat(6)); |
4255 | assert_eq_m128d(r, e); |
4256 | } |
4257 | |
4258 | #[simd_test(enable = "sse2" )] |
4259 | unsafe fn test_mm_cmpeq_sd() { |
4260 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4261 | let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64); |
4262 | let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b)); |
4263 | assert_eq_m128i(r, e); |
4264 | } |
4265 | |
4266 | #[simd_test(enable = "sse2" )] |
4267 | unsafe fn test_mm_cmplt_sd() { |
4268 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); |
4269 | let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64); |
4270 | let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b)); |
4271 | assert_eq_m128i(r, e); |
4272 | } |
4273 | |
4274 | #[simd_test(enable = "sse2" )] |
4275 | unsafe fn test_mm_cmple_sd() { |
4276 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4277 | let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64); |
4278 | let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b)); |
4279 | assert_eq_m128i(r, e); |
4280 | } |
4281 | |
4282 | #[simd_test(enable = "sse2" )] |
4283 | unsafe fn test_mm_cmpgt_sd() { |
4284 | let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4285 | let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64); |
4286 | let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b)); |
4287 | assert_eq_m128i(r, e); |
4288 | } |
4289 | |
4290 | #[simd_test(enable = "sse2" )] |
4291 | unsafe fn test_mm_cmpge_sd() { |
4292 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4293 | let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64); |
4294 | let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b)); |
4295 | assert_eq_m128i(r, e); |
4296 | } |
4297 | |
4298 | #[simd_test(enable = "sse2" )] |
4299 | unsafe fn test_mm_cmpord_sd() { |
4300 | let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); |
4301 | let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64); |
4302 | let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b)); |
4303 | assert_eq_m128i(r, e); |
4304 | } |
4305 | |
4306 | #[simd_test(enable = "sse2" )] |
4307 | unsafe fn test_mm_cmpunord_sd() { |
4308 | let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); |
4309 | let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64); |
4310 | let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b)); |
4311 | assert_eq_m128i(r, e); |
4312 | } |
4313 | |
4314 | #[simd_test(enable = "sse2" )] |
4315 | unsafe fn test_mm_cmpneq_sd() { |
4316 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); |
4317 | let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64); |
4318 | let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b)); |
4319 | assert_eq_m128i(r, e); |
4320 | } |
4321 | |
4322 | #[simd_test(enable = "sse2" )] |
4323 | unsafe fn test_mm_cmpnlt_sd() { |
4324 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); |
4325 | let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64); |
4326 | let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b)); |
4327 | assert_eq_m128i(r, e); |
4328 | } |
4329 | |
4330 | #[simd_test(enable = "sse2" )] |
4331 | unsafe fn test_mm_cmpnle_sd() { |
4332 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4333 | let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64); |
4334 | let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b)); |
4335 | assert_eq_m128i(r, e); |
4336 | } |
4337 | |
4338 | #[simd_test(enable = "sse2" )] |
4339 | unsafe fn test_mm_cmpngt_sd() { |
4340 | let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4341 | let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64); |
4342 | let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b)); |
4343 | assert_eq_m128i(r, e); |
4344 | } |
4345 | |
4346 | #[simd_test(enable = "sse2" )] |
4347 | unsafe fn test_mm_cmpnge_sd() { |
4348 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4349 | let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64); |
4350 | let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b)); |
4351 | assert_eq_m128i(r, e); |
4352 | } |
4353 | |
4354 | #[simd_test(enable = "sse2" )] |
4355 | unsafe fn test_mm_cmpeq_pd() { |
4356 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4357 | let e = _mm_setr_epi64x(!0, 0); |
4358 | let r = transmute::<_, __m128i>(_mm_cmpeq_pd(a, b)); |
4359 | assert_eq_m128i(r, e); |
4360 | } |
4361 | |
4362 | #[simd_test(enable = "sse2" )] |
4363 | unsafe fn test_mm_cmplt_pd() { |
4364 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4365 | let e = _mm_setr_epi64x(0, !0); |
4366 | let r = transmute::<_, __m128i>(_mm_cmplt_pd(a, b)); |
4367 | assert_eq_m128i(r, e); |
4368 | } |
4369 | |
4370 | #[simd_test(enable = "sse2" )] |
4371 | unsafe fn test_mm_cmple_pd() { |
4372 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4373 | let e = _mm_setr_epi64x(!0, !0); |
4374 | let r = transmute::<_, __m128i>(_mm_cmple_pd(a, b)); |
4375 | assert_eq_m128i(r, e); |
4376 | } |
4377 | |
4378 | #[simd_test(enable = "sse2" )] |
4379 | unsafe fn test_mm_cmpgt_pd() { |
4380 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4381 | let e = _mm_setr_epi64x(0, 0); |
4382 | let r = transmute::<_, __m128i>(_mm_cmpgt_pd(a, b)); |
4383 | assert_eq_m128i(r, e); |
4384 | } |
4385 | |
4386 | #[simd_test(enable = "sse2" )] |
4387 | unsafe fn test_mm_cmpge_pd() { |
4388 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4389 | let e = _mm_setr_epi64x(!0, 0); |
4390 | let r = transmute::<_, __m128i>(_mm_cmpge_pd(a, b)); |
4391 | assert_eq_m128i(r, e); |
4392 | } |
4393 | |
4394 | #[simd_test(enable = "sse2" )] |
4395 | unsafe fn test_mm_cmpord_pd() { |
4396 | let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); |
4397 | let e = _mm_setr_epi64x(0, !0); |
4398 | let r = transmute::<_, __m128i>(_mm_cmpord_pd(a, b)); |
4399 | assert_eq_m128i(r, e); |
4400 | } |
4401 | |
4402 | #[simd_test(enable = "sse2" )] |
4403 | unsafe fn test_mm_cmpunord_pd() { |
4404 | let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); |
4405 | let e = _mm_setr_epi64x(!0, 0); |
4406 | let r = transmute::<_, __m128i>(_mm_cmpunord_pd(a, b)); |
4407 | assert_eq_m128i(r, e); |
4408 | } |
4409 | |
4410 | #[simd_test(enable = "sse2" )] |
4411 | unsafe fn test_mm_cmpneq_pd() { |
4412 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); |
4413 | let e = _mm_setr_epi64x(!0, !0); |
4414 | let r = transmute::<_, __m128i>(_mm_cmpneq_pd(a, b)); |
4415 | assert_eq_m128i(r, e); |
4416 | } |
4417 | |
4418 | #[simd_test(enable = "sse2" )] |
4419 | unsafe fn test_mm_cmpnlt_pd() { |
4420 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); |
4421 | let e = _mm_setr_epi64x(0, 0); |
4422 | let r = transmute::<_, __m128i>(_mm_cmpnlt_pd(a, b)); |
4423 | assert_eq_m128i(r, e); |
4424 | } |
4425 | |
4426 | #[simd_test(enable = "sse2" )] |
4427 | unsafe fn test_mm_cmpnle_pd() { |
4428 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4429 | let e = _mm_setr_epi64x(0, 0); |
4430 | let r = transmute::<_, __m128i>(_mm_cmpnle_pd(a, b)); |
4431 | assert_eq_m128i(r, e); |
4432 | } |
4433 | |
4434 | #[simd_test(enable = "sse2" )] |
4435 | unsafe fn test_mm_cmpngt_pd() { |
4436 | let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4437 | let e = _mm_setr_epi64x(0, !0); |
4438 | let r = transmute::<_, __m128i>(_mm_cmpngt_pd(a, b)); |
4439 | assert_eq_m128i(r, e); |
4440 | } |
4441 | |
4442 | #[simd_test(enable = "sse2" )] |
4443 | unsafe fn test_mm_cmpnge_pd() { |
4444 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4445 | let e = _mm_setr_epi64x(0, !0); |
4446 | let r = transmute::<_, __m128i>(_mm_cmpnge_pd(a, b)); |
4447 | assert_eq_m128i(r, e); |
4448 | } |
4449 | |
4450 | #[simd_test(enable = "sse2" )] |
4451 | unsafe fn test_mm_comieq_sd() { |
4452 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4453 | assert!(_mm_comieq_sd(a, b) != 0); |
4454 | |
4455 | let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0)); |
4456 | assert!(_mm_comieq_sd(a, b) == 0); |
4457 | } |
4458 | |
4459 | #[simd_test(enable = "sse2" )] |
4460 | unsafe fn test_mm_comilt_sd() { |
4461 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4462 | assert!(_mm_comilt_sd(a, b) == 0); |
4463 | } |
4464 | |
4465 | #[simd_test(enable = "sse2" )] |
4466 | unsafe fn test_mm_comile_sd() { |
4467 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4468 | assert!(_mm_comile_sd(a, b) != 0); |
4469 | } |
4470 | |
4471 | #[simd_test(enable = "sse2" )] |
4472 | unsafe fn test_mm_comigt_sd() { |
4473 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4474 | assert!(_mm_comigt_sd(a, b) == 0); |
4475 | } |
4476 | |
4477 | #[simd_test(enable = "sse2" )] |
4478 | unsafe fn test_mm_comige_sd() { |
4479 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4480 | assert!(_mm_comige_sd(a, b) != 0); |
4481 | } |
4482 | |
4483 | #[simd_test(enable = "sse2" )] |
4484 | unsafe fn test_mm_comineq_sd() { |
4485 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4486 | assert!(_mm_comineq_sd(a, b) == 0); |
4487 | } |
4488 | |
4489 | #[simd_test(enable = "sse2" )] |
4490 | unsafe fn test_mm_ucomieq_sd() { |
4491 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4492 | assert!(_mm_ucomieq_sd(a, b) != 0); |
4493 | |
4494 | let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0)); |
4495 | assert!(_mm_ucomieq_sd(a, b) == 0); |
4496 | } |
4497 | |
4498 | #[simd_test(enable = "sse2" )] |
4499 | unsafe fn test_mm_ucomilt_sd() { |
4500 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4501 | assert!(_mm_ucomilt_sd(a, b) == 0); |
4502 | } |
4503 | |
4504 | #[simd_test(enable = "sse2" )] |
4505 | unsafe fn test_mm_ucomile_sd() { |
4506 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4507 | assert!(_mm_ucomile_sd(a, b) != 0); |
4508 | } |
4509 | |
4510 | #[simd_test(enable = "sse2" )] |
4511 | unsafe fn test_mm_ucomigt_sd() { |
4512 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4513 | assert!(_mm_ucomigt_sd(a, b) == 0); |
4514 | } |
4515 | |
4516 | #[simd_test(enable = "sse2" )] |
4517 | unsafe fn test_mm_ucomige_sd() { |
4518 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4519 | assert!(_mm_ucomige_sd(a, b) != 0); |
4520 | } |
4521 | |
4522 | #[simd_test(enable = "sse2" )] |
4523 | unsafe fn test_mm_ucomineq_sd() { |
4524 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
4525 | assert!(_mm_ucomineq_sd(a, b) == 0); |
4526 | } |
4527 | |
4528 | #[simd_test(enable = "sse2" )] |
4529 | unsafe fn test_mm_movemask_pd() { |
4530 | let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0)); |
4531 | assert_eq!(r, 0b01); |
4532 | |
4533 | let r = _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0)); |
4534 | assert_eq!(r, 0b11); |
4535 | } |
4536 | |
4537 | #[repr (align(16))] |
4538 | struct Memory { |
4539 | data: [f64; 4], |
4540 | } |
4541 | |
4542 | #[simd_test(enable = "sse2" )] |
4543 | unsafe fn test_mm_load_pd() { |
4544 | let mem = Memory { |
4545 | data: [1.0f64, 2.0, 3.0, 4.0], |
4546 | }; |
4547 | let vals = &mem.data; |
4548 | let d = vals.as_ptr(); |
4549 | |
4550 | let r = _mm_load_pd(d); |
4551 | assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0)); |
4552 | } |
4553 | |
4554 | #[simd_test(enable = "sse2" )] |
4555 | unsafe fn test_mm_load_sd() { |
4556 | let a = 1.; |
4557 | let expected = _mm_setr_pd(a, 0.); |
4558 | let r = _mm_load_sd(&a); |
4559 | assert_eq_m128d(r, expected); |
4560 | } |
4561 | |
4562 | #[simd_test(enable = "sse2" )] |
4563 | unsafe fn test_mm_loadh_pd() { |
4564 | let a = _mm_setr_pd(1., 2.); |
4565 | let b = 3.; |
4566 | let expected = _mm_setr_pd(_mm_cvtsd_f64(a), 3.); |
4567 | let r = _mm_loadh_pd(a, &b); |
4568 | assert_eq_m128d(r, expected); |
4569 | } |
4570 | |
4571 | #[simd_test(enable = "sse2" )] |
4572 | unsafe fn test_mm_loadl_pd() { |
4573 | let a = _mm_setr_pd(1., 2.); |
4574 | let b = 3.; |
4575 | let expected = _mm_setr_pd(3., get_m128d(a, 1)); |
4576 | let r = _mm_loadl_pd(a, &b); |
4577 | assert_eq_m128d(r, expected); |
4578 | } |
4579 | |
4580 | #[simd_test(enable = "sse2" )] |
4581 | // Miri cannot support this until it is clear how it fits in the Rust memory model |
4582 | // (non-temporal store) |
4583 | #[cfg_attr (miri, ignore)] |
4584 | unsafe fn test_mm_stream_pd() { |
4585 | #[repr (align(128))] |
4586 | struct Memory { |
4587 | pub data: [f64; 2], |
4588 | } |
4589 | let a = _mm_set1_pd(7.0); |
4590 | let mut mem = Memory { data: [-1.0; 2] }; |
4591 | |
4592 | _mm_stream_pd(&mut mem.data[0] as *mut f64, a); |
4593 | for i in 0..2 { |
4594 | assert_eq!(mem.data[i], get_m128d(a, i)); |
4595 | } |
4596 | } |
4597 | |
4598 | #[simd_test(enable = "sse2" )] |
4599 | unsafe fn test_mm_store_sd() { |
4600 | let mut dest = 0.; |
4601 | let a = _mm_setr_pd(1., 2.); |
4602 | _mm_store_sd(&mut dest, a); |
4603 | assert_eq!(dest, _mm_cvtsd_f64(a)); |
4604 | } |
4605 | |
4606 | #[simd_test(enable = "sse2" )] |
4607 | unsafe fn test_mm_store_pd() { |
4608 | let mut mem = Memory { data: [0.0f64; 4] }; |
4609 | let vals = &mut mem.data; |
4610 | let a = _mm_setr_pd(1.0, 2.0); |
4611 | let d = vals.as_mut_ptr(); |
4612 | |
4613 | _mm_store_pd(d, *black_box(&a)); |
4614 | assert_eq!(vals[0], 1.0); |
4615 | assert_eq!(vals[1], 2.0); |
4616 | } |
4617 | |
4618 | #[simd_test(enable = "sse2" )] |
4619 | unsafe fn test_mm_storeu_pd() { |
4620 | let mut mem = Memory { data: [0.0f64; 4] }; |
4621 | let vals = &mut mem.data; |
4622 | let a = _mm_setr_pd(1.0, 2.0); |
4623 | |
4624 | let mut ofs = 0; |
4625 | let mut p = vals.as_mut_ptr(); |
4626 | |
4627 | // Make sure p is **not** aligned to 16-byte boundary |
4628 | if (p as usize) & 0xf == 0 { |
4629 | ofs = 1; |
4630 | p = p.add(1); |
4631 | } |
4632 | |
4633 | _mm_storeu_pd(p, *black_box(&a)); |
4634 | |
4635 | if ofs > 0 { |
4636 | assert_eq!(vals[ofs - 1], 0.0); |
4637 | } |
4638 | assert_eq!(vals[ofs + 0], 1.0); |
4639 | assert_eq!(vals[ofs + 1], 2.0); |
4640 | } |
4641 | |
4642 | #[simd_test(enable = "sse2" )] |
4643 | unsafe fn test_mm_store1_pd() { |
4644 | let mut mem = Memory { data: [0.0f64; 4] }; |
4645 | let vals = &mut mem.data; |
4646 | let a = _mm_setr_pd(1.0, 2.0); |
4647 | let d = vals.as_mut_ptr(); |
4648 | |
4649 | _mm_store1_pd(d, *black_box(&a)); |
4650 | assert_eq!(vals[0], 1.0); |
4651 | assert_eq!(vals[1], 1.0); |
4652 | } |
4653 | |
4654 | #[simd_test(enable = "sse2" )] |
4655 | unsafe fn test_mm_store_pd1() { |
4656 | let mut mem = Memory { data: [0.0f64; 4] }; |
4657 | let vals = &mut mem.data; |
4658 | let a = _mm_setr_pd(1.0, 2.0); |
4659 | let d = vals.as_mut_ptr(); |
4660 | |
4661 | _mm_store_pd1(d, *black_box(&a)); |
4662 | assert_eq!(vals[0], 1.0); |
4663 | assert_eq!(vals[1], 1.0); |
4664 | } |
4665 | |
4666 | #[simd_test(enable = "sse2" )] |
4667 | unsafe fn test_mm_storer_pd() { |
4668 | let mut mem = Memory { data: [0.0f64; 4] }; |
4669 | let vals = &mut mem.data; |
4670 | let a = _mm_setr_pd(1.0, 2.0); |
4671 | let d = vals.as_mut_ptr(); |
4672 | |
4673 | _mm_storer_pd(d, *black_box(&a)); |
4674 | assert_eq!(vals[0], 2.0); |
4675 | assert_eq!(vals[1], 1.0); |
4676 | } |
4677 | |
4678 | #[simd_test(enable = "sse2" )] |
4679 | unsafe fn test_mm_storeh_pd() { |
4680 | let mut dest = 0.; |
4681 | let a = _mm_setr_pd(1., 2.); |
4682 | _mm_storeh_pd(&mut dest, a); |
4683 | assert_eq!(dest, get_m128d(a, 1)); |
4684 | } |
4685 | |
4686 | #[simd_test(enable = "sse2" )] |
4687 | unsafe fn test_mm_storel_pd() { |
4688 | let mut dest = 0.; |
4689 | let a = _mm_setr_pd(1., 2.); |
4690 | _mm_storel_pd(&mut dest, a); |
4691 | assert_eq!(dest, _mm_cvtsd_f64(a)); |
4692 | } |
4693 | |
4694 | #[simd_test(enable = "sse2" )] |
4695 | unsafe fn test_mm_loadr_pd() { |
4696 | let mut mem = Memory { |
4697 | data: [1.0f64, 2.0, 3.0, 4.0], |
4698 | }; |
4699 | let vals = &mut mem.data; |
4700 | let d = vals.as_ptr(); |
4701 | |
4702 | let r = _mm_loadr_pd(d); |
4703 | assert_eq_m128d(r, _mm_setr_pd(2.0, 1.0)); |
4704 | } |
4705 | |
4706 | #[simd_test(enable = "sse2" )] |
4707 | unsafe fn test_mm_loadu_pd() { |
4708 | let mut mem = Memory { |
4709 | data: [1.0f64, 2.0, 3.0, 4.0], |
4710 | }; |
4711 | let vals = &mut mem.data; |
4712 | let mut d = vals.as_ptr(); |
4713 | |
4714 | // make sure d is not aligned to 16-byte boundary |
4715 | let mut offset = 0; |
4716 | if (d as usize) & 0xf == 0 { |
4717 | offset = 1; |
4718 | d = d.add(offset); |
4719 | } |
4720 | |
4721 | let r = _mm_loadu_pd(d); |
4722 | let e = _mm_add_pd(_mm_setr_pd(1.0, 2.0), _mm_set1_pd(offset as f64)); |
4723 | assert_eq_m128d(r, e); |
4724 | } |
4725 | |
4726 | #[simd_test(enable = "sse2" )] |
4727 | unsafe fn test_mm_cvtpd_ps() { |
4728 | let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0)); |
4729 | assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0)); |
4730 | |
4731 | let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0)); |
4732 | assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0)); |
4733 | |
4734 | let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN)); |
4735 | assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0)); |
4736 | |
4737 | let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64)); |
4738 | assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0)); |
4739 | } |
4740 | |
4741 | #[simd_test(enable = "sse2" )] |
4742 | unsafe fn test_mm_cvtps_pd() { |
4743 | let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0)); |
4744 | assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0)); |
4745 | |
4746 | let r = _mm_cvtps_pd(_mm_setr_ps( |
4747 | f32::MAX, |
4748 | f32::INFINITY, |
4749 | f32::NEG_INFINITY, |
4750 | f32::MIN, |
4751 | )); |
4752 | assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY)); |
4753 | } |
4754 | |
4755 | #[simd_test(enable = "sse2" )] |
4756 | unsafe fn test_mm_cvtpd_epi32() { |
4757 | let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0)); |
4758 | assert_eq_m128i(r, _mm_setr_epi32(-1, 5, 0, 0)); |
4759 | |
4760 | let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0)); |
4761 | assert_eq_m128i(r, _mm_setr_epi32(-1, -5, 0, 0)); |
4762 | |
4763 | let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN)); |
4764 | assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); |
4765 | |
4766 | let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY)); |
4767 | assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); |
4768 | |
4769 | let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN)); |
4770 | assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); |
4771 | } |
4772 | |
4773 | #[simd_test(enable = "sse2" )] |
4774 | unsafe fn test_mm_cvtsd_si32() { |
4775 | let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0)); |
4776 | assert_eq!(r, -2); |
4777 | |
4778 | let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN)); |
4779 | assert_eq!(r, i32::MIN); |
4780 | |
4781 | let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN)); |
4782 | assert_eq!(r, i32::MIN); |
4783 | } |
4784 | |
4785 | #[simd_test(enable = "sse2" )] |
4786 | unsafe fn test_mm_cvtsd_ss() { |
4787 | let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4); |
4788 | let b = _mm_setr_pd(2.0, -5.0); |
4789 | |
4790 | let r = _mm_cvtsd_ss(a, b); |
4791 | |
4792 | assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4)); |
4793 | |
4794 | let a = _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY); |
4795 | let b = _mm_setr_pd(f64::INFINITY, -5.0); |
4796 | |
4797 | let r = _mm_cvtsd_ss(a, b); |
4798 | |
4799 | assert_eq_m128( |
4800 | r, |
4801 | _mm_setr_ps( |
4802 | f32::INFINITY, |
4803 | f32::NEG_INFINITY, |
4804 | f32::MAX, |
4805 | f32::NEG_INFINITY, |
4806 | ), |
4807 | ); |
4808 | } |
4809 | |
4810 | #[simd_test(enable = "sse2" )] |
4811 | unsafe fn test_mm_cvtsd_f64() { |
4812 | let r = _mm_cvtsd_f64(_mm_setr_pd(-1.1, 2.2)); |
4813 | assert_eq!(r, -1.1); |
4814 | } |
4815 | |
4816 | #[simd_test(enable = "sse2" )] |
4817 | unsafe fn test_mm_cvtss_sd() { |
4818 | let a = _mm_setr_pd(-1.1, 2.2); |
4819 | let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); |
4820 | |
4821 | let r = _mm_cvtss_sd(a, b); |
4822 | assert_eq_m128d(r, _mm_setr_pd(1.0, 2.2)); |
4823 | |
4824 | let a = _mm_setr_pd(-1.1, f64::INFINITY); |
4825 | let b = _mm_setr_ps(f32::NEG_INFINITY, 2.0, 3.0, 4.0); |
4826 | |
4827 | let r = _mm_cvtss_sd(a, b); |
4828 | assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY)); |
4829 | } |
4830 | |
4831 | #[simd_test(enable = "sse2" )] |
4832 | unsafe fn test_mm_cvttpd_epi32() { |
4833 | let a = _mm_setr_pd(-1.1, 2.2); |
4834 | let r = _mm_cvttpd_epi32(a); |
4835 | assert_eq_m128i(r, _mm_setr_epi32(-1, 2, 0, 0)); |
4836 | |
4837 | let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN); |
4838 | let r = _mm_cvttpd_epi32(a); |
4839 | assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); |
4840 | } |
4841 | |
4842 | #[simd_test(enable = "sse2" )] |
4843 | unsafe fn test_mm_cvttsd_si32() { |
4844 | let a = _mm_setr_pd(-1.1, 2.2); |
4845 | let r = _mm_cvttsd_si32(a); |
4846 | assert_eq!(r, -1); |
4847 | |
4848 | let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN); |
4849 | let r = _mm_cvttsd_si32(a); |
4850 | assert_eq!(r, i32::MIN); |
4851 | } |
4852 | |
4853 | #[simd_test(enable = "sse2" )] |
4854 | unsafe fn test_mm_cvttps_epi32() { |
4855 | let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6); |
4856 | let r = _mm_cvttps_epi32(a); |
4857 | assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6)); |
4858 | |
4859 | let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX); |
4860 | let r = _mm_cvttps_epi32(a); |
4861 | assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN)); |
4862 | } |
4863 | |
4864 | #[simd_test(enable = "sse2" )] |
4865 | unsafe fn test_mm_set_sd() { |
4866 | let r = _mm_set_sd(-1.0_f64); |
4867 | assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, 0_f64)); |
4868 | } |
4869 | |
4870 | #[simd_test(enable = "sse2" )] |
4871 | unsafe fn test_mm_set1_pd() { |
4872 | let r = _mm_set1_pd(-1.0_f64); |
4873 | assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, -1.0_f64)); |
4874 | } |
4875 | |
4876 | #[simd_test(enable = "sse2" )] |
4877 | unsafe fn test_mm_set_pd1() { |
4878 | let r = _mm_set_pd1(-2.0_f64); |
4879 | assert_eq_m128d(r, _mm_setr_pd(-2.0_f64, -2.0_f64)); |
4880 | } |
4881 | |
4882 | #[simd_test(enable = "sse2" )] |
4883 | unsafe fn test_mm_set_pd() { |
4884 | let r = _mm_set_pd(1.0_f64, 5.0_f64); |
4885 | assert_eq_m128d(r, _mm_setr_pd(5.0_f64, 1.0_f64)); |
4886 | } |
4887 | |
4888 | #[simd_test(enable = "sse2" )] |
4889 | unsafe fn test_mm_setr_pd() { |
4890 | let r = _mm_setr_pd(1.0_f64, -5.0_f64); |
4891 | assert_eq_m128d(r, _mm_setr_pd(1.0_f64, -5.0_f64)); |
4892 | } |
4893 | |
4894 | #[simd_test(enable = "sse2" )] |
4895 | unsafe fn test_mm_setzero_pd() { |
4896 | let r = _mm_setzero_pd(); |
4897 | assert_eq_m128d(r, _mm_setr_pd(0_f64, 0_f64)); |
4898 | } |
4899 | |
4900 | #[simd_test(enable = "sse2" )] |
4901 | unsafe fn test_mm_load1_pd() { |
4902 | let d = -5.0; |
4903 | let r = _mm_load1_pd(&d); |
4904 | assert_eq_m128d(r, _mm_setr_pd(d, d)); |
4905 | } |
4906 | |
4907 | #[simd_test(enable = "sse2" )] |
4908 | unsafe fn test_mm_load_pd1() { |
4909 | let d = -5.0; |
4910 | let r = _mm_load_pd1(&d); |
4911 | assert_eq_m128d(r, _mm_setr_pd(d, d)); |
4912 | } |
4913 | |
4914 | #[simd_test(enable = "sse2" )] |
4915 | unsafe fn test_mm_unpackhi_pd() { |
4916 | let a = _mm_setr_pd(1.0, 2.0); |
4917 | let b = _mm_setr_pd(3.0, 4.0); |
4918 | let r = _mm_unpackhi_pd(a, b); |
4919 | assert_eq_m128d(r, _mm_setr_pd(2.0, 4.0)); |
4920 | } |
4921 | |
4922 | #[simd_test(enable = "sse2" )] |
4923 | unsafe fn test_mm_unpacklo_pd() { |
4924 | let a = _mm_setr_pd(1.0, 2.0); |
4925 | let b = _mm_setr_pd(3.0, 4.0); |
4926 | let r = _mm_unpacklo_pd(a, b); |
4927 | assert_eq_m128d(r, _mm_setr_pd(1.0, 3.0)); |
4928 | } |
4929 | |
4930 | #[simd_test(enable = "sse2" )] |
4931 | unsafe fn test_mm_shuffle_pd() { |
4932 | let a = _mm_setr_pd(1., 2.); |
4933 | let b = _mm_setr_pd(3., 4.); |
4934 | let expected = _mm_setr_pd(1., 3.); |
4935 | let r = _mm_shuffle_pd::<0b00_00_00_00>(a, b); |
4936 | assert_eq_m128d(r, expected); |
4937 | } |
4938 | |
4939 | #[simd_test(enable = "sse2" )] |
4940 | unsafe fn test_mm_move_sd() { |
4941 | let a = _mm_setr_pd(1., 2.); |
4942 | let b = _mm_setr_pd(3., 4.); |
4943 | let expected = _mm_setr_pd(3., 2.); |
4944 | let r = _mm_move_sd(a, b); |
4945 | assert_eq_m128d(r, expected); |
4946 | } |
4947 | |
4948 | #[simd_test(enable = "sse2" )] |
4949 | unsafe fn test_mm_castpd_ps() { |
4950 | let a = _mm_set1_pd(0.); |
4951 | let expected = _mm_set1_ps(0.); |
4952 | let r = _mm_castpd_ps(a); |
4953 | assert_eq_m128(r, expected); |
4954 | } |
4955 | |
4956 | #[simd_test(enable = "sse2" )] |
4957 | unsafe fn test_mm_castpd_si128() { |
4958 | let a = _mm_set1_pd(0.); |
4959 | let expected = _mm_set1_epi64x(0); |
4960 | let r = _mm_castpd_si128(a); |
4961 | assert_eq_m128i(r, expected); |
4962 | } |
4963 | |
4964 | #[simd_test(enable = "sse2" )] |
4965 | unsafe fn test_mm_castps_pd() { |
4966 | let a = _mm_set1_ps(0.); |
4967 | let expected = _mm_set1_pd(0.); |
4968 | let r = _mm_castps_pd(a); |
4969 | assert_eq_m128d(r, expected); |
4970 | } |
4971 | |
4972 | #[simd_test(enable = "sse2" )] |
4973 | unsafe fn test_mm_castps_si128() { |
4974 | let a = _mm_set1_ps(0.); |
4975 | let expected = _mm_set1_epi32(0); |
4976 | let r = _mm_castps_si128(a); |
4977 | assert_eq_m128i(r, expected); |
4978 | } |
4979 | |
4980 | #[simd_test(enable = "sse2" )] |
4981 | unsafe fn test_mm_castsi128_pd() { |
4982 | let a = _mm_set1_epi64x(0); |
4983 | let expected = _mm_set1_pd(0.); |
4984 | let r = _mm_castsi128_pd(a); |
4985 | assert_eq_m128d(r, expected); |
4986 | } |
4987 | |
4988 | #[simd_test(enable = "sse2" )] |
4989 | unsafe fn test_mm_castsi128_ps() { |
4990 | let a = _mm_set1_epi32(0); |
4991 | let expected = _mm_set1_ps(0.); |
4992 | let r = _mm_castsi128_ps(a); |
4993 | assert_eq_m128(r, expected); |
4994 | } |
4995 | } |
4996 | |