1 | //! Advanced Vector Extensions 2 (AVX) |
2 | //! |
3 | //! AVX2 expands most AVX commands to 256-bit wide vector registers and |
4 | //! adds [FMA](https://en.wikipedia.org/wiki/Fused_multiply-accumulate). |
5 | //! |
6 | //! The references are: |
7 | //! |
8 | //! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: |
9 | //! Instruction Set Reference, A-Z][intel64_ref]. |
10 | //! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and |
11 | //! System Instructions][amd64_ref]. |
12 | //! |
13 | //! Wikipedia's [AVX][wiki_avx] and [FMA][wiki_fma] pages provide a quick |
14 | //! overview of the instructions available. |
15 | //! |
16 | //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf |
17 | //! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf |
18 | //! [wiki_avx]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions |
19 | //! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate |
20 | |
21 | use crate::core_arch::{simd::*, x86::*}; |
22 | use crate::intrinsics::simd::*; |
23 | |
24 | #[cfg (test)] |
25 | use stdarch_test::assert_instr; |
26 | |
27 | /// Computes the absolute values of packed 32-bit integers in `a`. |
28 | /// |
29 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi32) |
30 | #[inline ] |
31 | #[target_feature (enable = "avx2" )] |
32 | #[cfg_attr (test, assert_instr(vpabsd))] |
33 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
34 | pub unsafe fn _mm256_abs_epi32(a: __m256i) -> __m256i { |
35 | let a: i32x8 = a.as_i32x8(); |
36 | let zero: i32x8 = i32x8::splat(0); |
37 | let r: i32x8 = simd_select::<m32x8, _>(mask:simd_lt(a, zero), if_true:simd_neg(a), if_false:a); |
38 | transmute(src:r) |
39 | } |
40 | |
41 | /// Computes the absolute values of packed 16-bit integers in `a`. |
42 | /// |
43 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi16) |
44 | #[inline ] |
45 | #[target_feature (enable = "avx2" )] |
46 | #[cfg_attr (test, assert_instr(vpabsw))] |
47 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
48 | pub unsafe fn _mm256_abs_epi16(a: __m256i) -> __m256i { |
49 | let a: i16x16 = a.as_i16x16(); |
50 | let zero: i16x16 = i16x16::splat(0); |
51 | let r: i16x16 = simd_select::<m16x16, _>(mask:simd_lt(a, zero), if_true:simd_neg(a), if_false:a); |
52 | transmute(src:r) |
53 | } |
54 | |
55 | /// Computes the absolute values of packed 8-bit integers in `a`. |
56 | /// |
57 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi8) |
58 | #[inline ] |
59 | #[target_feature (enable = "avx2" )] |
60 | #[cfg_attr (test, assert_instr(vpabsb))] |
61 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
62 | pub unsafe fn _mm256_abs_epi8(a: __m256i) -> __m256i { |
63 | let a: i8x32 = a.as_i8x32(); |
64 | let zero: i8x32 = i8x32::splat(0); |
65 | let r: i8x32 = simd_select::<m8x32, _>(mask:simd_lt(a, zero), if_true:simd_neg(a), if_false:a); |
66 | transmute(src:r) |
67 | } |
68 | |
69 | /// Adds packed 64-bit integers in `a` and `b`. |
70 | /// |
71 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi64) |
72 | #[inline ] |
73 | #[target_feature (enable = "avx2" )] |
74 | #[cfg_attr (test, assert_instr(vpaddq))] |
75 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
76 | pub unsafe fn _mm256_add_epi64(a: __m256i, b: __m256i) -> __m256i { |
77 | transmute(src:simd_add(x:a.as_i64x4(), y:b.as_i64x4())) |
78 | } |
79 | |
80 | /// Adds packed 32-bit integers in `a` and `b`. |
81 | /// |
82 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi32) |
83 | #[inline ] |
84 | #[target_feature (enable = "avx2" )] |
85 | #[cfg_attr (test, assert_instr(vpaddd))] |
86 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
87 | pub unsafe fn _mm256_add_epi32(a: __m256i, b: __m256i) -> __m256i { |
88 | transmute(src:simd_add(x:a.as_i32x8(), y:b.as_i32x8())) |
89 | } |
90 | |
91 | /// Adds packed 16-bit integers in `a` and `b`. |
92 | /// |
93 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi16) |
94 | #[inline ] |
95 | #[target_feature (enable = "avx2" )] |
96 | #[cfg_attr (test, assert_instr(vpaddw))] |
97 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
98 | pub unsafe fn _mm256_add_epi16(a: __m256i, b: __m256i) -> __m256i { |
99 | transmute(src:simd_add(x:a.as_i16x16(), y:b.as_i16x16())) |
100 | } |
101 | |
102 | /// Adds packed 8-bit integers in `a` and `b`. |
103 | /// |
104 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi8) |
105 | #[inline ] |
106 | #[target_feature (enable = "avx2" )] |
107 | #[cfg_attr (test, assert_instr(vpaddb))] |
108 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
109 | pub unsafe fn _mm256_add_epi8(a: __m256i, b: __m256i) -> __m256i { |
110 | transmute(src:simd_add(x:a.as_i8x32(), y:b.as_i8x32())) |
111 | } |
112 | |
113 | /// Adds packed 8-bit integers in `a` and `b` using saturation. |
114 | /// |
115 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_adds_epi8) |
116 | #[inline ] |
117 | #[target_feature (enable = "avx2" )] |
118 | #[cfg_attr (test, assert_instr(vpaddsb))] |
119 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
120 | pub unsafe fn _mm256_adds_epi8(a: __m256i, b: __m256i) -> __m256i { |
121 | transmute(src:simd_saturating_add(x:a.as_i8x32(), y:b.as_i8x32())) |
122 | } |
123 | |
124 | /// Adds packed 16-bit integers in `a` and `b` using saturation. |
125 | /// |
126 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_adds_epi16) |
127 | #[inline ] |
128 | #[target_feature (enable = "avx2" )] |
129 | #[cfg_attr (test, assert_instr(vpaddsw))] |
130 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
131 | pub unsafe fn _mm256_adds_epi16(a: __m256i, b: __m256i) -> __m256i { |
132 | transmute(src:simd_saturating_add(x:a.as_i16x16(), y:b.as_i16x16())) |
133 | } |
134 | |
135 | /// Adds packed unsigned 8-bit integers in `a` and `b` using saturation. |
136 | /// |
137 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_adds_epu8) |
138 | #[inline ] |
139 | #[target_feature (enable = "avx2" )] |
140 | #[cfg_attr (test, assert_instr(vpaddusb))] |
141 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
142 | pub unsafe fn _mm256_adds_epu8(a: __m256i, b: __m256i) -> __m256i { |
143 | transmute(src:simd_saturating_add(x:a.as_u8x32(), y:b.as_u8x32())) |
144 | } |
145 | |
146 | /// Adds packed unsigned 16-bit integers in `a` and `b` using saturation. |
147 | /// |
148 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_adds_epu16) |
149 | #[inline ] |
150 | #[target_feature (enable = "avx2" )] |
151 | #[cfg_attr (test, assert_instr(vpaddusw))] |
152 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
153 | pub unsafe fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i { |
154 | transmute(src:simd_saturating_add(x:a.as_u16x16(), y:b.as_u16x16())) |
155 | } |
156 | |
157 | /// Concatenates pairs of 16-byte blocks in `a` and `b` into a 32-byte temporary |
158 | /// result, shifts the result right by `n` bytes, and returns the low 16 bytes. |
159 | /// |
160 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi8) |
161 | #[inline ] |
162 | #[target_feature (enable = "avx2" )] |
163 | #[cfg_attr (test, assert_instr(vpalignr, IMM8 = 7))] |
164 | #[rustc_legacy_const_generics (2)] |
165 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
166 | pub unsafe fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i { |
167 | static_assert_uimm_bits!(IMM8, 8); |
168 | // If palignr is shifting the pair of vectors more than the size of two |
169 | // lanes, emit zero. |
170 | if IMM8 > 32 { |
171 | return _mm256_set1_epi8(0); |
172 | } |
173 | // If palignr is shifting the pair of input vectors more than one lane, |
174 | // but less than two lanes, convert to shifting in zeroes. |
175 | let (a, b) = if IMM8 > 16 { |
176 | (_mm256_set1_epi8(0), a) |
177 | } else { |
178 | (a, b) |
179 | }; |
180 | |
181 | let a = a.as_i8x32(); |
182 | let b = b.as_i8x32(); |
183 | |
184 | let r: i8x32 = match IMM8 % 16 { |
185 | 0 => simd_shuffle!( |
186 | b, |
187 | a, |
188 | [ |
189 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, |
190 | 23, 24, 25, 26, 27, 28, 29, 30, 31, |
191 | ], |
192 | ), |
193 | 1 => simd_shuffle!( |
194 | b, |
195 | a, |
196 | [ |
197 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 17, 18, 19, 20, 21, 22, 23, |
198 | 24, 25, 26, 27, 28, 29, 30, 31, 48, |
199 | ], |
200 | ), |
201 | 2 => simd_shuffle!( |
202 | b, |
203 | a, |
204 | [ |
205 | 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 18, 19, 20, 21, 22, 23, 24, |
206 | 25, 26, 27, 28, 29, 30, 31, 48, 49, |
207 | ], |
208 | ), |
209 | 3 => simd_shuffle!( |
210 | b, |
211 | a, |
212 | [ |
213 | 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 19, 20, 21, 22, 23, 24, |
214 | 25, 26, 27, 28, 29, 30, 31, 48, 49, 50, |
215 | ], |
216 | ), |
217 | 4 => simd_shuffle!( |
218 | b, |
219 | a, |
220 | [ |
221 | 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 20, 21, 22, 23, 24, 25, |
222 | 26, 27, 28, 29, 30, 31, 48, 49, 50, 51, |
223 | ], |
224 | ), |
225 | 5 => simd_shuffle!( |
226 | b, |
227 | a, |
228 | [ |
229 | 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 21, 22, 23, 24, 25, 26, |
230 | 27, 28, 29, 30, 31, 48, 49, 50, 51, 52, |
231 | ], |
232 | ), |
233 | 6 => simd_shuffle!( |
234 | b, |
235 | a, |
236 | [ |
237 | 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 22, 23, 24, 25, 26, 27, |
238 | 28, 29, 30, 31, 48, 49, 50, 51, 52, 53, |
239 | ], |
240 | ), |
241 | 7 => simd_shuffle!( |
242 | b, |
243 | a, |
244 | [ |
245 | 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 23, 24, 25, 26, 27, |
246 | 28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54, |
247 | ], |
248 | ), |
249 | 8 => simd_shuffle!( |
250 | b, |
251 | a, |
252 | [ |
253 | 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 24, 25, 26, 27, 28, |
254 | 29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55, |
255 | ], |
256 | ), |
257 | 9 => simd_shuffle!( |
258 | b, |
259 | a, |
260 | [ |
261 | 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 25, 26, 27, 28, 29, |
262 | 30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56, |
263 | ], |
264 | ), |
265 | 10 => simd_shuffle!( |
266 | b, |
267 | a, |
268 | [ |
269 | 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 26, 27, 28, 29, 30, |
270 | 31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, |
271 | ], |
272 | ), |
273 | 11 => simd_shuffle!( |
274 | b, |
275 | a, |
276 | [ |
277 | 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 27, 28, 29, 30, 31, |
278 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, |
279 | ], |
280 | ), |
281 | 12 => simd_shuffle!( |
282 | b, |
283 | a, |
284 | [ |
285 | 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 28, 29, 30, 31, 48, |
286 | 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, |
287 | ], |
288 | ), |
289 | 13 => simd_shuffle!( |
290 | b, |
291 | a, |
292 | [ |
293 | 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 29, 30, 31, 48, 49, |
294 | 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, |
295 | ], |
296 | ), |
297 | 14 => simd_shuffle!( |
298 | b, |
299 | a, |
300 | [ |
301 | 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 30, 31, 48, 49, 50, |
302 | 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, |
303 | ], |
304 | ), |
305 | 15 => simd_shuffle!( |
306 | b, |
307 | a, |
308 | [ |
309 | 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 31, 48, 49, 50, 51, |
310 | 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, |
311 | ], |
312 | ), |
313 | _ => b, |
314 | }; |
315 | transmute(r) |
316 | } |
317 | |
318 | /// Computes the bitwise AND of 256 bits (representing integer data) |
319 | /// in `a` and `b`. |
320 | /// |
321 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_and_si256) |
322 | #[inline ] |
323 | #[target_feature (enable = "avx2" )] |
324 | #[cfg_attr (test, assert_instr(vandps))] |
325 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
326 | pub unsafe fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i { |
327 | transmute(src:simd_and(x:a.as_i64x4(), y:b.as_i64x4())) |
328 | } |
329 | |
330 | /// Computes the bitwise NOT of 256 bits (representing integer data) |
331 | /// in `a` and then AND with `b`. |
332 | /// |
333 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_andnot_si256) |
334 | #[inline ] |
335 | #[target_feature (enable = "avx2" )] |
336 | #[cfg_attr (test, assert_instr(vandnps))] |
337 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
338 | pub unsafe fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i { |
339 | let all_ones: __m256i = _mm256_set1_epi8(-1); |
340 | transmute(src:simd_and( |
341 | x:simd_xor(a.as_i64x4(), all_ones.as_i64x4()), |
342 | y:b.as_i64x4(), |
343 | )) |
344 | } |
345 | |
346 | /// Averages packed unsigned 16-bit integers in `a` and `b`. |
347 | /// |
348 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_avg_epu16) |
349 | #[inline ] |
350 | #[target_feature (enable = "avx2" )] |
351 | #[cfg_attr (test, assert_instr(vpavgw))] |
352 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
353 | pub unsafe fn _mm256_avg_epu16(a: __m256i, b: __m256i) -> __m256i { |
354 | let a: u32x16 = simd_cast::<_, u32x16>(a.as_u16x16()); |
355 | let b: u32x16 = simd_cast::<_, u32x16>(b.as_u16x16()); |
356 | let r: u32x16 = simd_shr(lhs:simd_add(simd_add(a, b), u32x16::splat(1)), rhs:u32x16::splat(1)); |
357 | transmute(src:simd_cast::<_, u16x16>(r)) |
358 | } |
359 | |
360 | /// Averages packed unsigned 8-bit integers in `a` and `b`. |
361 | /// |
362 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_avg_epu8) |
363 | #[inline ] |
364 | #[target_feature (enable = "avx2" )] |
365 | #[cfg_attr (test, assert_instr(vpavgb))] |
366 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
367 | pub unsafe fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i { |
368 | let a: u16x32 = simd_cast::<_, u16x32>(a.as_u8x32()); |
369 | let b: u16x32 = simd_cast::<_, u16x32>(b.as_u8x32()); |
370 | let r: u16x32 = simd_shr(lhs:simd_add(simd_add(a, b), u16x32::splat(1)), rhs:u16x32::splat(1)); |
371 | transmute(src:simd_cast::<_, u8x32>(r)) |
372 | } |
373 | |
374 | /// Blends packed 32-bit integers from `a` and `b` using control mask `IMM4`. |
375 | /// |
376 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blend_epi32) |
377 | #[inline ] |
378 | #[target_feature (enable = "avx2" )] |
379 | #[cfg_attr (test, assert_instr(vblendps, IMM4 = 9))] |
380 | #[rustc_legacy_const_generics (2)] |
381 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
382 | pub unsafe fn _mm_blend_epi32<const IMM4: i32>(a: __m128i, b: __m128i) -> __m128i { |
383 | static_assert_uimm_bits!(IMM4, 4); |
384 | let a: i32x4 = a.as_i32x4(); |
385 | let b: i32x4 = b.as_i32x4(); |
386 | let r: i32x4 = simd_shuffle!( |
387 | a, |
388 | b, |
389 | [ |
390 | [0, 4, 0, 4][IMM4 as usize & 0b11], |
391 | [1, 1, 5, 5][IMM4 as usize & 0b11], |
392 | [2, 6, 2, 6][(IMM4 as usize >> 2) & 0b11], |
393 | [3, 3, 7, 7][(IMM4 as usize >> 2) & 0b11], |
394 | ], |
395 | ); |
396 | transmute(src:r) |
397 | } |
398 | |
399 | /// Blends packed 32-bit integers from `a` and `b` using control mask `IMM8`. |
400 | /// |
401 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blend_epi32) |
402 | #[inline ] |
403 | #[target_feature (enable = "avx2" )] |
404 | #[cfg_attr (test, assert_instr(vblendps, IMM8 = 9))] |
405 | #[rustc_legacy_const_generics (2)] |
406 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
407 | pub unsafe fn _mm256_blend_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i { |
408 | static_assert_uimm_bits!(IMM8, 8); |
409 | let a: i32x8 = a.as_i32x8(); |
410 | let b: i32x8 = b.as_i32x8(); |
411 | let r: i32x8 = simd_shuffle!( |
412 | a, |
413 | b, |
414 | [ |
415 | [0, 8, 0, 8][IMM8 as usize & 0b11], |
416 | [1, 1, 9, 9][IMM8 as usize & 0b11], |
417 | [2, 10, 2, 10][(IMM8 as usize >> 2) & 0b11], |
418 | [3, 3, 11, 11][(IMM8 as usize >> 2) & 0b11], |
419 | [4, 12, 4, 12][(IMM8 as usize >> 4) & 0b11], |
420 | [5, 5, 13, 13][(IMM8 as usize >> 4) & 0b11], |
421 | [6, 14, 6, 14][(IMM8 as usize >> 6) & 0b11], |
422 | [7, 7, 15, 15][(IMM8 as usize >> 6) & 0b11], |
423 | ], |
424 | ); |
425 | transmute(src:r) |
426 | } |
427 | |
428 | /// Blends packed 16-bit integers from `a` and `b` using control mask `IMM8`. |
429 | /// |
430 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blend_epi16) |
431 | #[inline ] |
432 | #[target_feature (enable = "avx2" )] |
433 | #[cfg_attr (test, assert_instr(vpblendw, IMM8 = 9))] |
434 | #[rustc_legacy_const_generics (2)] |
435 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
436 | pub unsafe fn _mm256_blend_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i { |
437 | static_assert_uimm_bits!(IMM8, 8); |
438 | let a = a.as_i16x16(); |
439 | let b = b.as_i16x16(); |
440 | |
441 | let r: i16x16 = simd_shuffle!( |
442 | a, |
443 | b, |
444 | [ |
445 | [0, 16, 0, 16][IMM8 as usize & 0b11], |
446 | [1, 1, 17, 17][IMM8 as usize & 0b11], |
447 | [2, 18, 2, 18][(IMM8 as usize >> 2) & 0b11], |
448 | [3, 3, 19, 19][(IMM8 as usize >> 2) & 0b11], |
449 | [4, 20, 4, 20][(IMM8 as usize >> 4) & 0b11], |
450 | [5, 5, 21, 21][(IMM8 as usize >> 4) & 0b11], |
451 | [6, 22, 6, 22][(IMM8 as usize >> 6) & 0b11], |
452 | [7, 7, 23, 23][(IMM8 as usize >> 6) & 0b11], |
453 | [8, 24, 8, 24][IMM8 as usize & 0b11], |
454 | [9, 9, 25, 25][IMM8 as usize & 0b11], |
455 | [10, 26, 10, 26][(IMM8 as usize >> 2) & 0b11], |
456 | [11, 11, 27, 27][(IMM8 as usize >> 2) & 0b11], |
457 | [12, 28, 12, 28][(IMM8 as usize >> 4) & 0b11], |
458 | [13, 13, 29, 29][(IMM8 as usize >> 4) & 0b11], |
459 | [14, 30, 14, 30][(IMM8 as usize >> 6) & 0b11], |
460 | [15, 15, 31, 31][(IMM8 as usize >> 6) & 0b11], |
461 | ], |
462 | ); |
463 | transmute(r) |
464 | } |
465 | |
466 | /// Blends packed 8-bit integers from `a` and `b` using `mask`. |
467 | /// |
468 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blendv_epi8) |
469 | #[inline ] |
470 | #[target_feature (enable = "avx2" )] |
471 | #[cfg_attr (test, assert_instr(vpblendvb))] |
472 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
473 | pub unsafe fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m256i { |
474 | let mask: i8x32 = simd_lt(x:mask.as_i8x32(), y:i8x32::splat(0)); |
475 | transmute(src:simd_select(mask, if_true:b.as_i8x32(), if_false:a.as_i8x32())) |
476 | } |
477 | |
478 | /// Broadcasts the low packed 8-bit integer from `a` to all elements of |
479 | /// the 128-bit returned value. |
480 | /// |
481 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastb_epi8) |
482 | #[inline ] |
483 | #[target_feature (enable = "avx2" )] |
484 | #[cfg_attr (test, assert_instr(vpbroadcastb))] |
485 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
486 | pub unsafe fn _mm_broadcastb_epi8(a: __m128i) -> __m128i { |
487 | let zero: __m128i = _mm_setzero_si128(); |
488 | let ret: i8x16 = simd_shuffle!(a.as_i8x16(), zero.as_i8x16(), [0_u32; 16]); |
489 | transmute::<i8x16, _>(src:ret) |
490 | } |
491 | |
492 | /// Broadcasts the low packed 8-bit integer from `a` to all elements of |
493 | /// the 256-bit returned value. |
494 | /// |
495 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastb_epi8) |
496 | #[inline ] |
497 | #[target_feature (enable = "avx2" )] |
498 | #[cfg_attr (test, assert_instr(vpbroadcastb))] |
499 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
500 | pub unsafe fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i { |
501 | let zero: __m128i = _mm_setzero_si128(); |
502 | let ret: i8x32 = simd_shuffle!(a.as_i8x16(), zero.as_i8x16(), [0_u32; 32]); |
503 | transmute::<i8x32, _>(src:ret) |
504 | } |
505 | |
506 | // N.B., `simd_shuffle4` with integer data types for `a` and `b` is |
507 | // often compiled to `vbroadcastss`. |
508 | /// Broadcasts the low packed 32-bit integer from `a` to all elements of |
509 | /// the 128-bit returned value. |
510 | /// |
511 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastd_epi32) |
512 | #[inline ] |
513 | #[target_feature (enable = "avx2" )] |
514 | #[cfg_attr (test, assert_instr(vbroadcastss))] |
515 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
516 | pub unsafe fn _mm_broadcastd_epi32(a: __m128i) -> __m128i { |
517 | let zero: __m128i = _mm_setzero_si128(); |
518 | let ret: i32x4 = simd_shuffle!(a.as_i32x4(), zero.as_i32x4(), [0_u32; 4]); |
519 | transmute::<i32x4, _>(src:ret) |
520 | } |
521 | |
522 | // N.B., `simd_shuffle4`` with integer data types for `a` and `b` is |
523 | // often compiled to `vbroadcastss`. |
524 | /// Broadcasts the low packed 32-bit integer from `a` to all elements of |
525 | /// the 256-bit returned value. |
526 | /// |
527 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastd_epi32) |
528 | #[inline ] |
529 | #[target_feature (enable = "avx2" )] |
530 | #[cfg_attr (test, assert_instr(vbroadcastss))] |
531 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
532 | pub unsafe fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i { |
533 | let zero: __m128i = _mm_setzero_si128(); |
534 | let ret: i32x8 = simd_shuffle!(a.as_i32x4(), zero.as_i32x4(), [0_u32; 8]); |
535 | transmute::<i32x8, _>(src:ret) |
536 | } |
537 | |
538 | /// Broadcasts the low packed 64-bit integer from `a` to all elements of |
539 | /// the 128-bit returned value. |
540 | /// |
541 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastq_epi64) |
542 | #[inline ] |
543 | #[target_feature (enable = "avx2" )] |
544 | // Emits `vmovddup` instead of `vpbroadcastq` |
545 | // See https://github.com/rust-lang/stdarch/issues/791 |
546 | #[cfg_attr (test, assert_instr(vmovddup))] |
547 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
548 | pub unsafe fn _mm_broadcastq_epi64(a: __m128i) -> __m128i { |
549 | let ret: i64x2 = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 2]); |
550 | transmute::<i64x2, _>(src:ret) |
551 | } |
552 | |
553 | /// Broadcasts the low packed 64-bit integer from `a` to all elements of |
554 | /// the 256-bit returned value. |
555 | /// |
556 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastq_epi64) |
557 | #[inline ] |
558 | #[target_feature (enable = "avx2" )] |
559 | #[cfg_attr (test, assert_instr(vbroadcastsd))] |
560 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
561 | pub unsafe fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i { |
562 | let ret: i64x4 = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 4]); |
563 | transmute::<i64x4, _>(src:ret) |
564 | } |
565 | |
566 | /// Broadcasts the low double-precision (64-bit) floating-point element |
567 | /// from `a` to all elements of the 128-bit returned value. |
568 | /// |
569 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastsd_pd) |
570 | #[inline ] |
571 | #[target_feature (enable = "avx2" )] |
572 | #[cfg_attr (test, assert_instr(vmovddup))] |
573 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
574 | pub unsafe fn _mm_broadcastsd_pd(a: __m128d) -> __m128d { |
575 | simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 2]) |
576 | } |
577 | |
578 | /// Broadcasts the low double-precision (64-bit) floating-point element |
579 | /// from `a` to all elements of the 256-bit returned value. |
580 | /// |
581 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastsd_pd) |
582 | #[inline ] |
583 | #[target_feature (enable = "avx2" )] |
584 | #[cfg_attr (test, assert_instr(vbroadcastsd))] |
585 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
586 | pub unsafe fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d { |
587 | simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 4]) |
588 | } |
589 | |
590 | // N.B., `broadcastsi128_si256` is often compiled to `vinsertf128` or |
591 | // `vbroadcastf128`. |
592 | /// Broadcasts 128 bits of integer data from a to all 128-bit lanes in |
593 | /// the 256-bit returned value. |
594 | /// |
595 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastsi128_si256) |
596 | #[inline ] |
597 | #[target_feature (enable = "avx2" )] |
598 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
599 | pub unsafe fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i { |
600 | let zero: __m128i = _mm_setzero_si128(); |
601 | let ret: i64x4 = simd_shuffle!(a.as_i64x2(), zero.as_i64x2(), [0, 1, 0, 1]); |
602 | transmute::<i64x4, _>(src:ret) |
603 | } |
604 | |
605 | /// Broadcasts the low single-precision (32-bit) floating-point element |
606 | /// from `a` to all elements of the 128-bit returned value. |
607 | /// |
608 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastss_ps) |
609 | #[inline ] |
610 | #[target_feature (enable = "avx2" )] |
611 | #[cfg_attr (test, assert_instr(vbroadcastss))] |
612 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
613 | pub unsafe fn _mm_broadcastss_ps(a: __m128) -> __m128 { |
614 | simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 4]) |
615 | } |
616 | |
617 | /// Broadcasts the low single-precision (32-bit) floating-point element |
618 | /// from `a` to all elements of the 256-bit returned value. |
619 | /// |
620 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastss_ps) |
621 | #[inline ] |
622 | #[target_feature (enable = "avx2" )] |
623 | #[cfg_attr (test, assert_instr(vbroadcastss))] |
624 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
625 | pub unsafe fn _mm256_broadcastss_ps(a: __m128) -> __m256 { |
626 | simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 8]) |
627 | } |
628 | |
629 | /// Broadcasts the low packed 16-bit integer from a to all elements of |
630 | /// the 128-bit returned value |
631 | /// |
632 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastw_epi16) |
633 | #[inline ] |
634 | #[target_feature (enable = "avx2" )] |
635 | #[cfg_attr (test, assert_instr(vpbroadcastw))] |
636 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
637 | pub unsafe fn _mm_broadcastw_epi16(a: __m128i) -> __m128i { |
638 | let zero: __m128i = _mm_setzero_si128(); |
639 | let ret: i16x8 = simd_shuffle!(a.as_i16x8(), zero.as_i16x8(), [0_u32; 8]); |
640 | transmute::<i16x8, _>(src:ret) |
641 | } |
642 | |
643 | /// Broadcasts the low packed 16-bit integer from a to all elements of |
644 | /// the 256-bit returned value |
645 | /// |
646 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastw_epi16) |
647 | #[inline ] |
648 | #[target_feature (enable = "avx2" )] |
649 | #[cfg_attr (test, assert_instr(vpbroadcastw))] |
650 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
651 | pub unsafe fn _mm256_broadcastw_epi16(a: __m128i) -> __m256i { |
652 | let zero: __m128i = _mm_setzero_si128(); |
653 | let ret: i16x16 = simd_shuffle!(a.as_i16x8(), zero.as_i16x8(), [0_u32; 16]); |
654 | transmute::<i16x16, _>(src:ret) |
655 | } |
656 | |
657 | /// Compares packed 64-bit integers in `a` and `b` for equality. |
658 | /// |
659 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi64) |
660 | #[inline ] |
661 | #[target_feature (enable = "avx2" )] |
662 | #[cfg_attr (test, assert_instr(vpcmpeqq))] |
663 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
664 | pub unsafe fn _mm256_cmpeq_epi64(a: __m256i, b: __m256i) -> __m256i { |
665 | transmute::<i64x4, _>(src:simd_eq(x:a.as_i64x4(), y:b.as_i64x4())) |
666 | } |
667 | |
668 | /// Compares packed 32-bit integers in `a` and `b` for equality. |
669 | /// |
670 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi32) |
671 | #[inline ] |
672 | #[target_feature (enable = "avx2" )] |
673 | #[cfg_attr (test, assert_instr(vpcmpeqd))] |
674 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
675 | pub unsafe fn _mm256_cmpeq_epi32(a: __m256i, b: __m256i) -> __m256i { |
676 | transmute::<i32x8, _>(src:simd_eq(x:a.as_i32x8(), y:b.as_i32x8())) |
677 | } |
678 | |
679 | /// Compares packed 16-bit integers in `a` and `b` for equality. |
680 | /// |
681 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi16) |
682 | #[inline ] |
683 | #[target_feature (enable = "avx2" )] |
684 | #[cfg_attr (test, assert_instr(vpcmpeqw))] |
685 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
686 | pub unsafe fn _mm256_cmpeq_epi16(a: __m256i, b: __m256i) -> __m256i { |
687 | transmute::<i16x16, _>(src:simd_eq(x:a.as_i16x16(), y:b.as_i16x16())) |
688 | } |
689 | |
690 | /// Compares packed 8-bit integers in `a` and `b` for equality. |
691 | /// |
692 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi8) |
693 | #[inline ] |
694 | #[target_feature (enable = "avx2" )] |
695 | #[cfg_attr (test, assert_instr(vpcmpeqb))] |
696 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
697 | pub unsafe fn _mm256_cmpeq_epi8(a: __m256i, b: __m256i) -> __m256i { |
698 | transmute::<i8x32, _>(src:simd_eq(x:a.as_i8x32(), y:b.as_i8x32())) |
699 | } |
700 | |
701 | /// Compares packed 64-bit integers in `a` and `b` for greater-than. |
702 | /// |
703 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi64) |
704 | #[inline ] |
705 | #[target_feature (enable = "avx2" )] |
706 | #[cfg_attr (test, assert_instr(vpcmpgtq))] |
707 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
708 | pub unsafe fn _mm256_cmpgt_epi64(a: __m256i, b: __m256i) -> __m256i { |
709 | transmute::<i64x4, _>(src:simd_gt(x:a.as_i64x4(), y:b.as_i64x4())) |
710 | } |
711 | |
712 | /// Compares packed 32-bit integers in `a` and `b` for greater-than. |
713 | /// |
714 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi32) |
715 | #[inline ] |
716 | #[target_feature (enable = "avx2" )] |
717 | #[cfg_attr (test, assert_instr(vpcmpgtd))] |
718 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
719 | pub unsafe fn _mm256_cmpgt_epi32(a: __m256i, b: __m256i) -> __m256i { |
720 | transmute::<i32x8, _>(src:simd_gt(x:a.as_i32x8(), y:b.as_i32x8())) |
721 | } |
722 | |
723 | /// Compares packed 16-bit integers in `a` and `b` for greater-than. |
724 | /// |
725 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi16) |
726 | #[inline ] |
727 | #[target_feature (enable = "avx2" )] |
728 | #[cfg_attr (test, assert_instr(vpcmpgtw))] |
729 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
730 | pub unsafe fn _mm256_cmpgt_epi16(a: __m256i, b: __m256i) -> __m256i { |
731 | transmute::<i16x16, _>(src:simd_gt(x:a.as_i16x16(), y:b.as_i16x16())) |
732 | } |
733 | |
734 | /// Compares packed 8-bit integers in `a` and `b` for greater-than. |
735 | /// |
736 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi8) |
737 | #[inline ] |
738 | #[target_feature (enable = "avx2" )] |
739 | #[cfg_attr (test, assert_instr(vpcmpgtb))] |
740 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
741 | pub unsafe fn _mm256_cmpgt_epi8(a: __m256i, b: __m256i) -> __m256i { |
742 | transmute::<i8x32, _>(src:simd_gt(x:a.as_i8x32(), y:b.as_i8x32())) |
743 | } |
744 | |
745 | /// Sign-extend 16-bit integers to 32-bit integers. |
746 | /// |
747 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi16_epi32) |
748 | #[inline ] |
749 | #[target_feature (enable = "avx2" )] |
750 | #[cfg_attr (test, assert_instr(vpmovsxwd))] |
751 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
752 | pub unsafe fn _mm256_cvtepi16_epi32(a: __m128i) -> __m256i { |
753 | transmute::<i32x8, _>(src:simd_cast(a.as_i16x8())) |
754 | } |
755 | |
756 | /// Sign-extend 16-bit integers to 64-bit integers. |
757 | /// |
758 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi16_epi64) |
759 | #[inline ] |
760 | #[target_feature (enable = "avx2" )] |
761 | #[cfg_attr (test, assert_instr(vpmovsxwq))] |
762 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
763 | pub unsafe fn _mm256_cvtepi16_epi64(a: __m128i) -> __m256i { |
764 | let a: i16x8 = a.as_i16x8(); |
765 | let v64: i16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); |
766 | transmute::<i64x4, _>(src:simd_cast(v64)) |
767 | } |
768 | |
769 | /// Sign-extend 32-bit integers to 64-bit integers. |
770 | /// |
771 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi64) |
772 | #[inline ] |
773 | #[target_feature (enable = "avx2" )] |
774 | #[cfg_attr (test, assert_instr(vpmovsxdq))] |
775 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
776 | pub unsafe fn _mm256_cvtepi32_epi64(a: __m128i) -> __m256i { |
777 | transmute::<i64x4, _>(src:simd_cast(a.as_i32x4())) |
778 | } |
779 | |
780 | /// Sign-extend 8-bit integers to 16-bit integers. |
781 | /// |
782 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi8_epi16) |
783 | #[inline ] |
784 | #[target_feature (enable = "avx2" )] |
785 | #[cfg_attr (test, assert_instr(vpmovsxbw))] |
786 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
787 | pub unsafe fn _mm256_cvtepi8_epi16(a: __m128i) -> __m256i { |
788 | transmute::<i16x16, _>(src:simd_cast(a.as_i8x16())) |
789 | } |
790 | |
791 | /// Sign-extend 8-bit integers to 32-bit integers. |
792 | /// |
793 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi8_epi32) |
794 | #[inline ] |
795 | #[target_feature (enable = "avx2" )] |
796 | #[cfg_attr (test, assert_instr(vpmovsxbd))] |
797 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
798 | pub unsafe fn _mm256_cvtepi8_epi32(a: __m128i) -> __m256i { |
799 | let a: i8x16 = a.as_i8x16(); |
800 | let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); |
801 | transmute::<i32x8, _>(src:simd_cast(v64)) |
802 | } |
803 | |
804 | /// Sign-extend 8-bit integers to 64-bit integers. |
805 | /// |
806 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi8_epi64) |
807 | #[inline ] |
808 | #[target_feature (enable = "avx2" )] |
809 | #[cfg_attr (test, assert_instr(vpmovsxbq))] |
810 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
811 | pub unsafe fn _mm256_cvtepi8_epi64(a: __m128i) -> __m256i { |
812 | let a: i8x16 = a.as_i8x16(); |
813 | let v32: i8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); |
814 | transmute::<i64x4, _>(src:simd_cast(v32)) |
815 | } |
816 | |
817 | /// Zeroes extend packed unsigned 16-bit integers in `a` to packed 32-bit |
818 | /// integers, and stores the results in `dst`. |
819 | /// |
820 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu16_epi32) |
821 | #[inline ] |
822 | #[target_feature (enable = "avx2" )] |
823 | #[cfg_attr (test, assert_instr(vpmovzxwd))] |
824 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
825 | pub unsafe fn _mm256_cvtepu16_epi32(a: __m128i) -> __m256i { |
826 | transmute::<i32x8, _>(src:simd_cast(a.as_u16x8())) |
827 | } |
828 | |
829 | /// Zero-extend the lower four unsigned 16-bit integers in `a` to 64-bit |
830 | /// integers. The upper four elements of `a` are unused. |
831 | /// |
832 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu16_epi64) |
833 | #[inline ] |
834 | #[target_feature (enable = "avx2" )] |
835 | #[cfg_attr (test, assert_instr(vpmovzxwq))] |
836 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
837 | pub unsafe fn _mm256_cvtepu16_epi64(a: __m128i) -> __m256i { |
838 | let a: u16x8 = a.as_u16x8(); |
839 | let v64: u16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); |
840 | transmute::<i64x4, _>(src:simd_cast(v64)) |
841 | } |
842 | |
843 | /// Zero-extend unsigned 32-bit integers in `a` to 64-bit integers. |
844 | /// |
845 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu32_epi64) |
846 | #[inline ] |
847 | #[target_feature (enable = "avx2" )] |
848 | #[cfg_attr (test, assert_instr(vpmovzxdq))] |
849 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
850 | pub unsafe fn _mm256_cvtepu32_epi64(a: __m128i) -> __m256i { |
851 | transmute::<i64x4, _>(src:simd_cast(a.as_u32x4())) |
852 | } |
853 | |
854 | /// Zero-extend unsigned 8-bit integers in `a` to 16-bit integers. |
855 | /// |
856 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu8_epi16) |
857 | #[inline ] |
858 | #[target_feature (enable = "avx2" )] |
859 | #[cfg_attr (test, assert_instr(vpmovzxbw))] |
860 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
861 | pub unsafe fn _mm256_cvtepu8_epi16(a: __m128i) -> __m256i { |
862 | transmute::<i16x16, _>(src:simd_cast(a.as_u8x16())) |
863 | } |
864 | |
865 | /// Zero-extend the lower eight unsigned 8-bit integers in `a` to 32-bit |
866 | /// integers. The upper eight elements of `a` are unused. |
867 | /// |
868 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu8_epi32) |
869 | #[inline ] |
870 | #[target_feature (enable = "avx2" )] |
871 | #[cfg_attr (test, assert_instr(vpmovzxbd))] |
872 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
873 | pub unsafe fn _mm256_cvtepu8_epi32(a: __m128i) -> __m256i { |
874 | let a: u8x16 = a.as_u8x16(); |
875 | let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); |
876 | transmute::<i32x8, _>(src:simd_cast(v64)) |
877 | } |
878 | |
879 | /// Zero-extend the lower four unsigned 8-bit integers in `a` to 64-bit |
880 | /// integers. The upper twelve elements of `a` are unused. |
881 | /// |
882 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu8_epi64) |
883 | #[inline ] |
884 | #[target_feature (enable = "avx2" )] |
885 | #[cfg_attr (test, assert_instr(vpmovzxbq))] |
886 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
887 | pub unsafe fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i { |
888 | let a: u8x16 = a.as_u8x16(); |
889 | let v32: u8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); |
890 | transmute::<i64x4, _>(src:simd_cast(v32)) |
891 | } |
892 | |
893 | /// Extracts 128 bits (of integer data) from `a` selected with `IMM1`. |
894 | /// |
895 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extracti128_si256) |
896 | #[inline ] |
897 | #[target_feature (enable = "avx2" )] |
898 | #[cfg_attr ( |
899 | all(test, not(target_os = "windows" )), |
900 | assert_instr(vextractf128, IMM1 = 1) |
901 | )] |
902 | #[rustc_legacy_const_generics (1)] |
903 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
904 | pub unsafe fn _mm256_extracti128_si256<const IMM1: i32>(a: __m256i) -> __m128i { |
905 | static_assert_uimm_bits!(IMM1, 1); |
906 | let a: i64x4 = a.as_i64x4(); |
907 | let b: i64x4 = _mm256_undefined_si256().as_i64x4(); |
908 | let dst: i64x2 = simd_shuffle!(a, b, [[0, 1], [2, 3]][IMM1 as usize]); |
909 | transmute(src:dst) |
910 | } |
911 | |
912 | /// Horizontally adds adjacent pairs of 16-bit integers in `a` and `b`. |
913 | /// |
914 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hadd_epi16) |
915 | #[inline ] |
916 | #[target_feature (enable = "avx2" )] |
917 | #[cfg_attr (test, assert_instr(vphaddw))] |
918 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
919 | pub unsafe fn _mm256_hadd_epi16(a: __m256i, b: __m256i) -> __m256i { |
920 | transmute(src:phaddw(a:a.as_i16x16(), b:b.as_i16x16())) |
921 | } |
922 | |
923 | /// Horizontally adds adjacent pairs of 32-bit integers in `a` and `b`. |
924 | /// |
925 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hadd_epi32) |
926 | #[inline ] |
927 | #[target_feature (enable = "avx2" )] |
928 | #[cfg_attr (test, assert_instr(vphaddd))] |
929 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
930 | pub unsafe fn _mm256_hadd_epi32(a: __m256i, b: __m256i) -> __m256i { |
931 | transmute(src:phaddd(a:a.as_i32x8(), b:b.as_i32x8())) |
932 | } |
933 | |
934 | /// Horizontally adds adjacent pairs of 16-bit integers in `a` and `b` |
935 | /// using saturation. |
936 | /// |
937 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hadds_epi16) |
938 | #[inline ] |
939 | #[target_feature (enable = "avx2" )] |
940 | #[cfg_attr (test, assert_instr(vphaddsw))] |
941 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
942 | pub unsafe fn _mm256_hadds_epi16(a: __m256i, b: __m256i) -> __m256i { |
943 | transmute(src:phaddsw(a:a.as_i16x16(), b:b.as_i16x16())) |
944 | } |
945 | |
946 | /// Horizontally subtract adjacent pairs of 16-bit integers in `a` and `b`. |
947 | /// |
948 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hsub_epi16) |
949 | #[inline ] |
950 | #[target_feature (enable = "avx2" )] |
951 | #[cfg_attr (test, assert_instr(vphsubw))] |
952 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
953 | pub unsafe fn _mm256_hsub_epi16(a: __m256i, b: __m256i) -> __m256i { |
954 | transmute(src:phsubw(a:a.as_i16x16(), b:b.as_i16x16())) |
955 | } |
956 | |
957 | /// Horizontally subtract adjacent pairs of 32-bit integers in `a` and `b`. |
958 | /// |
959 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hsub_epi32) |
960 | #[inline ] |
961 | #[target_feature (enable = "avx2" )] |
962 | #[cfg_attr (test, assert_instr(vphsubd))] |
963 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
964 | pub unsafe fn _mm256_hsub_epi32(a: __m256i, b: __m256i) -> __m256i { |
965 | transmute(src:phsubd(a:a.as_i32x8(), b:b.as_i32x8())) |
966 | } |
967 | |
968 | /// Horizontally subtract adjacent pairs of 16-bit integers in `a` and `b` |
969 | /// using saturation. |
970 | /// |
971 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hsubs_epi16) |
972 | #[inline ] |
973 | #[target_feature (enable = "avx2" )] |
974 | #[cfg_attr (test, assert_instr(vphsubsw))] |
975 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
976 | pub unsafe fn _mm256_hsubs_epi16(a: __m256i, b: __m256i) -> __m256i { |
977 | transmute(src:phsubsw(a:a.as_i16x16(), b:b.as_i16x16())) |
978 | } |
979 | |
980 | /// Returns values from `slice` at offsets determined by `offsets * scale`, |
981 | /// where |
982 | /// `scale` should be 1, 2, 4 or 8. |
983 | /// |
984 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i32gather_epi32) |
985 | #[inline ] |
986 | #[target_feature (enable = "avx2" )] |
987 | #[cfg_attr (test, assert_instr(vpgatherdd, SCALE = 1))] |
988 | #[rustc_legacy_const_generics (2)] |
989 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
990 | pub unsafe fn _mm_i32gather_epi32<const SCALE: i32>( |
991 | slice: *const i32, |
992 | offsets: __m128i, |
993 | ) -> __m128i { |
994 | static_assert_imm8_scale!(SCALE); |
995 | let zero: i32x4 = _mm_setzero_si128().as_i32x4(); |
996 | let neg_one: i32x4 = _mm_set1_epi32(-1).as_i32x4(); |
997 | let offsets: i32x4 = offsets.as_i32x4(); |
998 | let slice: *const i8 = slice as *const i8; |
999 | let r: i32x4 = pgatherdd(src:zero, slice, offsets, mask:neg_one, SCALE as i8); |
1000 | transmute(src:r) |
1001 | } |
1002 | |
1003 | /// Returns values from `slice` at offsets determined by `offsets * scale`, |
1004 | /// where |
1005 | /// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in |
1006 | /// that position instead. |
1007 | /// |
1008 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i32gather_epi32) |
1009 | #[inline ] |
1010 | #[target_feature (enable = "avx2" )] |
1011 | #[cfg_attr (test, assert_instr(vpgatherdd, SCALE = 1))] |
1012 | #[rustc_legacy_const_generics (4)] |
1013 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1014 | pub unsafe fn _mm_mask_i32gather_epi32<const SCALE: i32>( |
1015 | src: __m128i, |
1016 | slice: *const i32, |
1017 | offsets: __m128i, |
1018 | mask: __m128i, |
1019 | ) -> __m128i { |
1020 | static_assert_imm8_scale!(SCALE); |
1021 | let src: i32x4 = src.as_i32x4(); |
1022 | let mask: i32x4 = mask.as_i32x4(); |
1023 | let offsets: i32x4 = offsets.as_i32x4(); |
1024 | let slice: *const i8 = slice as *const i8; |
1025 | let r: i32x4 = pgatherdd(src, slice, offsets, mask, SCALE as i8); |
1026 | transmute(src:r) |
1027 | } |
1028 | |
1029 | /// Returns values from `slice` at offsets determined by `offsets * scale`, |
1030 | /// where |
1031 | /// `scale` should be 1, 2, 4 or 8. |
1032 | /// |
1033 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32gather_epi32) |
1034 | #[inline ] |
1035 | #[target_feature (enable = "avx2" )] |
1036 | #[cfg_attr (test, assert_instr(vpgatherdd, SCALE = 1))] |
1037 | #[rustc_legacy_const_generics (2)] |
1038 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1039 | pub unsafe fn _mm256_i32gather_epi32<const SCALE: i32>( |
1040 | slice: *const i32, |
1041 | offsets: __m256i, |
1042 | ) -> __m256i { |
1043 | static_assert_imm8_scale!(SCALE); |
1044 | let zero: i32x8 = _mm256_setzero_si256().as_i32x8(); |
1045 | let neg_one: i32x8 = _mm256_set1_epi32(-1).as_i32x8(); |
1046 | let offsets: i32x8 = offsets.as_i32x8(); |
1047 | let slice: *const i8 = slice as *const i8; |
1048 | let r: i32x8 = vpgatherdd(src:zero, slice, offsets, mask:neg_one, SCALE as i8); |
1049 | transmute(src:r) |
1050 | } |
1051 | |
1052 | /// Returns values from `slice` at offsets determined by `offsets * scale`, |
1053 | /// where |
1054 | /// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in |
1055 | /// that position instead. |
1056 | /// |
1057 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i32gather_epi32) |
1058 | #[inline ] |
1059 | #[target_feature (enable = "avx2" )] |
1060 | #[cfg_attr (test, assert_instr(vpgatherdd, SCALE = 1))] |
1061 | #[rustc_legacy_const_generics (4)] |
1062 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1063 | pub unsafe fn _mm256_mask_i32gather_epi32<const SCALE: i32>( |
1064 | src: __m256i, |
1065 | slice: *const i32, |
1066 | offsets: __m256i, |
1067 | mask: __m256i, |
1068 | ) -> __m256i { |
1069 | static_assert_imm8_scale!(SCALE); |
1070 | let src: i32x8 = src.as_i32x8(); |
1071 | let mask: i32x8 = mask.as_i32x8(); |
1072 | let offsets: i32x8 = offsets.as_i32x8(); |
1073 | let slice: *const i8 = slice as *const i8; |
1074 | let r: i32x8 = vpgatherdd(src, slice, offsets, mask, SCALE as i8); |
1075 | transmute(src:r) |
1076 | } |
1077 | |
1078 | /// Returns values from `slice` at offsets determined by `offsets * scale`, |
1079 | /// where |
1080 | /// `scale` should be 1, 2, 4 or 8. |
1081 | /// |
1082 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i32gather_ps) |
1083 | #[inline ] |
1084 | #[target_feature (enable = "avx2" )] |
1085 | #[cfg_attr (test, assert_instr(vgatherdps, SCALE = 1))] |
1086 | #[rustc_legacy_const_generics (2)] |
1087 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1088 | pub unsafe fn _mm_i32gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m128i) -> __m128 { |
1089 | static_assert_imm8_scale!(SCALE); |
1090 | let zero: __m128 = _mm_setzero_ps(); |
1091 | let neg_one: __m128 = _mm_set1_ps(-1.0); |
1092 | let offsets: i32x4 = offsets.as_i32x4(); |
1093 | let slice: *const i8 = slice as *const i8; |
1094 | pgatherdps(src:zero, slice, offsets, mask:neg_one, SCALE as i8) |
1095 | } |
1096 | |
1097 | /// Returns values from `slice` at offsets determined by `offsets * scale`, |
1098 | /// where |
1099 | /// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in |
1100 | /// that position instead. |
1101 | /// |
1102 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i32gather_ps) |
1103 | #[inline ] |
1104 | #[target_feature (enable = "avx2" )] |
1105 | #[cfg_attr (test, assert_instr(vgatherdps, SCALE = 1))] |
1106 | #[rustc_legacy_const_generics (4)] |
1107 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1108 | pub unsafe fn _mm_mask_i32gather_ps<const SCALE: i32>( |
1109 | src: __m128, |
1110 | slice: *const f32, |
1111 | offsets: __m128i, |
1112 | mask: __m128, |
1113 | ) -> __m128 { |
1114 | static_assert_imm8_scale!(SCALE); |
1115 | let offsets: i32x4 = offsets.as_i32x4(); |
1116 | let slice: *const i8 = slice as *const i8; |
1117 | pgatherdps(src, slice, offsets, mask, SCALE as i8) |
1118 | } |
1119 | |
1120 | /// Returns values from `slice` at offsets determined by `offsets * scale`, |
1121 | /// where |
1122 | /// `scale` should be 1, 2, 4 or 8. |
1123 | /// |
1124 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32gather_ps) |
1125 | #[inline ] |
1126 | #[target_feature (enable = "avx2" )] |
1127 | #[cfg_attr (test, assert_instr(vgatherdps, SCALE = 1))] |
1128 | #[rustc_legacy_const_generics (2)] |
1129 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1130 | pub unsafe fn _mm256_i32gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m256i) -> __m256 { |
1131 | static_assert_imm8_scale!(SCALE); |
1132 | let zero: __m256 = _mm256_setzero_ps(); |
1133 | let neg_one: __m256 = _mm256_set1_ps(-1.0); |
1134 | let offsets: i32x8 = offsets.as_i32x8(); |
1135 | let slice: *const i8 = slice as *const i8; |
1136 | vpgatherdps(src:zero, slice, offsets, mask:neg_one, SCALE as i8) |
1137 | } |
1138 | |
1139 | /// Returns values from `slice` at offsets determined by `offsets * scale`, |
1140 | /// where |
1141 | /// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in |
1142 | /// that position instead. |
1143 | /// |
1144 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i32gather_ps) |
1145 | #[inline ] |
1146 | #[target_feature (enable = "avx2" )] |
1147 | #[cfg_attr (test, assert_instr(vgatherdps, SCALE = 1))] |
1148 | #[rustc_legacy_const_generics (4)] |
1149 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1150 | pub unsafe fn _mm256_mask_i32gather_ps<const SCALE: i32>( |
1151 | src: __m256, |
1152 | slice: *const f32, |
1153 | offsets: __m256i, |
1154 | mask: __m256, |
1155 | ) -> __m256 { |
1156 | static_assert_imm8_scale!(SCALE); |
1157 | let offsets: i32x8 = offsets.as_i32x8(); |
1158 | let slice: *const i8 = slice as *const i8; |
1159 | vpgatherdps(src, slice, offsets, mask, SCALE as i8) |
1160 | } |
1161 | |
1162 | /// Returns values from `slice` at offsets determined by `offsets * scale`, |
1163 | /// where |
1164 | /// `scale` should be 1, 2, 4 or 8. |
1165 | /// |
1166 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i32gather_epi64) |
1167 | #[inline ] |
1168 | #[target_feature (enable = "avx2" )] |
1169 | #[cfg_attr (test, assert_instr(vpgatherdq, SCALE = 1))] |
1170 | #[rustc_legacy_const_generics (2)] |
1171 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1172 | pub unsafe fn _mm_i32gather_epi64<const SCALE: i32>( |
1173 | slice: *const i64, |
1174 | offsets: __m128i, |
1175 | ) -> __m128i { |
1176 | static_assert_imm8_scale!(SCALE); |
1177 | let zero: i64x2 = _mm_setzero_si128().as_i64x2(); |
1178 | let neg_one: i64x2 = _mm_set1_epi64x(-1).as_i64x2(); |
1179 | let offsets: i32x4 = offsets.as_i32x4(); |
1180 | let slice: *const i8 = slice as *const i8; |
1181 | let r: i64x2 = pgatherdq(src:zero, slice, offsets, mask:neg_one, SCALE as i8); |
1182 | transmute(src:r) |
1183 | } |
1184 | |
1185 | /// Returns values from `slice` at offsets determined by `offsets * scale`, |
1186 | /// where |
1187 | /// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in |
1188 | /// that position instead. |
1189 | /// |
1190 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i32gather_epi64) |
1191 | #[inline ] |
1192 | #[target_feature (enable = "avx2" )] |
1193 | #[cfg_attr (test, assert_instr(vpgatherdq, SCALE = 1))] |
1194 | #[rustc_legacy_const_generics (4)] |
1195 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1196 | pub unsafe fn _mm_mask_i32gather_epi64<const SCALE: i32>( |
1197 | src: __m128i, |
1198 | slice: *const i64, |
1199 | offsets: __m128i, |
1200 | mask: __m128i, |
1201 | ) -> __m128i { |
1202 | static_assert_imm8_scale!(SCALE); |
1203 | let src: i64x2 = src.as_i64x2(); |
1204 | let mask: i64x2 = mask.as_i64x2(); |
1205 | let offsets: i32x4 = offsets.as_i32x4(); |
1206 | let slice: *const i8 = slice as *const i8; |
1207 | let r: i64x2 = pgatherdq(src, slice, offsets, mask, SCALE as i8); |
1208 | transmute(src:r) |
1209 | } |
1210 | |
1211 | /// Returns values from `slice` at offsets determined by `offsets * scale`, |
1212 | /// where |
1213 | /// `scale` should be 1, 2, 4 or 8. |
1214 | /// |
1215 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32gather_epi64) |
1216 | #[inline ] |
1217 | #[target_feature (enable = "avx2" )] |
1218 | #[cfg_attr (test, assert_instr(vpgatherdq, SCALE = 1))] |
1219 | #[rustc_legacy_const_generics (2)] |
1220 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1221 | pub unsafe fn _mm256_i32gather_epi64<const SCALE: i32>( |
1222 | slice: *const i64, |
1223 | offsets: __m128i, |
1224 | ) -> __m256i { |
1225 | static_assert_imm8_scale!(SCALE); |
1226 | let zero: i64x4 = _mm256_setzero_si256().as_i64x4(); |
1227 | let neg_one: i64x4 = _mm256_set1_epi64x(-1).as_i64x4(); |
1228 | let offsets: i32x4 = offsets.as_i32x4(); |
1229 | let slice: *const i8 = slice as *const i8; |
1230 | let r: i64x4 = vpgatherdq(src:zero, slice, offsets, mask:neg_one, SCALE as i8); |
1231 | transmute(src:r) |
1232 | } |
1233 | |
1234 | /// Returns values from `slice` at offsets determined by `offsets * scale`, |
1235 | /// where |
1236 | /// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in |
1237 | /// that position instead. |
1238 | /// |
1239 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i32gather_epi64) |
1240 | #[inline ] |
1241 | #[target_feature (enable = "avx2" )] |
1242 | #[cfg_attr (test, assert_instr(vpgatherdq, SCALE = 1))] |
1243 | #[rustc_legacy_const_generics (4)] |
1244 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1245 | pub unsafe fn _mm256_mask_i32gather_epi64<const SCALE: i32>( |
1246 | src: __m256i, |
1247 | slice: *const i64, |
1248 | offsets: __m128i, |
1249 | mask: __m256i, |
1250 | ) -> __m256i { |
1251 | static_assert_imm8_scale!(SCALE); |
1252 | let src: i64x4 = src.as_i64x4(); |
1253 | let mask: i64x4 = mask.as_i64x4(); |
1254 | let offsets: i32x4 = offsets.as_i32x4(); |
1255 | let slice: *const i8 = slice as *const i8; |
1256 | let r: i64x4 = vpgatherdq(src, slice, offsets, mask, SCALE as i8); |
1257 | transmute(src:r) |
1258 | } |
1259 | |
1260 | /// Returns values from `slice` at offsets determined by `offsets * scale`, |
1261 | /// where |
1262 | /// `scale` should be 1, 2, 4 or 8. |
1263 | /// |
1264 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i32gather_pd) |
1265 | #[inline ] |
1266 | #[target_feature (enable = "avx2" )] |
1267 | #[cfg_attr (test, assert_instr(vgatherdpd, SCALE = 1))] |
1268 | #[rustc_legacy_const_generics (2)] |
1269 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1270 | pub unsafe fn _mm_i32gather_pd<const SCALE: i32>(slice: *const f64, offsets: __m128i) -> __m128d { |
1271 | static_assert_imm8_scale!(SCALE); |
1272 | let zero: __m128d = _mm_setzero_pd(); |
1273 | let neg_one: __m128d = _mm_set1_pd(-1.0); |
1274 | let offsets: i32x4 = offsets.as_i32x4(); |
1275 | let slice: *const i8 = slice as *const i8; |
1276 | pgatherdpd(src:zero, slice, offsets, mask:neg_one, SCALE as i8) |
1277 | } |
1278 | |
1279 | /// Returns values from `slice` at offsets determined by `offsets * scale`, |
1280 | /// where |
1281 | /// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in |
1282 | /// that position instead. |
1283 | /// |
1284 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i32gather_pd) |
1285 | #[inline ] |
1286 | #[target_feature (enable = "avx2" )] |
1287 | #[cfg_attr (test, assert_instr(vgatherdpd, SCALE = 1))] |
1288 | #[rustc_legacy_const_generics (4)] |
1289 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1290 | pub unsafe fn _mm_mask_i32gather_pd<const SCALE: i32>( |
1291 | src: __m128d, |
1292 | slice: *const f64, |
1293 | offsets: __m128i, |
1294 | mask: __m128d, |
1295 | ) -> __m128d { |
1296 | static_assert_imm8_scale!(SCALE); |
1297 | let offsets: i32x4 = offsets.as_i32x4(); |
1298 | let slice: *const i8 = slice as *const i8; |
1299 | pgatherdpd(src, slice, offsets, mask, SCALE as i8) |
1300 | } |
1301 | |
1302 | /// Returns values from `slice` at offsets determined by `offsets * scale`, |
1303 | /// where |
1304 | /// `scale` should be 1, 2, 4 or 8. |
1305 | /// |
1306 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32gather_pd) |
1307 | #[inline ] |
1308 | #[target_feature (enable = "avx2" )] |
1309 | #[cfg_attr (test, assert_instr(vgatherdpd, SCALE = 1))] |
1310 | #[rustc_legacy_const_generics (2)] |
1311 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1312 | pub unsafe fn _mm256_i32gather_pd<const SCALE: i32>( |
1313 | slice: *const f64, |
1314 | offsets: __m128i, |
1315 | ) -> __m256d { |
1316 | static_assert_imm8_scale!(SCALE); |
1317 | let zero: __m256d = _mm256_setzero_pd(); |
1318 | let neg_one: __m256d = _mm256_set1_pd(-1.0); |
1319 | let offsets: i32x4 = offsets.as_i32x4(); |
1320 | let slice: *const i8 = slice as *const i8; |
1321 | vpgatherdpd(src:zero, slice, offsets, mask:neg_one, SCALE as i8) |
1322 | } |
1323 | |
1324 | /// Returns values from `slice` at offsets determined by `offsets * scale`, |
1325 | /// where |
1326 | /// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in |
1327 | /// that position instead. |
1328 | /// |
1329 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i32gather_pd) |
1330 | #[inline ] |
1331 | #[target_feature (enable = "avx2" )] |
1332 | #[cfg_attr (test, assert_instr(vgatherdpd, SCALE = 1))] |
1333 | #[rustc_legacy_const_generics (4)] |
1334 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1335 | pub unsafe fn _mm256_mask_i32gather_pd<const SCALE: i32>( |
1336 | src: __m256d, |
1337 | slice: *const f64, |
1338 | offsets: __m128i, |
1339 | mask: __m256d, |
1340 | ) -> __m256d { |
1341 | static_assert_imm8_scale!(SCALE); |
1342 | let offsets: i32x4 = offsets.as_i32x4(); |
1343 | let slice: *const i8 = slice as *const i8; |
1344 | vpgatherdpd(src, slice, offsets, mask, SCALE as i8) |
1345 | } |
1346 | |
1347 | /// Returns values from `slice` at offsets determined by `offsets * scale`, |
1348 | /// where |
1349 | /// `scale` should be 1, 2, 4 or 8. |
1350 | /// |
1351 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_epi32) |
1352 | #[inline ] |
1353 | #[target_feature (enable = "avx2" )] |
1354 | #[cfg_attr (test, assert_instr(vpgatherqd, SCALE = 1))] |
1355 | #[rustc_legacy_const_generics (2)] |
1356 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1357 | pub unsafe fn _mm_i64gather_epi32<const SCALE: i32>( |
1358 | slice: *const i32, |
1359 | offsets: __m128i, |
1360 | ) -> __m128i { |
1361 | static_assert_imm8_scale!(SCALE); |
1362 | let zero: i32x4 = _mm_setzero_si128().as_i32x4(); |
1363 | let neg_one: i32x4 = _mm_set1_epi64x(-1).as_i32x4(); |
1364 | let offsets: i64x2 = offsets.as_i64x2(); |
1365 | let slice: *const i8 = slice as *const i8; |
1366 | let r: i32x4 = pgatherqd(src:zero, slice, offsets, mask:neg_one, SCALE as i8); |
1367 | transmute(src:r) |
1368 | } |
1369 | |
1370 | /// Returns values from `slice` at offsets determined by `offsets * scale`, |
1371 | /// where |
1372 | /// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in |
1373 | /// that position instead. |
1374 | /// |
1375 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i64gather_epi32) |
1376 | #[inline ] |
1377 | #[target_feature (enable = "avx2" )] |
1378 | #[cfg_attr (test, assert_instr(vpgatherqd, SCALE = 1))] |
1379 | #[rustc_legacy_const_generics (4)] |
1380 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1381 | pub unsafe fn _mm_mask_i64gather_epi32<const SCALE: i32>( |
1382 | src: __m128i, |
1383 | slice: *const i32, |
1384 | offsets: __m128i, |
1385 | mask: __m128i, |
1386 | ) -> __m128i { |
1387 | static_assert_imm8_scale!(SCALE); |
1388 | let src: i32x4 = src.as_i32x4(); |
1389 | let mask: i32x4 = mask.as_i32x4(); |
1390 | let offsets: i64x2 = offsets.as_i64x2(); |
1391 | let slice: *const i8 = slice as *const i8; |
1392 | let r: i32x4 = pgatherqd(src, slice, offsets, mask, SCALE as i8); |
1393 | transmute(src:r) |
1394 | } |
1395 | |
1396 | /// Returns values from `slice` at offsets determined by `offsets * scale`, |
1397 | /// where |
1398 | /// `scale` should be 1, 2, 4 or 8. |
1399 | /// |
1400 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i64gather_epi32) |
1401 | #[inline ] |
1402 | #[target_feature (enable = "avx2" )] |
1403 | #[cfg_attr (test, assert_instr(vpgatherqd, SCALE = 1))] |
1404 | #[rustc_legacy_const_generics (2)] |
1405 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1406 | pub unsafe fn _mm256_i64gather_epi32<const SCALE: i32>( |
1407 | slice: *const i32, |
1408 | offsets: __m256i, |
1409 | ) -> __m128i { |
1410 | static_assert_imm8_scale!(SCALE); |
1411 | let zero: i32x4 = _mm_setzero_si128().as_i32x4(); |
1412 | let neg_one: i32x4 = _mm_set1_epi64x(-1).as_i32x4(); |
1413 | let offsets: i64x4 = offsets.as_i64x4(); |
1414 | let slice: *const i8 = slice as *const i8; |
1415 | let r: i32x4 = vpgatherqd(src:zero, slice, offsets, mask:neg_one, SCALE as i8); |
1416 | transmute(src:r) |
1417 | } |
1418 | |
1419 | /// Returns values from `slice` at offsets determined by `offsets * scale`, |
1420 | /// where |
1421 | /// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in |
1422 | /// that position instead. |
1423 | /// |
1424 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i64gather_epi32) |
1425 | #[inline ] |
1426 | #[target_feature (enable = "avx2" )] |
1427 | #[cfg_attr (test, assert_instr(vpgatherqd, SCALE = 1))] |
1428 | #[rustc_legacy_const_generics (4)] |
1429 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1430 | pub unsafe fn _mm256_mask_i64gather_epi32<const SCALE: i32>( |
1431 | src: __m128i, |
1432 | slice: *const i32, |
1433 | offsets: __m256i, |
1434 | mask: __m128i, |
1435 | ) -> __m128i { |
1436 | static_assert_imm8_scale!(SCALE); |
1437 | let src: i32x4 = src.as_i32x4(); |
1438 | let mask: i32x4 = mask.as_i32x4(); |
1439 | let offsets: i64x4 = offsets.as_i64x4(); |
1440 | let slice: *const i8 = slice as *const i8; |
1441 | let r: i32x4 = vpgatherqd(src, slice, offsets, mask, SCALE as i8); |
1442 | transmute(src:r) |
1443 | } |
1444 | |
1445 | /// Returns values from `slice` at offsets determined by `offsets * scale`, |
1446 | /// where |
1447 | /// `scale` should be 1, 2, 4 or 8. |
1448 | /// |
1449 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_ps) |
1450 | #[inline ] |
1451 | #[target_feature (enable = "avx2" )] |
1452 | #[cfg_attr (test, assert_instr(vgatherqps, SCALE = 1))] |
1453 | #[rustc_legacy_const_generics (2)] |
1454 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1455 | pub unsafe fn _mm_i64gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m128i) -> __m128 { |
1456 | static_assert_imm8_scale!(SCALE); |
1457 | let zero: __m128 = _mm_setzero_ps(); |
1458 | let neg_one: __m128 = _mm_set1_ps(-1.0); |
1459 | let offsets: i64x2 = offsets.as_i64x2(); |
1460 | let slice: *const i8 = slice as *const i8; |
1461 | pgatherqps(src:zero, slice, offsets, mask:neg_one, SCALE as i8) |
1462 | } |
1463 | |
1464 | /// Returns values from `slice` at offsets determined by `offsets * scale`, |
1465 | /// where |
1466 | /// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in |
1467 | /// that position instead. |
1468 | /// |
1469 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i64gather_ps) |
1470 | #[inline ] |
1471 | #[target_feature (enable = "avx2" )] |
1472 | #[cfg_attr (test, assert_instr(vgatherqps, SCALE = 1))] |
1473 | #[rustc_legacy_const_generics (4)] |
1474 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1475 | pub unsafe fn _mm_mask_i64gather_ps<const SCALE: i32>( |
1476 | src: __m128, |
1477 | slice: *const f32, |
1478 | offsets: __m128i, |
1479 | mask: __m128, |
1480 | ) -> __m128 { |
1481 | static_assert_imm8_scale!(SCALE); |
1482 | let offsets: i64x2 = offsets.as_i64x2(); |
1483 | let slice: *const i8 = slice as *const i8; |
1484 | pgatherqps(src, slice, offsets, mask, SCALE as i8) |
1485 | } |
1486 | |
1487 | /// Returns values from `slice` at offsets determined by `offsets * scale`, |
1488 | /// where |
1489 | /// `scale` should be 1, 2, 4 or 8. |
1490 | /// |
1491 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i64gather_ps) |
1492 | #[inline ] |
1493 | #[target_feature (enable = "avx2" )] |
1494 | #[cfg_attr (test, assert_instr(vgatherqps, SCALE = 1))] |
1495 | #[rustc_legacy_const_generics (2)] |
1496 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1497 | pub unsafe fn _mm256_i64gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m256i) -> __m128 { |
1498 | static_assert_imm8_scale!(SCALE); |
1499 | let zero: __m128 = _mm_setzero_ps(); |
1500 | let neg_one: __m128 = _mm_set1_ps(-1.0); |
1501 | let offsets: i64x4 = offsets.as_i64x4(); |
1502 | let slice: *const i8 = slice as *const i8; |
1503 | vpgatherqps(src:zero, slice, offsets, mask:neg_one, SCALE as i8) |
1504 | } |
1505 | |
1506 | /// Returns values from `slice` at offsets determined by `offsets * scale`, |
1507 | /// where |
1508 | /// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in |
1509 | /// that position instead. |
1510 | /// |
1511 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i64gather_ps) |
1512 | #[inline ] |
1513 | #[target_feature (enable = "avx2" )] |
1514 | #[cfg_attr (test, assert_instr(vgatherqps, SCALE = 1))] |
1515 | #[rustc_legacy_const_generics (4)] |
1516 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1517 | pub unsafe fn _mm256_mask_i64gather_ps<const SCALE: i32>( |
1518 | src: __m128, |
1519 | slice: *const f32, |
1520 | offsets: __m256i, |
1521 | mask: __m128, |
1522 | ) -> __m128 { |
1523 | static_assert_imm8_scale!(SCALE); |
1524 | let offsets: i64x4 = offsets.as_i64x4(); |
1525 | let slice: *const i8 = slice as *const i8; |
1526 | vpgatherqps(src, slice, offsets, mask, SCALE as i8) |
1527 | } |
1528 | |
1529 | /// Returns values from `slice` at offsets determined by `offsets * scale`, |
1530 | /// where |
1531 | /// `scale` should be 1, 2, 4 or 8. |
1532 | /// |
1533 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_epi64) |
1534 | #[inline ] |
1535 | #[target_feature (enable = "avx2" )] |
1536 | #[cfg_attr (test, assert_instr(vpgatherqq, SCALE = 1))] |
1537 | #[rustc_legacy_const_generics (2)] |
1538 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1539 | pub unsafe fn _mm_i64gather_epi64<const SCALE: i32>( |
1540 | slice: *const i64, |
1541 | offsets: __m128i, |
1542 | ) -> __m128i { |
1543 | static_assert_imm8_scale!(SCALE); |
1544 | let zero: i64x2 = _mm_setzero_si128().as_i64x2(); |
1545 | let neg_one: i64x2 = _mm_set1_epi64x(-1).as_i64x2(); |
1546 | let slice: *const i8 = slice as *const i8; |
1547 | let offsets: i64x2 = offsets.as_i64x2(); |
1548 | let r: i64x2 = pgatherqq(src:zero, slice, offsets, mask:neg_one, SCALE as i8); |
1549 | transmute(src:r) |
1550 | } |
1551 | |
1552 | /// Returns values from `slice` at offsets determined by `offsets * scale`, |
1553 | /// where |
1554 | /// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in |
1555 | /// that position instead. |
1556 | /// |
1557 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i64gather_epi64) |
1558 | #[inline ] |
1559 | #[target_feature (enable = "avx2" )] |
1560 | #[cfg_attr (test, assert_instr(vpgatherqq, SCALE = 1))] |
1561 | #[rustc_legacy_const_generics (4)] |
1562 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1563 | pub unsafe fn _mm_mask_i64gather_epi64<const SCALE: i32>( |
1564 | src: __m128i, |
1565 | slice: *const i64, |
1566 | offsets: __m128i, |
1567 | mask: __m128i, |
1568 | ) -> __m128i { |
1569 | static_assert_imm8_scale!(SCALE); |
1570 | let src: i64x2 = src.as_i64x2(); |
1571 | let mask: i64x2 = mask.as_i64x2(); |
1572 | let offsets: i64x2 = offsets.as_i64x2(); |
1573 | let slice: *const i8 = slice as *const i8; |
1574 | let r: i64x2 = pgatherqq(src, slice, offsets, mask, SCALE as i8); |
1575 | transmute(src:r) |
1576 | } |
1577 | |
1578 | /// Returns values from `slice` at offsets determined by `offsets * scale`, |
1579 | /// where |
1580 | /// `scale` should be 1, 2, 4 or 8. |
1581 | /// |
1582 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i64gather_epi64) |
1583 | #[inline ] |
1584 | #[target_feature (enable = "avx2" )] |
1585 | #[cfg_attr (test, assert_instr(vpgatherqq, SCALE = 1))] |
1586 | #[rustc_legacy_const_generics (2)] |
1587 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1588 | pub unsafe fn _mm256_i64gather_epi64<const SCALE: i32>( |
1589 | slice: *const i64, |
1590 | offsets: __m256i, |
1591 | ) -> __m256i { |
1592 | static_assert_imm8_scale!(SCALE); |
1593 | let zero: i64x4 = _mm256_setzero_si256().as_i64x4(); |
1594 | let neg_one: i64x4 = _mm256_set1_epi64x(-1).as_i64x4(); |
1595 | let slice: *const i8 = slice as *const i8; |
1596 | let offsets: i64x4 = offsets.as_i64x4(); |
1597 | let r: i64x4 = vpgatherqq(src:zero, slice, offsets, mask:neg_one, SCALE as i8); |
1598 | transmute(src:r) |
1599 | } |
1600 | |
1601 | /// Returns values from `slice` at offsets determined by `offsets * scale`, |
1602 | /// where |
1603 | /// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in |
1604 | /// that position instead. |
1605 | /// |
1606 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i64gather_epi64) |
1607 | #[inline ] |
1608 | #[target_feature (enable = "avx2" )] |
1609 | #[cfg_attr (test, assert_instr(vpgatherqq, SCALE = 1))] |
1610 | #[rustc_legacy_const_generics (4)] |
1611 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1612 | pub unsafe fn _mm256_mask_i64gather_epi64<const SCALE: i32>( |
1613 | src: __m256i, |
1614 | slice: *const i64, |
1615 | offsets: __m256i, |
1616 | mask: __m256i, |
1617 | ) -> __m256i { |
1618 | static_assert_imm8_scale!(SCALE); |
1619 | let src: i64x4 = src.as_i64x4(); |
1620 | let mask: i64x4 = mask.as_i64x4(); |
1621 | let offsets: i64x4 = offsets.as_i64x4(); |
1622 | let slice: *const i8 = slice as *const i8; |
1623 | let r: i64x4 = vpgatherqq(src, slice, offsets, mask, SCALE as i8); |
1624 | transmute(src:r) |
1625 | } |
1626 | |
1627 | /// Returns values from `slice` at offsets determined by `offsets * scale`, |
1628 | /// where |
1629 | /// `scale` should be 1, 2, 4 or 8. |
1630 | /// |
1631 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_pd) |
1632 | #[inline ] |
1633 | #[target_feature (enable = "avx2" )] |
1634 | #[cfg_attr (test, assert_instr(vgatherqpd, SCALE = 1))] |
1635 | #[rustc_legacy_const_generics (2)] |
1636 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1637 | pub unsafe fn _mm_i64gather_pd<const SCALE: i32>(slice: *const f64, offsets: __m128i) -> __m128d { |
1638 | static_assert_imm8_scale!(SCALE); |
1639 | let zero: __m128d = _mm_setzero_pd(); |
1640 | let neg_one: __m128d = _mm_set1_pd(-1.0); |
1641 | let slice: *const i8 = slice as *const i8; |
1642 | let offsets: i64x2 = offsets.as_i64x2(); |
1643 | pgatherqpd(src:zero, slice, offsets, mask:neg_one, SCALE as i8) |
1644 | } |
1645 | |
1646 | /// Returns values from `slice` at offsets determined by `offsets * scale`, |
1647 | /// where |
1648 | /// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in |
1649 | /// that position instead. |
1650 | /// |
1651 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i64gather_pd) |
1652 | #[inline ] |
1653 | #[target_feature (enable = "avx2" )] |
1654 | #[cfg_attr (test, assert_instr(vgatherqpd, SCALE = 1))] |
1655 | #[rustc_legacy_const_generics (4)] |
1656 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1657 | pub unsafe fn _mm_mask_i64gather_pd<const SCALE: i32>( |
1658 | src: __m128d, |
1659 | slice: *const f64, |
1660 | offsets: __m128i, |
1661 | mask: __m128d, |
1662 | ) -> __m128d { |
1663 | static_assert_imm8_scale!(SCALE); |
1664 | let slice: *const i8 = slice as *const i8; |
1665 | let offsets: i64x2 = offsets.as_i64x2(); |
1666 | pgatherqpd(src, slice, offsets, mask, SCALE as i8) |
1667 | } |
1668 | |
1669 | /// Returns values from `slice` at offsets determined by `offsets * scale`, |
1670 | /// where |
1671 | /// `scale` should be 1, 2, 4 or 8. |
1672 | /// |
1673 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i64gather_pd) |
1674 | #[inline ] |
1675 | #[target_feature (enable = "avx2" )] |
1676 | #[cfg_attr (test, assert_instr(vgatherqpd, SCALE = 1))] |
1677 | #[rustc_legacy_const_generics (2)] |
1678 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1679 | pub unsafe fn _mm256_i64gather_pd<const SCALE: i32>( |
1680 | slice: *const f64, |
1681 | offsets: __m256i, |
1682 | ) -> __m256d { |
1683 | static_assert_imm8_scale!(SCALE); |
1684 | let zero: __m256d = _mm256_setzero_pd(); |
1685 | let neg_one: __m256d = _mm256_set1_pd(-1.0); |
1686 | let slice: *const i8 = slice as *const i8; |
1687 | let offsets: i64x4 = offsets.as_i64x4(); |
1688 | vpgatherqpd(src:zero, slice, offsets, mask:neg_one, SCALE as i8) |
1689 | } |
1690 | |
1691 | /// Returns values from `slice` at offsets determined by `offsets * scale`, |
1692 | /// where |
1693 | /// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in |
1694 | /// that position instead. |
1695 | /// |
1696 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i64gather_pd) |
1697 | #[inline ] |
1698 | #[target_feature (enable = "avx2" )] |
1699 | #[cfg_attr (test, assert_instr(vgatherqpd, SCALE = 1))] |
1700 | #[rustc_legacy_const_generics (4)] |
1701 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1702 | pub unsafe fn _mm256_mask_i64gather_pd<const SCALE: i32>( |
1703 | src: __m256d, |
1704 | slice: *const f64, |
1705 | offsets: __m256i, |
1706 | mask: __m256d, |
1707 | ) -> __m256d { |
1708 | static_assert_imm8_scale!(SCALE); |
1709 | let slice: *const i8 = slice as *const i8; |
1710 | let offsets: i64x4 = offsets.as_i64x4(); |
1711 | vpgatherqpd(src, slice, offsets, mask, SCALE as i8) |
1712 | } |
1713 | |
1714 | /// Copies `a` to `dst`, then insert 128 bits (of integer data) from `b` at the |
1715 | /// location specified by `IMM1`. |
1716 | /// |
1717 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_inserti128_si256) |
1718 | #[inline ] |
1719 | #[target_feature (enable = "avx2" )] |
1720 | #[cfg_attr ( |
1721 | all(test, not(target_os = "windows" )), |
1722 | assert_instr(vinsertf128, IMM1 = 1) |
1723 | )] |
1724 | #[rustc_legacy_const_generics (2)] |
1725 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1726 | pub unsafe fn _mm256_inserti128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m256i { |
1727 | static_assert_uimm_bits!(IMM1, 1); |
1728 | let a: i64x4 = a.as_i64x4(); |
1729 | let b: i64x4 = _mm256_castsi128_si256(b).as_i64x4(); |
1730 | let dst: i64x4 = simd_shuffle!(a, b, [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize]); |
1731 | transmute(src:dst) |
1732 | } |
1733 | |
1734 | /// Multiplies packed signed 16-bit integers in `a` and `b`, producing |
1735 | /// intermediate signed 32-bit integers. Horizontally add adjacent pairs |
1736 | /// of intermediate 32-bit integers. |
1737 | /// |
1738 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_madd_epi16) |
1739 | #[inline ] |
1740 | #[target_feature (enable = "avx2" )] |
1741 | #[cfg_attr (test, assert_instr(vpmaddwd))] |
1742 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1743 | pub unsafe fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i { |
1744 | transmute(src:pmaddwd(a:a.as_i16x16(), b:b.as_i16x16())) |
1745 | } |
1746 | |
1747 | /// Vertically multiplies each unsigned 8-bit integer from `a` with the |
1748 | /// corresponding signed 8-bit integer from `b`, producing intermediate |
1749 | /// signed 16-bit integers. Horizontally add adjacent pairs of intermediate |
1750 | /// signed 16-bit integers |
1751 | /// |
1752 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maddubs_epi16) |
1753 | #[inline ] |
1754 | #[target_feature (enable = "avx2" )] |
1755 | #[cfg_attr (test, assert_instr(vpmaddubsw))] |
1756 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1757 | pub unsafe fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i { |
1758 | transmute(src:pmaddubsw(a:a.as_u8x32(), b:b.as_u8x32())) |
1759 | } |
1760 | |
1761 | /// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask` |
1762 | /// (elements are zeroed out when the highest bit is not set in the |
1763 | /// corresponding element). |
1764 | /// |
1765 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskload_epi32) |
1766 | #[inline ] |
1767 | #[target_feature (enable = "avx2" )] |
1768 | #[cfg_attr (test, assert_instr(vpmaskmovd))] |
1769 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1770 | pub unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: __m128i) -> __m128i { |
1771 | transmute(src:maskloadd(mem_addr as *const i8, mask:mask.as_i32x4())) |
1772 | } |
1773 | |
1774 | /// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask` |
1775 | /// (elements are zeroed out when the highest bit is not set in the |
1776 | /// corresponding element). |
1777 | /// |
1778 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskload_epi32) |
1779 | #[inline ] |
1780 | #[target_feature (enable = "avx2" )] |
1781 | #[cfg_attr (test, assert_instr(vpmaskmovd))] |
1782 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1783 | pub unsafe fn _mm256_maskload_epi32(mem_addr: *const i32, mask: __m256i) -> __m256i { |
1784 | transmute(src:maskloadd256(mem_addr as *const i8, mask:mask.as_i32x8())) |
1785 | } |
1786 | |
1787 | /// Loads packed 64-bit integers from memory pointed by `mem_addr` using `mask` |
1788 | /// (elements are zeroed out when the highest bit is not set in the |
1789 | /// corresponding element). |
1790 | /// |
1791 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskload_epi64) |
1792 | #[inline ] |
1793 | #[target_feature (enable = "avx2" )] |
1794 | #[cfg_attr (test, assert_instr(vpmaskmovq))] |
1795 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1796 | pub unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: __m128i) -> __m128i { |
1797 | transmute(src:maskloadq(mem_addr as *const i8, mask:mask.as_i64x2())) |
1798 | } |
1799 | |
1800 | /// Loads packed 64-bit integers from memory pointed by `mem_addr` using `mask` |
1801 | /// (elements are zeroed out when the highest bit is not set in the |
1802 | /// corresponding element). |
1803 | /// |
1804 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskload_epi64) |
1805 | #[inline ] |
1806 | #[target_feature (enable = "avx2" )] |
1807 | #[cfg_attr (test, assert_instr(vpmaskmovq))] |
1808 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1809 | pub unsafe fn _mm256_maskload_epi64(mem_addr: *const i64, mask: __m256i) -> __m256i { |
1810 | transmute(src:maskloadq256(mem_addr as *const i8, mask:mask.as_i64x4())) |
1811 | } |
1812 | |
1813 | /// Stores packed 32-bit integers from `a` into memory pointed by `mem_addr` |
1814 | /// using `mask` (elements are not stored when the highest bit is not set |
1815 | /// in the corresponding element). |
1816 | /// |
1817 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskstore_epi32) |
1818 | #[inline ] |
1819 | #[target_feature (enable = "avx2" )] |
1820 | #[cfg_attr (test, assert_instr(vpmaskmovd))] |
1821 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1822 | pub unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: __m128i, a: __m128i) { |
1823 | maskstored(mem_addr as *mut i8, mask:mask.as_i32x4(), a:a.as_i32x4()) |
1824 | } |
1825 | |
1826 | /// Stores packed 32-bit integers from `a` into memory pointed by `mem_addr` |
1827 | /// using `mask` (elements are not stored when the highest bit is not set |
1828 | /// in the corresponding element). |
1829 | /// |
1830 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskstore_epi32) |
1831 | #[inline ] |
1832 | #[target_feature (enable = "avx2" )] |
1833 | #[cfg_attr (test, assert_instr(vpmaskmovd))] |
1834 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1835 | pub unsafe fn _mm256_maskstore_epi32(mem_addr: *mut i32, mask: __m256i, a: __m256i) { |
1836 | maskstored256(mem_addr as *mut i8, mask:mask.as_i32x8(), a:a.as_i32x8()) |
1837 | } |
1838 | |
1839 | /// Stores packed 64-bit integers from `a` into memory pointed by `mem_addr` |
1840 | /// using `mask` (elements are not stored when the highest bit is not set |
1841 | /// in the corresponding element). |
1842 | /// |
1843 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskstore_epi64) |
1844 | #[inline ] |
1845 | #[target_feature (enable = "avx2" )] |
1846 | #[cfg_attr (test, assert_instr(vpmaskmovq))] |
1847 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1848 | pub unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: __m128i, a: __m128i) { |
1849 | maskstoreq(mem_addr as *mut i8, mask:mask.as_i64x2(), a:a.as_i64x2()) |
1850 | } |
1851 | |
1852 | /// Stores packed 64-bit integers from `a` into memory pointed by `mem_addr` |
1853 | /// using `mask` (elements are not stored when the highest bit is not set |
1854 | /// in the corresponding element). |
1855 | /// |
1856 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskstore_epi64) |
1857 | #[inline ] |
1858 | #[target_feature (enable = "avx2" )] |
1859 | #[cfg_attr (test, assert_instr(vpmaskmovq))] |
1860 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1861 | pub unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m256i) { |
1862 | maskstoreq256(mem_addr as *mut i8, mask:mask.as_i64x4(), a:a.as_i64x4()) |
1863 | } |
1864 | |
1865 | /// Compares packed 16-bit integers in `a` and `b`, and returns the packed |
1866 | /// maximum values. |
1867 | /// |
1868 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi16) |
1869 | #[inline ] |
1870 | #[target_feature (enable = "avx2" )] |
1871 | #[cfg_attr (test, assert_instr(vpmaxsw))] |
1872 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1873 | pub unsafe fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i { |
1874 | let a: i16x16 = a.as_i16x16(); |
1875 | let b: i16x16 = b.as_i16x16(); |
1876 | transmute(src:simd_select::<i16x16, _>(mask:simd_gt(a, b), if_true:a, if_false:b)) |
1877 | } |
1878 | |
1879 | /// Compares packed 32-bit integers in `a` and `b`, and returns the packed |
1880 | /// maximum values. |
1881 | /// |
1882 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi32) |
1883 | #[inline ] |
1884 | #[target_feature (enable = "avx2" )] |
1885 | #[cfg_attr (test, assert_instr(vpmaxsd))] |
1886 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1887 | pub unsafe fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i { |
1888 | let a: i32x8 = a.as_i32x8(); |
1889 | let b: i32x8 = b.as_i32x8(); |
1890 | transmute(src:simd_select::<i32x8, _>(mask:simd_gt(a, b), if_true:a, if_false:b)) |
1891 | } |
1892 | |
1893 | /// Compares packed 8-bit integers in `a` and `b`, and returns the packed |
1894 | /// maximum values. |
1895 | /// |
1896 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi8) |
1897 | #[inline ] |
1898 | #[target_feature (enable = "avx2" )] |
1899 | #[cfg_attr (test, assert_instr(vpmaxsb))] |
1900 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1901 | pub unsafe fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i { |
1902 | let a: i8x32 = a.as_i8x32(); |
1903 | let b: i8x32 = b.as_i8x32(); |
1904 | transmute(src:simd_select::<i8x32, _>(mask:simd_gt(a, b), if_true:a, if_false:b)) |
1905 | } |
1906 | |
1907 | /// Compares packed unsigned 16-bit integers in `a` and `b`, and returns |
1908 | /// the packed maximum values. |
1909 | /// |
1910 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu16) |
1911 | #[inline ] |
1912 | #[target_feature (enable = "avx2" )] |
1913 | #[cfg_attr (test, assert_instr(vpmaxuw))] |
1914 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1915 | pub unsafe fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i { |
1916 | let a: u16x16 = a.as_u16x16(); |
1917 | let b: u16x16 = b.as_u16x16(); |
1918 | transmute(src:simd_select::<i16x16, _>(mask:simd_gt(a, b), if_true:a, if_false:b)) |
1919 | } |
1920 | |
1921 | /// Compares packed unsigned 32-bit integers in `a` and `b`, and returns |
1922 | /// the packed maximum values. |
1923 | /// |
1924 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu32) |
1925 | #[inline ] |
1926 | #[target_feature (enable = "avx2" )] |
1927 | #[cfg_attr (test, assert_instr(vpmaxud))] |
1928 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1929 | pub unsafe fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i { |
1930 | let a: u32x8 = a.as_u32x8(); |
1931 | let b: u32x8 = b.as_u32x8(); |
1932 | transmute(src:simd_select::<i32x8, _>(mask:simd_gt(a, b), if_true:a, if_false:b)) |
1933 | } |
1934 | |
1935 | /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns |
1936 | /// the packed maximum values. |
1937 | /// |
1938 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu8) |
1939 | #[inline ] |
1940 | #[target_feature (enable = "avx2" )] |
1941 | #[cfg_attr (test, assert_instr(vpmaxub))] |
1942 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1943 | pub unsafe fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i { |
1944 | let a: u8x32 = a.as_u8x32(); |
1945 | let b: u8x32 = b.as_u8x32(); |
1946 | transmute(src:simd_select::<i8x32, _>(mask:simd_gt(a, b), if_true:a, if_false:b)) |
1947 | } |
1948 | |
1949 | /// Compares packed 16-bit integers in `a` and `b`, and returns the packed |
1950 | /// minimum values. |
1951 | /// |
1952 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi16) |
1953 | #[inline ] |
1954 | #[target_feature (enable = "avx2" )] |
1955 | #[cfg_attr (test, assert_instr(vpminsw))] |
1956 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1957 | pub unsafe fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i { |
1958 | let a: i16x16 = a.as_i16x16(); |
1959 | let b: i16x16 = b.as_i16x16(); |
1960 | transmute(src:simd_select::<i16x16, _>(mask:simd_lt(a, b), if_true:a, if_false:b)) |
1961 | } |
1962 | |
1963 | /// Compares packed 32-bit integers in `a` and `b`, and returns the packed |
1964 | /// minimum values. |
1965 | /// |
1966 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi32) |
1967 | #[inline ] |
1968 | #[target_feature (enable = "avx2" )] |
1969 | #[cfg_attr (test, assert_instr(vpminsd))] |
1970 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1971 | pub unsafe fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i { |
1972 | let a: i32x8 = a.as_i32x8(); |
1973 | let b: i32x8 = b.as_i32x8(); |
1974 | transmute(src:simd_select::<i32x8, _>(mask:simd_lt(a, b), if_true:a, if_false:b)) |
1975 | } |
1976 | |
1977 | /// Compares packed 8-bit integers in `a` and `b`, and returns the packed |
1978 | /// minimum values. |
1979 | /// |
1980 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi8) |
1981 | #[inline ] |
1982 | #[target_feature (enable = "avx2" )] |
1983 | #[cfg_attr (test, assert_instr(vpminsb))] |
1984 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1985 | pub unsafe fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i { |
1986 | let a: i8x32 = a.as_i8x32(); |
1987 | let b: i8x32 = b.as_i8x32(); |
1988 | transmute(src:simd_select::<i8x32, _>(mask:simd_lt(a, b), if_true:a, if_false:b)) |
1989 | } |
1990 | |
1991 | /// Compares packed unsigned 16-bit integers in `a` and `b`, and returns |
1992 | /// the packed minimum values. |
1993 | /// |
1994 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu16) |
1995 | #[inline ] |
1996 | #[target_feature (enable = "avx2" )] |
1997 | #[cfg_attr (test, assert_instr(vpminuw))] |
1998 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
1999 | pub unsafe fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i { |
2000 | let a: u16x16 = a.as_u16x16(); |
2001 | let b: u16x16 = b.as_u16x16(); |
2002 | transmute(src:simd_select::<i16x16, _>(mask:simd_lt(a, b), if_true:a, if_false:b)) |
2003 | } |
2004 | |
2005 | /// Compares packed unsigned 32-bit integers in `a` and `b`, and returns |
2006 | /// the packed minimum values. |
2007 | /// |
2008 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu32) |
2009 | #[inline ] |
2010 | #[target_feature (enable = "avx2" )] |
2011 | #[cfg_attr (test, assert_instr(vpminud))] |
2012 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2013 | pub unsafe fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i { |
2014 | let a: u32x8 = a.as_u32x8(); |
2015 | let b: u32x8 = b.as_u32x8(); |
2016 | transmute(src:simd_select::<i32x8, _>(mask:simd_lt(a, b), if_true:a, if_false:b)) |
2017 | } |
2018 | |
2019 | /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns |
2020 | /// the packed minimum values. |
2021 | /// |
2022 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu8) |
2023 | #[inline ] |
2024 | #[target_feature (enable = "avx2" )] |
2025 | #[cfg_attr (test, assert_instr(vpminub))] |
2026 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2027 | pub unsafe fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i { |
2028 | let a: u8x32 = a.as_u8x32(); |
2029 | let b: u8x32 = b.as_u8x32(); |
2030 | transmute(src:simd_select::<i8x32, _>(mask:simd_lt(a, b), if_true:a, if_false:b)) |
2031 | } |
2032 | |
2033 | /// Creates mask from the most significant bit of each 8-bit element in `a`, |
2034 | /// return the result. |
2035 | /// |
2036 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movemask_epi8) |
2037 | #[inline ] |
2038 | #[target_feature (enable = "avx2" )] |
2039 | #[cfg_attr (test, assert_instr(vpmovmskb))] |
2040 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2041 | pub unsafe fn _mm256_movemask_epi8(a: __m256i) -> i32 { |
2042 | let z: i8x32 = i8x32::splat(0); |
2043 | let m: i8x32 = simd_lt(x:a.as_i8x32(), y:z); |
2044 | simd_bitmask::<_, u32>(m) as i32 |
2045 | } |
2046 | |
2047 | /// Computes the sum of absolute differences (SADs) of quadruplets of unsigned |
2048 | /// 8-bit integers in `a` compared to those in `b`, and stores the 16-bit |
2049 | /// results in dst. Eight SADs are performed for each 128-bit lane using one |
2050 | /// quadruplet from `b` and eight quadruplets from `a`. One quadruplet is |
2051 | /// selected from `b` starting at on the offset specified in `imm8`. Eight |
2052 | /// quadruplets are formed from sequential 8-bit integers selected from `a` |
2053 | /// starting at the offset specified in `imm8`. |
2054 | /// |
2055 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mpsadbw_epu8) |
2056 | #[inline ] |
2057 | #[target_feature (enable = "avx2" )] |
2058 | #[cfg_attr (test, assert_instr(vmpsadbw, IMM8 = 0))] |
2059 | #[rustc_legacy_const_generics (2)] |
2060 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2061 | pub unsafe fn _mm256_mpsadbw_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i { |
2062 | static_assert_uimm_bits!(IMM8, 8); |
2063 | transmute(src:mpsadbw(a:a.as_u8x32(), b:b.as_u8x32(), IMM8)) |
2064 | } |
2065 | |
2066 | /// Multiplies the low 32-bit integers from each packed 64-bit element in |
2067 | /// `a` and `b` |
2068 | /// |
2069 | /// Returns the 64-bit results. |
2070 | /// |
2071 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mul_epi32) |
2072 | #[inline ] |
2073 | #[target_feature (enable = "avx2" )] |
2074 | #[cfg_attr (test, assert_instr(vpmuldq))] |
2075 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2076 | pub unsafe fn _mm256_mul_epi32(a: __m256i, b: __m256i) -> __m256i { |
2077 | let a: i64x4 = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(a.as_i64x4())); |
2078 | let b: i64x4 = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(b.as_i64x4())); |
2079 | transmute(src:simd_mul(x:a, y:b)) |
2080 | } |
2081 | |
2082 | /// Multiplies the low unsigned 32-bit integers from each packed 64-bit |
2083 | /// element in `a` and `b` |
2084 | /// |
2085 | /// Returns the unsigned 64-bit results. |
2086 | /// |
2087 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mul_epu32) |
2088 | #[inline ] |
2089 | #[target_feature (enable = "avx2" )] |
2090 | #[cfg_attr (test, assert_instr(vpmuludq))] |
2091 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2092 | pub unsafe fn _mm256_mul_epu32(a: __m256i, b: __m256i) -> __m256i { |
2093 | let a: u64x4 = a.as_u64x4(); |
2094 | let b: u64x4 = b.as_u64x4(); |
2095 | let mask: u64x4 = u64x4::splat(u32::MAX.into()); |
2096 | transmute(src:simd_mul(x:simd_and(a, mask), y:simd_and(x:b, y:mask))) |
2097 | } |
2098 | |
2099 | /// Multiplies the packed 16-bit integers in `a` and `b`, producing |
2100 | /// intermediate 32-bit integers and returning the high 16 bits of the |
2101 | /// intermediate integers. |
2102 | /// |
2103 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mulhi_epi16) |
2104 | #[inline ] |
2105 | #[target_feature (enable = "avx2" )] |
2106 | #[cfg_attr (test, assert_instr(vpmulhw))] |
2107 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2108 | pub unsafe fn _mm256_mulhi_epi16(a: __m256i, b: __m256i) -> __m256i { |
2109 | let a: i32x16 = simd_cast::<_, i32x16>(a.as_i16x16()); |
2110 | let b: i32x16 = simd_cast::<_, i32x16>(b.as_i16x16()); |
2111 | let r: i32x16 = simd_shr(lhs:simd_mul(a, b), rhs:i32x16::splat(16)); |
2112 | transmute(src:simd_cast::<i32x16, i16x16>(r)) |
2113 | } |
2114 | |
2115 | /// Multiplies the packed unsigned 16-bit integers in `a` and `b`, producing |
2116 | /// intermediate 32-bit integers and returning the high 16 bits of the |
2117 | /// intermediate integers. |
2118 | /// |
2119 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mulhi_epu16) |
2120 | #[inline ] |
2121 | #[target_feature (enable = "avx2" )] |
2122 | #[cfg_attr (test, assert_instr(vpmulhuw))] |
2123 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2124 | pub unsafe fn _mm256_mulhi_epu16(a: __m256i, b: __m256i) -> __m256i { |
2125 | let a: u32x16 = simd_cast::<_, u32x16>(a.as_u16x16()); |
2126 | let b: u32x16 = simd_cast::<_, u32x16>(b.as_u16x16()); |
2127 | let r: u32x16 = simd_shr(lhs:simd_mul(a, b), rhs:u32x16::splat(16)); |
2128 | transmute(src:simd_cast::<u32x16, u16x16>(r)) |
2129 | } |
2130 | |
2131 | /// Multiplies the packed 16-bit integers in `a` and `b`, producing |
2132 | /// intermediate 32-bit integers, and returns the low 16 bits of the |
2133 | /// intermediate integers |
2134 | /// |
2135 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mullo_epi16) |
2136 | #[inline ] |
2137 | #[target_feature (enable = "avx2" )] |
2138 | #[cfg_attr (test, assert_instr(vpmullw))] |
2139 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2140 | pub unsafe fn _mm256_mullo_epi16(a: __m256i, b: __m256i) -> __m256i { |
2141 | transmute(src:simd_mul(x:a.as_i16x16(), y:b.as_i16x16())) |
2142 | } |
2143 | |
2144 | /// Multiplies the packed 32-bit integers in `a` and `b`, producing |
2145 | /// intermediate 64-bit integers, and returns the low 32 bits of the |
2146 | /// intermediate integers |
2147 | /// |
2148 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mullo_epi32) |
2149 | #[inline ] |
2150 | #[target_feature (enable = "avx2" )] |
2151 | #[cfg_attr (test, assert_instr(vpmulld))] |
2152 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2153 | pub unsafe fn _mm256_mullo_epi32(a: __m256i, b: __m256i) -> __m256i { |
2154 | transmute(src:simd_mul(x:a.as_i32x8(), y:b.as_i32x8())) |
2155 | } |
2156 | |
2157 | /// Multiplies packed 16-bit integers in `a` and `b`, producing |
2158 | /// intermediate signed 32-bit integers. Truncate each intermediate |
2159 | /// integer to the 18 most significant bits, round by adding 1, and |
2160 | /// return bits `[16:1]`. |
2161 | /// |
2162 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mulhrs_epi16) |
2163 | #[inline ] |
2164 | #[target_feature (enable = "avx2" )] |
2165 | #[cfg_attr (test, assert_instr(vpmulhrsw))] |
2166 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2167 | pub unsafe fn _mm256_mulhrs_epi16(a: __m256i, b: __m256i) -> __m256i { |
2168 | transmute(src:pmulhrsw(a:a.as_i16x16(), b:b.as_i16x16())) |
2169 | } |
2170 | |
2171 | /// Computes the bitwise OR of 256 bits (representing integer data) in `a` |
2172 | /// and `b` |
2173 | /// |
2174 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_si256) |
2175 | #[inline ] |
2176 | #[target_feature (enable = "avx2" )] |
2177 | #[cfg_attr (test, assert_instr(vorps))] |
2178 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2179 | pub unsafe fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i { |
2180 | transmute(src:simd_or(x:a.as_i32x8(), y:b.as_i32x8())) |
2181 | } |
2182 | |
2183 | /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers |
2184 | /// using signed saturation |
2185 | /// |
2186 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packs_epi16) |
2187 | #[inline ] |
2188 | #[target_feature (enable = "avx2" )] |
2189 | #[cfg_attr (test, assert_instr(vpacksswb))] |
2190 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2191 | pub unsafe fn _mm256_packs_epi16(a: __m256i, b: __m256i) -> __m256i { |
2192 | transmute(src:packsswb(a:a.as_i16x16(), b:b.as_i16x16())) |
2193 | } |
2194 | |
2195 | /// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers |
2196 | /// using signed saturation |
2197 | /// |
2198 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packs_epi32) |
2199 | #[inline ] |
2200 | #[target_feature (enable = "avx2" )] |
2201 | #[cfg_attr (test, assert_instr(vpackssdw))] |
2202 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2203 | pub unsafe fn _mm256_packs_epi32(a: __m256i, b: __m256i) -> __m256i { |
2204 | transmute(src:packssdw(a:a.as_i32x8(), b:b.as_i32x8())) |
2205 | } |
2206 | |
2207 | /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers |
2208 | /// using unsigned saturation |
2209 | /// |
2210 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packus_epi16) |
2211 | #[inline ] |
2212 | #[target_feature (enable = "avx2" )] |
2213 | #[cfg_attr (test, assert_instr(vpackuswb))] |
2214 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2215 | pub unsafe fn _mm256_packus_epi16(a: __m256i, b: __m256i) -> __m256i { |
2216 | transmute(src:packuswb(a:a.as_i16x16(), b:b.as_i16x16())) |
2217 | } |
2218 | |
2219 | /// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers |
2220 | /// using unsigned saturation |
2221 | /// |
2222 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packus_epi32) |
2223 | #[inline ] |
2224 | #[target_feature (enable = "avx2" )] |
2225 | #[cfg_attr (test, assert_instr(vpackusdw))] |
2226 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2227 | pub unsafe fn _mm256_packus_epi32(a: __m256i, b: __m256i) -> __m256i { |
2228 | transmute(src:packusdw(a:a.as_i32x8(), b:b.as_i32x8())) |
2229 | } |
2230 | |
2231 | /// Permutes packed 32-bit integers from `a` according to the content of `b`. |
2232 | /// |
2233 | /// The last 3 bits of each integer of `b` are used as addresses into the 8 |
2234 | /// integers of `a`. |
2235 | /// |
2236 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar8x32_epi32) |
2237 | #[inline ] |
2238 | #[target_feature (enable = "avx2" )] |
2239 | #[cfg_attr (test, assert_instr(vpermps))] |
2240 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2241 | pub unsafe fn _mm256_permutevar8x32_epi32(a: __m256i, b: __m256i) -> __m256i { |
2242 | transmute(src:permd(a:a.as_u32x8(), b:b.as_u32x8())) |
2243 | } |
2244 | |
2245 | /// Permutes 64-bit integers from `a` using control mask `imm8`. |
2246 | /// |
2247 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute4x64_epi64) |
2248 | #[inline ] |
2249 | #[target_feature (enable = "avx2" )] |
2250 | #[cfg_attr (test, assert_instr(vpermpd, IMM8 = 9))] |
2251 | #[rustc_legacy_const_generics (1)] |
2252 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2253 | pub unsafe fn _mm256_permute4x64_epi64<const IMM8: i32>(a: __m256i) -> __m256i { |
2254 | static_assert_uimm_bits!(IMM8, 8); |
2255 | let zero: i64x4 = _mm256_setzero_si256().as_i64x4(); |
2256 | let r: i64x4 = simd_shuffle!( |
2257 | a.as_i64x4(), |
2258 | zero, |
2259 | [ |
2260 | IMM8 as u32 & 0b11, |
2261 | (IMM8 as u32 >> 2) & 0b11, |
2262 | (IMM8 as u32 >> 4) & 0b11, |
2263 | (IMM8 as u32 >> 6) & 0b11, |
2264 | ], |
2265 | ); |
2266 | transmute(src:r) |
2267 | } |
2268 | |
2269 | /// Shuffles 128-bits of integer data selected by `imm8` from `a` and `b`. |
2270 | /// |
2271 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2x128_si256) |
2272 | #[inline ] |
2273 | #[target_feature (enable = "avx2" )] |
2274 | #[cfg_attr (test, assert_instr(vperm2f128, IMM8 = 9))] |
2275 | #[rustc_legacy_const_generics (2)] |
2276 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2277 | pub unsafe fn _mm256_permute2x128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i { |
2278 | static_assert_uimm_bits!(IMM8, 8); |
2279 | transmute(src:vperm2i128(a:a.as_i64x4(), b:b.as_i64x4(), IMM8 as i8)) |
2280 | } |
2281 | |
2282 | /// Shuffles 64-bit floating-point elements in `a` across lanes using the |
2283 | /// control in `imm8`. |
2284 | /// |
2285 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute4x64_pd) |
2286 | #[inline ] |
2287 | #[target_feature (enable = "avx2" )] |
2288 | #[cfg_attr (test, assert_instr(vpermpd, IMM8 = 1))] |
2289 | #[rustc_legacy_const_generics (1)] |
2290 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2291 | pub unsafe fn _mm256_permute4x64_pd<const IMM8: i32>(a: __m256d) -> __m256d { |
2292 | static_assert_uimm_bits!(IMM8, 8); |
2293 | simd_shuffle!( |
2294 | a, |
2295 | _mm256_undefined_pd(), |
2296 | [ |
2297 | IMM8 as u32 & 0b11, |
2298 | (IMM8 as u32 >> 2) & 0b11, |
2299 | (IMM8 as u32 >> 4) & 0b11, |
2300 | (IMM8 as u32 >> 6) & 0b11, |
2301 | ], |
2302 | ) |
2303 | } |
2304 | |
2305 | /// Shuffles eight 32-bit floating-point elements in `a` across lanes using |
2306 | /// the corresponding 32-bit integer index in `idx`. |
2307 | /// |
2308 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar8x32_ps) |
2309 | #[inline ] |
2310 | #[target_feature (enable = "avx2" )] |
2311 | #[cfg_attr (test, assert_instr(vpermps))] |
2312 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2313 | pub unsafe fn _mm256_permutevar8x32_ps(a: __m256, idx: __m256i) -> __m256 { |
2314 | permps(a, b:idx.as_i32x8()) |
2315 | } |
2316 | |
2317 | /// Computes the absolute differences of packed unsigned 8-bit integers in `a` |
2318 | /// and `b`, then horizontally sum each consecutive 8 differences to |
2319 | /// produce four unsigned 16-bit integers, and pack these unsigned 16-bit |
2320 | /// integers in the low 16 bits of the 64-bit return value |
2321 | /// |
2322 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sad_epu8) |
2323 | #[inline ] |
2324 | #[target_feature (enable = "avx2" )] |
2325 | #[cfg_attr (test, assert_instr(vpsadbw))] |
2326 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2327 | pub unsafe fn _mm256_sad_epu8(a: __m256i, b: __m256i) -> __m256i { |
2328 | transmute(src:psadbw(a:a.as_u8x32(), b:b.as_u8x32())) |
2329 | } |
2330 | |
2331 | /// Shuffles bytes from `a` according to the content of `b`. |
2332 | /// |
2333 | /// For each of the 128-bit low and high halves of the vectors, the last |
2334 | /// 4 bits of each byte of `b` are used as addresses into the respective |
2335 | /// low or high 16 bytes of `a`. That is, the halves are shuffled separately. |
2336 | /// |
2337 | /// In addition, if the highest significant bit of a byte of `b` is set, the |
2338 | /// respective destination byte is set to 0. |
2339 | /// |
2340 | /// Picturing `a` and `b` as `[u8; 32]`, `_mm256_shuffle_epi8` is logically |
2341 | /// equivalent to: |
2342 | /// |
2343 | /// ``` |
2344 | /// fn mm256_shuffle_epi8(a: [u8; 32], b: [u8; 32]) -> [u8; 32] { |
2345 | /// let mut r = [0; 32]; |
2346 | /// for i in 0..16 { |
2347 | /// // if the most significant bit of b is set, |
2348 | /// // then the destination byte is set to 0. |
2349 | /// if b[i] & 0x80 == 0u8 { |
2350 | /// r[i] = a[(b[i] % 16) as usize]; |
2351 | /// } |
2352 | /// if b[i + 16] & 0x80 == 0u8 { |
2353 | /// r[i + 16] = a[(b[i + 16] % 16 + 16) as usize]; |
2354 | /// } |
2355 | /// } |
2356 | /// r |
2357 | /// } |
2358 | /// ``` |
2359 | /// |
2360 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_epi8) |
2361 | #[inline ] |
2362 | #[target_feature (enable = "avx2" )] |
2363 | #[cfg_attr (test, assert_instr(vpshufb))] |
2364 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2365 | pub unsafe fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i { |
2366 | transmute(src:pshufb(a:a.as_u8x32(), b:b.as_u8x32())) |
2367 | } |
2368 | |
2369 | /// Shuffles 32-bit integers in 128-bit lanes of `a` using the control in |
2370 | /// `imm8`. |
2371 | /// |
2372 | /// ```rust |
2373 | /// #[cfg(target_arch = "x86" )] |
2374 | /// use std::arch::x86::*; |
2375 | /// #[cfg(target_arch = "x86_64" )] |
2376 | /// use std::arch::x86_64::*; |
2377 | /// |
2378 | /// # fn main() { |
2379 | /// # if is_x86_feature_detected!("avx2" ) { |
2380 | /// # #[target_feature (enable = "avx2" )] |
2381 | /// # unsafe fn worker() { |
2382 | /// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); |
2383 | /// |
2384 | /// let c1 = _mm256_shuffle_epi32(a, 0b00_11_10_01); |
2385 | /// let c2 = _mm256_shuffle_epi32(a, 0b01_00_10_11); |
2386 | /// |
2387 | /// let expected1 = _mm256_setr_epi32(1, 2, 3, 0, 5, 6, 7, 4); |
2388 | /// let expected2 = _mm256_setr_epi32(3, 2, 0, 1, 7, 6, 4, 5); |
2389 | /// |
2390 | /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c1, expected1)), !0); |
2391 | /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c2, expected2)), !0); |
2392 | /// # } |
2393 | /// # unsafe { worker(); } |
2394 | /// # } |
2395 | /// # } |
2396 | /// ``` |
2397 | /// |
2398 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_epi32) |
2399 | #[inline ] |
2400 | #[target_feature (enable = "avx2" )] |
2401 | #[cfg_attr (test, assert_instr(vshufps, MASK = 9))] |
2402 | #[rustc_legacy_const_generics (1)] |
2403 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2404 | pub unsafe fn _mm256_shuffle_epi32<const MASK: i32>(a: __m256i) -> __m256i { |
2405 | static_assert_uimm_bits!(MASK, 8); |
2406 | let r: i32x8 = simd_shuffle!( |
2407 | a.as_i32x8(), |
2408 | a.as_i32x8(), |
2409 | [ |
2410 | MASK as u32 & 0b11, |
2411 | (MASK as u32 >> 2) & 0b11, |
2412 | (MASK as u32 >> 4) & 0b11, |
2413 | (MASK as u32 >> 6) & 0b11, |
2414 | (MASK as u32 & 0b11) + 4, |
2415 | ((MASK as u32 >> 2) & 0b11) + 4, |
2416 | ((MASK as u32 >> 4) & 0b11) + 4, |
2417 | ((MASK as u32 >> 6) & 0b11) + 4, |
2418 | ], |
2419 | ); |
2420 | transmute(src:r) |
2421 | } |
2422 | |
2423 | /// Shuffles 16-bit integers in the high 64 bits of 128-bit lanes of `a` using |
2424 | /// the control in `imm8`. The low 64 bits of 128-bit lanes of `a` are copied |
2425 | /// to the output. |
2426 | /// |
2427 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shufflehi_epi16) |
2428 | #[inline ] |
2429 | #[target_feature (enable = "avx2" )] |
2430 | #[cfg_attr (test, assert_instr(vpshufhw, IMM8 = 9))] |
2431 | #[rustc_legacy_const_generics (1)] |
2432 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2433 | pub unsafe fn _mm256_shufflehi_epi16<const IMM8: i32>(a: __m256i) -> __m256i { |
2434 | static_assert_uimm_bits!(IMM8, 8); |
2435 | let a = a.as_i16x16(); |
2436 | let r: i16x16 = simd_shuffle!( |
2437 | a, |
2438 | a, |
2439 | [ |
2440 | 0, |
2441 | 1, |
2442 | 2, |
2443 | 3, |
2444 | 4 + (IMM8 as u32 & 0b11), |
2445 | 4 + ((IMM8 as u32 >> 2) & 0b11), |
2446 | 4 + ((IMM8 as u32 >> 4) & 0b11), |
2447 | 4 + ((IMM8 as u32 >> 6) & 0b11), |
2448 | 8, |
2449 | 9, |
2450 | 10, |
2451 | 11, |
2452 | 12 + (IMM8 as u32 & 0b11), |
2453 | 12 + ((IMM8 as u32 >> 2) & 0b11), |
2454 | 12 + ((IMM8 as u32 >> 4) & 0b11), |
2455 | 12 + ((IMM8 as u32 >> 6) & 0b11), |
2456 | ], |
2457 | ); |
2458 | transmute(r) |
2459 | } |
2460 | |
2461 | /// Shuffles 16-bit integers in the low 64 bits of 128-bit lanes of `a` using |
2462 | /// the control in `imm8`. The high 64 bits of 128-bit lanes of `a` are copied |
2463 | /// to the output. |
2464 | /// |
2465 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shufflelo_epi16) |
2466 | #[inline ] |
2467 | #[target_feature (enable = "avx2" )] |
2468 | #[cfg_attr (test, assert_instr(vpshuflw, IMM8 = 9))] |
2469 | #[rustc_legacy_const_generics (1)] |
2470 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2471 | pub unsafe fn _mm256_shufflelo_epi16<const IMM8: i32>(a: __m256i) -> __m256i { |
2472 | static_assert_uimm_bits!(IMM8, 8); |
2473 | let a = a.as_i16x16(); |
2474 | let r: i16x16 = simd_shuffle!( |
2475 | a, |
2476 | a, |
2477 | [ |
2478 | 0 + (IMM8 as u32 & 0b11), |
2479 | 0 + ((IMM8 as u32 >> 2) & 0b11), |
2480 | 0 + ((IMM8 as u32 >> 4) & 0b11), |
2481 | 0 + ((IMM8 as u32 >> 6) & 0b11), |
2482 | 4, |
2483 | 5, |
2484 | 6, |
2485 | 7, |
2486 | 8 + (IMM8 as u32 & 0b11), |
2487 | 8 + ((IMM8 as u32 >> 2) & 0b11), |
2488 | 8 + ((IMM8 as u32 >> 4) & 0b11), |
2489 | 8 + ((IMM8 as u32 >> 6) & 0b11), |
2490 | 12, |
2491 | 13, |
2492 | 14, |
2493 | 15, |
2494 | ], |
2495 | ); |
2496 | transmute(r) |
2497 | } |
2498 | |
2499 | /// Negates packed 16-bit integers in `a` when the corresponding signed |
2500 | /// 16-bit integer in `b` is negative, and returns the results. |
2501 | /// Results are zeroed out when the corresponding element in `b` is zero. |
2502 | /// |
2503 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sign_epi16) |
2504 | #[inline ] |
2505 | #[target_feature (enable = "avx2" )] |
2506 | #[cfg_attr (test, assert_instr(vpsignw))] |
2507 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2508 | pub unsafe fn _mm256_sign_epi16(a: __m256i, b: __m256i) -> __m256i { |
2509 | transmute(src:psignw(a:a.as_i16x16(), b:b.as_i16x16())) |
2510 | } |
2511 | |
2512 | /// Negates packed 32-bit integers in `a` when the corresponding signed |
2513 | /// 32-bit integer in `b` is negative, and returns the results. |
2514 | /// Results are zeroed out when the corresponding element in `b` is zero. |
2515 | /// |
2516 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sign_epi32) |
2517 | #[inline ] |
2518 | #[target_feature (enable = "avx2" )] |
2519 | #[cfg_attr (test, assert_instr(vpsignd))] |
2520 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2521 | pub unsafe fn _mm256_sign_epi32(a: __m256i, b: __m256i) -> __m256i { |
2522 | transmute(src:psignd(a:a.as_i32x8(), b:b.as_i32x8())) |
2523 | } |
2524 | |
2525 | /// Negates packed 8-bit integers in `a` when the corresponding signed |
2526 | /// 8-bit integer in `b` is negative, and returns the results. |
2527 | /// Results are zeroed out when the corresponding element in `b` is zero. |
2528 | /// |
2529 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sign_epi8) |
2530 | #[inline ] |
2531 | #[target_feature (enable = "avx2" )] |
2532 | #[cfg_attr (test, assert_instr(vpsignb))] |
2533 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2534 | pub unsafe fn _mm256_sign_epi8(a: __m256i, b: __m256i) -> __m256i { |
2535 | transmute(src:psignb(a:a.as_i8x32(), b:b.as_i8x32())) |
2536 | } |
2537 | |
2538 | /// Shifts packed 16-bit integers in `a` left by `count` while |
2539 | /// shifting in zeros, and returns the result |
2540 | /// |
2541 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sll_epi16) |
2542 | #[inline ] |
2543 | #[target_feature (enable = "avx2" )] |
2544 | #[cfg_attr (test, assert_instr(vpsllw))] |
2545 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2546 | pub unsafe fn _mm256_sll_epi16(a: __m256i, count: __m128i) -> __m256i { |
2547 | transmute(src:psllw(a:a.as_i16x16(), count:count.as_i16x8())) |
2548 | } |
2549 | |
2550 | /// Shifts packed 32-bit integers in `a` left by `count` while |
2551 | /// shifting in zeros, and returns the result |
2552 | /// |
2553 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sll_epi32) |
2554 | #[inline ] |
2555 | #[target_feature (enable = "avx2" )] |
2556 | #[cfg_attr (test, assert_instr(vpslld))] |
2557 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2558 | pub unsafe fn _mm256_sll_epi32(a: __m256i, count: __m128i) -> __m256i { |
2559 | transmute(src:pslld(a:a.as_i32x8(), count:count.as_i32x4())) |
2560 | } |
2561 | |
2562 | /// Shifts packed 64-bit integers in `a` left by `count` while |
2563 | /// shifting in zeros, and returns the result |
2564 | /// |
2565 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sll_epi64) |
2566 | #[inline ] |
2567 | #[target_feature (enable = "avx2" )] |
2568 | #[cfg_attr (test, assert_instr(vpsllq))] |
2569 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2570 | pub unsafe fn _mm256_sll_epi64(a: __m256i, count: __m128i) -> __m256i { |
2571 | transmute(src:psllq(a:a.as_i64x4(), count:count.as_i64x2())) |
2572 | } |
2573 | |
2574 | /// Shifts packed 16-bit integers in `a` left by `IMM8` while |
2575 | /// shifting in zeros, return the results; |
2576 | /// |
2577 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_slli_epi16) |
2578 | #[inline ] |
2579 | #[target_feature (enable = "avx2" )] |
2580 | #[cfg_attr (test, assert_instr(vpsllw, IMM8 = 7))] |
2581 | #[rustc_legacy_const_generics (1)] |
2582 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2583 | pub unsafe fn _mm256_slli_epi16<const IMM8: i32>(a: __m256i) -> __m256i { |
2584 | static_assert_uimm_bits!(IMM8, 8); |
2585 | if IMM8 >= 16 { |
2586 | _mm256_setzero_si256() |
2587 | } else { |
2588 | transmute(src:simd_shl(lhs:a.as_u16x16(), rhs:u16x16::splat(IMM8 as u16))) |
2589 | } |
2590 | } |
2591 | |
2592 | /// Shifts packed 32-bit integers in `a` left by `IMM8` while |
2593 | /// shifting in zeros, return the results; |
2594 | /// |
2595 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_slli_epi32) |
2596 | #[inline ] |
2597 | #[target_feature (enable = "avx2" )] |
2598 | #[cfg_attr (test, assert_instr(vpslld, IMM8 = 7))] |
2599 | #[rustc_legacy_const_generics (1)] |
2600 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2601 | pub unsafe fn _mm256_slli_epi32<const IMM8: i32>(a: __m256i) -> __m256i { |
2602 | static_assert_uimm_bits!(IMM8, 8); |
2603 | if IMM8 >= 32 { |
2604 | _mm256_setzero_si256() |
2605 | } else { |
2606 | transmute(src:simd_shl(lhs:a.as_u32x8(), rhs:u32x8::splat(IMM8 as u32))) |
2607 | } |
2608 | } |
2609 | |
2610 | /// Shifts packed 64-bit integers in `a` left by `IMM8` while |
2611 | /// shifting in zeros, return the results; |
2612 | /// |
2613 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_slli_epi64) |
2614 | #[inline ] |
2615 | #[target_feature (enable = "avx2" )] |
2616 | #[cfg_attr (test, assert_instr(vpsllq, IMM8 = 7))] |
2617 | #[rustc_legacy_const_generics (1)] |
2618 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2619 | pub unsafe fn _mm256_slli_epi64<const IMM8: i32>(a: __m256i) -> __m256i { |
2620 | static_assert_uimm_bits!(IMM8, 8); |
2621 | if IMM8 >= 64 { |
2622 | _mm256_setzero_si256() |
2623 | } else { |
2624 | transmute(src:simd_shl(lhs:a.as_u64x4(), rhs:u64x4::splat(IMM8 as u64))) |
2625 | } |
2626 | } |
2627 | |
2628 | /// Shifts 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros. |
2629 | /// |
2630 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_slli_si256) |
2631 | #[inline ] |
2632 | #[target_feature (enable = "avx2" )] |
2633 | #[cfg_attr (test, assert_instr(vpslldq, IMM8 = 3))] |
2634 | #[rustc_legacy_const_generics (1)] |
2635 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2636 | pub unsafe fn _mm256_slli_si256<const IMM8: i32>(a: __m256i) -> __m256i { |
2637 | static_assert_uimm_bits!(IMM8, 8); |
2638 | _mm256_bslli_epi128::<IMM8>(a) |
2639 | } |
2640 | |
2641 | /// Shifts 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros. |
2642 | /// |
2643 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_bslli_epi128) |
2644 | #[inline ] |
2645 | #[target_feature (enable = "avx2" )] |
2646 | #[cfg_attr (test, assert_instr(vpslldq, IMM8 = 3))] |
2647 | #[rustc_legacy_const_generics (1)] |
2648 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2649 | pub unsafe fn _mm256_bslli_epi128<const IMM8: i32>(a: __m256i) -> __m256i { |
2650 | static_assert_uimm_bits!(IMM8, 8); |
2651 | const fn mask(shift: i32, i: u32) -> u32 { |
2652 | let shift = shift as u32 & 0xff; |
2653 | if shift > 15 || i % 16 < shift { |
2654 | 0 |
2655 | } else { |
2656 | 32 + (i - shift) |
2657 | } |
2658 | } |
2659 | let a = a.as_i8x32(); |
2660 | let zero = _mm256_setzero_si256().as_i8x32(); |
2661 | let r: i8x32 = simd_shuffle!( |
2662 | zero, |
2663 | a, |
2664 | [ |
2665 | mask(IMM8, 0), |
2666 | mask(IMM8, 1), |
2667 | mask(IMM8, 2), |
2668 | mask(IMM8, 3), |
2669 | mask(IMM8, 4), |
2670 | mask(IMM8, 5), |
2671 | mask(IMM8, 6), |
2672 | mask(IMM8, 7), |
2673 | mask(IMM8, 8), |
2674 | mask(IMM8, 9), |
2675 | mask(IMM8, 10), |
2676 | mask(IMM8, 11), |
2677 | mask(IMM8, 12), |
2678 | mask(IMM8, 13), |
2679 | mask(IMM8, 14), |
2680 | mask(IMM8, 15), |
2681 | mask(IMM8, 16), |
2682 | mask(IMM8, 17), |
2683 | mask(IMM8, 18), |
2684 | mask(IMM8, 19), |
2685 | mask(IMM8, 20), |
2686 | mask(IMM8, 21), |
2687 | mask(IMM8, 22), |
2688 | mask(IMM8, 23), |
2689 | mask(IMM8, 24), |
2690 | mask(IMM8, 25), |
2691 | mask(IMM8, 26), |
2692 | mask(IMM8, 27), |
2693 | mask(IMM8, 28), |
2694 | mask(IMM8, 29), |
2695 | mask(IMM8, 30), |
2696 | mask(IMM8, 31), |
2697 | ], |
2698 | ); |
2699 | transmute(r) |
2700 | } |
2701 | |
2702 | /// Shifts packed 32-bit integers in `a` left by the amount |
2703 | /// specified by the corresponding element in `count` while |
2704 | /// shifting in zeros, and returns the result. |
2705 | /// |
2706 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sllv_epi32) |
2707 | #[inline ] |
2708 | #[target_feature (enable = "avx2" )] |
2709 | #[cfg_attr (test, assert_instr(vpsllvd))] |
2710 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2711 | pub unsafe fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i { |
2712 | transmute(src:psllvd(a:a.as_i32x4(), count:count.as_i32x4())) |
2713 | } |
2714 | |
2715 | /// Shifts packed 32-bit integers in `a` left by the amount |
2716 | /// specified by the corresponding element in `count` while |
2717 | /// shifting in zeros, and returns the result. |
2718 | /// |
2719 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sllv_epi32) |
2720 | #[inline ] |
2721 | #[target_feature (enable = "avx2" )] |
2722 | #[cfg_attr (test, assert_instr(vpsllvd))] |
2723 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2724 | pub unsafe fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i { |
2725 | transmute(src:psllvd256(a:a.as_i32x8(), count:count.as_i32x8())) |
2726 | } |
2727 | |
2728 | /// Shifts packed 64-bit integers in `a` left by the amount |
2729 | /// specified by the corresponding element in `count` while |
2730 | /// shifting in zeros, and returns the result. |
2731 | /// |
2732 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sllv_epi64) |
2733 | #[inline ] |
2734 | #[target_feature (enable = "avx2" )] |
2735 | #[cfg_attr (test, assert_instr(vpsllvq))] |
2736 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2737 | pub unsafe fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i { |
2738 | transmute(src:psllvq(a:a.as_i64x2(), count:count.as_i64x2())) |
2739 | } |
2740 | |
2741 | /// Shifts packed 64-bit integers in `a` left by the amount |
2742 | /// specified by the corresponding element in `count` while |
2743 | /// shifting in zeros, and returns the result. |
2744 | /// |
2745 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sllv_epi64) |
2746 | #[inline ] |
2747 | #[target_feature (enable = "avx2" )] |
2748 | #[cfg_attr (test, assert_instr(vpsllvq))] |
2749 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2750 | pub unsafe fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i { |
2751 | transmute(src:psllvq256(a:a.as_i64x4(), count:count.as_i64x4())) |
2752 | } |
2753 | |
2754 | /// Shifts packed 16-bit integers in `a` right by `count` while |
2755 | /// shifting in sign bits. |
2756 | /// |
2757 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sra_epi16) |
2758 | #[inline ] |
2759 | #[target_feature (enable = "avx2" )] |
2760 | #[cfg_attr (test, assert_instr(vpsraw))] |
2761 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2762 | pub unsafe fn _mm256_sra_epi16(a: __m256i, count: __m128i) -> __m256i { |
2763 | transmute(src:psraw(a:a.as_i16x16(), count:count.as_i16x8())) |
2764 | } |
2765 | |
2766 | /// Shifts packed 32-bit integers in `a` right by `count` while |
2767 | /// shifting in sign bits. |
2768 | /// |
2769 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sra_epi32) |
2770 | #[inline ] |
2771 | #[target_feature (enable = "avx2" )] |
2772 | #[cfg_attr (test, assert_instr(vpsrad))] |
2773 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2774 | pub unsafe fn _mm256_sra_epi32(a: __m256i, count: __m128i) -> __m256i { |
2775 | transmute(src:psrad(a:a.as_i32x8(), count:count.as_i32x4())) |
2776 | } |
2777 | |
2778 | /// Shifts packed 16-bit integers in `a` right by `IMM8` while |
2779 | /// shifting in sign bits. |
2780 | /// |
2781 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srai_epi16) |
2782 | #[inline ] |
2783 | #[target_feature (enable = "avx2" )] |
2784 | #[cfg_attr (test, assert_instr(vpsraw, IMM8 = 7))] |
2785 | #[rustc_legacy_const_generics (1)] |
2786 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2787 | pub unsafe fn _mm256_srai_epi16<const IMM8: i32>(a: __m256i) -> __m256i { |
2788 | static_assert_uimm_bits!(IMM8, 8); |
2789 | transmute(src:simd_shr(lhs:a.as_i16x16(), rhs:i16x16::splat(IMM8.min(15) as i16))) |
2790 | } |
2791 | |
2792 | /// Shifts packed 32-bit integers in `a` right by `IMM8` while |
2793 | /// shifting in sign bits. |
2794 | /// |
2795 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srai_epi32) |
2796 | #[inline ] |
2797 | #[target_feature (enable = "avx2" )] |
2798 | #[cfg_attr (test, assert_instr(vpsrad, IMM8 = 7))] |
2799 | #[rustc_legacy_const_generics (1)] |
2800 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2801 | pub unsafe fn _mm256_srai_epi32<const IMM8: i32>(a: __m256i) -> __m256i { |
2802 | static_assert_uimm_bits!(IMM8, 8); |
2803 | transmute(src:simd_shr(lhs:a.as_i32x8(), rhs:i32x8::splat(IMM8.min(31)))) |
2804 | } |
2805 | |
2806 | /// Shifts packed 32-bit integers in `a` right by the amount specified by the |
2807 | /// corresponding element in `count` while shifting in sign bits. |
2808 | /// |
2809 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi32) |
2810 | #[inline ] |
2811 | #[target_feature (enable = "avx2" )] |
2812 | #[cfg_attr (test, assert_instr(vpsravd))] |
2813 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2814 | pub unsafe fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i { |
2815 | transmute(src:psravd(a:a.as_i32x4(), count:count.as_i32x4())) |
2816 | } |
2817 | |
2818 | /// Shifts packed 32-bit integers in `a` right by the amount specified by the |
2819 | /// corresponding element in `count` while shifting in sign bits. |
2820 | /// |
2821 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi32) |
2822 | #[inline ] |
2823 | #[target_feature (enable = "avx2" )] |
2824 | #[cfg_attr (test, assert_instr(vpsravd))] |
2825 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2826 | pub unsafe fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i { |
2827 | transmute(src:psravd256(a:a.as_i32x8(), count:count.as_i32x8())) |
2828 | } |
2829 | |
2830 | /// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros. |
2831 | /// |
2832 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srli_si256) |
2833 | #[inline ] |
2834 | #[target_feature (enable = "avx2" )] |
2835 | #[cfg_attr (test, assert_instr(vpsrldq, IMM8 = 1))] |
2836 | #[rustc_legacy_const_generics (1)] |
2837 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2838 | pub unsafe fn _mm256_srli_si256<const IMM8: i32>(a: __m256i) -> __m256i { |
2839 | static_assert_uimm_bits!(IMM8, 8); |
2840 | _mm256_bsrli_epi128::<IMM8>(a) |
2841 | } |
2842 | |
2843 | /// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros. |
2844 | /// |
2845 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_bsrli_epi128) |
2846 | #[inline ] |
2847 | #[target_feature (enable = "avx2" )] |
2848 | #[cfg_attr (test, assert_instr(vpsrldq, IMM8 = 1))] |
2849 | #[rustc_legacy_const_generics (1)] |
2850 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2851 | pub unsafe fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i { |
2852 | static_assert_uimm_bits!(IMM8, 8); |
2853 | let a = a.as_i8x32(); |
2854 | let zero = _mm256_setzero_si256().as_i8x32(); |
2855 | let r: i8x32 = match IMM8 % 16 { |
2856 | 0 => simd_shuffle!( |
2857 | a, |
2858 | zero, |
2859 | [ |
2860 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, |
2861 | 23, 24, 25, 26, 27, 28, 29, 30, 31, |
2862 | ], |
2863 | ), |
2864 | 1 => simd_shuffle!( |
2865 | a, |
2866 | zero, |
2867 | [ |
2868 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 17, 18, 19, 20, 21, 22, 23, |
2869 | 24, 25, 26, 27, 28, 29, 30, 31, 32, |
2870 | ], |
2871 | ), |
2872 | 2 => simd_shuffle!( |
2873 | a, |
2874 | zero, |
2875 | [ |
2876 | 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 18, 19, 20, 21, 22, 23, 24, |
2877 | 25, 26, 27, 28, 29, 30, 31, 32, 32, |
2878 | ], |
2879 | ), |
2880 | 3 => simd_shuffle!( |
2881 | a, |
2882 | zero, |
2883 | [ |
2884 | 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 19, 20, 21, 22, 23, 24, |
2885 | 25, 26, 27, 28, 29, 30, 31, 32, 32, 32, |
2886 | ], |
2887 | ), |
2888 | 4 => simd_shuffle!( |
2889 | a, |
2890 | zero, |
2891 | [ |
2892 | 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 20, 21, 22, 23, 24, 25, |
2893 | 26, 27, 28, 29, 30, 31, 32, 32, 32, 32, |
2894 | ], |
2895 | ), |
2896 | 5 => simd_shuffle!( |
2897 | a, |
2898 | zero, |
2899 | [ |
2900 | 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 21, 22, 23, 24, 25, 26, |
2901 | 27, 28, 29, 30, 31, 32, 32, 32, 32, 32, |
2902 | ], |
2903 | ), |
2904 | 6 => simd_shuffle!( |
2905 | a, |
2906 | zero, |
2907 | [ |
2908 | 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 22, 23, 24, 25, 26, 27, |
2909 | 28, 29, 30, 31, 32, 32, 32, 32, 32, 32, |
2910 | ], |
2911 | ), |
2912 | 7 => simd_shuffle!( |
2913 | a, |
2914 | zero, |
2915 | [ |
2916 | 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 23, 24, 25, 26, 27, |
2917 | 28, 29, 30, 31, 32, 32, 32, 32, 32, 32, 32, |
2918 | ], |
2919 | ), |
2920 | 8 => simd_shuffle!( |
2921 | a, |
2922 | zero, |
2923 | [ |
2924 | 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 24, 25, 26, 27, 28, |
2925 | 29, 30, 31, 32, 32, 32, 32, 32, 32, 32, 32, |
2926 | ], |
2927 | ), |
2928 | 9 => simd_shuffle!( |
2929 | a, |
2930 | zero, |
2931 | [ |
2932 | 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 25, 26, 27, 28, 29, |
2933 | 30, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, |
2934 | ], |
2935 | ), |
2936 | 10 => simd_shuffle!( |
2937 | a, |
2938 | zero, |
2939 | [ |
2940 | 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 26, 27, 28, 29, 30, |
2941 | 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, |
2942 | ], |
2943 | ), |
2944 | 11 => simd_shuffle!( |
2945 | a, |
2946 | zero, |
2947 | [ |
2948 | 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 27, 28, 29, 30, 31, |
2949 | 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, |
2950 | ], |
2951 | ), |
2952 | 12 => simd_shuffle!( |
2953 | a, |
2954 | zero, |
2955 | [ |
2956 | 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 28, 29, 30, 31, 32, |
2957 | 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, |
2958 | ], |
2959 | ), |
2960 | 13 => simd_shuffle!( |
2961 | a, |
2962 | zero, |
2963 | [ |
2964 | 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 29, 30, 31, 32, 32, |
2965 | 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, |
2966 | ], |
2967 | ), |
2968 | 14 => simd_shuffle!( |
2969 | a, |
2970 | zero, |
2971 | [ |
2972 | 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 30, 31, 32, 32, 32, |
2973 | 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, |
2974 | ], |
2975 | ), |
2976 | 15 => simd_shuffle!( |
2977 | a, |
2978 | zero, |
2979 | [ |
2980 | 14, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, 32, |
2981 | 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, |
2982 | ], |
2983 | ), |
2984 | _ => zero, |
2985 | }; |
2986 | transmute(r) |
2987 | } |
2988 | |
2989 | /// Shifts packed 16-bit integers in `a` right by `count` while shifting in |
2990 | /// zeros. |
2991 | /// |
2992 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srl_epi16) |
2993 | #[inline ] |
2994 | #[target_feature (enable = "avx2" )] |
2995 | #[cfg_attr (test, assert_instr(vpsrlw))] |
2996 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
2997 | pub unsafe fn _mm256_srl_epi16(a: __m256i, count: __m128i) -> __m256i { |
2998 | transmute(src:psrlw(a:a.as_i16x16(), count:count.as_i16x8())) |
2999 | } |
3000 | |
3001 | /// Shifts packed 32-bit integers in `a` right by `count` while shifting in |
3002 | /// zeros. |
3003 | /// |
3004 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srl_epi32) |
3005 | #[inline ] |
3006 | #[target_feature (enable = "avx2" )] |
3007 | #[cfg_attr (test, assert_instr(vpsrld))] |
3008 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
3009 | pub unsafe fn _mm256_srl_epi32(a: __m256i, count: __m128i) -> __m256i { |
3010 | transmute(src:psrld(a:a.as_i32x8(), count:count.as_i32x4())) |
3011 | } |
3012 | |
3013 | /// Shifts packed 64-bit integers in `a` right by `count` while shifting in |
3014 | /// zeros. |
3015 | /// |
3016 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srl_epi64) |
3017 | #[inline ] |
3018 | #[target_feature (enable = "avx2" )] |
3019 | #[cfg_attr (test, assert_instr(vpsrlq))] |
3020 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
3021 | pub unsafe fn _mm256_srl_epi64(a: __m256i, count: __m128i) -> __m256i { |
3022 | transmute(src:psrlq(a:a.as_i64x4(), count:count.as_i64x2())) |
3023 | } |
3024 | |
3025 | /// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in |
3026 | /// zeros |
3027 | /// |
3028 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srli_epi16) |
3029 | #[inline ] |
3030 | #[target_feature (enable = "avx2" )] |
3031 | #[cfg_attr (test, assert_instr(vpsrlw, IMM8 = 7))] |
3032 | #[rustc_legacy_const_generics (1)] |
3033 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
3034 | pub unsafe fn _mm256_srli_epi16<const IMM8: i32>(a: __m256i) -> __m256i { |
3035 | static_assert_uimm_bits!(IMM8, 8); |
3036 | if IMM8 >= 16 { |
3037 | _mm256_setzero_si256() |
3038 | } else { |
3039 | transmute(src:simd_shr(lhs:a.as_u16x16(), rhs:u16x16::splat(IMM8 as u16))) |
3040 | } |
3041 | } |
3042 | |
3043 | /// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in |
3044 | /// zeros |
3045 | /// |
3046 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srli_epi32) |
3047 | #[inline ] |
3048 | #[target_feature (enable = "avx2" )] |
3049 | #[cfg_attr (test, assert_instr(vpsrld, IMM8 = 7))] |
3050 | #[rustc_legacy_const_generics (1)] |
3051 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
3052 | pub unsafe fn _mm256_srli_epi32<const IMM8: i32>(a: __m256i) -> __m256i { |
3053 | static_assert_uimm_bits!(IMM8, 8); |
3054 | if IMM8 >= 32 { |
3055 | _mm256_setzero_si256() |
3056 | } else { |
3057 | transmute(src:simd_shr(lhs:a.as_u32x8(), rhs:u32x8::splat(IMM8 as u32))) |
3058 | } |
3059 | } |
3060 | |
3061 | /// Shifts packed 64-bit integers in `a` right by `IMM8` while shifting in |
3062 | /// zeros |
3063 | /// |
3064 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srli_epi64) |
3065 | #[inline ] |
3066 | #[target_feature (enable = "avx2" )] |
3067 | #[cfg_attr (test, assert_instr(vpsrlq, IMM8 = 7))] |
3068 | #[rustc_legacy_const_generics (1)] |
3069 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
3070 | pub unsafe fn _mm256_srli_epi64<const IMM8: i32>(a: __m256i) -> __m256i { |
3071 | static_assert_uimm_bits!(IMM8, 8); |
3072 | if IMM8 >= 64 { |
3073 | _mm256_setzero_si256() |
3074 | } else { |
3075 | transmute(src:simd_shr(lhs:a.as_u64x4(), rhs:u64x4::splat(IMM8 as u64))) |
3076 | } |
3077 | } |
3078 | |
3079 | /// Shifts packed 32-bit integers in `a` right by the amount specified by |
3080 | /// the corresponding element in `count` while shifting in zeros, |
3081 | /// |
3082 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srlv_epi32) |
3083 | #[inline ] |
3084 | #[target_feature (enable = "avx2" )] |
3085 | #[cfg_attr (test, assert_instr(vpsrlvd))] |
3086 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
3087 | pub unsafe fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i { |
3088 | transmute(src:psrlvd(a:a.as_i32x4(), count:count.as_i32x4())) |
3089 | } |
3090 | |
3091 | /// Shifts packed 32-bit integers in `a` right by the amount specified by |
3092 | /// the corresponding element in `count` while shifting in zeros, |
3093 | /// |
3094 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srlv_epi32) |
3095 | #[inline ] |
3096 | #[target_feature (enable = "avx2" )] |
3097 | #[cfg_attr (test, assert_instr(vpsrlvd))] |
3098 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
3099 | pub unsafe fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i { |
3100 | transmute(src:psrlvd256(a:a.as_i32x8(), count:count.as_i32x8())) |
3101 | } |
3102 | |
3103 | /// Shifts packed 64-bit integers in `a` right by the amount specified by |
3104 | /// the corresponding element in `count` while shifting in zeros, |
3105 | /// |
3106 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srlv_epi64) |
3107 | #[inline ] |
3108 | #[target_feature (enable = "avx2" )] |
3109 | #[cfg_attr (test, assert_instr(vpsrlvq))] |
3110 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
3111 | pub unsafe fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i { |
3112 | transmute(src:psrlvq(a:a.as_i64x2(), count:count.as_i64x2())) |
3113 | } |
3114 | |
3115 | /// Shifts packed 64-bit integers in `a` right by the amount specified by |
3116 | /// the corresponding element in `count` while shifting in zeros, |
3117 | /// |
3118 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srlv_epi64) |
3119 | #[inline ] |
3120 | #[target_feature (enable = "avx2" )] |
3121 | #[cfg_attr (test, assert_instr(vpsrlvq))] |
3122 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
3123 | pub unsafe fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i { |
3124 | transmute(src:psrlvq256(a:a.as_i64x4(), count:count.as_i64x4())) |
3125 | } |
3126 | |
3127 | // TODO _mm256_stream_load_si256 (__m256i const* mem_addr) |
3128 | |
3129 | /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a` |
3130 | /// |
3131 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_epi16) |
3132 | #[inline ] |
3133 | #[target_feature (enable = "avx2" )] |
3134 | #[cfg_attr (test, assert_instr(vpsubw))] |
3135 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
3136 | pub unsafe fn _mm256_sub_epi16(a: __m256i, b: __m256i) -> __m256i { |
3137 | transmute(src:simd_sub(lhs:a.as_i16x16(), rhs:b.as_i16x16())) |
3138 | } |
3139 | |
3140 | /// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a` |
3141 | /// |
3142 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_epi32) |
3143 | #[inline ] |
3144 | #[target_feature (enable = "avx2" )] |
3145 | #[cfg_attr (test, assert_instr(vpsubd))] |
3146 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
3147 | pub unsafe fn _mm256_sub_epi32(a: __m256i, b: __m256i) -> __m256i { |
3148 | transmute(src:simd_sub(lhs:a.as_i32x8(), rhs:b.as_i32x8())) |
3149 | } |
3150 | |
3151 | /// Subtract packed 64-bit integers in `b` from packed 64-bit integers in `a` |
3152 | /// |
3153 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_epi64) |
3154 | #[inline ] |
3155 | #[target_feature (enable = "avx2" )] |
3156 | #[cfg_attr (test, assert_instr(vpsubq))] |
3157 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
3158 | pub unsafe fn _mm256_sub_epi64(a: __m256i, b: __m256i) -> __m256i { |
3159 | transmute(src:simd_sub(lhs:a.as_i64x4(), rhs:b.as_i64x4())) |
3160 | } |
3161 | |
3162 | /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a` |
3163 | /// |
3164 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_epi8) |
3165 | #[inline ] |
3166 | #[target_feature (enable = "avx2" )] |
3167 | #[cfg_attr (test, assert_instr(vpsubb))] |
3168 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
3169 | pub unsafe fn _mm256_sub_epi8(a: __m256i, b: __m256i) -> __m256i { |
3170 | transmute(src:simd_sub(lhs:a.as_i8x32(), rhs:b.as_i8x32())) |
3171 | } |
3172 | |
3173 | /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in |
3174 | /// `a` using saturation. |
3175 | /// |
3176 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_subs_epi16) |
3177 | #[inline ] |
3178 | #[target_feature (enable = "avx2" )] |
3179 | #[cfg_attr (test, assert_instr(vpsubsw))] |
3180 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
3181 | pub unsafe fn _mm256_subs_epi16(a: __m256i, b: __m256i) -> __m256i { |
3182 | transmute(src:simd_saturating_sub(lhs:a.as_i16x16(), rhs:b.as_i16x16())) |
3183 | } |
3184 | |
3185 | /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in |
3186 | /// `a` using saturation. |
3187 | /// |
3188 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_subs_epi8) |
3189 | #[inline ] |
3190 | #[target_feature (enable = "avx2" )] |
3191 | #[cfg_attr (test, assert_instr(vpsubsb))] |
3192 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
3193 | pub unsafe fn _mm256_subs_epi8(a: __m256i, b: __m256i) -> __m256i { |
3194 | transmute(src:simd_saturating_sub(lhs:a.as_i8x32(), rhs:b.as_i8x32())) |
3195 | } |
3196 | |
3197 | /// Subtract packed unsigned 16-bit integers in `b` from packed 16-bit |
3198 | /// integers in `a` using saturation. |
3199 | /// |
3200 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_subs_epu16) |
3201 | #[inline ] |
3202 | #[target_feature (enable = "avx2" )] |
3203 | #[cfg_attr (test, assert_instr(vpsubusw))] |
3204 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
3205 | pub unsafe fn _mm256_subs_epu16(a: __m256i, b: __m256i) -> __m256i { |
3206 | transmute(src:simd_saturating_sub(lhs:a.as_u16x16(), rhs:b.as_u16x16())) |
3207 | } |
3208 | |
3209 | /// Subtract packed unsigned 8-bit integers in `b` from packed 8-bit |
3210 | /// integers in `a` using saturation. |
3211 | /// |
3212 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_subs_epu8) |
3213 | #[inline ] |
3214 | #[target_feature (enable = "avx2" )] |
3215 | #[cfg_attr (test, assert_instr(vpsubusb))] |
3216 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
3217 | pub unsafe fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i { |
3218 | transmute(src:simd_saturating_sub(lhs:a.as_u8x32(), rhs:b.as_u8x32())) |
3219 | } |
3220 | |
3221 | /// Unpacks and interleave 8-bit integers from the high half of each |
3222 | /// 128-bit lane in `a` and `b`. |
3223 | /// |
3224 | /// ```rust |
3225 | /// #[cfg(target_arch = "x86" )] |
3226 | /// use std::arch::x86::*; |
3227 | /// #[cfg(target_arch = "x86_64" )] |
3228 | /// use std::arch::x86_64::*; |
3229 | /// |
3230 | /// # fn main() { |
3231 | /// # if is_x86_feature_detected!("avx2" ) { |
3232 | /// # #[target_feature (enable = "avx2" )] |
3233 | /// # unsafe fn worker() { |
3234 | /// let a = _mm256_setr_epi8( |
3235 | /// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, |
3236 | /// 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
3237 | /// ); |
3238 | /// let b = _mm256_setr_epi8( |
3239 | /// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, |
3240 | /// -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, |
3241 | /// -30, -31, |
3242 | /// ); |
3243 | /// |
3244 | /// let c = _mm256_unpackhi_epi8(a, b); |
3245 | /// |
3246 | /// let expected = _mm256_setr_epi8( |
3247 | /// 8, -8, 9, -9, 10, -10, 11, -11, 12, -12, 13, -13, 14, -14, 15, -15, |
3248 | /// 24, -24, 25, -25, 26, -26, 27, -27, 28, -28, 29, -29, 30, -30, 31, |
3249 | /// -31, |
3250 | /// ); |
3251 | /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0); |
3252 | /// |
3253 | /// # } |
3254 | /// # unsafe { worker(); } |
3255 | /// # } |
3256 | /// # } |
3257 | /// ``` |
3258 | /// |
3259 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_epi8) |
3260 | #[inline ] |
3261 | #[target_feature (enable = "avx2" )] |
3262 | #[cfg_attr (test, assert_instr(vpunpckhbw))] |
3263 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
3264 | pub unsafe fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i { |
3265 | #[rustfmt::skip] |
3266 | let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [ |
3267 | 8, 40, 9, 41, 10, 42, 11, 43, |
3268 | 12, 44, 13, 45, 14, 46, 15, 47, |
3269 | 24, 56, 25, 57, 26, 58, 27, 59, |
3270 | 28, 60, 29, 61, 30, 62, 31, 63, |
3271 | ]); |
3272 | transmute(src:r) |
3273 | } |
3274 | |
3275 | /// Unpacks and interleave 8-bit integers from the low half of each |
3276 | /// 128-bit lane of `a` and `b`. |
3277 | /// |
3278 | /// ```rust |
3279 | /// #[cfg(target_arch = "x86" )] |
3280 | /// use std::arch::x86::*; |
3281 | /// #[cfg(target_arch = "x86_64" )] |
3282 | /// use std::arch::x86_64::*; |
3283 | /// |
3284 | /// # fn main() { |
3285 | /// # if is_x86_feature_detected!("avx2" ) { |
3286 | /// # #[target_feature (enable = "avx2" )] |
3287 | /// # unsafe fn worker() { |
3288 | /// let a = _mm256_setr_epi8( |
3289 | /// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, |
3290 | /// 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
3291 | /// ); |
3292 | /// let b = _mm256_setr_epi8( |
3293 | /// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, |
3294 | /// -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, |
3295 | /// -30, -31, |
3296 | /// ); |
3297 | /// |
3298 | /// let c = _mm256_unpacklo_epi8(a, b); |
3299 | /// |
3300 | /// let expected = _mm256_setr_epi8( |
3301 | /// 0, 0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 7, -7, 16, -16, 17, |
3302 | /// -17, 18, -18, 19, -19, 20, -20, 21, -21, 22, -22, 23, -23, |
3303 | /// ); |
3304 | /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0); |
3305 | /// |
3306 | /// # } |
3307 | /// # unsafe { worker(); } |
3308 | /// # } |
3309 | /// # } |
3310 | /// ``` |
3311 | /// |
3312 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_epi8) |
3313 | #[inline ] |
3314 | #[target_feature (enable = "avx2" )] |
3315 | #[cfg_attr (test, assert_instr(vpunpcklbw))] |
3316 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
3317 | pub unsafe fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i { |
3318 | #[rustfmt::skip] |
3319 | let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [ |
3320 | 0, 32, 1, 33, 2, 34, 3, 35, |
3321 | 4, 36, 5, 37, 6, 38, 7, 39, |
3322 | 16, 48, 17, 49, 18, 50, 19, 51, |
3323 | 20, 52, 21, 53, 22, 54, 23, 55, |
3324 | ]); |
3325 | transmute(src:r) |
3326 | } |
3327 | |
3328 | /// Unpacks and interleave 16-bit integers from the high half of each |
3329 | /// 128-bit lane of `a` and `b`. |
3330 | /// |
3331 | /// ```rust |
3332 | /// #[cfg(target_arch = "x86" )] |
3333 | /// use std::arch::x86::*; |
3334 | /// #[cfg(target_arch = "x86_64" )] |
3335 | /// use std::arch::x86_64::*; |
3336 | /// |
3337 | /// # fn main() { |
3338 | /// # if is_x86_feature_detected!("avx2" ) { |
3339 | /// # #[target_feature (enable = "avx2" )] |
3340 | /// # unsafe fn worker() { |
3341 | /// let a = _mm256_setr_epi16( |
3342 | /// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
3343 | /// ); |
3344 | /// let b = _mm256_setr_epi16( |
3345 | /// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, |
3346 | /// ); |
3347 | /// |
3348 | /// let c = _mm256_unpackhi_epi16(a, b); |
3349 | /// |
3350 | /// let expected = _mm256_setr_epi16( |
3351 | /// 4, -4, 5, -5, 6, -6, 7, -7, 12, -12, 13, -13, 14, -14, 15, -15, |
3352 | /// ); |
3353 | /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0); |
3354 | /// |
3355 | /// # } |
3356 | /// # unsafe { worker(); } |
3357 | /// # } |
3358 | /// # } |
3359 | /// ``` |
3360 | /// |
3361 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_epi16) |
3362 | #[inline ] |
3363 | #[target_feature (enable = "avx2" )] |
3364 | #[cfg_attr (test, assert_instr(vpunpckhwd))] |
3365 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
3366 | pub unsafe fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i { |
3367 | let r: i16x16 = simd_shuffle!( |
3368 | a.as_i16x16(), |
3369 | b.as_i16x16(), |
3370 | [4, 20, 5, 21, 6, 22, 7, 23, 12, 28, 13, 29, 14, 30, 15, 31], |
3371 | ); |
3372 | transmute(src:r) |
3373 | } |
3374 | |
3375 | /// Unpacks and interleave 16-bit integers from the low half of each |
3376 | /// 128-bit lane of `a` and `b`. |
3377 | /// |
3378 | /// ```rust |
3379 | /// #[cfg(target_arch = "x86" )] |
3380 | /// use std::arch::x86::*; |
3381 | /// #[cfg(target_arch = "x86_64" )] |
3382 | /// use std::arch::x86_64::*; |
3383 | /// |
3384 | /// # fn main() { |
3385 | /// # if is_x86_feature_detected!("avx2" ) { |
3386 | /// # #[target_feature (enable = "avx2" )] |
3387 | /// # unsafe fn worker() { |
3388 | /// |
3389 | /// let a = _mm256_setr_epi16( |
3390 | /// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
3391 | /// ); |
3392 | /// let b = _mm256_setr_epi16( |
3393 | /// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, |
3394 | /// ); |
3395 | /// |
3396 | /// let c = _mm256_unpacklo_epi16(a, b); |
3397 | /// |
3398 | /// let expected = _mm256_setr_epi16( |
3399 | /// 0, 0, 1, -1, 2, -2, 3, -3, 8, -8, 9, -9, 10, -10, 11, -11, |
3400 | /// ); |
3401 | /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0); |
3402 | /// |
3403 | /// # } |
3404 | /// # unsafe { worker(); } |
3405 | /// # } |
3406 | /// # } |
3407 | /// ``` |
3408 | /// |
3409 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_epi16) |
3410 | #[inline ] |
3411 | #[target_feature (enable = "avx2" )] |
3412 | #[cfg_attr (test, assert_instr(vpunpcklwd))] |
3413 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
3414 | pub unsafe fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i { |
3415 | let r: i16x16 = simd_shuffle!( |
3416 | a.as_i16x16(), |
3417 | b.as_i16x16(), |
3418 | [0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27], |
3419 | ); |
3420 | transmute(src:r) |
3421 | } |
3422 | |
3423 | /// Unpacks and interleave 32-bit integers from the high half of each |
3424 | /// 128-bit lane of `a` and `b`. |
3425 | /// |
3426 | /// ```rust |
3427 | /// #[cfg(target_arch = "x86" )] |
3428 | /// use std::arch::x86::*; |
3429 | /// #[cfg(target_arch = "x86_64" )] |
3430 | /// use std::arch::x86_64::*; |
3431 | /// |
3432 | /// # fn main() { |
3433 | /// # if is_x86_feature_detected!("avx2" ) { |
3434 | /// # #[target_feature (enable = "avx2" )] |
3435 | /// # unsafe fn worker() { |
3436 | /// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); |
3437 | /// let b = _mm256_setr_epi32(0, -1, -2, -3, -4, -5, -6, -7); |
3438 | /// |
3439 | /// let c = _mm256_unpackhi_epi32(a, b); |
3440 | /// |
3441 | /// let expected = _mm256_setr_epi32(2, -2, 3, -3, 6, -6, 7, -7); |
3442 | /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0); |
3443 | /// |
3444 | /// # } |
3445 | /// # unsafe { worker(); } |
3446 | /// # } |
3447 | /// # } |
3448 | /// ``` |
3449 | /// |
3450 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_epi32) |
3451 | #[inline ] |
3452 | #[target_feature (enable = "avx2" )] |
3453 | #[cfg_attr (test, assert_instr(vunpckhps))] |
3454 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
3455 | pub unsafe fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i { |
3456 | let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [2, 10, 3, 11, 6, 14, 7, 15]); |
3457 | transmute(src:r) |
3458 | } |
3459 | |
3460 | /// Unpacks and interleave 32-bit integers from the low half of each |
3461 | /// 128-bit lane of `a` and `b`. |
3462 | /// |
3463 | /// ```rust |
3464 | /// #[cfg(target_arch = "x86" )] |
3465 | /// use std::arch::x86::*; |
3466 | /// #[cfg(target_arch = "x86_64" )] |
3467 | /// use std::arch::x86_64::*; |
3468 | /// |
3469 | /// # fn main() { |
3470 | /// # if is_x86_feature_detected!("avx2" ) { |
3471 | /// # #[target_feature (enable = "avx2" )] |
3472 | /// # unsafe fn worker() { |
3473 | /// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); |
3474 | /// let b = _mm256_setr_epi32(0, -1, -2, -3, -4, -5, -6, -7); |
3475 | /// |
3476 | /// let c = _mm256_unpacklo_epi32(a, b); |
3477 | /// |
3478 | /// let expected = _mm256_setr_epi32(0, 0, 1, -1, 4, -4, 5, -5); |
3479 | /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0); |
3480 | /// |
3481 | /// # } |
3482 | /// # unsafe { worker(); } |
3483 | /// # } |
3484 | /// # } |
3485 | /// ``` |
3486 | /// |
3487 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_epi32) |
3488 | #[inline ] |
3489 | #[target_feature (enable = "avx2" )] |
3490 | #[cfg_attr (test, assert_instr(vunpcklps))] |
3491 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
3492 | pub unsafe fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i { |
3493 | let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [0, 8, 1, 9, 4, 12, 5, 13]); |
3494 | transmute(src:r) |
3495 | } |
3496 | |
3497 | /// Unpacks and interleave 64-bit integers from the high half of each |
3498 | /// 128-bit lane of `a` and `b`. |
3499 | /// |
3500 | /// ```rust |
3501 | /// #[cfg(target_arch = "x86" )] |
3502 | /// use std::arch::x86::*; |
3503 | /// #[cfg(target_arch = "x86_64" )] |
3504 | /// use std::arch::x86_64::*; |
3505 | /// |
3506 | /// # fn main() { |
3507 | /// # if is_x86_feature_detected!("avx2" ) { |
3508 | /// # #[target_feature (enable = "avx2" )] |
3509 | /// # unsafe fn worker() { |
3510 | /// let a = _mm256_setr_epi64x(0, 1, 2, 3); |
3511 | /// let b = _mm256_setr_epi64x(0, -1, -2, -3); |
3512 | /// |
3513 | /// let c = _mm256_unpackhi_epi64(a, b); |
3514 | /// |
3515 | /// let expected = _mm256_setr_epi64x(1, -1, 3, -3); |
3516 | /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0); |
3517 | /// |
3518 | /// # } |
3519 | /// # unsafe { worker(); } |
3520 | /// # } |
3521 | /// # } |
3522 | /// ``` |
3523 | /// |
3524 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_epi64) |
3525 | #[inline ] |
3526 | #[target_feature (enable = "avx2" )] |
3527 | #[cfg_attr (test, assert_instr(vunpckhpd))] |
3528 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
3529 | pub unsafe fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i { |
3530 | let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [1, 5, 3, 7]); |
3531 | transmute(src:r) |
3532 | } |
3533 | |
3534 | /// Unpacks and interleave 64-bit integers from the low half of each |
3535 | /// 128-bit lane of `a` and `b`. |
3536 | /// |
3537 | /// ```rust |
3538 | /// #[cfg(target_arch = "x86" )] |
3539 | /// use std::arch::x86::*; |
3540 | /// #[cfg(target_arch = "x86_64" )] |
3541 | /// use std::arch::x86_64::*; |
3542 | /// |
3543 | /// # fn main() { |
3544 | /// # if is_x86_feature_detected!("avx2" ) { |
3545 | /// # #[target_feature (enable = "avx2" )] |
3546 | /// # unsafe fn worker() { |
3547 | /// let a = _mm256_setr_epi64x(0, 1, 2, 3); |
3548 | /// let b = _mm256_setr_epi64x(0, -1, -2, -3); |
3549 | /// |
3550 | /// let c = _mm256_unpacklo_epi64(a, b); |
3551 | /// |
3552 | /// let expected = _mm256_setr_epi64x(0, 0, 2, -2); |
3553 | /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0); |
3554 | /// |
3555 | /// # } |
3556 | /// # unsafe { worker(); } |
3557 | /// # } |
3558 | /// # } |
3559 | /// ``` |
3560 | /// |
3561 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_epi64) |
3562 | #[inline ] |
3563 | #[target_feature (enable = "avx2" )] |
3564 | #[cfg_attr (test, assert_instr(vunpcklpd))] |
3565 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
3566 | pub unsafe fn _mm256_unpacklo_epi64(a: __m256i, b: __m256i) -> __m256i { |
3567 | let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [0, 4, 2, 6]); |
3568 | transmute(src:r) |
3569 | } |
3570 | |
3571 | /// Computes the bitwise XOR of 256 bits (representing integer data) |
3572 | /// in `a` and `b` |
3573 | /// |
3574 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_si256) |
3575 | #[inline ] |
3576 | #[target_feature (enable = "avx2" )] |
3577 | #[cfg_attr (test, assert_instr(vxorps))] |
3578 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
3579 | pub unsafe fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i { |
3580 | transmute(src:simd_xor(x:a.as_i64x4(), y:b.as_i64x4())) |
3581 | } |
3582 | |
3583 | /// Extracts an 8-bit integer from `a`, selected with `INDEX`. Returns a 32-bit |
3584 | /// integer containing the zero-extended integer data. |
3585 | /// |
3586 | /// See [LLVM commit D20468](https://reviews.llvm.org/D20468). |
3587 | /// |
3588 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extract_epi8) |
3589 | #[inline ] |
3590 | #[target_feature (enable = "avx2" )] |
3591 | // This intrinsic has no corresponding instruction. |
3592 | #[rustc_legacy_const_generics (1)] |
3593 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
3594 | pub unsafe fn _mm256_extract_epi8<const INDEX: i32>(a: __m256i) -> i32 { |
3595 | static_assert_uimm_bits!(INDEX, 5); |
3596 | simd_extract!(a.as_u8x32(), INDEX as u32, u8) as i32 |
3597 | } |
3598 | |
3599 | /// Extracts a 16-bit integer from `a`, selected with `INDEX`. Returns a 32-bit |
3600 | /// integer containing the zero-extended integer data. |
3601 | /// |
3602 | /// See [LLVM commit D20468](https://reviews.llvm.org/D20468). |
3603 | /// |
3604 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extract_epi16) |
3605 | #[inline ] |
3606 | #[target_feature (enable = "avx2" )] |
3607 | // This intrinsic has no corresponding instruction. |
3608 | #[rustc_legacy_const_generics (1)] |
3609 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
3610 | pub unsafe fn _mm256_extract_epi16<const INDEX: i32>(a: __m256i) -> i32 { |
3611 | static_assert_uimm_bits!(INDEX, 4); |
3612 | simd_extract!(a.as_u16x16(), INDEX as u32, u16) as i32 |
3613 | } |
3614 | |
3615 | /// Extracts a 32-bit integer from `a`, selected with `INDEX`. |
3616 | /// |
3617 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extract_epi32) |
3618 | #[inline ] |
3619 | #[target_feature (enable = "avx2" )] |
3620 | // This intrinsic has no corresponding instruction. |
3621 | #[rustc_legacy_const_generics (1)] |
3622 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
3623 | pub unsafe fn _mm256_extract_epi32<const INDEX: i32>(a: __m256i) -> i32 { |
3624 | static_assert_uimm_bits!(INDEX, 3); |
3625 | simd_extract!(a.as_i32x8(), INDEX as u32) |
3626 | } |
3627 | |
3628 | /// Returns the first element of the input vector of `[4 x double]`. |
3629 | /// |
3630 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsd_f64) |
3631 | #[inline ] |
3632 | #[target_feature (enable = "avx2" )] |
3633 | //#[cfg_attr(test, assert_instr(movsd))] FIXME |
3634 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
3635 | pub unsafe fn _mm256_cvtsd_f64(a: __m256d) -> f64 { |
3636 | simd_extract!(a, 0) |
3637 | } |
3638 | |
3639 | /// Returns the first element of the input vector of `[8 x i32]`. |
3640 | /// |
3641 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsi256_si32) |
3642 | #[inline ] |
3643 | #[target_feature (enable = "avx2" )] |
3644 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
3645 | pub unsafe fn _mm256_cvtsi256_si32(a: __m256i) -> i32 { |
3646 | simd_extract!(a.as_i32x8(), 0) |
3647 | } |
3648 | |
3649 | #[allow (improper_ctypes)] |
3650 | extern "C" { |
3651 | #[link_name = "llvm.x86.avx2.phadd.w" ] |
3652 | fn phaddw(a: i16x16, b: i16x16) -> i16x16; |
3653 | #[link_name = "llvm.x86.avx2.phadd.d" ] |
3654 | fn phaddd(a: i32x8, b: i32x8) -> i32x8; |
3655 | #[link_name = "llvm.x86.avx2.phadd.sw" ] |
3656 | fn phaddsw(a: i16x16, b: i16x16) -> i16x16; |
3657 | #[link_name = "llvm.x86.avx2.phsub.w" ] |
3658 | fn phsubw(a: i16x16, b: i16x16) -> i16x16; |
3659 | #[link_name = "llvm.x86.avx2.phsub.d" ] |
3660 | fn phsubd(a: i32x8, b: i32x8) -> i32x8; |
3661 | #[link_name = "llvm.x86.avx2.phsub.sw" ] |
3662 | fn phsubsw(a: i16x16, b: i16x16) -> i16x16; |
3663 | #[link_name = "llvm.x86.avx2.pmadd.wd" ] |
3664 | fn pmaddwd(a: i16x16, b: i16x16) -> i32x8; |
3665 | #[link_name = "llvm.x86.avx2.pmadd.ub.sw" ] |
3666 | fn pmaddubsw(a: u8x32, b: u8x32) -> i16x16; |
3667 | #[link_name = "llvm.x86.avx2.maskload.d" ] |
3668 | fn maskloadd(mem_addr: *const i8, mask: i32x4) -> i32x4; |
3669 | #[link_name = "llvm.x86.avx2.maskload.d.256" ] |
3670 | fn maskloadd256(mem_addr: *const i8, mask: i32x8) -> i32x8; |
3671 | #[link_name = "llvm.x86.avx2.maskload.q" ] |
3672 | fn maskloadq(mem_addr: *const i8, mask: i64x2) -> i64x2; |
3673 | #[link_name = "llvm.x86.avx2.maskload.q.256" ] |
3674 | fn maskloadq256(mem_addr: *const i8, mask: i64x4) -> i64x4; |
3675 | #[link_name = "llvm.x86.avx2.maskstore.d" ] |
3676 | fn maskstored(mem_addr: *mut i8, mask: i32x4, a: i32x4); |
3677 | #[link_name = "llvm.x86.avx2.maskstore.d.256" ] |
3678 | fn maskstored256(mem_addr: *mut i8, mask: i32x8, a: i32x8); |
3679 | #[link_name = "llvm.x86.avx2.maskstore.q" ] |
3680 | fn maskstoreq(mem_addr: *mut i8, mask: i64x2, a: i64x2); |
3681 | #[link_name = "llvm.x86.avx2.maskstore.q.256" ] |
3682 | fn maskstoreq256(mem_addr: *mut i8, mask: i64x4, a: i64x4); |
3683 | #[link_name = "llvm.x86.avx2.mpsadbw" ] |
3684 | fn mpsadbw(a: u8x32, b: u8x32, imm8: i32) -> u16x16; |
3685 | #[link_name = "llvm.x86.avx2.pmul.hr.sw" ] |
3686 | fn pmulhrsw(a: i16x16, b: i16x16) -> i16x16; |
3687 | #[link_name = "llvm.x86.avx2.packsswb" ] |
3688 | fn packsswb(a: i16x16, b: i16x16) -> i8x32; |
3689 | #[link_name = "llvm.x86.avx2.packssdw" ] |
3690 | fn packssdw(a: i32x8, b: i32x8) -> i16x16; |
3691 | #[link_name = "llvm.x86.avx2.packuswb" ] |
3692 | fn packuswb(a: i16x16, b: i16x16) -> u8x32; |
3693 | #[link_name = "llvm.x86.avx2.packusdw" ] |
3694 | fn packusdw(a: i32x8, b: i32x8) -> u16x16; |
3695 | #[link_name = "llvm.x86.avx2.psad.bw" ] |
3696 | fn psadbw(a: u8x32, b: u8x32) -> u64x4; |
3697 | #[link_name = "llvm.x86.avx2.psign.b" ] |
3698 | fn psignb(a: i8x32, b: i8x32) -> i8x32; |
3699 | #[link_name = "llvm.x86.avx2.psign.w" ] |
3700 | fn psignw(a: i16x16, b: i16x16) -> i16x16; |
3701 | #[link_name = "llvm.x86.avx2.psign.d" ] |
3702 | fn psignd(a: i32x8, b: i32x8) -> i32x8; |
3703 | #[link_name = "llvm.x86.avx2.psll.w" ] |
3704 | fn psllw(a: i16x16, count: i16x8) -> i16x16; |
3705 | #[link_name = "llvm.x86.avx2.psll.d" ] |
3706 | fn pslld(a: i32x8, count: i32x4) -> i32x8; |
3707 | #[link_name = "llvm.x86.avx2.psll.q" ] |
3708 | fn psllq(a: i64x4, count: i64x2) -> i64x4; |
3709 | #[link_name = "llvm.x86.avx2.psllv.d" ] |
3710 | fn psllvd(a: i32x4, count: i32x4) -> i32x4; |
3711 | #[link_name = "llvm.x86.avx2.psllv.d.256" ] |
3712 | fn psllvd256(a: i32x8, count: i32x8) -> i32x8; |
3713 | #[link_name = "llvm.x86.avx2.psllv.q" ] |
3714 | fn psllvq(a: i64x2, count: i64x2) -> i64x2; |
3715 | #[link_name = "llvm.x86.avx2.psllv.q.256" ] |
3716 | fn psllvq256(a: i64x4, count: i64x4) -> i64x4; |
3717 | #[link_name = "llvm.x86.avx2.psra.w" ] |
3718 | fn psraw(a: i16x16, count: i16x8) -> i16x16; |
3719 | #[link_name = "llvm.x86.avx2.psra.d" ] |
3720 | fn psrad(a: i32x8, count: i32x4) -> i32x8; |
3721 | #[link_name = "llvm.x86.avx2.psrav.d" ] |
3722 | fn psravd(a: i32x4, count: i32x4) -> i32x4; |
3723 | #[link_name = "llvm.x86.avx2.psrav.d.256" ] |
3724 | fn psravd256(a: i32x8, count: i32x8) -> i32x8; |
3725 | #[link_name = "llvm.x86.avx2.psrl.w" ] |
3726 | fn psrlw(a: i16x16, count: i16x8) -> i16x16; |
3727 | #[link_name = "llvm.x86.avx2.psrl.d" ] |
3728 | fn psrld(a: i32x8, count: i32x4) -> i32x8; |
3729 | #[link_name = "llvm.x86.avx2.psrl.q" ] |
3730 | fn psrlq(a: i64x4, count: i64x2) -> i64x4; |
3731 | #[link_name = "llvm.x86.avx2.psrlv.d" ] |
3732 | fn psrlvd(a: i32x4, count: i32x4) -> i32x4; |
3733 | #[link_name = "llvm.x86.avx2.psrlv.d.256" ] |
3734 | fn psrlvd256(a: i32x8, count: i32x8) -> i32x8; |
3735 | #[link_name = "llvm.x86.avx2.psrlv.q" ] |
3736 | fn psrlvq(a: i64x2, count: i64x2) -> i64x2; |
3737 | #[link_name = "llvm.x86.avx2.psrlv.q.256" ] |
3738 | fn psrlvq256(a: i64x4, count: i64x4) -> i64x4; |
3739 | #[link_name = "llvm.x86.avx2.pshuf.b" ] |
3740 | fn pshufb(a: u8x32, b: u8x32) -> u8x32; |
3741 | #[link_name = "llvm.x86.avx2.permd" ] |
3742 | fn permd(a: u32x8, b: u32x8) -> u32x8; |
3743 | #[link_name = "llvm.x86.avx2.permps" ] |
3744 | fn permps(a: __m256, b: i32x8) -> __m256; |
3745 | #[link_name = "llvm.x86.avx2.vperm2i128" ] |
3746 | fn vperm2i128(a: i64x4, b: i64x4, imm8: i8) -> i64x4; |
3747 | #[link_name = "llvm.x86.avx2.gather.d.d" ] |
3748 | fn pgatherdd(src: i32x4, slice: *const i8, offsets: i32x4, mask: i32x4, scale: i8) -> i32x4; |
3749 | #[link_name = "llvm.x86.avx2.gather.d.d.256" ] |
3750 | fn vpgatherdd(src: i32x8, slice: *const i8, offsets: i32x8, mask: i32x8, scale: i8) -> i32x8; |
3751 | #[link_name = "llvm.x86.avx2.gather.d.q" ] |
3752 | fn pgatherdq(src: i64x2, slice: *const i8, offsets: i32x4, mask: i64x2, scale: i8) -> i64x2; |
3753 | #[link_name = "llvm.x86.avx2.gather.d.q.256" ] |
3754 | fn vpgatherdq(src: i64x4, slice: *const i8, offsets: i32x4, mask: i64x4, scale: i8) -> i64x4; |
3755 | #[link_name = "llvm.x86.avx2.gather.q.d" ] |
3756 | fn pgatherqd(src: i32x4, slice: *const i8, offsets: i64x2, mask: i32x4, scale: i8) -> i32x4; |
3757 | #[link_name = "llvm.x86.avx2.gather.q.d.256" ] |
3758 | fn vpgatherqd(src: i32x4, slice: *const i8, offsets: i64x4, mask: i32x4, scale: i8) -> i32x4; |
3759 | #[link_name = "llvm.x86.avx2.gather.q.q" ] |
3760 | fn pgatherqq(src: i64x2, slice: *const i8, offsets: i64x2, mask: i64x2, scale: i8) -> i64x2; |
3761 | #[link_name = "llvm.x86.avx2.gather.q.q.256" ] |
3762 | fn vpgatherqq(src: i64x4, slice: *const i8, offsets: i64x4, mask: i64x4, scale: i8) -> i64x4; |
3763 | #[link_name = "llvm.x86.avx2.gather.d.pd" ] |
3764 | fn pgatherdpd( |
3765 | src: __m128d, |
3766 | slice: *const i8, |
3767 | offsets: i32x4, |
3768 | mask: __m128d, |
3769 | scale: i8, |
3770 | ) -> __m128d; |
3771 | #[link_name = "llvm.x86.avx2.gather.d.pd.256" ] |
3772 | fn vpgatherdpd( |
3773 | src: __m256d, |
3774 | slice: *const i8, |
3775 | offsets: i32x4, |
3776 | mask: __m256d, |
3777 | scale: i8, |
3778 | ) -> __m256d; |
3779 | #[link_name = "llvm.x86.avx2.gather.q.pd" ] |
3780 | fn pgatherqpd( |
3781 | src: __m128d, |
3782 | slice: *const i8, |
3783 | offsets: i64x2, |
3784 | mask: __m128d, |
3785 | scale: i8, |
3786 | ) -> __m128d; |
3787 | #[link_name = "llvm.x86.avx2.gather.q.pd.256" ] |
3788 | fn vpgatherqpd( |
3789 | src: __m256d, |
3790 | slice: *const i8, |
3791 | offsets: i64x4, |
3792 | mask: __m256d, |
3793 | scale: i8, |
3794 | ) -> __m256d; |
3795 | #[link_name = "llvm.x86.avx2.gather.d.ps" ] |
3796 | fn pgatherdps(src: __m128, slice: *const i8, offsets: i32x4, mask: __m128, scale: i8) |
3797 | -> __m128; |
3798 | #[link_name = "llvm.x86.avx2.gather.d.ps.256" ] |
3799 | fn vpgatherdps( |
3800 | src: __m256, |
3801 | slice: *const i8, |
3802 | offsets: i32x8, |
3803 | mask: __m256, |
3804 | scale: i8, |
3805 | ) -> __m256; |
3806 | #[link_name = "llvm.x86.avx2.gather.q.ps" ] |
3807 | fn pgatherqps(src: __m128, slice: *const i8, offsets: i64x2, mask: __m128, scale: i8) |
3808 | -> __m128; |
3809 | #[link_name = "llvm.x86.avx2.gather.q.ps.256" ] |
3810 | fn vpgatherqps( |
3811 | src: __m128, |
3812 | slice: *const i8, |
3813 | offsets: i64x4, |
3814 | mask: __m128, |
3815 | scale: i8, |
3816 | ) -> __m128; |
3817 | #[link_name = "llvm.x86.avx2.psll.dq" ] |
3818 | fn vpslldq(a: i64x4, b: i32) -> i64x4; |
3819 | #[link_name = "llvm.x86.avx2.psrl.dq" ] |
3820 | fn vpsrldq(a: i64x4, b: i32) -> i64x4; |
3821 | } |
3822 | |
3823 | #[cfg (test)] |
3824 | mod tests { |
3825 | |
3826 | use stdarch_test::simd_test; |
3827 | |
3828 | use crate::core_arch::x86::*; |
3829 | |
3830 | #[simd_test(enable = "avx2" )] |
3831 | unsafe fn test_mm256_abs_epi32() { |
3832 | #[rustfmt::skip] |
3833 | let a = _mm256_setr_epi32( |
3834 | 0, 1, -1, i32::MAX, |
3835 | i32::MIN, 100, -100, -32, |
3836 | ); |
3837 | let r = _mm256_abs_epi32(a); |
3838 | #[rustfmt::skip] |
3839 | let e = _mm256_setr_epi32( |
3840 | 0, 1, 1, i32::MAX, |
3841 | i32::MAX.wrapping_add(1), 100, 100, 32, |
3842 | ); |
3843 | assert_eq_m256i(r, e); |
3844 | } |
3845 | |
3846 | #[simd_test(enable = "avx2" )] |
3847 | unsafe fn test_mm256_abs_epi16() { |
3848 | #[rustfmt::skip] |
3849 | let a = _mm256_setr_epi16( |
3850 | 0, 1, -1, 2, -2, 3, -3, 4, |
3851 | -4, 5, -5, i16::MAX, i16::MIN, 100, -100, -32, |
3852 | ); |
3853 | let r = _mm256_abs_epi16(a); |
3854 | #[rustfmt::skip] |
3855 | let e = _mm256_setr_epi16( |
3856 | 0, 1, 1, 2, 2, 3, 3, 4, |
3857 | 4, 5, 5, i16::MAX, i16::MAX.wrapping_add(1), 100, 100, 32, |
3858 | ); |
3859 | assert_eq_m256i(r, e); |
3860 | } |
3861 | |
3862 | #[simd_test(enable = "avx2" )] |
3863 | unsafe fn test_mm256_abs_epi8() { |
3864 | #[rustfmt::skip] |
3865 | let a = _mm256_setr_epi8( |
3866 | 0, 1, -1, 2, -2, 3, -3, 4, |
3867 | -4, 5, -5, i8::MAX, i8::MIN, 100, -100, -32, |
3868 | 0, 1, -1, 2, -2, 3, -3, 4, |
3869 | -4, 5, -5, i8::MAX, i8::MIN, 100, -100, -32, |
3870 | ); |
3871 | let r = _mm256_abs_epi8(a); |
3872 | #[rustfmt::skip] |
3873 | let e = _mm256_setr_epi8( |
3874 | 0, 1, 1, 2, 2, 3, 3, 4, |
3875 | 4, 5, 5, i8::MAX, i8::MAX.wrapping_add(1), 100, 100, 32, |
3876 | 0, 1, 1, 2, 2, 3, 3, 4, |
3877 | 4, 5, 5, i8::MAX, i8::MAX.wrapping_add(1), 100, 100, 32, |
3878 | ); |
3879 | assert_eq_m256i(r, e); |
3880 | } |
3881 | |
3882 | #[simd_test(enable = "avx2" )] |
3883 | unsafe fn test_mm256_add_epi64() { |
3884 | let a = _mm256_setr_epi64x(-10, 0, 100, 1_000_000_000); |
3885 | let b = _mm256_setr_epi64x(-1, 0, 1, 2); |
3886 | let r = _mm256_add_epi64(a, b); |
3887 | let e = _mm256_setr_epi64x(-11, 0, 101, 1_000_000_002); |
3888 | assert_eq_m256i(r, e); |
3889 | } |
3890 | |
3891 | #[simd_test(enable = "avx2" )] |
3892 | unsafe fn test_mm256_add_epi32() { |
3893 | let a = _mm256_setr_epi32(-1, 0, 1, 2, 3, 4, 5, 6); |
3894 | let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); |
3895 | let r = _mm256_add_epi32(a, b); |
3896 | let e = _mm256_setr_epi32(0, 2, 4, 6, 8, 10, 12, 14); |
3897 | assert_eq_m256i(r, e); |
3898 | } |
3899 | |
3900 | #[simd_test(enable = "avx2" )] |
3901 | unsafe fn test_mm256_add_epi16() { |
3902 | #[rustfmt::skip] |
3903 | let a = _mm256_setr_epi16( |
3904 | 0, 1, 2, 3, 4, 5, 6, 7, |
3905 | 8, 9, 10, 11, 12, 13, 14, 15, |
3906 | ); |
3907 | #[rustfmt::skip] |
3908 | let b = _mm256_setr_epi16( |
3909 | 0, 1, 2, 3, 4, 5, 6, 7, |
3910 | 8, 9, 10, 11, 12, 13, 14, 15, |
3911 | ); |
3912 | let r = _mm256_add_epi16(a, b); |
3913 | #[rustfmt::skip] |
3914 | let e = _mm256_setr_epi16( |
3915 | 0, 2, 4, 6, 8, 10, 12, 14, |
3916 | 16, 18, 20, 22, 24, 26, 28, 30, |
3917 | ); |
3918 | assert_eq_m256i(r, e); |
3919 | } |
3920 | |
3921 | #[simd_test(enable = "avx2" )] |
3922 | unsafe fn test_mm256_add_epi8() { |
3923 | #[rustfmt::skip] |
3924 | let a = _mm256_setr_epi8( |
3925 | 0, 1, 2, 3, 4, 5, 6, 7, |
3926 | 8, 9, 10, 11, 12, 13, 14, 15, |
3927 | 16, 17, 18, 19, 20, 21, 22, 23, |
3928 | 24, 25, 26, 27, 28, 29, 30, 31, |
3929 | ); |
3930 | #[rustfmt::skip] |
3931 | let b = _mm256_setr_epi8( |
3932 | 0, 1, 2, 3, 4, 5, 6, 7, |
3933 | 8, 9, 10, 11, 12, 13, 14, 15, |
3934 | 16, 17, 18, 19, 20, 21, 22, 23, |
3935 | 24, 25, 26, 27, 28, 29, 30, 31, |
3936 | ); |
3937 | let r = _mm256_add_epi8(a, b); |
3938 | #[rustfmt::skip] |
3939 | let e = _mm256_setr_epi8( |
3940 | 0, 2, 4, 6, 8, 10, 12, 14, |
3941 | 16, 18, 20, 22, 24, 26, 28, 30, |
3942 | 32, 34, 36, 38, 40, 42, 44, 46, |
3943 | 48, 50, 52, 54, 56, 58, 60, 62, |
3944 | ); |
3945 | assert_eq_m256i(r, e); |
3946 | } |
3947 | |
3948 | #[simd_test(enable = "avx2" )] |
3949 | unsafe fn test_mm256_adds_epi8() { |
3950 | #[rustfmt::skip] |
3951 | let a = _mm256_setr_epi8( |
3952 | 0, 1, 2, 3, 4, 5, 6, 7, |
3953 | 8, 9, 10, 11, 12, 13, 14, 15, |
3954 | 16, 17, 18, 19, 20, 21, 22, 23, |
3955 | 24, 25, 26, 27, 28, 29, 30, 31, |
3956 | ); |
3957 | #[rustfmt::skip] |
3958 | let b = _mm256_setr_epi8( |
3959 | 32, 33, 34, 35, 36, 37, 38, 39, |
3960 | 40, 41, 42, 43, 44, 45, 46, 47, |
3961 | 48, 49, 50, 51, 52, 53, 54, 55, |
3962 | 56, 57, 58, 59, 60, 61, 62, 63, |
3963 | ); |
3964 | let r = _mm256_adds_epi8(a, b); |
3965 | #[rustfmt::skip] |
3966 | let e = _mm256_setr_epi8( |
3967 | 32, 34, 36, 38, 40, 42, 44, 46, |
3968 | 48, 50, 52, 54, 56, 58, 60, 62, |
3969 | 64, 66, 68, 70, 72, 74, 76, 78, |
3970 | 80, 82, 84, 86, 88, 90, 92, 94, |
3971 | ); |
3972 | assert_eq_m256i(r, e); |
3973 | } |
3974 | |
3975 | #[simd_test(enable = "avx2" )] |
3976 | unsafe fn test_mm256_adds_epi8_saturate_positive() { |
3977 | let a = _mm256_set1_epi8(0x7F); |
3978 | let b = _mm256_set1_epi8(1); |
3979 | let r = _mm256_adds_epi8(a, b); |
3980 | assert_eq_m256i(r, a); |
3981 | } |
3982 | |
3983 | #[simd_test(enable = "avx2" )] |
3984 | unsafe fn test_mm256_adds_epi8_saturate_negative() { |
3985 | let a = _mm256_set1_epi8(-0x80); |
3986 | let b = _mm256_set1_epi8(-1); |
3987 | let r = _mm256_adds_epi8(a, b); |
3988 | assert_eq_m256i(r, a); |
3989 | } |
3990 | |
3991 | #[simd_test(enable = "avx2" )] |
3992 | unsafe fn test_mm256_adds_epi16() { |
3993 | #[rustfmt::skip] |
3994 | let a = _mm256_setr_epi16( |
3995 | 0, 1, 2, 3, 4, 5, 6, 7, |
3996 | 8, 9, 10, 11, 12, 13, 14, 15, |
3997 | ); |
3998 | #[rustfmt::skip] |
3999 | let b = _mm256_setr_epi16( |
4000 | 32, 33, 34, 35, 36, 37, 38, 39, |
4001 | 40, 41, 42, 43, 44, 45, 46, 47, |
4002 | ); |
4003 | let r = _mm256_adds_epi16(a, b); |
4004 | #[rustfmt::skip] |
4005 | let e = _mm256_setr_epi16( |
4006 | 32, 34, 36, 38, 40, 42, 44, 46, |
4007 | 48, 50, 52, 54, 56, 58, 60, 62, |
4008 | ); |
4009 | |
4010 | assert_eq_m256i(r, e); |
4011 | } |
4012 | |
4013 | #[simd_test(enable = "avx2" )] |
4014 | unsafe fn test_mm256_adds_epi16_saturate_positive() { |
4015 | let a = _mm256_set1_epi16(0x7FFF); |
4016 | let b = _mm256_set1_epi16(1); |
4017 | let r = _mm256_adds_epi16(a, b); |
4018 | assert_eq_m256i(r, a); |
4019 | } |
4020 | |
4021 | #[simd_test(enable = "avx2" )] |
4022 | unsafe fn test_mm256_adds_epi16_saturate_negative() { |
4023 | let a = _mm256_set1_epi16(-0x8000); |
4024 | let b = _mm256_set1_epi16(-1); |
4025 | let r = _mm256_adds_epi16(a, b); |
4026 | assert_eq_m256i(r, a); |
4027 | } |
4028 | |
4029 | #[simd_test(enable = "avx2" )] |
4030 | unsafe fn test_mm256_adds_epu8() { |
4031 | #[rustfmt::skip] |
4032 | let a = _mm256_setr_epi8( |
4033 | 0, 1, 2, 3, 4, 5, 6, 7, |
4034 | 8, 9, 10, 11, 12, 13, 14, 15, |
4035 | 16, 17, 18, 19, 20, 21, 22, 23, |
4036 | 24, 25, 26, 27, 28, 29, 30, 31, |
4037 | ); |
4038 | #[rustfmt::skip] |
4039 | let b = _mm256_setr_epi8( |
4040 | 32, 33, 34, 35, 36, 37, 38, 39, |
4041 | 40, 41, 42, 43, 44, 45, 46, 47, |
4042 | 48, 49, 50, 51, 52, 53, 54, 55, |
4043 | 56, 57, 58, 59, 60, 61, 62, 63, |
4044 | ); |
4045 | let r = _mm256_adds_epu8(a, b); |
4046 | #[rustfmt::skip] |
4047 | let e = _mm256_setr_epi8( |
4048 | 32, 34, 36, 38, 40, 42, 44, 46, |
4049 | 48, 50, 52, 54, 56, 58, 60, 62, |
4050 | 64, 66, 68, 70, 72, 74, 76, 78, |
4051 | 80, 82, 84, 86, 88, 90, 92, 94, |
4052 | ); |
4053 | assert_eq_m256i(r, e); |
4054 | } |
4055 | |
4056 | #[simd_test(enable = "avx2" )] |
4057 | unsafe fn test_mm256_adds_epu8_saturate() { |
4058 | let a = _mm256_set1_epi8(!0); |
4059 | let b = _mm256_set1_epi8(1); |
4060 | let r = _mm256_adds_epu8(a, b); |
4061 | assert_eq_m256i(r, a); |
4062 | } |
4063 | |
4064 | #[simd_test(enable = "avx2" )] |
4065 | unsafe fn test_mm256_adds_epu16() { |
4066 | #[rustfmt::skip] |
4067 | let a = _mm256_setr_epi16( |
4068 | 0, 1, 2, 3, 4, 5, 6, 7, |
4069 | 8, 9, 10, 11, 12, 13, 14, 15, |
4070 | ); |
4071 | #[rustfmt::skip] |
4072 | let b = _mm256_setr_epi16( |
4073 | 32, 33, 34, 35, 36, 37, 38, 39, |
4074 | 40, 41, 42, 43, 44, 45, 46, 47, |
4075 | ); |
4076 | let r = _mm256_adds_epu16(a, b); |
4077 | #[rustfmt::skip] |
4078 | let e = _mm256_setr_epi16( |
4079 | 32, 34, 36, 38, 40, 42, 44, 46, |
4080 | 48, 50, 52, 54, 56, 58, 60, 62, |
4081 | ); |
4082 | |
4083 | assert_eq_m256i(r, e); |
4084 | } |
4085 | |
4086 | #[simd_test(enable = "avx2" )] |
4087 | unsafe fn test_mm256_adds_epu16_saturate() { |
4088 | let a = _mm256_set1_epi16(!0); |
4089 | let b = _mm256_set1_epi16(1); |
4090 | let r = _mm256_adds_epu16(a, b); |
4091 | assert_eq_m256i(r, a); |
4092 | } |
4093 | |
4094 | #[simd_test(enable = "avx2" )] |
4095 | unsafe fn test_mm256_and_si256() { |
4096 | let a = _mm256_set1_epi8(5); |
4097 | let b = _mm256_set1_epi8(3); |
4098 | let got = _mm256_and_si256(a, b); |
4099 | assert_eq_m256i(got, _mm256_set1_epi8(1)); |
4100 | } |
4101 | |
4102 | #[simd_test(enable = "avx2" )] |
4103 | unsafe fn test_mm256_andnot_si256() { |
4104 | let a = _mm256_set1_epi8(5); |
4105 | let b = _mm256_set1_epi8(3); |
4106 | let got = _mm256_andnot_si256(a, b); |
4107 | assert_eq_m256i(got, _mm256_set1_epi8(2)); |
4108 | } |
4109 | |
4110 | #[simd_test(enable = "avx2" )] |
4111 | unsafe fn test_mm256_avg_epu8() { |
4112 | let (a, b) = (_mm256_set1_epi8(3), _mm256_set1_epi8(9)); |
4113 | let r = _mm256_avg_epu8(a, b); |
4114 | assert_eq_m256i(r, _mm256_set1_epi8(6)); |
4115 | } |
4116 | |
4117 | #[simd_test(enable = "avx2" )] |
4118 | unsafe fn test_mm256_avg_epu16() { |
4119 | let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9)); |
4120 | let r = _mm256_avg_epu16(a, b); |
4121 | assert_eq_m256i(r, _mm256_set1_epi16(6)); |
4122 | } |
4123 | |
4124 | #[simd_test(enable = "avx2" )] |
4125 | unsafe fn test_mm_blend_epi32() { |
4126 | let (a, b) = (_mm_set1_epi32(3), _mm_set1_epi32(9)); |
4127 | let e = _mm_setr_epi32(9, 3, 3, 3); |
4128 | let r = _mm_blend_epi32::<0x01>(a, b); |
4129 | assert_eq_m128i(r, e); |
4130 | |
4131 | let r = _mm_blend_epi32::<0x0E>(b, a); |
4132 | assert_eq_m128i(r, e); |
4133 | } |
4134 | |
4135 | #[simd_test(enable = "avx2" )] |
4136 | unsafe fn test_mm256_blend_epi32() { |
4137 | let (a, b) = (_mm256_set1_epi32(3), _mm256_set1_epi32(9)); |
4138 | let e = _mm256_setr_epi32(9, 3, 3, 3, 3, 3, 3, 3); |
4139 | let r = _mm256_blend_epi32::<0x01>(a, b); |
4140 | assert_eq_m256i(r, e); |
4141 | |
4142 | let e = _mm256_setr_epi32(3, 9, 3, 3, 3, 3, 3, 9); |
4143 | let r = _mm256_blend_epi32::<0x82>(a, b); |
4144 | assert_eq_m256i(r, e); |
4145 | |
4146 | let e = _mm256_setr_epi32(3, 3, 9, 9, 9, 9, 9, 3); |
4147 | let r = _mm256_blend_epi32::<0x7C>(a, b); |
4148 | assert_eq_m256i(r, e); |
4149 | } |
4150 | |
4151 | #[simd_test(enable = "avx2" )] |
4152 | unsafe fn test_mm256_blend_epi16() { |
4153 | let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9)); |
4154 | let e = _mm256_setr_epi16(9, 3, 3, 3, 3, 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 3); |
4155 | let r = _mm256_blend_epi16::<0x01>(a, b); |
4156 | assert_eq_m256i(r, e); |
4157 | |
4158 | let r = _mm256_blend_epi16::<0xFE>(b, a); |
4159 | assert_eq_m256i(r, e); |
4160 | } |
4161 | |
4162 | #[simd_test(enable = "avx2" )] |
4163 | unsafe fn test_mm256_blendv_epi8() { |
4164 | let (a, b) = (_mm256_set1_epi8(4), _mm256_set1_epi8(2)); |
4165 | let mask = _mm256_insert_epi8::<2>(_mm256_set1_epi8(0), -1); |
4166 | let e = _mm256_insert_epi8::<2>(_mm256_set1_epi8(4), 2); |
4167 | let r = _mm256_blendv_epi8(a, b, mask); |
4168 | assert_eq_m256i(r, e); |
4169 | } |
4170 | |
4171 | #[simd_test(enable = "avx2" )] |
4172 | unsafe fn test_mm_broadcastb_epi8() { |
4173 | let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a); |
4174 | let res = _mm_broadcastb_epi8(a); |
4175 | assert_eq_m128i(res, _mm_set1_epi8(0x2a)); |
4176 | } |
4177 | |
4178 | #[simd_test(enable = "avx2" )] |
4179 | unsafe fn test_mm256_broadcastb_epi8() { |
4180 | let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a); |
4181 | let res = _mm256_broadcastb_epi8(a); |
4182 | assert_eq_m256i(res, _mm256_set1_epi8(0x2a)); |
4183 | } |
4184 | |
4185 | #[simd_test(enable = "avx2" )] |
4186 | unsafe fn test_mm_broadcastd_epi32() { |
4187 | let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0); |
4188 | let res = _mm_broadcastd_epi32(a); |
4189 | assert_eq_m128i(res, _mm_set1_epi32(0x2a)); |
4190 | } |
4191 | |
4192 | #[simd_test(enable = "avx2" )] |
4193 | unsafe fn test_mm256_broadcastd_epi32() { |
4194 | let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0); |
4195 | let res = _mm256_broadcastd_epi32(a); |
4196 | assert_eq_m256i(res, _mm256_set1_epi32(0x2a)); |
4197 | } |
4198 | |
4199 | #[simd_test(enable = "avx2" )] |
4200 | unsafe fn test_mm_broadcastq_epi64() { |
4201 | let a = _mm_setr_epi64x(0x1ffffffff, 0); |
4202 | let res = _mm_broadcastq_epi64(a); |
4203 | assert_eq_m128i(res, _mm_set1_epi64x(0x1ffffffff)); |
4204 | } |
4205 | |
4206 | #[simd_test(enable = "avx2" )] |
4207 | unsafe fn test_mm256_broadcastq_epi64() { |
4208 | let a = _mm_setr_epi64x(0x1ffffffff, 0); |
4209 | let res = _mm256_broadcastq_epi64(a); |
4210 | assert_eq_m256i(res, _mm256_set1_epi64x(0x1ffffffff)); |
4211 | } |
4212 | |
4213 | #[simd_test(enable = "avx2" )] |
4214 | unsafe fn test_mm_broadcastsd_pd() { |
4215 | let a = _mm_setr_pd(6.88, 3.44); |
4216 | let res = _mm_broadcastsd_pd(a); |
4217 | assert_eq_m128d(res, _mm_set1_pd(6.88)); |
4218 | } |
4219 | |
4220 | #[simd_test(enable = "avx2" )] |
4221 | unsafe fn test_mm256_broadcastsd_pd() { |
4222 | let a = _mm_setr_pd(6.88, 3.44); |
4223 | let res = _mm256_broadcastsd_pd(a); |
4224 | assert_eq_m256d(res, _mm256_set1_pd(6.88f64)); |
4225 | } |
4226 | |
4227 | #[simd_test(enable = "avx2" )] |
4228 | unsafe fn test_mm256_broadcastsi128_si256() { |
4229 | let a = _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210); |
4230 | let res = _mm256_broadcastsi128_si256(a); |
4231 | let retval = _mm256_setr_epi64x( |
4232 | 0x0987654321012334, |
4233 | 0x5678909876543210, |
4234 | 0x0987654321012334, |
4235 | 0x5678909876543210, |
4236 | ); |
4237 | assert_eq_m256i(res, retval); |
4238 | } |
4239 | |
4240 | #[simd_test(enable = "avx2" )] |
4241 | unsafe fn test_mm_broadcastss_ps() { |
4242 | let a = _mm_setr_ps(6.88, 3.44, 0.0, 0.0); |
4243 | let res = _mm_broadcastss_ps(a); |
4244 | assert_eq_m128(res, _mm_set1_ps(6.88)); |
4245 | } |
4246 | |
4247 | #[simd_test(enable = "avx2" )] |
4248 | unsafe fn test_mm256_broadcastss_ps() { |
4249 | let a = _mm_setr_ps(6.88, 3.44, 0.0, 0.0); |
4250 | let res = _mm256_broadcastss_ps(a); |
4251 | assert_eq_m256(res, _mm256_set1_ps(6.88)); |
4252 | } |
4253 | |
4254 | #[simd_test(enable = "avx2" )] |
4255 | unsafe fn test_mm_broadcastw_epi16() { |
4256 | let a = _mm_insert_epi16::<0>(_mm_set1_epi16(0x2a), 0x22b); |
4257 | let res = _mm_broadcastw_epi16(a); |
4258 | assert_eq_m128i(res, _mm_set1_epi16(0x22b)); |
4259 | } |
4260 | |
4261 | #[simd_test(enable = "avx2" )] |
4262 | unsafe fn test_mm256_broadcastw_epi16() { |
4263 | let a = _mm_insert_epi16::<0>(_mm_set1_epi16(0x2a), 0x22b); |
4264 | let res = _mm256_broadcastw_epi16(a); |
4265 | assert_eq_m256i(res, _mm256_set1_epi16(0x22b)); |
4266 | } |
4267 | |
4268 | #[simd_test(enable = "avx2" )] |
4269 | unsafe fn test_mm256_cmpeq_epi8() { |
4270 | #[rustfmt::skip] |
4271 | let a = _mm256_setr_epi8( |
4272 | 0, 1, 2, 3, 4, 5, 6, 7, |
4273 | 8, 9, 10, 11, 12, 13, 14, 15, |
4274 | 16, 17, 18, 19, 20, 21, 22, 23, |
4275 | 24, 25, 26, 27, 28, 29, 30, 31, |
4276 | ); |
4277 | #[rustfmt::skip] |
4278 | let b = _mm256_setr_epi8( |
4279 | 31, 30, 2, 28, 27, 26, 25, 24, |
4280 | 23, 22, 21, 20, 19, 18, 17, 16, |
4281 | 15, 14, 13, 12, 11, 10, 9, 8, |
4282 | 7, 6, 5, 4, 3, 2, 1, 0, |
4283 | ); |
4284 | let r = _mm256_cmpeq_epi8(a, b); |
4285 | assert_eq_m256i(r, _mm256_insert_epi8::<2>(_mm256_set1_epi8(0), !0)); |
4286 | } |
4287 | |
4288 | #[simd_test(enable = "avx2" )] |
4289 | unsafe fn test_mm256_cmpeq_epi16() { |
4290 | #[rustfmt::skip] |
4291 | let a = _mm256_setr_epi16( |
4292 | 0, 1, 2, 3, 4, 5, 6, 7, |
4293 | 8, 9, 10, 11, 12, 13, 14, 15, |
4294 | ); |
4295 | #[rustfmt::skip] |
4296 | let b = _mm256_setr_epi16( |
4297 | 15, 14, 2, 12, 11, 10, 9, 8, |
4298 | 7, 6, 5, 4, 3, 2, 1, 0, |
4299 | ); |
4300 | let r = _mm256_cmpeq_epi16(a, b); |
4301 | assert_eq_m256i(r, _mm256_insert_epi16::<2>(_mm256_set1_epi16(0), !0)); |
4302 | } |
4303 | |
4304 | #[simd_test(enable = "avx2" )] |
4305 | unsafe fn test_mm256_cmpeq_epi32() { |
4306 | let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); |
4307 | let b = _mm256_setr_epi32(7, 6, 2, 4, 3, 2, 1, 0); |
4308 | let r = _mm256_cmpeq_epi32(a, b); |
4309 | let e = _mm256_set1_epi32(0); |
4310 | let e = _mm256_insert_epi32::<2>(e, !0); |
4311 | assert_eq_m256i(r, e); |
4312 | } |
4313 | |
4314 | #[simd_test(enable = "avx2" )] |
4315 | unsafe fn test_mm256_cmpeq_epi64() { |
4316 | let a = _mm256_setr_epi64x(0, 1, 2, 3); |
4317 | let b = _mm256_setr_epi64x(3, 2, 2, 0); |
4318 | let r = _mm256_cmpeq_epi64(a, b); |
4319 | assert_eq_m256i(r, _mm256_insert_epi64::<2>(_mm256_set1_epi64x(0), !0)); |
4320 | } |
4321 | |
4322 | #[simd_test(enable = "avx2" )] |
4323 | unsafe fn test_mm256_cmpgt_epi8() { |
4324 | let a = _mm256_insert_epi8::<0>(_mm256_set1_epi8(0), 5); |
4325 | let b = _mm256_set1_epi8(0); |
4326 | let r = _mm256_cmpgt_epi8(a, b); |
4327 | assert_eq_m256i(r, _mm256_insert_epi8::<0>(_mm256_set1_epi8(0), !0)); |
4328 | } |
4329 | |
4330 | #[simd_test(enable = "avx2" )] |
4331 | unsafe fn test_mm256_cmpgt_epi16() { |
4332 | let a = _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), 5); |
4333 | let b = _mm256_set1_epi16(0); |
4334 | let r = _mm256_cmpgt_epi16(a, b); |
4335 | assert_eq_m256i(r, _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), !0)); |
4336 | } |
4337 | |
4338 | #[simd_test(enable = "avx2" )] |
4339 | unsafe fn test_mm256_cmpgt_epi32() { |
4340 | let a = _mm256_insert_epi32::<0>(_mm256_set1_epi32(0), 5); |
4341 | let b = _mm256_set1_epi32(0); |
4342 | let r = _mm256_cmpgt_epi32(a, b); |
4343 | assert_eq_m256i(r, _mm256_insert_epi32::<0>(_mm256_set1_epi32(0), !0)); |
4344 | } |
4345 | |
4346 | #[simd_test(enable = "avx2" )] |
4347 | unsafe fn test_mm256_cmpgt_epi64() { |
4348 | let a = _mm256_insert_epi64::<0>(_mm256_set1_epi64x(0), 5); |
4349 | let b = _mm256_set1_epi64x(0); |
4350 | let r = _mm256_cmpgt_epi64(a, b); |
4351 | assert_eq_m256i(r, _mm256_insert_epi64::<0>(_mm256_set1_epi64x(0), !0)); |
4352 | } |
4353 | |
4354 | #[simd_test(enable = "avx2" )] |
4355 | unsafe fn test_mm256_cvtepi8_epi16() { |
4356 | #[rustfmt::skip] |
4357 | let a = _mm_setr_epi8( |
4358 | 0, 0, -1, 1, -2, 2, -3, 3, |
4359 | -4, 4, -5, 5, -6, 6, -7, 7, |
4360 | ); |
4361 | #[rustfmt::skip] |
4362 | let r = _mm256_setr_epi16( |
4363 | 0, 0, -1, 1, -2, 2, -3, 3, |
4364 | -4, 4, -5, 5, -6, 6, -7, 7, |
4365 | ); |
4366 | assert_eq_m256i(r, _mm256_cvtepi8_epi16(a)); |
4367 | } |
4368 | |
4369 | #[simd_test(enable = "avx2" )] |
4370 | unsafe fn test_mm256_cvtepi8_epi32() { |
4371 | #[rustfmt::skip] |
4372 | let a = _mm_setr_epi8( |
4373 | 0, 0, -1, 1, -2, 2, -3, 3, |
4374 | -4, 4, -5, 5, -6, 6, -7, 7, |
4375 | ); |
4376 | let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3); |
4377 | assert_eq_m256i(r, _mm256_cvtepi8_epi32(a)); |
4378 | } |
4379 | |
4380 | #[simd_test(enable = "avx2" )] |
4381 | unsafe fn test_mm256_cvtepi8_epi64() { |
4382 | #[rustfmt::skip] |
4383 | let a = _mm_setr_epi8( |
4384 | 0, 0, -1, 1, -2, 2, -3, 3, |
4385 | -4, 4, -5, 5, -6, 6, -7, 7, |
4386 | ); |
4387 | let r = _mm256_setr_epi64x(0, 0, -1, 1); |
4388 | assert_eq_m256i(r, _mm256_cvtepi8_epi64(a)); |
4389 | } |
4390 | |
4391 | #[simd_test(enable = "avx2" )] |
4392 | unsafe fn test_mm256_cvtepi16_epi32() { |
4393 | let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3); |
4394 | let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3); |
4395 | assert_eq_m256i(r, _mm256_cvtepi16_epi32(a)); |
4396 | } |
4397 | |
4398 | #[simd_test(enable = "avx2" )] |
4399 | unsafe fn test_mm256_cvtepi16_epi64() { |
4400 | let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3); |
4401 | let r = _mm256_setr_epi64x(0, 0, -1, 1); |
4402 | assert_eq_m256i(r, _mm256_cvtepi16_epi64(a)); |
4403 | } |
4404 | |
4405 | #[simd_test(enable = "avx2" )] |
4406 | unsafe fn test_mm256_cvtepi32_epi64() { |
4407 | let a = _mm_setr_epi32(0, 0, -1, 1); |
4408 | let r = _mm256_setr_epi64x(0, 0, -1, 1); |
4409 | assert_eq_m256i(r, _mm256_cvtepi32_epi64(a)); |
4410 | } |
4411 | |
4412 | #[simd_test(enable = "avx2" )] |
4413 | unsafe fn test_mm256_cvtepu16_epi32() { |
4414 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
4415 | let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); |
4416 | assert_eq_m256i(r, _mm256_cvtepu16_epi32(a)); |
4417 | } |
4418 | |
4419 | #[simd_test(enable = "avx2" )] |
4420 | unsafe fn test_mm256_cvtepu16_epi64() { |
4421 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
4422 | let r = _mm256_setr_epi64x(0, 1, 2, 3); |
4423 | assert_eq_m256i(r, _mm256_cvtepu16_epi64(a)); |
4424 | } |
4425 | |
4426 | #[simd_test(enable = "avx2" )] |
4427 | unsafe fn test_mm256_cvtepu32_epi64() { |
4428 | let a = _mm_setr_epi32(0, 1, 2, 3); |
4429 | let r = _mm256_setr_epi64x(0, 1, 2, 3); |
4430 | assert_eq_m256i(r, _mm256_cvtepu32_epi64(a)); |
4431 | } |
4432 | |
4433 | #[simd_test(enable = "avx2" )] |
4434 | unsafe fn test_mm256_cvtepu8_epi16() { |
4435 | #[rustfmt::skip] |
4436 | let a = _mm_setr_epi8( |
4437 | 0, 1, 2, 3, 4, 5, 6, 7, |
4438 | 8, 9, 10, 11, 12, 13, 14, 15, |
4439 | ); |
4440 | #[rustfmt::skip] |
4441 | let r = _mm256_setr_epi16( |
4442 | 0, 1, 2, 3, 4, 5, 6, 7, |
4443 | 8, 9, 10, 11, 12, 13, 14, 15, |
4444 | ); |
4445 | assert_eq_m256i(r, _mm256_cvtepu8_epi16(a)); |
4446 | } |
4447 | |
4448 | #[simd_test(enable = "avx2" )] |
4449 | unsafe fn test_mm256_cvtepu8_epi32() { |
4450 | #[rustfmt::skip] |
4451 | let a = _mm_setr_epi8( |
4452 | 0, 1, 2, 3, 4, 5, 6, 7, |
4453 | 8, 9, 10, 11, 12, 13, 14, 15, |
4454 | ); |
4455 | let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); |
4456 | assert_eq_m256i(r, _mm256_cvtepu8_epi32(a)); |
4457 | } |
4458 | |
4459 | #[simd_test(enable = "avx2" )] |
4460 | unsafe fn test_mm256_cvtepu8_epi64() { |
4461 | #[rustfmt::skip] |
4462 | let a = _mm_setr_epi8( |
4463 | 0, 1, 2, 3, 4, 5, 6, 7, |
4464 | 8, 9, 10, 11, 12, 13, 14, 15, |
4465 | ); |
4466 | let r = _mm256_setr_epi64x(0, 1, 2, 3); |
4467 | assert_eq_m256i(r, _mm256_cvtepu8_epi64(a)); |
4468 | } |
4469 | |
4470 | #[simd_test(enable = "avx2" )] |
4471 | unsafe fn test_mm256_extracti128_si256() { |
4472 | let a = _mm256_setr_epi64x(1, 2, 3, 4); |
4473 | let r = _mm256_extracti128_si256::<1>(a); |
4474 | let e = _mm_setr_epi64x(3, 4); |
4475 | assert_eq_m128i(r, e); |
4476 | } |
4477 | |
4478 | #[simd_test(enable = "avx2" )] |
4479 | unsafe fn test_mm256_hadd_epi16() { |
4480 | let a = _mm256_set1_epi16(2); |
4481 | let b = _mm256_set1_epi16(4); |
4482 | let r = _mm256_hadd_epi16(a, b); |
4483 | let e = _mm256_setr_epi16(4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8); |
4484 | assert_eq_m256i(r, e); |
4485 | } |
4486 | |
4487 | #[simd_test(enable = "avx2" )] |
4488 | unsafe fn test_mm256_hadd_epi32() { |
4489 | let a = _mm256_set1_epi32(2); |
4490 | let b = _mm256_set1_epi32(4); |
4491 | let r = _mm256_hadd_epi32(a, b); |
4492 | let e = _mm256_setr_epi32(4, 4, 8, 8, 4, 4, 8, 8); |
4493 | assert_eq_m256i(r, e); |
4494 | } |
4495 | |
4496 | #[simd_test(enable = "avx2" )] |
4497 | unsafe fn test_mm256_hadds_epi16() { |
4498 | let a = _mm256_set1_epi16(2); |
4499 | let a = _mm256_insert_epi16::<0>(a, 0x7fff); |
4500 | let a = _mm256_insert_epi16::<1>(a, 1); |
4501 | let b = _mm256_set1_epi16(4); |
4502 | let r = _mm256_hadds_epi16(a, b); |
4503 | #[rustfmt::skip] |
4504 | let e = _mm256_setr_epi16( |
4505 | 0x7FFF, 4, 4, 4, 8, 8, 8, 8, |
4506 | 4, 4, 4, 4, 8, 8, 8, 8, |
4507 | ); |
4508 | assert_eq_m256i(r, e); |
4509 | } |
4510 | |
4511 | #[simd_test(enable = "avx2" )] |
4512 | unsafe fn test_mm256_hsub_epi16() { |
4513 | let a = _mm256_set1_epi16(2); |
4514 | let b = _mm256_set1_epi16(4); |
4515 | let r = _mm256_hsub_epi16(a, b); |
4516 | let e = _mm256_set1_epi16(0); |
4517 | assert_eq_m256i(r, e); |
4518 | } |
4519 | |
4520 | #[simd_test(enable = "avx2" )] |
4521 | unsafe fn test_mm256_hsub_epi32() { |
4522 | let a = _mm256_set1_epi32(2); |
4523 | let b = _mm256_set1_epi32(4); |
4524 | let r = _mm256_hsub_epi32(a, b); |
4525 | let e = _mm256_set1_epi32(0); |
4526 | assert_eq_m256i(r, e); |
4527 | } |
4528 | |
4529 | #[simd_test(enable = "avx2" )] |
4530 | unsafe fn test_mm256_hsubs_epi16() { |
4531 | let a = _mm256_set1_epi16(2); |
4532 | let a = _mm256_insert_epi16::<0>(a, 0x7fff); |
4533 | let a = _mm256_insert_epi16::<1>(a, -1); |
4534 | let b = _mm256_set1_epi16(4); |
4535 | let r = _mm256_hsubs_epi16(a, b); |
4536 | let e = _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), 0x7FFF); |
4537 | assert_eq_m256i(r, e); |
4538 | } |
4539 | |
4540 | #[simd_test(enable = "avx2" )] |
4541 | unsafe fn test_mm256_madd_epi16() { |
4542 | let a = _mm256_set1_epi16(2); |
4543 | let b = _mm256_set1_epi16(4); |
4544 | let r = _mm256_madd_epi16(a, b); |
4545 | let e = _mm256_set1_epi32(16); |
4546 | assert_eq_m256i(r, e); |
4547 | } |
4548 | |
4549 | #[simd_test(enable = "avx2" )] |
4550 | unsafe fn test_mm256_inserti128_si256() { |
4551 | let a = _mm256_setr_epi64x(1, 2, 3, 4); |
4552 | let b = _mm_setr_epi64x(7, 8); |
4553 | let r = _mm256_inserti128_si256::<1>(a, b); |
4554 | let e = _mm256_setr_epi64x(1, 2, 7, 8); |
4555 | assert_eq_m256i(r, e); |
4556 | } |
4557 | |
4558 | #[simd_test(enable = "avx2" )] |
4559 | unsafe fn test_mm256_maddubs_epi16() { |
4560 | let a = _mm256_set1_epi8(2); |
4561 | let b = _mm256_set1_epi8(4); |
4562 | let r = _mm256_maddubs_epi16(a, b); |
4563 | let e = _mm256_set1_epi16(16); |
4564 | assert_eq_m256i(r, e); |
4565 | } |
4566 | |
4567 | #[simd_test(enable = "avx2" )] |
4568 | unsafe fn test_mm_maskload_epi32() { |
4569 | let nums = [1, 2, 3, 4]; |
4570 | let a = &nums as *const i32; |
4571 | let mask = _mm_setr_epi32(-1, 0, 0, -1); |
4572 | let r = _mm_maskload_epi32(a, mask); |
4573 | let e = _mm_setr_epi32(1, 0, 0, 4); |
4574 | assert_eq_m128i(r, e); |
4575 | } |
4576 | |
4577 | #[simd_test(enable = "avx2" )] |
4578 | unsafe fn test_mm256_maskload_epi32() { |
4579 | let nums = [1, 2, 3, 4, 5, 6, 7, 8]; |
4580 | let a = &nums as *const i32; |
4581 | let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0); |
4582 | let r = _mm256_maskload_epi32(a, mask); |
4583 | let e = _mm256_setr_epi32(1, 0, 0, 4, 0, 6, 7, 0); |
4584 | assert_eq_m256i(r, e); |
4585 | } |
4586 | |
4587 | #[simd_test(enable = "avx2" )] |
4588 | unsafe fn test_mm_maskload_epi64() { |
4589 | let nums = [1_i64, 2_i64]; |
4590 | let a = &nums as *const i64; |
4591 | let mask = _mm_setr_epi64x(0, -1); |
4592 | let r = _mm_maskload_epi64(a, mask); |
4593 | let e = _mm_setr_epi64x(0, 2); |
4594 | assert_eq_m128i(r, e); |
4595 | } |
4596 | |
4597 | #[simd_test(enable = "avx2" )] |
4598 | unsafe fn test_mm256_maskload_epi64() { |
4599 | let nums = [1_i64, 2_i64, 3_i64, 4_i64]; |
4600 | let a = &nums as *const i64; |
4601 | let mask = _mm256_setr_epi64x(0, -1, -1, 0); |
4602 | let r = _mm256_maskload_epi64(a, mask); |
4603 | let e = _mm256_setr_epi64x(0, 2, 3, 0); |
4604 | assert_eq_m256i(r, e); |
4605 | } |
4606 | |
4607 | #[simd_test(enable = "avx2" )] |
4608 | unsafe fn test_mm_maskstore_epi32() { |
4609 | let a = _mm_setr_epi32(1, 2, 3, 4); |
4610 | let mut arr = [-1, -1, -1, -1]; |
4611 | let mask = _mm_setr_epi32(-1, 0, 0, -1); |
4612 | _mm_maskstore_epi32(arr.as_mut_ptr(), mask, a); |
4613 | let e = [1, -1, -1, 4]; |
4614 | assert_eq!(arr, e); |
4615 | } |
4616 | |
4617 | #[simd_test(enable = "avx2" )] |
4618 | unsafe fn test_mm256_maskstore_epi32() { |
4619 | let a = _mm256_setr_epi32(1, 0x6d726f, 3, 42, 0x777161, 6, 7, 8); |
4620 | let mut arr = [-1, -1, -1, 0x776173, -1, 0x68657265, -1, -1]; |
4621 | let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0); |
4622 | _mm256_maskstore_epi32(arr.as_mut_ptr(), mask, a); |
4623 | let e = [1, -1, -1, 42, -1, 6, 7, -1]; |
4624 | assert_eq!(arr, e); |
4625 | } |
4626 | |
4627 | #[simd_test(enable = "avx2" )] |
4628 | unsafe fn test_mm_maskstore_epi64() { |
4629 | let a = _mm_setr_epi64x(1_i64, 2_i64); |
4630 | let mut arr = [-1_i64, -1_i64]; |
4631 | let mask = _mm_setr_epi64x(0, -1); |
4632 | _mm_maskstore_epi64(arr.as_mut_ptr(), mask, a); |
4633 | let e = [-1, 2]; |
4634 | assert_eq!(arr, e); |
4635 | } |
4636 | |
4637 | #[simd_test(enable = "avx2" )] |
4638 | unsafe fn test_mm256_maskstore_epi64() { |
4639 | let a = _mm256_setr_epi64x(1_i64, 2_i64, 3_i64, 4_i64); |
4640 | let mut arr = [-1_i64, -1_i64, -1_i64, -1_i64]; |
4641 | let mask = _mm256_setr_epi64x(0, -1, -1, 0); |
4642 | _mm256_maskstore_epi64(arr.as_mut_ptr(), mask, a); |
4643 | let e = [-1, 2, 3, -1]; |
4644 | assert_eq!(arr, e); |
4645 | } |
4646 | |
4647 | #[simd_test(enable = "avx2" )] |
4648 | unsafe fn test_mm256_max_epi16() { |
4649 | let a = _mm256_set1_epi16(2); |
4650 | let b = _mm256_set1_epi16(4); |
4651 | let r = _mm256_max_epi16(a, b); |
4652 | assert_eq_m256i(r, b); |
4653 | } |
4654 | |
4655 | #[simd_test(enable = "avx2" )] |
4656 | unsafe fn test_mm256_max_epi32() { |
4657 | let a = _mm256_set1_epi32(2); |
4658 | let b = _mm256_set1_epi32(4); |
4659 | let r = _mm256_max_epi32(a, b); |
4660 | assert_eq_m256i(r, b); |
4661 | } |
4662 | |
4663 | #[simd_test(enable = "avx2" )] |
4664 | unsafe fn test_mm256_max_epi8() { |
4665 | let a = _mm256_set1_epi8(2); |
4666 | let b = _mm256_set1_epi8(4); |
4667 | let r = _mm256_max_epi8(a, b); |
4668 | assert_eq_m256i(r, b); |
4669 | } |
4670 | |
4671 | #[simd_test(enable = "avx2" )] |
4672 | unsafe fn test_mm256_max_epu16() { |
4673 | let a = _mm256_set1_epi16(2); |
4674 | let b = _mm256_set1_epi16(4); |
4675 | let r = _mm256_max_epu16(a, b); |
4676 | assert_eq_m256i(r, b); |
4677 | } |
4678 | |
4679 | #[simd_test(enable = "avx2" )] |
4680 | unsafe fn test_mm256_max_epu32() { |
4681 | let a = _mm256_set1_epi32(2); |
4682 | let b = _mm256_set1_epi32(4); |
4683 | let r = _mm256_max_epu32(a, b); |
4684 | assert_eq_m256i(r, b); |
4685 | } |
4686 | |
4687 | #[simd_test(enable = "avx2" )] |
4688 | unsafe fn test_mm256_max_epu8() { |
4689 | let a = _mm256_set1_epi8(2); |
4690 | let b = _mm256_set1_epi8(4); |
4691 | let r = _mm256_max_epu8(a, b); |
4692 | assert_eq_m256i(r, b); |
4693 | } |
4694 | |
4695 | #[simd_test(enable = "avx2" )] |
4696 | unsafe fn test_mm256_min_epi16() { |
4697 | let a = _mm256_set1_epi16(2); |
4698 | let b = _mm256_set1_epi16(4); |
4699 | let r = _mm256_min_epi16(a, b); |
4700 | assert_eq_m256i(r, a); |
4701 | } |
4702 | |
4703 | #[simd_test(enable = "avx2" )] |
4704 | unsafe fn test_mm256_min_epi32() { |
4705 | let a = _mm256_set1_epi32(2); |
4706 | let b = _mm256_set1_epi32(4); |
4707 | let r = _mm256_min_epi32(a, b); |
4708 | assert_eq_m256i(r, a); |
4709 | } |
4710 | |
4711 | #[simd_test(enable = "avx2" )] |
4712 | unsafe fn test_mm256_min_epi8() { |
4713 | let a = _mm256_set1_epi8(2); |
4714 | let b = _mm256_set1_epi8(4); |
4715 | let r = _mm256_min_epi8(a, b); |
4716 | assert_eq_m256i(r, a); |
4717 | } |
4718 | |
4719 | #[simd_test(enable = "avx2" )] |
4720 | unsafe fn test_mm256_min_epu16() { |
4721 | let a = _mm256_set1_epi16(2); |
4722 | let b = _mm256_set1_epi16(4); |
4723 | let r = _mm256_min_epu16(a, b); |
4724 | assert_eq_m256i(r, a); |
4725 | } |
4726 | |
4727 | #[simd_test(enable = "avx2" )] |
4728 | unsafe fn test_mm256_min_epu32() { |
4729 | let a = _mm256_set1_epi32(2); |
4730 | let b = _mm256_set1_epi32(4); |
4731 | let r = _mm256_min_epu32(a, b); |
4732 | assert_eq_m256i(r, a); |
4733 | } |
4734 | |
4735 | #[simd_test(enable = "avx2" )] |
4736 | unsafe fn test_mm256_min_epu8() { |
4737 | let a = _mm256_set1_epi8(2); |
4738 | let b = _mm256_set1_epi8(4); |
4739 | let r = _mm256_min_epu8(a, b); |
4740 | assert_eq_m256i(r, a); |
4741 | } |
4742 | |
4743 | #[simd_test(enable = "avx2" )] |
4744 | unsafe fn test_mm256_movemask_epi8() { |
4745 | let a = _mm256_set1_epi8(-1); |
4746 | let r = _mm256_movemask_epi8(a); |
4747 | let e = -1; |
4748 | assert_eq!(r, e); |
4749 | } |
4750 | |
4751 | #[simd_test(enable = "avx2" )] |
4752 | unsafe fn test_mm256_mpsadbw_epu8() { |
4753 | let a = _mm256_set1_epi8(2); |
4754 | let b = _mm256_set1_epi8(4); |
4755 | let r = _mm256_mpsadbw_epu8::<0>(a, b); |
4756 | let e = _mm256_set1_epi16(8); |
4757 | assert_eq_m256i(r, e); |
4758 | } |
4759 | |
4760 | #[simd_test(enable = "avx2" )] |
4761 | unsafe fn test_mm256_mul_epi32() { |
4762 | let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2); |
4763 | let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); |
4764 | let r = _mm256_mul_epi32(a, b); |
4765 | let e = _mm256_setr_epi64x(0, 0, 10, 14); |
4766 | assert_eq_m256i(r, e); |
4767 | } |
4768 | |
4769 | #[simd_test(enable = "avx2" )] |
4770 | unsafe fn test_mm256_mul_epu32() { |
4771 | let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2); |
4772 | let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); |
4773 | let r = _mm256_mul_epu32(a, b); |
4774 | let e = _mm256_setr_epi64x(0, 0, 10, 14); |
4775 | assert_eq_m256i(r, e); |
4776 | } |
4777 | |
4778 | #[simd_test(enable = "avx2" )] |
4779 | unsafe fn test_mm256_mulhi_epi16() { |
4780 | let a = _mm256_set1_epi16(6535); |
4781 | let b = _mm256_set1_epi16(6535); |
4782 | let r = _mm256_mulhi_epi16(a, b); |
4783 | let e = _mm256_set1_epi16(651); |
4784 | assert_eq_m256i(r, e); |
4785 | } |
4786 | |
4787 | #[simd_test(enable = "avx2" )] |
4788 | unsafe fn test_mm256_mulhi_epu16() { |
4789 | let a = _mm256_set1_epi16(6535); |
4790 | let b = _mm256_set1_epi16(6535); |
4791 | let r = _mm256_mulhi_epu16(a, b); |
4792 | let e = _mm256_set1_epi16(651); |
4793 | assert_eq_m256i(r, e); |
4794 | } |
4795 | |
4796 | #[simd_test(enable = "avx2" )] |
4797 | unsafe fn test_mm256_mullo_epi16() { |
4798 | let a = _mm256_set1_epi16(2); |
4799 | let b = _mm256_set1_epi16(4); |
4800 | let r = _mm256_mullo_epi16(a, b); |
4801 | let e = _mm256_set1_epi16(8); |
4802 | assert_eq_m256i(r, e); |
4803 | } |
4804 | |
4805 | #[simd_test(enable = "avx2" )] |
4806 | unsafe fn test_mm256_mullo_epi32() { |
4807 | let a = _mm256_set1_epi32(2); |
4808 | let b = _mm256_set1_epi32(4); |
4809 | let r = _mm256_mullo_epi32(a, b); |
4810 | let e = _mm256_set1_epi32(8); |
4811 | assert_eq_m256i(r, e); |
4812 | } |
4813 | |
4814 | #[simd_test(enable = "avx2" )] |
4815 | unsafe fn test_mm256_mulhrs_epi16() { |
4816 | let a = _mm256_set1_epi16(2); |
4817 | let b = _mm256_set1_epi16(4); |
4818 | let r = _mm256_mullo_epi16(a, b); |
4819 | let e = _mm256_set1_epi16(8); |
4820 | assert_eq_m256i(r, e); |
4821 | } |
4822 | |
4823 | #[simd_test(enable = "avx2" )] |
4824 | unsafe fn test_mm256_or_si256() { |
4825 | let a = _mm256_set1_epi8(-1); |
4826 | let b = _mm256_set1_epi8(0); |
4827 | let r = _mm256_or_si256(a, b); |
4828 | assert_eq_m256i(r, a); |
4829 | } |
4830 | |
4831 | #[simd_test(enable = "avx2" )] |
4832 | unsafe fn test_mm256_packs_epi16() { |
4833 | let a = _mm256_set1_epi16(2); |
4834 | let b = _mm256_set1_epi16(4); |
4835 | let r = _mm256_packs_epi16(a, b); |
4836 | #[rustfmt::skip] |
4837 | let e = _mm256_setr_epi8( |
4838 | 2, 2, 2, 2, 2, 2, 2, 2, |
4839 | 4, 4, 4, 4, 4, 4, 4, 4, |
4840 | 2, 2, 2, 2, 2, 2, 2, 2, |
4841 | 4, 4, 4, 4, 4, 4, 4, 4, |
4842 | ); |
4843 | |
4844 | assert_eq_m256i(r, e); |
4845 | } |
4846 | |
4847 | #[simd_test(enable = "avx2" )] |
4848 | unsafe fn test_mm256_packs_epi32() { |
4849 | let a = _mm256_set1_epi32(2); |
4850 | let b = _mm256_set1_epi32(4); |
4851 | let r = _mm256_packs_epi32(a, b); |
4852 | let e = _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4); |
4853 | |
4854 | assert_eq_m256i(r, e); |
4855 | } |
4856 | |
4857 | #[simd_test(enable = "avx2" )] |
4858 | unsafe fn test_mm256_packus_epi16() { |
4859 | let a = _mm256_set1_epi16(2); |
4860 | let b = _mm256_set1_epi16(4); |
4861 | let r = _mm256_packus_epi16(a, b); |
4862 | #[rustfmt::skip] |
4863 | let e = _mm256_setr_epi8( |
4864 | 2, 2, 2, 2, 2, 2, 2, 2, |
4865 | 4, 4, 4, 4, 4, 4, 4, 4, |
4866 | 2, 2, 2, 2, 2, 2, 2, 2, |
4867 | 4, 4, 4, 4, 4, 4, 4, 4, |
4868 | ); |
4869 | |
4870 | assert_eq_m256i(r, e); |
4871 | } |
4872 | |
4873 | #[simd_test(enable = "avx2" )] |
4874 | unsafe fn test_mm256_packus_epi32() { |
4875 | let a = _mm256_set1_epi32(2); |
4876 | let b = _mm256_set1_epi32(4); |
4877 | let r = _mm256_packus_epi32(a, b); |
4878 | let e = _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4); |
4879 | |
4880 | assert_eq_m256i(r, e); |
4881 | } |
4882 | |
4883 | #[simd_test(enable = "avx2" )] |
4884 | unsafe fn test_mm256_sad_epu8() { |
4885 | let a = _mm256_set1_epi8(2); |
4886 | let b = _mm256_set1_epi8(4); |
4887 | let r = _mm256_sad_epu8(a, b); |
4888 | let e = _mm256_set1_epi64x(16); |
4889 | assert_eq_m256i(r, e); |
4890 | } |
4891 | |
4892 | #[simd_test(enable = "avx2" )] |
4893 | unsafe fn test_mm256_shufflehi_epi16() { |
4894 | #[rustfmt::skip] |
4895 | let a = _mm256_setr_epi16( |
4896 | 0, 1, 2, 3, 11, 22, 33, 44, |
4897 | 4, 5, 6, 7, 55, 66, 77, 88, |
4898 | ); |
4899 | #[rustfmt::skip] |
4900 | let e = _mm256_setr_epi16( |
4901 | 0, 1, 2, 3, 44, 22, 22, 11, |
4902 | 4, 5, 6, 7, 88, 66, 66, 55, |
4903 | ); |
4904 | let r = _mm256_shufflehi_epi16::<0b00_01_01_11>(a); |
4905 | assert_eq_m256i(r, e); |
4906 | } |
4907 | |
4908 | #[simd_test(enable = "avx2" )] |
4909 | unsafe fn test_mm256_shufflelo_epi16() { |
4910 | #[rustfmt::skip] |
4911 | let a = _mm256_setr_epi16( |
4912 | 11, 22, 33, 44, 0, 1, 2, 3, |
4913 | 55, 66, 77, 88, 4, 5, 6, 7, |
4914 | ); |
4915 | #[rustfmt::skip] |
4916 | let e = _mm256_setr_epi16( |
4917 | 44, 22, 22, 11, 0, 1, 2, 3, |
4918 | 88, 66, 66, 55, 4, 5, 6, 7, |
4919 | ); |
4920 | let r = _mm256_shufflelo_epi16::<0b00_01_01_11>(a); |
4921 | assert_eq_m256i(r, e); |
4922 | } |
4923 | |
4924 | #[simd_test(enable = "avx2" )] |
4925 | unsafe fn test_mm256_sign_epi16() { |
4926 | let a = _mm256_set1_epi16(2); |
4927 | let b = _mm256_set1_epi16(-1); |
4928 | let r = _mm256_sign_epi16(a, b); |
4929 | let e = _mm256_set1_epi16(-2); |
4930 | assert_eq_m256i(r, e); |
4931 | } |
4932 | |
4933 | #[simd_test(enable = "avx2" )] |
4934 | unsafe fn test_mm256_sign_epi32() { |
4935 | let a = _mm256_set1_epi32(2); |
4936 | let b = _mm256_set1_epi32(-1); |
4937 | let r = _mm256_sign_epi32(a, b); |
4938 | let e = _mm256_set1_epi32(-2); |
4939 | assert_eq_m256i(r, e); |
4940 | } |
4941 | |
4942 | #[simd_test(enable = "avx2" )] |
4943 | unsafe fn test_mm256_sign_epi8() { |
4944 | let a = _mm256_set1_epi8(2); |
4945 | let b = _mm256_set1_epi8(-1); |
4946 | let r = _mm256_sign_epi8(a, b); |
4947 | let e = _mm256_set1_epi8(-2); |
4948 | assert_eq_m256i(r, e); |
4949 | } |
4950 | |
4951 | #[simd_test(enable = "avx2" )] |
4952 | unsafe fn test_mm256_sll_epi16() { |
4953 | let a = _mm256_set1_epi16(0xFF); |
4954 | let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4); |
4955 | let r = _mm256_sll_epi16(a, b); |
4956 | assert_eq_m256i(r, _mm256_set1_epi16(0xFF0)); |
4957 | } |
4958 | |
4959 | #[simd_test(enable = "avx2" )] |
4960 | unsafe fn test_mm256_sll_epi32() { |
4961 | let a = _mm256_set1_epi32(0xFFFF); |
4962 | let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4); |
4963 | let r = _mm256_sll_epi32(a, b); |
4964 | assert_eq_m256i(r, _mm256_set1_epi32(0xFFFF0)); |
4965 | } |
4966 | |
4967 | #[simd_test(enable = "avx2" )] |
4968 | unsafe fn test_mm256_sll_epi64() { |
4969 | let a = _mm256_set1_epi64x(0xFFFFFFFF); |
4970 | let b = _mm_insert_epi64::<0>(_mm_set1_epi64x(0), 4); |
4971 | let r = _mm256_sll_epi64(a, b); |
4972 | assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF0)); |
4973 | } |
4974 | |
4975 | #[simd_test(enable = "avx2" )] |
4976 | unsafe fn test_mm256_slli_epi16() { |
4977 | assert_eq_m256i( |
4978 | _mm256_slli_epi16::<4>(_mm256_set1_epi16(0xFF)), |
4979 | _mm256_set1_epi16(0xFF0), |
4980 | ); |
4981 | } |
4982 | |
4983 | #[simd_test(enable = "avx2" )] |
4984 | unsafe fn test_mm256_slli_epi32() { |
4985 | assert_eq_m256i( |
4986 | _mm256_slli_epi32::<4>(_mm256_set1_epi32(0xFFFF)), |
4987 | _mm256_set1_epi32(0xFFFF0), |
4988 | ); |
4989 | } |
4990 | |
4991 | #[simd_test(enable = "avx2" )] |
4992 | unsafe fn test_mm256_slli_epi64() { |
4993 | assert_eq_m256i( |
4994 | _mm256_slli_epi64::<4>(_mm256_set1_epi64x(0xFFFFFFFF)), |
4995 | _mm256_set1_epi64x(0xFFFFFFFF0), |
4996 | ); |
4997 | } |
4998 | |
4999 | #[simd_test(enable = "avx2" )] |
5000 | unsafe fn test_mm256_slli_si256() { |
5001 | let a = _mm256_set1_epi64x(0xFFFFFFFF); |
5002 | let r = _mm256_slli_si256::<3>(a); |
5003 | assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF000000)); |
5004 | } |
5005 | |
5006 | #[simd_test(enable = "avx2" )] |
5007 | unsafe fn test_mm_sllv_epi32() { |
5008 | let a = _mm_set1_epi32(2); |
5009 | let b = _mm_set1_epi32(1); |
5010 | let r = _mm_sllv_epi32(a, b); |
5011 | let e = _mm_set1_epi32(4); |
5012 | assert_eq_m128i(r, e); |
5013 | } |
5014 | |
5015 | #[simd_test(enable = "avx2" )] |
5016 | unsafe fn test_mm256_sllv_epi32() { |
5017 | let a = _mm256_set1_epi32(2); |
5018 | let b = _mm256_set1_epi32(1); |
5019 | let r = _mm256_sllv_epi32(a, b); |
5020 | let e = _mm256_set1_epi32(4); |
5021 | assert_eq_m256i(r, e); |
5022 | } |
5023 | |
5024 | #[simd_test(enable = "avx2" )] |
5025 | unsafe fn test_mm_sllv_epi64() { |
5026 | let a = _mm_set1_epi64x(2); |
5027 | let b = _mm_set1_epi64x(1); |
5028 | let r = _mm_sllv_epi64(a, b); |
5029 | let e = _mm_set1_epi64x(4); |
5030 | assert_eq_m128i(r, e); |
5031 | } |
5032 | |
5033 | #[simd_test(enable = "avx2" )] |
5034 | unsafe fn test_mm256_sllv_epi64() { |
5035 | let a = _mm256_set1_epi64x(2); |
5036 | let b = _mm256_set1_epi64x(1); |
5037 | let r = _mm256_sllv_epi64(a, b); |
5038 | let e = _mm256_set1_epi64x(4); |
5039 | assert_eq_m256i(r, e); |
5040 | } |
5041 | |
5042 | #[simd_test(enable = "avx2" )] |
5043 | unsafe fn test_mm256_sra_epi16() { |
5044 | let a = _mm256_set1_epi16(-1); |
5045 | let b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0); |
5046 | let r = _mm256_sra_epi16(a, b); |
5047 | assert_eq_m256i(r, _mm256_set1_epi16(-1)); |
5048 | } |
5049 | |
5050 | #[simd_test(enable = "avx2" )] |
5051 | unsafe fn test_mm256_sra_epi32() { |
5052 | let a = _mm256_set1_epi32(-1); |
5053 | let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 1); |
5054 | let r = _mm256_sra_epi32(a, b); |
5055 | assert_eq_m256i(r, _mm256_set1_epi32(-1)); |
5056 | } |
5057 | |
5058 | #[simd_test(enable = "avx2" )] |
5059 | unsafe fn test_mm256_srai_epi16() { |
5060 | assert_eq_m256i( |
5061 | _mm256_srai_epi16::<1>(_mm256_set1_epi16(-1)), |
5062 | _mm256_set1_epi16(-1), |
5063 | ); |
5064 | } |
5065 | |
5066 | #[simd_test(enable = "avx2" )] |
5067 | unsafe fn test_mm256_srai_epi32() { |
5068 | assert_eq_m256i( |
5069 | _mm256_srai_epi32::<1>(_mm256_set1_epi32(-1)), |
5070 | _mm256_set1_epi32(-1), |
5071 | ); |
5072 | } |
5073 | |
5074 | #[simd_test(enable = "avx2" )] |
5075 | unsafe fn test_mm_srav_epi32() { |
5076 | let a = _mm_set1_epi32(4); |
5077 | let count = _mm_set1_epi32(1); |
5078 | let r = _mm_srav_epi32(a, count); |
5079 | let e = _mm_set1_epi32(2); |
5080 | assert_eq_m128i(r, e); |
5081 | } |
5082 | |
5083 | #[simd_test(enable = "avx2" )] |
5084 | unsafe fn test_mm256_srav_epi32() { |
5085 | let a = _mm256_set1_epi32(4); |
5086 | let count = _mm256_set1_epi32(1); |
5087 | let r = _mm256_srav_epi32(a, count); |
5088 | let e = _mm256_set1_epi32(2); |
5089 | assert_eq_m256i(r, e); |
5090 | } |
5091 | |
5092 | #[simd_test(enable = "avx2" )] |
5093 | unsafe fn test_mm256_srli_si256() { |
5094 | #[rustfmt::skip] |
5095 | let a = _mm256_setr_epi8( |
5096 | 1, 2, 3, 4, 5, 6, 7, 8, |
5097 | 9, 10, 11, 12, 13, 14, 15, 16, |
5098 | 17, 18, 19, 20, 21, 22, 23, 24, |
5099 | 25, 26, 27, 28, 29, 30, 31, 32, |
5100 | ); |
5101 | let r = _mm256_srli_si256::<3>(a); |
5102 | #[rustfmt::skip] |
5103 | let e = _mm256_setr_epi8( |
5104 | 4, 5, 6, 7, 8, 9, 10, 11, |
5105 | 12, 13, 14, 15, 16, 0, 0, 0, |
5106 | 20, 21, 22, 23, 24, 25, 26, 27, |
5107 | 28, 29, 30, 31, 32, 0, 0, 0, |
5108 | ); |
5109 | assert_eq_m256i(r, e); |
5110 | } |
5111 | |
5112 | #[simd_test(enable = "avx2" )] |
5113 | unsafe fn test_mm256_srl_epi16() { |
5114 | let a = _mm256_set1_epi16(0xFF); |
5115 | let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4); |
5116 | let r = _mm256_srl_epi16(a, b); |
5117 | assert_eq_m256i(r, _mm256_set1_epi16(0xF)); |
5118 | } |
5119 | |
5120 | #[simd_test(enable = "avx2" )] |
5121 | unsafe fn test_mm256_srl_epi32() { |
5122 | let a = _mm256_set1_epi32(0xFFFF); |
5123 | let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4); |
5124 | let r = _mm256_srl_epi32(a, b); |
5125 | assert_eq_m256i(r, _mm256_set1_epi32(0xFFF)); |
5126 | } |
5127 | |
5128 | #[simd_test(enable = "avx2" )] |
5129 | unsafe fn test_mm256_srl_epi64() { |
5130 | let a = _mm256_set1_epi64x(0xFFFFFFFF); |
5131 | let b = _mm_setr_epi64x(4, 0); |
5132 | let r = _mm256_srl_epi64(a, b); |
5133 | assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFF)); |
5134 | } |
5135 | |
5136 | #[simd_test(enable = "avx2" )] |
5137 | unsafe fn test_mm256_srli_epi16() { |
5138 | assert_eq_m256i( |
5139 | _mm256_srli_epi16::<4>(_mm256_set1_epi16(0xFF)), |
5140 | _mm256_set1_epi16(0xF), |
5141 | ); |
5142 | } |
5143 | |
5144 | #[simd_test(enable = "avx2" )] |
5145 | unsafe fn test_mm256_srli_epi32() { |
5146 | assert_eq_m256i( |
5147 | _mm256_srli_epi32::<4>(_mm256_set1_epi32(0xFFFF)), |
5148 | _mm256_set1_epi32(0xFFF), |
5149 | ); |
5150 | } |
5151 | |
5152 | #[simd_test(enable = "avx2" )] |
5153 | unsafe fn test_mm256_srli_epi64() { |
5154 | assert_eq_m256i( |
5155 | _mm256_srli_epi64::<4>(_mm256_set1_epi64x(0xFFFFFFFF)), |
5156 | _mm256_set1_epi64x(0xFFFFFFF), |
5157 | ); |
5158 | } |
5159 | |
5160 | #[simd_test(enable = "avx2" )] |
5161 | unsafe fn test_mm_srlv_epi32() { |
5162 | let a = _mm_set1_epi32(2); |
5163 | let count = _mm_set1_epi32(1); |
5164 | let r = _mm_srlv_epi32(a, count); |
5165 | let e = _mm_set1_epi32(1); |
5166 | assert_eq_m128i(r, e); |
5167 | } |
5168 | |
5169 | #[simd_test(enable = "avx2" )] |
5170 | unsafe fn test_mm256_srlv_epi32() { |
5171 | let a = _mm256_set1_epi32(2); |
5172 | let count = _mm256_set1_epi32(1); |
5173 | let r = _mm256_srlv_epi32(a, count); |
5174 | let e = _mm256_set1_epi32(1); |
5175 | assert_eq_m256i(r, e); |
5176 | } |
5177 | |
5178 | #[simd_test(enable = "avx2" )] |
5179 | unsafe fn test_mm_srlv_epi64() { |
5180 | let a = _mm_set1_epi64x(2); |
5181 | let count = _mm_set1_epi64x(1); |
5182 | let r = _mm_srlv_epi64(a, count); |
5183 | let e = _mm_set1_epi64x(1); |
5184 | assert_eq_m128i(r, e); |
5185 | } |
5186 | |
5187 | #[simd_test(enable = "avx2" )] |
5188 | unsafe fn test_mm256_srlv_epi64() { |
5189 | let a = _mm256_set1_epi64x(2); |
5190 | let count = _mm256_set1_epi64x(1); |
5191 | let r = _mm256_srlv_epi64(a, count); |
5192 | let e = _mm256_set1_epi64x(1); |
5193 | assert_eq_m256i(r, e); |
5194 | } |
5195 | |
5196 | #[simd_test(enable = "avx2" )] |
5197 | unsafe fn test_mm256_sub_epi16() { |
5198 | let a = _mm256_set1_epi16(4); |
5199 | let b = _mm256_set1_epi16(2); |
5200 | let r = _mm256_sub_epi16(a, b); |
5201 | assert_eq_m256i(r, b); |
5202 | } |
5203 | |
5204 | #[simd_test(enable = "avx2" )] |
5205 | unsafe fn test_mm256_sub_epi32() { |
5206 | let a = _mm256_set1_epi32(4); |
5207 | let b = _mm256_set1_epi32(2); |
5208 | let r = _mm256_sub_epi32(a, b); |
5209 | assert_eq_m256i(r, b); |
5210 | } |
5211 | |
5212 | #[simd_test(enable = "avx2" )] |
5213 | unsafe fn test_mm256_sub_epi64() { |
5214 | let a = _mm256_set1_epi64x(4); |
5215 | let b = _mm256_set1_epi64x(2); |
5216 | let r = _mm256_sub_epi64(a, b); |
5217 | assert_eq_m256i(r, b); |
5218 | } |
5219 | |
5220 | #[simd_test(enable = "avx2" )] |
5221 | unsafe fn test_mm256_sub_epi8() { |
5222 | let a = _mm256_set1_epi8(4); |
5223 | let b = _mm256_set1_epi8(2); |
5224 | let r = _mm256_sub_epi8(a, b); |
5225 | assert_eq_m256i(r, b); |
5226 | } |
5227 | |
5228 | #[simd_test(enable = "avx2" )] |
5229 | unsafe fn test_mm256_subs_epi16() { |
5230 | let a = _mm256_set1_epi16(4); |
5231 | let b = _mm256_set1_epi16(2); |
5232 | let r = _mm256_subs_epi16(a, b); |
5233 | assert_eq_m256i(r, b); |
5234 | } |
5235 | |
5236 | #[simd_test(enable = "avx2" )] |
5237 | unsafe fn test_mm256_subs_epi8() { |
5238 | let a = _mm256_set1_epi8(4); |
5239 | let b = _mm256_set1_epi8(2); |
5240 | let r = _mm256_subs_epi8(a, b); |
5241 | assert_eq_m256i(r, b); |
5242 | } |
5243 | |
5244 | #[simd_test(enable = "avx2" )] |
5245 | unsafe fn test_mm256_subs_epu16() { |
5246 | let a = _mm256_set1_epi16(4); |
5247 | let b = _mm256_set1_epi16(2); |
5248 | let r = _mm256_subs_epu16(a, b); |
5249 | assert_eq_m256i(r, b); |
5250 | } |
5251 | |
5252 | #[simd_test(enable = "avx2" )] |
5253 | unsafe fn test_mm256_subs_epu8() { |
5254 | let a = _mm256_set1_epi8(4); |
5255 | let b = _mm256_set1_epi8(2); |
5256 | let r = _mm256_subs_epu8(a, b); |
5257 | assert_eq_m256i(r, b); |
5258 | } |
5259 | |
5260 | #[simd_test(enable = "avx2" )] |
5261 | unsafe fn test_mm256_xor_si256() { |
5262 | let a = _mm256_set1_epi8(5); |
5263 | let b = _mm256_set1_epi8(3); |
5264 | let r = _mm256_xor_si256(a, b); |
5265 | assert_eq_m256i(r, _mm256_set1_epi8(6)); |
5266 | } |
5267 | |
5268 | #[simd_test(enable = "avx2" )] |
5269 | unsafe fn test_mm256_alignr_epi8() { |
5270 | #[rustfmt::skip] |
5271 | let a = _mm256_setr_epi8( |
5272 | 1, 2, 3, 4, 5, 6, 7, 8, |
5273 | 9, 10, 11, 12, 13, 14, 15, 16, |
5274 | 17, 18, 19, 20, 21, 22, 23, 24, |
5275 | 25, 26, 27, 28, 29, 30, 31, 32, |
5276 | ); |
5277 | #[rustfmt::skip] |
5278 | let b = _mm256_setr_epi8( |
5279 | -1, -2, -3, -4, -5, -6, -7, -8, |
5280 | -9, -10, -11, -12, -13, -14, -15, -16, |
5281 | -17, -18, -19, -20, -21, -22, -23, -24, |
5282 | -25, -26, -27, -28, -29, -30, -31, -32, |
5283 | ); |
5284 | let r = _mm256_alignr_epi8::<33>(a, b); |
5285 | assert_eq_m256i(r, _mm256_set1_epi8(0)); |
5286 | |
5287 | let r = _mm256_alignr_epi8::<17>(a, b); |
5288 | #[rustfmt::skip] |
5289 | let expected = _mm256_setr_epi8( |
5290 | 2, 3, 4, 5, 6, 7, 8, 9, |
5291 | 10, 11, 12, 13, 14, 15, 16, 0, |
5292 | 18, 19, 20, 21, 22, 23, 24, 25, |
5293 | 26, 27, 28, 29, 30, 31, 32, 0, |
5294 | ); |
5295 | assert_eq_m256i(r, expected); |
5296 | |
5297 | let r = _mm256_alignr_epi8::<4>(a, b); |
5298 | #[rustfmt::skip] |
5299 | let expected = _mm256_setr_epi8( |
5300 | -5, -6, -7, -8, -9, -10, -11, -12, |
5301 | -13, -14, -15, -16, 1, 2, 3, 4, |
5302 | -21, -22, -23, -24, -25, -26, -27, -28, |
5303 | -29, -30, -31, -32, 17, 18, 19, 20, |
5304 | ); |
5305 | assert_eq_m256i(r, expected); |
5306 | |
5307 | #[rustfmt::skip] |
5308 | let expected = _mm256_setr_epi8( |
5309 | -1, -2, -3, -4, -5, -6, -7, -8, |
5310 | -9, -10, -11, -12, -13, -14, -15, -16, -17, |
5311 | -18, -19, -20, -21, -22, -23, -24, -25, |
5312 | -26, -27, -28, -29, -30, -31, -32, |
5313 | ); |
5314 | let r = _mm256_alignr_epi8::<16>(a, b); |
5315 | assert_eq_m256i(r, expected); |
5316 | |
5317 | let r = _mm256_alignr_epi8::<15>(a, b); |
5318 | #[rustfmt::skip] |
5319 | let expected = _mm256_setr_epi8( |
5320 | -16, 1, 2, 3, 4, 5, 6, 7, |
5321 | 8, 9, 10, 11, 12, 13, 14, 15, |
5322 | -32, 17, 18, 19, 20, 21, 22, 23, |
5323 | 24, 25, 26, 27, 28, 29, 30, 31, |
5324 | ); |
5325 | assert_eq_m256i(r, expected); |
5326 | |
5327 | let r = _mm256_alignr_epi8::<0>(a, b); |
5328 | assert_eq_m256i(r, b); |
5329 | } |
5330 | |
5331 | #[simd_test(enable = "avx2" )] |
5332 | unsafe fn test_mm256_shuffle_epi8() { |
5333 | #[rustfmt::skip] |
5334 | let a = _mm256_setr_epi8( |
5335 | 1, 2, 3, 4, 5, 6, 7, 8, |
5336 | 9, 10, 11, 12, 13, 14, 15, 16, |
5337 | 17, 18, 19, 20, 21, 22, 23, 24, |
5338 | 25, 26, 27, 28, 29, 30, 31, 32, |
5339 | ); |
5340 | #[rustfmt::skip] |
5341 | let b = _mm256_setr_epi8( |
5342 | 4, 128u8 as i8, 4, 3, 24, 12, 6, 19, |
5343 | 12, 5, 5, 10, 4, 1, 8, 0, |
5344 | 4, 128u8 as i8, 4, 3, 24, 12, 6, 19, |
5345 | 12, 5, 5, 10, 4, 1, 8, 0, |
5346 | ); |
5347 | #[rustfmt::skip] |
5348 | let expected = _mm256_setr_epi8( |
5349 | 5, 0, 5, 4, 9, 13, 7, 4, |
5350 | 13, 6, 6, 11, 5, 2, 9, 1, |
5351 | 21, 0, 21, 20, 25, 29, 23, 20, |
5352 | 29, 22, 22, 27, 21, 18, 25, 17, |
5353 | ); |
5354 | let r = _mm256_shuffle_epi8(a, b); |
5355 | assert_eq_m256i(r, expected); |
5356 | } |
5357 | |
5358 | #[simd_test(enable = "avx2" )] |
5359 | unsafe fn test_mm256_permutevar8x32_epi32() { |
5360 | let a = _mm256_setr_epi32(100, 200, 300, 400, 500, 600, 700, 800); |
5361 | let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4); |
5362 | let expected = _mm256_setr_epi32(600, 100, 600, 200, 800, 700, 400, 500); |
5363 | let r = _mm256_permutevar8x32_epi32(a, b); |
5364 | assert_eq_m256i(r, expected); |
5365 | } |
5366 | |
5367 | #[simd_test(enable = "avx2" )] |
5368 | unsafe fn test_mm256_permute4x64_epi64() { |
5369 | let a = _mm256_setr_epi64x(100, 200, 300, 400); |
5370 | let expected = _mm256_setr_epi64x(400, 100, 200, 100); |
5371 | let r = _mm256_permute4x64_epi64::<0b00010011>(a); |
5372 | assert_eq_m256i(r, expected); |
5373 | } |
5374 | |
5375 | #[simd_test(enable = "avx2" )] |
5376 | unsafe fn test_mm256_permute2x128_si256() { |
5377 | let a = _mm256_setr_epi64x(100, 200, 500, 600); |
5378 | let b = _mm256_setr_epi64x(300, 400, 700, 800); |
5379 | let r = _mm256_permute2x128_si256::<0b00_01_00_11>(a, b); |
5380 | let e = _mm256_setr_epi64x(700, 800, 500, 600); |
5381 | assert_eq_m256i(r, e); |
5382 | } |
5383 | |
5384 | #[simd_test(enable = "avx2" )] |
5385 | unsafe fn test_mm256_permute4x64_pd() { |
5386 | let a = _mm256_setr_pd(1., 2., 3., 4.); |
5387 | let r = _mm256_permute4x64_pd::<0b00_01_00_11>(a); |
5388 | let e = _mm256_setr_pd(4., 1., 2., 1.); |
5389 | assert_eq_m256d(r, e); |
5390 | } |
5391 | |
5392 | #[simd_test(enable = "avx2" )] |
5393 | unsafe fn test_mm256_permutevar8x32_ps() { |
5394 | let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
5395 | let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4); |
5396 | let r = _mm256_permutevar8x32_ps(a, b); |
5397 | let e = _mm256_setr_ps(6., 1., 6., 2., 8., 7., 4., 5.); |
5398 | assert_eq_m256(r, e); |
5399 | } |
5400 | |
5401 | #[simd_test(enable = "avx2" )] |
5402 | unsafe fn test_mm_i32gather_epi32() { |
5403 | let arr: [i32; 128] = core::array::from_fn(|i| i as i32); |
5404 | // A multiplier of 4 is word-addressing |
5405 | let r = _mm_i32gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48)); |
5406 | assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48)); |
5407 | } |
5408 | |
5409 | #[simd_test(enable = "avx2" )] |
5410 | unsafe fn test_mm_mask_i32gather_epi32() { |
5411 | let arr: [i32; 128] = core::array::from_fn(|i| i as i32); |
5412 | // A multiplier of 4 is word-addressing |
5413 | let r = _mm_mask_i32gather_epi32::<4>( |
5414 | _mm_set1_epi32(256), |
5415 | arr.as_ptr(), |
5416 | _mm_setr_epi32(0, 16, 64, 96), |
5417 | _mm_setr_epi32(-1, -1, -1, 0), |
5418 | ); |
5419 | assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256)); |
5420 | } |
5421 | |
5422 | #[simd_test(enable = "avx2" )] |
5423 | unsafe fn test_mm256_i32gather_epi32() { |
5424 | let arr: [i32; 128] = core::array::from_fn(|i| i as i32); |
5425 | // A multiplier of 4 is word-addressing |
5426 | let r = |
5427 | _mm256_i32gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4)); |
5428 | assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4)); |
5429 | } |
5430 | |
5431 | #[simd_test(enable = "avx2" )] |
5432 | unsafe fn test_mm256_mask_i32gather_epi32() { |
5433 | let arr: [i32; 128] = core::array::from_fn(|i| i as i32); |
5434 | // A multiplier of 4 is word-addressing |
5435 | let r = _mm256_mask_i32gather_epi32::<4>( |
5436 | _mm256_set1_epi32(256), |
5437 | arr.as_ptr(), |
5438 | _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0), |
5439 | _mm256_setr_epi32(-1, -1, -1, 0, 0, 0, 0, 0), |
5440 | ); |
5441 | assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 64, 256, 256, 256, 256, 256)); |
5442 | } |
5443 | |
5444 | #[simd_test(enable = "avx2" )] |
5445 | unsafe fn test_mm_i32gather_ps() { |
5446 | let arr: [f32; 128] = core::array::from_fn(|i| i as f32); |
5447 | // A multiplier of 4 is word-addressing for f32s |
5448 | let r = _mm_i32gather_ps::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48)); |
5449 | assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0)); |
5450 | } |
5451 | |
5452 | #[simd_test(enable = "avx2" )] |
5453 | unsafe fn test_mm_mask_i32gather_ps() { |
5454 | let arr: [f32; 128] = core::array::from_fn(|i| i as f32); |
5455 | // A multiplier of 4 is word-addressing for f32s |
5456 | let r = _mm_mask_i32gather_ps::<4>( |
5457 | _mm_set1_ps(256.0), |
5458 | arr.as_ptr(), |
5459 | _mm_setr_epi32(0, 16, 64, 96), |
5460 | _mm_setr_ps(-1.0, -1.0, -1.0, 0.0), |
5461 | ); |
5462 | assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0)); |
5463 | } |
5464 | |
5465 | #[simd_test(enable = "avx2" )] |
5466 | unsafe fn test_mm256_i32gather_ps() { |
5467 | let arr: [f32; 128] = core::array::from_fn(|i| i as f32); |
5468 | // A multiplier of 4 is word-addressing for f32s |
5469 | let r = |
5470 | _mm256_i32gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4)); |
5471 | assert_eq_m256(r, _mm256_setr_ps(0.0, 16.0, 32.0, 48.0, 1.0, 2.0, 3.0, 4.0)); |
5472 | } |
5473 | |
5474 | #[simd_test(enable = "avx2" )] |
5475 | unsafe fn test_mm256_mask_i32gather_ps() { |
5476 | let arr: [f32; 128] = core::array::from_fn(|i| i as f32); |
5477 | // A multiplier of 4 is word-addressing for f32s |
5478 | let r = _mm256_mask_i32gather_ps::<4>( |
5479 | _mm256_set1_ps(256.0), |
5480 | arr.as_ptr(), |
5481 | _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0), |
5482 | _mm256_setr_ps(-1.0, -1.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0), |
5483 | ); |
5484 | assert_eq_m256( |
5485 | r, |
5486 | _mm256_setr_ps(0.0, 16.0, 64.0, 256.0, 256.0, 256.0, 256.0, 256.0), |
5487 | ); |
5488 | } |
5489 | |
5490 | #[simd_test(enable = "avx2" )] |
5491 | unsafe fn test_mm_i32gather_epi64() { |
5492 | let arr: [i64; 128] = core::array::from_fn(|i| i as i64); |
5493 | // A multiplier of 8 is word-addressing for i64s |
5494 | let r = _mm_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0)); |
5495 | assert_eq_m128i(r, _mm_setr_epi64x(0, 16)); |
5496 | } |
5497 | |
5498 | #[simd_test(enable = "avx2" )] |
5499 | unsafe fn test_mm_mask_i32gather_epi64() { |
5500 | let arr: [i64; 128] = core::array::from_fn(|i| i as i64); |
5501 | // A multiplier of 8 is word-addressing for i64s |
5502 | let r = _mm_mask_i32gather_epi64::<8>( |
5503 | _mm_set1_epi64x(256), |
5504 | arr.as_ptr(), |
5505 | _mm_setr_epi32(16, 16, 16, 16), |
5506 | _mm_setr_epi64x(-1, 0), |
5507 | ); |
5508 | assert_eq_m128i(r, _mm_setr_epi64x(16, 256)); |
5509 | } |
5510 | |
5511 | #[simd_test(enable = "avx2" )] |
5512 | unsafe fn test_mm256_i32gather_epi64() { |
5513 | let arr: [i64; 128] = core::array::from_fn(|i| i as i64); |
5514 | // A multiplier of 8 is word-addressing for i64s |
5515 | let r = _mm256_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48)); |
5516 | assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48)); |
5517 | } |
5518 | |
5519 | #[simd_test(enable = "avx2" )] |
5520 | unsafe fn test_mm256_mask_i32gather_epi64() { |
5521 | let arr: [i64; 128] = core::array::from_fn(|i| i as i64); |
5522 | // A multiplier of 8 is word-addressing for i64s |
5523 | let r = _mm256_mask_i32gather_epi64::<8>( |
5524 | _mm256_set1_epi64x(256), |
5525 | arr.as_ptr(), |
5526 | _mm_setr_epi32(0, 16, 64, 96), |
5527 | _mm256_setr_epi64x(-1, -1, -1, 0), |
5528 | ); |
5529 | assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256)); |
5530 | } |
5531 | |
5532 | #[simd_test(enable = "avx2" )] |
5533 | unsafe fn test_mm_i32gather_pd() { |
5534 | let arr: [f64; 128] = core::array::from_fn(|i| i as f64); |
5535 | // A multiplier of 8 is word-addressing for f64s |
5536 | let r = _mm_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0)); |
5537 | assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0)); |
5538 | } |
5539 | |
5540 | #[simd_test(enable = "avx2" )] |
5541 | unsafe fn test_mm_mask_i32gather_pd() { |
5542 | let arr: [f64; 128] = core::array::from_fn(|i| i as f64); |
5543 | // A multiplier of 8 is word-addressing for f64s |
5544 | let r = _mm_mask_i32gather_pd::<8>( |
5545 | _mm_set1_pd(256.0), |
5546 | arr.as_ptr(), |
5547 | _mm_setr_epi32(16, 16, 16, 16), |
5548 | _mm_setr_pd(-1.0, 0.0), |
5549 | ); |
5550 | assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0)); |
5551 | } |
5552 | |
5553 | #[simd_test(enable = "avx2" )] |
5554 | unsafe fn test_mm256_i32gather_pd() { |
5555 | let arr: [f64; 128] = core::array::from_fn(|i| i as f64); |
5556 | // A multiplier of 8 is word-addressing for f64s |
5557 | let r = _mm256_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48)); |
5558 | assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0)); |
5559 | } |
5560 | |
5561 | #[simd_test(enable = "avx2" )] |
5562 | unsafe fn test_mm256_mask_i32gather_pd() { |
5563 | let arr: [f64; 128] = core::array::from_fn(|i| i as f64); |
5564 | // A multiplier of 8 is word-addressing for f64s |
5565 | let r = _mm256_mask_i32gather_pd::<8>( |
5566 | _mm256_set1_pd(256.0), |
5567 | arr.as_ptr(), |
5568 | _mm_setr_epi32(0, 16, 64, 96), |
5569 | _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0), |
5570 | ); |
5571 | assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0)); |
5572 | } |
5573 | |
5574 | #[simd_test(enable = "avx2" )] |
5575 | unsafe fn test_mm_i64gather_epi32() { |
5576 | let arr: [i32; 128] = core::array::from_fn(|i| i as i32); |
5577 | // A multiplier of 4 is word-addressing |
5578 | let r = _mm_i64gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16)); |
5579 | assert_eq_m128i(r, _mm_setr_epi32(0, 16, 0, 0)); |
5580 | } |
5581 | |
5582 | #[simd_test(enable = "avx2" )] |
5583 | unsafe fn test_mm_mask_i64gather_epi32() { |
5584 | let arr: [i32; 128] = core::array::from_fn(|i| i as i32); |
5585 | // A multiplier of 4 is word-addressing |
5586 | let r = _mm_mask_i64gather_epi32::<4>( |
5587 | _mm_set1_epi32(256), |
5588 | arr.as_ptr(), |
5589 | _mm_setr_epi64x(0, 16), |
5590 | _mm_setr_epi32(-1, 0, -1, 0), |
5591 | ); |
5592 | assert_eq_m128i(r, _mm_setr_epi32(0, 256, 0, 0)); |
5593 | } |
5594 | |
5595 | #[simd_test(enable = "avx2" )] |
5596 | unsafe fn test_mm256_i64gather_epi32() { |
5597 | let arr: [i32; 128] = core::array::from_fn(|i| i as i32); |
5598 | // A multiplier of 4 is word-addressing |
5599 | let r = _mm256_i64gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48)); |
5600 | assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48)); |
5601 | } |
5602 | |
5603 | #[simd_test(enable = "avx2" )] |
5604 | unsafe fn test_mm256_mask_i64gather_epi32() { |
5605 | let arr: [i32; 128] = core::array::from_fn(|i| i as i32); |
5606 | // A multiplier of 4 is word-addressing |
5607 | let r = _mm256_mask_i64gather_epi32::<4>( |
5608 | _mm_set1_epi32(256), |
5609 | arr.as_ptr(), |
5610 | _mm256_setr_epi64x(0, 16, 64, 96), |
5611 | _mm_setr_epi32(-1, -1, -1, 0), |
5612 | ); |
5613 | assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256)); |
5614 | } |
5615 | |
5616 | #[simd_test(enable = "avx2" )] |
5617 | unsafe fn test_mm_i64gather_ps() { |
5618 | let arr: [f32; 128] = core::array::from_fn(|i| i as f32); |
5619 | // A multiplier of 4 is word-addressing for f32s |
5620 | let r = _mm_i64gather_ps::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16)); |
5621 | assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 0.0, 0.0)); |
5622 | } |
5623 | |
5624 | #[simd_test(enable = "avx2" )] |
5625 | unsafe fn test_mm_mask_i64gather_ps() { |
5626 | let arr: [f32; 128] = core::array::from_fn(|i| i as f32); |
5627 | // A multiplier of 4 is word-addressing for f32s |
5628 | let r = _mm_mask_i64gather_ps::<4>( |
5629 | _mm_set1_ps(256.0), |
5630 | arr.as_ptr(), |
5631 | _mm_setr_epi64x(0, 16), |
5632 | _mm_setr_ps(-1.0, 0.0, -1.0, 0.0), |
5633 | ); |
5634 | assert_eq_m128(r, _mm_setr_ps(0.0, 256.0, 0.0, 0.0)); |
5635 | } |
5636 | |
5637 | #[simd_test(enable = "avx2" )] |
5638 | unsafe fn test_mm256_i64gather_ps() { |
5639 | let arr: [f32; 128] = core::array::from_fn(|i| i as f32); |
5640 | // A multiplier of 4 is word-addressing for f32s |
5641 | let r = _mm256_i64gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48)); |
5642 | assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0)); |
5643 | } |
5644 | |
5645 | #[simd_test(enable = "avx2" )] |
5646 | unsafe fn test_mm256_mask_i64gather_ps() { |
5647 | let arr: [f32; 128] = core::array::from_fn(|i| i as f32); |
5648 | // A multiplier of 4 is word-addressing for f32s |
5649 | let r = _mm256_mask_i64gather_ps::<4>( |
5650 | _mm_set1_ps(256.0), |
5651 | arr.as_ptr(), |
5652 | _mm256_setr_epi64x(0, 16, 64, 96), |
5653 | _mm_setr_ps(-1.0, -1.0, -1.0, 0.0), |
5654 | ); |
5655 | assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0)); |
5656 | } |
5657 | |
5658 | #[simd_test(enable = "avx2" )] |
5659 | unsafe fn test_mm_i64gather_epi64() { |
5660 | let arr: [i64; 128] = core::array::from_fn(|i| i as i64); |
5661 | // A multiplier of 8 is word-addressing for i64s |
5662 | let r = _mm_i64gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16)); |
5663 | assert_eq_m128i(r, _mm_setr_epi64x(0, 16)); |
5664 | } |
5665 | |
5666 | #[simd_test(enable = "avx2" )] |
5667 | unsafe fn test_mm_mask_i64gather_epi64() { |
5668 | let arr: [i64; 128] = core::array::from_fn(|i| i as i64); |
5669 | // A multiplier of 8 is word-addressing for i64s |
5670 | let r = _mm_mask_i64gather_epi64::<8>( |
5671 | _mm_set1_epi64x(256), |
5672 | arr.as_ptr(), |
5673 | _mm_setr_epi64x(16, 16), |
5674 | _mm_setr_epi64x(-1, 0), |
5675 | ); |
5676 | assert_eq_m128i(r, _mm_setr_epi64x(16, 256)); |
5677 | } |
5678 | |
5679 | #[simd_test(enable = "avx2" )] |
5680 | unsafe fn test_mm256_i64gather_epi64() { |
5681 | let arr: [i64; 128] = core::array::from_fn(|i| i as i64); |
5682 | // A multiplier of 8 is word-addressing for i64s |
5683 | let r = _mm256_i64gather_epi64::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48)); |
5684 | assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48)); |
5685 | } |
5686 | |
5687 | #[simd_test(enable = "avx2" )] |
5688 | unsafe fn test_mm256_mask_i64gather_epi64() { |
5689 | let arr: [i64; 128] = core::array::from_fn(|i| i as i64); |
5690 | // A multiplier of 8 is word-addressing for i64s |
5691 | let r = _mm256_mask_i64gather_epi64::<8>( |
5692 | _mm256_set1_epi64x(256), |
5693 | arr.as_ptr(), |
5694 | _mm256_setr_epi64x(0, 16, 64, 96), |
5695 | _mm256_setr_epi64x(-1, -1, -1, 0), |
5696 | ); |
5697 | assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256)); |
5698 | } |
5699 | |
5700 | #[simd_test(enable = "avx2" )] |
5701 | unsafe fn test_mm_i64gather_pd() { |
5702 | let arr: [f64; 128] = core::array::from_fn(|i| i as f64); |
5703 | // A multiplier of 8 is word-addressing for f64s |
5704 | let r = _mm_i64gather_pd::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16)); |
5705 | assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0)); |
5706 | } |
5707 | |
5708 | #[simd_test(enable = "avx2" )] |
5709 | unsafe fn test_mm_mask_i64gather_pd() { |
5710 | let arr: [f64; 128] = core::array::from_fn(|i| i as f64); |
5711 | // A multiplier of 8 is word-addressing for f64s |
5712 | let r = _mm_mask_i64gather_pd::<8>( |
5713 | _mm_set1_pd(256.0), |
5714 | arr.as_ptr(), |
5715 | _mm_setr_epi64x(16, 16), |
5716 | _mm_setr_pd(-1.0, 0.0), |
5717 | ); |
5718 | assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0)); |
5719 | } |
5720 | |
5721 | #[simd_test(enable = "avx2" )] |
5722 | unsafe fn test_mm256_i64gather_pd() { |
5723 | let arr: [f64; 128] = core::array::from_fn(|i| i as f64); |
5724 | // A multiplier of 8 is word-addressing for f64s |
5725 | let r = _mm256_i64gather_pd::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48)); |
5726 | assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0)); |
5727 | } |
5728 | |
5729 | #[simd_test(enable = "avx2" )] |
5730 | unsafe fn test_mm256_mask_i64gather_pd() { |
5731 | let arr: [f64; 128] = core::array::from_fn(|i| i as f64); |
5732 | // A multiplier of 8 is word-addressing for f64s |
5733 | let r = _mm256_mask_i64gather_pd::<8>( |
5734 | _mm256_set1_pd(256.0), |
5735 | arr.as_ptr(), |
5736 | _mm256_setr_epi64x(0, 16, 64, 96), |
5737 | _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0), |
5738 | ); |
5739 | assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0)); |
5740 | } |
5741 | |
5742 | #[simd_test(enable = "avx" )] |
5743 | unsafe fn test_mm256_extract_epi8() { |
5744 | #[rustfmt::skip] |
5745 | let a = _mm256_setr_epi8( |
5746 | -1, 1, 2, 3, 4, 5, 6, 7, |
5747 | 8, 9, 10, 11, 12, 13, 14, 15, |
5748 | 16, 17, 18, 19, 20, 21, 22, 23, |
5749 | 24, 25, 26, 27, 28, 29, 30, 31 |
5750 | ); |
5751 | let r1 = _mm256_extract_epi8::<0>(a); |
5752 | let r2 = _mm256_extract_epi8::<3>(a); |
5753 | assert_eq!(r1, 0xFF); |
5754 | assert_eq!(r2, 3); |
5755 | } |
5756 | |
5757 | #[simd_test(enable = "avx2" )] |
5758 | unsafe fn test_mm256_extract_epi16() { |
5759 | #[rustfmt::skip] |
5760 | let a = _mm256_setr_epi16( |
5761 | -1, 1, 2, 3, 4, 5, 6, 7, |
5762 | 8, 9, 10, 11, 12, 13, 14, 15, |
5763 | ); |
5764 | let r1 = _mm256_extract_epi16::<0>(a); |
5765 | let r2 = _mm256_extract_epi16::<3>(a); |
5766 | assert_eq!(r1, 0xFFFF); |
5767 | assert_eq!(r2, 3); |
5768 | } |
5769 | |
5770 | #[simd_test(enable = "avx2" )] |
5771 | unsafe fn test_mm256_extract_epi32() { |
5772 | let a = _mm256_setr_epi32(-1, 1, 2, 3, 4, 5, 6, 7); |
5773 | let r1 = _mm256_extract_epi32::<0>(a); |
5774 | let r2 = _mm256_extract_epi32::<3>(a); |
5775 | assert_eq!(r1, -1); |
5776 | assert_eq!(r2, 3); |
5777 | } |
5778 | |
5779 | #[simd_test(enable = "avx2" )] |
5780 | unsafe fn test_mm256_cvtsd_f64() { |
5781 | let a = _mm256_setr_pd(1., 2., 3., 4.); |
5782 | let r = _mm256_cvtsd_f64(a); |
5783 | assert_eq!(r, 1.); |
5784 | } |
5785 | |
5786 | #[simd_test(enable = "avx2" )] |
5787 | unsafe fn test_mm256_cvtsi256_si32() { |
5788 | let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); |
5789 | let r = _mm256_cvtsi256_si32(a); |
5790 | assert_eq!(r, 1); |
5791 | } |
5792 | } |
5793 | |