1 | use crate::{ |
2 | arch::asm, |
3 | core_arch::{simd::*, simd_llvm::*, x86::*}, |
4 | mem::{self, transmute}, |
5 | ptr, |
6 | }; |
7 | |
8 | #[cfg (test)] |
9 | use stdarch_test::assert_instr; |
10 | |
11 | use super::avx512f::{vpl, vps}; |
12 | |
13 | /// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst. |
14 | /// |
15 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi16&expand=30) |
16 | #[inline ] |
17 | #[target_feature (enable = "avx512bw" )] |
18 | #[cfg_attr (test, assert_instr(vpabsw))] |
19 | pub unsafe fn _mm512_abs_epi16(a: __m512i) -> __m512i { |
20 | let a: i16x32 = a.as_i16x32(); |
21 | // all-0 is a properly initialized i16x32 |
22 | let zero: i16x32 = mem::zeroed(); |
23 | let sub: i16x32 = simd_sub(x:zero, y:a); |
24 | let cmp: i16x32 = simd_gt(x:a, y:zero); |
25 | transmute(src:simd_select(m:cmp, a, b:sub)) |
26 | } |
27 | |
28 | /// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
29 | /// |
30 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi16&expand=31) |
31 | #[inline ] |
32 | #[target_feature (enable = "avx512bw" )] |
33 | #[cfg_attr (test, assert_instr(vpabsw))] |
34 | pub unsafe fn _mm512_mask_abs_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i { |
35 | let abs: i16x32 = _mm512_abs_epi16(a).as_i16x32(); |
36 | transmute(src:simd_select_bitmask(m:k, a:abs, b:src.as_i16x32())) |
37 | } |
38 | |
39 | /// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
40 | /// |
41 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi16&expand=32) |
42 | #[inline ] |
43 | #[target_feature (enable = "avx512bw" )] |
44 | #[cfg_attr (test, assert_instr(vpabsw))] |
45 | pub unsafe fn _mm512_maskz_abs_epi16(k: __mmask32, a: __m512i) -> __m512i { |
46 | let abs: i16x32 = _mm512_abs_epi16(a).as_i16x32(); |
47 | let zero: i16x32 = _mm512_setzero_si512().as_i16x32(); |
48 | transmute(src:simd_select_bitmask(m:k, a:abs, b:zero)) |
49 | } |
50 | |
51 | /// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
52 | /// |
53 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi16&expand=28) |
54 | #[inline ] |
55 | #[target_feature (enable = "avx512bw,avx512vl" )] |
56 | #[cfg_attr (test, assert_instr(vpabsw))] |
57 | pub unsafe fn _mm256_mask_abs_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i { |
58 | let abs: i16x16 = _mm256_abs_epi16(a).as_i16x16(); |
59 | transmute(src:simd_select_bitmask(m:k, a:abs, b:src.as_i16x16())) |
60 | } |
61 | |
62 | /// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
63 | /// |
64 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi16&expand=29) |
65 | #[inline ] |
66 | #[target_feature (enable = "avx512bw,avx512vl" )] |
67 | #[cfg_attr (test, assert_instr(vpabsw))] |
68 | pub unsafe fn _mm256_maskz_abs_epi16(k: __mmask16, a: __m256i) -> __m256i { |
69 | let abs: i16x16 = _mm256_abs_epi16(a).as_i16x16(); |
70 | let zero: i16x16 = _mm256_setzero_si256().as_i16x16(); |
71 | transmute(src:simd_select_bitmask(m:k, a:abs, b:zero)) |
72 | } |
73 | |
74 | /// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
75 | /// |
76 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi16&expand=25) |
77 | #[inline ] |
78 | #[target_feature (enable = "avx512bw,avx512vl" )] |
79 | #[cfg_attr (test, assert_instr(vpabsw))] |
80 | pub unsafe fn _mm_mask_abs_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { |
81 | let abs: i16x8 = _mm_abs_epi16(a).as_i16x8(); |
82 | transmute(src:simd_select_bitmask(m:k, a:abs, b:src.as_i16x8())) |
83 | } |
84 | |
85 | /// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
86 | /// |
87 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi16&expand=26) |
88 | #[inline ] |
89 | #[target_feature (enable = "avx512bw,avx512vl" )] |
90 | #[cfg_attr (test, assert_instr(vpabsw))] |
91 | pub unsafe fn _mm_maskz_abs_epi16(k: __mmask8, a: __m128i) -> __m128i { |
92 | let abs: i16x8 = _mm_abs_epi16(a).as_i16x8(); |
93 | let zero: i16x8 = _mm_setzero_si128().as_i16x8(); |
94 | transmute(src:simd_select_bitmask(m:k, a:abs, b:zero)) |
95 | } |
96 | |
97 | /// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst. |
98 | /// |
99 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi8&expand=57) |
100 | #[inline ] |
101 | #[target_feature (enable = "avx512bw" )] |
102 | #[cfg_attr (test, assert_instr(vpabsb))] |
103 | pub unsafe fn _mm512_abs_epi8(a: __m512i) -> __m512i { |
104 | let a: i8x64 = a.as_i8x64(); |
105 | // all-0 is a properly initialized i8x64 |
106 | let zero: i8x64 = mem::zeroed(); |
107 | let sub: i8x64 = simd_sub(x:zero, y:a); |
108 | let cmp: i8x64 = simd_gt(x:a, y:zero); |
109 | transmute(src:simd_select(m:cmp, a, b:sub)) |
110 | } |
111 | |
112 | /// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
113 | /// |
114 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi8&expand=58) |
115 | #[inline ] |
116 | #[target_feature (enable = "avx512bw" )] |
117 | #[cfg_attr (test, assert_instr(vpabsb))] |
118 | pub unsafe fn _mm512_mask_abs_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i { |
119 | let abs: i8x64 = _mm512_abs_epi8(a).as_i8x64(); |
120 | transmute(src:simd_select_bitmask(m:k, a:abs, b:src.as_i8x64())) |
121 | } |
122 | |
123 | /// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
124 | /// |
125 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi8&expand=59) |
126 | #[inline ] |
127 | #[target_feature (enable = "avx512bw" )] |
128 | #[cfg_attr (test, assert_instr(vpabsb))] |
129 | pub unsafe fn _mm512_maskz_abs_epi8(k: __mmask64, a: __m512i) -> __m512i { |
130 | let abs: i8x64 = _mm512_abs_epi8(a).as_i8x64(); |
131 | let zero: i8x64 = _mm512_setzero_si512().as_i8x64(); |
132 | transmute(src:simd_select_bitmask(m:k, a:abs, b:zero)) |
133 | } |
134 | |
135 | /// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
136 | /// |
137 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi8&expand=55) |
138 | #[inline ] |
139 | #[target_feature (enable = "avx512bw,avx512vl" )] |
140 | #[cfg_attr (test, assert_instr(vpabsb))] |
141 | pub unsafe fn _mm256_mask_abs_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i { |
142 | let abs: i8x32 = _mm256_abs_epi8(a).as_i8x32(); |
143 | transmute(src:simd_select_bitmask(m:k, a:abs, b:src.as_i8x32())) |
144 | } |
145 | |
146 | /// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
147 | /// |
148 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi8&expand=56) |
149 | #[inline ] |
150 | #[target_feature (enable = "avx512bw,avx512vl" )] |
151 | #[cfg_attr (test, assert_instr(vpabsb))] |
152 | pub unsafe fn _mm256_maskz_abs_epi8(k: __mmask32, a: __m256i) -> __m256i { |
153 | let abs: i8x32 = _mm256_abs_epi8(a).as_i8x32(); |
154 | let zero: i8x32 = _mm256_setzero_si256().as_i8x32(); |
155 | transmute(src:simd_select_bitmask(m:k, a:abs, b:zero)) |
156 | } |
157 | |
158 | /// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set) |
159 | /// |
160 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi8&expand=52) |
161 | #[inline ] |
162 | #[target_feature (enable = "avx512bw,avx512vl" )] |
163 | #[cfg_attr (test, assert_instr(vpabsb))] |
164 | pub unsafe fn _mm_mask_abs_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i { |
165 | let abs: i8x16 = _mm_abs_epi8(a).as_i8x16(); |
166 | transmute(src:simd_select_bitmask(m:k, a:abs, b:src.as_i8x16())) |
167 | } |
168 | |
169 | /// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
170 | /// |
171 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi8&expand=53) |
172 | #[inline ] |
173 | #[target_feature (enable = "avx512bw,avx512vl" )] |
174 | #[cfg_attr (test, assert_instr(vpabsb))] |
175 | pub unsafe fn _mm_maskz_abs_epi8(k: __mmask16, a: __m128i) -> __m128i { |
176 | let abs: i8x16 = _mm_abs_epi8(a).as_i8x16(); |
177 | let zero: i8x16 = _mm_setzero_si128().as_i8x16(); |
178 | transmute(src:simd_select_bitmask(m:k, a:abs, b:zero)) |
179 | } |
180 | |
181 | /// Add packed 16-bit integers in a and b, and store the results in dst. |
182 | /// |
183 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi16&expand=91) |
184 | #[inline ] |
185 | #[target_feature (enable = "avx512bw" )] |
186 | #[cfg_attr (test, assert_instr(vpaddw))] |
187 | pub unsafe fn _mm512_add_epi16(a: __m512i, b: __m512i) -> __m512i { |
188 | transmute(src:simd_add(x:a.as_i16x32(), y:b.as_i16x32())) |
189 | } |
190 | |
191 | /// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
192 | /// |
193 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi16&expand=92) |
194 | #[inline ] |
195 | #[target_feature (enable = "avx512bw" )] |
196 | #[cfg_attr (test, assert_instr(vpaddw))] |
197 | pub unsafe fn _mm512_mask_add_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { |
198 | let add: i16x32 = _mm512_add_epi16(a, b).as_i16x32(); |
199 | transmute(src:simd_select_bitmask(m:k, a:add, b:src.as_i16x32())) |
200 | } |
201 | |
202 | /// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
203 | /// |
204 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi16&expand=93) |
205 | #[inline ] |
206 | #[target_feature (enable = "avx512bw" )] |
207 | #[cfg_attr (test, assert_instr(vpaddw))] |
208 | pub unsafe fn _mm512_maskz_add_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { |
209 | let add: i16x32 = _mm512_add_epi16(a, b).as_i16x32(); |
210 | let zero: i16x32 = _mm512_setzero_si512().as_i16x32(); |
211 | transmute(src:simd_select_bitmask(m:k, a:add, b:zero)) |
212 | } |
213 | |
214 | /// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
215 | /// |
216 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi&expand=89) |
217 | #[inline ] |
218 | #[target_feature (enable = "avx512bw,avx512vl" )] |
219 | #[cfg_attr (test, assert_instr(vpaddw))] |
220 | pub unsafe fn _mm256_mask_add_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { |
221 | let add: i16x16 = _mm256_add_epi16(a, b).as_i16x16(); |
222 | transmute(src:simd_select_bitmask(m:k, a:add, b:src.as_i16x16())) |
223 | } |
224 | |
225 | /// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
226 | /// |
227 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi16&expand=90) |
228 | #[inline ] |
229 | #[target_feature (enable = "avx512bw,avx512vl" )] |
230 | #[cfg_attr (test, assert_instr(vpaddw))] |
231 | pub unsafe fn _mm256_maskz_add_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { |
232 | let add: i16x16 = _mm256_add_epi16(a, b).as_i16x16(); |
233 | let zero: i16x16 = _mm256_setzero_si256().as_i16x16(); |
234 | transmute(src:simd_select_bitmask(m:k, a:add, b:zero)) |
235 | } |
236 | |
237 | /// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
238 | /// |
239 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi16&expand=86) |
240 | #[inline ] |
241 | #[target_feature (enable = "avx512bw,avx512vl" )] |
242 | #[cfg_attr (test, assert_instr(vpaddw))] |
243 | pub unsafe fn _mm_mask_add_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
244 | let add: i16x8 = _mm_add_epi16(a, b).as_i16x8(); |
245 | transmute(src:simd_select_bitmask(m:k, a:add, b:src.as_i16x8())) |
246 | } |
247 | |
248 | /// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
249 | /// |
250 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi16&expand=87) |
251 | #[inline ] |
252 | #[target_feature (enable = "avx512bw,avx512vl" )] |
253 | #[cfg_attr (test, assert_instr(vpaddw))] |
254 | pub unsafe fn _mm_maskz_add_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
255 | let add: i16x8 = _mm_add_epi16(a, b).as_i16x8(); |
256 | let zero: i16x8 = _mm_setzero_si128().as_i16x8(); |
257 | transmute(src:simd_select_bitmask(m:k, a:add, b:zero)) |
258 | } |
259 | |
260 | /// Add packed 8-bit integers in a and b, and store the results in dst. |
261 | /// |
262 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi8&expand=118) |
263 | #[inline ] |
264 | #[target_feature (enable = "avx512bw" )] |
265 | #[cfg_attr (test, assert_instr(vpaddb))] |
266 | pub unsafe fn _mm512_add_epi8(a: __m512i, b: __m512i) -> __m512i { |
267 | transmute(src:simd_add(x:a.as_i8x64(), y:b.as_i8x64())) |
268 | } |
269 | |
270 | /// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
271 | /// |
272 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi8&expand=119) |
273 | #[inline ] |
274 | #[target_feature (enable = "avx512bw" )] |
275 | #[cfg_attr (test, assert_instr(vpaddb))] |
276 | pub unsafe fn _mm512_mask_add_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { |
277 | let add: i8x64 = _mm512_add_epi8(a, b).as_i8x64(); |
278 | transmute(src:simd_select_bitmask(m:k, a:add, b:src.as_i8x64())) |
279 | } |
280 | |
281 | /// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
282 | /// |
283 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi8&expand=120) |
284 | #[inline ] |
285 | #[target_feature (enable = "avx512bw" )] |
286 | #[cfg_attr (test, assert_instr(vpaddb))] |
287 | pub unsafe fn _mm512_maskz_add_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { |
288 | let add: i8x64 = _mm512_add_epi8(a, b).as_i8x64(); |
289 | let zero: i8x64 = _mm512_setzero_si512().as_i8x64(); |
290 | transmute(src:simd_select_bitmask(m:k, a:add, b:zero)) |
291 | } |
292 | |
293 | /// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
294 | /// |
295 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi8&expand=116) |
296 | #[inline ] |
297 | #[target_feature (enable = "avx512bw,avx512vl" )] |
298 | #[cfg_attr (test, assert_instr(vpaddb))] |
299 | pub unsafe fn _mm256_mask_add_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { |
300 | let add: i8x32 = _mm256_add_epi8(a, b).as_i8x32(); |
301 | transmute(src:simd_select_bitmask(m:k, a:add, b:src.as_i8x32())) |
302 | } |
303 | |
304 | /// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
305 | /// |
306 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi8&expand=117) |
307 | #[inline ] |
308 | #[target_feature (enable = "avx512bw,avx512vl" )] |
309 | #[cfg_attr (test, assert_instr(vpaddb))] |
310 | pub unsafe fn _mm256_maskz_add_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { |
311 | let add: i8x32 = _mm256_add_epi8(a, b).as_i8x32(); |
312 | let zero: i8x32 = _mm256_setzero_si256().as_i8x32(); |
313 | transmute(src:simd_select_bitmask(m:k, a:add, b:zero)) |
314 | } |
315 | |
316 | /// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
317 | /// |
318 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi8&expand=113) |
319 | #[inline ] |
320 | #[target_feature (enable = "avx512bw,avx512vl" )] |
321 | #[cfg_attr (test, assert_instr(vpaddb))] |
322 | pub unsafe fn _mm_mask_add_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { |
323 | let add: i8x16 = _mm_add_epi8(a, b).as_i8x16(); |
324 | transmute(src:simd_select_bitmask(m:k, a:add, b:src.as_i8x16())) |
325 | } |
326 | |
327 | /// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
328 | /// |
329 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi8&expand=114) |
330 | #[inline ] |
331 | #[target_feature (enable = "avx512bw,avx512vl" )] |
332 | #[cfg_attr (test, assert_instr(vpaddb))] |
333 | pub unsafe fn _mm_maskz_add_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { |
334 | let add: i8x16 = _mm_add_epi8(a, b).as_i8x16(); |
335 | let zero: i8x16 = _mm_setzero_si128().as_i8x16(); |
336 | transmute(src:simd_select_bitmask(m:k, a:add, b:zero)) |
337 | } |
338 | |
339 | /// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst. |
340 | /// |
341 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epu16&expand=197) |
342 | #[inline ] |
343 | #[target_feature (enable = "avx512bw" )] |
344 | #[cfg_attr (test, assert_instr(vpaddusw))] |
345 | pub unsafe fn _mm512_adds_epu16(a: __m512i, b: __m512i) -> __m512i { |
346 | transmute(src:vpaddusw( |
347 | a:a.as_u16x32(), |
348 | b:b.as_u16x32(), |
349 | src:_mm512_setzero_si512().as_u16x32(), |
350 | mask:0b11111111_11111111_11111111_11111111, |
351 | )) |
352 | } |
353 | |
354 | /// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
355 | /// |
356 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epu16&expand=198) |
357 | #[inline ] |
358 | #[target_feature (enable = "avx512bw" )] |
359 | #[cfg_attr (test, assert_instr(vpaddusw))] |
360 | pub unsafe fn _mm512_mask_adds_epu16( |
361 | src: __m512i, |
362 | k: __mmask32, |
363 | a: __m512i, |
364 | b: __m512i, |
365 | ) -> __m512i { |
366 | transmute(src:vpaddusw(a:a.as_u16x32(), b:b.as_u16x32(), src:src.as_u16x32(), mask:k)) |
367 | } |
368 | |
369 | /// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
370 | /// |
371 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epu16&expand=199) |
372 | #[inline ] |
373 | #[target_feature (enable = "avx512bw" )] |
374 | #[cfg_attr (test, assert_instr(vpaddusw))] |
375 | pub unsafe fn _mm512_maskz_adds_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { |
376 | transmute(src:vpaddusw( |
377 | a:a.as_u16x32(), |
378 | b:b.as_u16x32(), |
379 | src:_mm512_setzero_si512().as_u16x32(), |
380 | mask:k, |
381 | )) |
382 | } |
383 | |
384 | /// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
385 | /// |
386 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epu16&expand=195) |
387 | #[inline ] |
388 | #[target_feature (enable = "avx512bw,avx512vl" )] |
389 | #[cfg_attr (test, assert_instr(vpaddusw))] |
390 | pub unsafe fn _mm256_mask_adds_epu16( |
391 | src: __m256i, |
392 | k: __mmask16, |
393 | a: __m256i, |
394 | b: __m256i, |
395 | ) -> __m256i { |
396 | transmute(src:vpaddusw256( |
397 | a:a.as_u16x16(), |
398 | b:b.as_u16x16(), |
399 | src:src.as_u16x16(), |
400 | mask:k, |
401 | )) |
402 | } |
403 | |
404 | /// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
405 | /// |
406 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epu16&expand=196) |
407 | #[inline ] |
408 | #[target_feature (enable = "avx512bw,avx512vl" )] |
409 | #[cfg_attr (test, assert_instr(vpaddusw))] |
410 | pub unsafe fn _mm256_maskz_adds_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { |
411 | transmute(src:vpaddusw256( |
412 | a:a.as_u16x16(), |
413 | b:b.as_u16x16(), |
414 | src:_mm256_setzero_si256().as_u16x16(), |
415 | mask:k, |
416 | )) |
417 | } |
418 | |
419 | /// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
420 | /// |
421 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epu16&expand=192) |
422 | #[inline ] |
423 | #[target_feature (enable = "avx512bw,avx512vl" )] |
424 | #[cfg_attr (test, assert_instr(vpaddusw))] |
425 | pub unsafe fn _mm_mask_adds_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
426 | transmute(src:vpaddusw128(a:a.as_u16x8(), b:b.as_u16x8(), src:src.as_u16x8(), mask:k)) |
427 | } |
428 | |
429 | /// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
430 | /// |
431 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epu16&expand=193) |
432 | #[inline ] |
433 | #[target_feature (enable = "avx512bw,avx512vl" )] |
434 | #[cfg_attr (test, assert_instr(vpaddusw))] |
435 | pub unsafe fn _mm_maskz_adds_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
436 | transmute(src:vpaddusw128( |
437 | a:a.as_u16x8(), |
438 | b:b.as_u16x8(), |
439 | src:_mm_setzero_si128().as_u16x8(), |
440 | mask:k, |
441 | )) |
442 | } |
443 | |
444 | /// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst. |
445 | /// |
446 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epu8&expand=206) |
447 | #[inline ] |
448 | #[target_feature (enable = "avx512bw" )] |
449 | #[cfg_attr (test, assert_instr(vpaddusb))] |
450 | pub unsafe fn _mm512_adds_epu8(a: __m512i, b: __m512i) -> __m512i { |
451 | transmute(src:vpaddusb( |
452 | a:a.as_u8x64(), |
453 | b:b.as_u8x64(), |
454 | src:_mm512_setzero_si512().as_u8x64(), |
455 | mask:0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, |
456 | )) |
457 | } |
458 | |
459 | /// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
460 | /// |
461 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epu8&expand=207) |
462 | #[inline ] |
463 | #[target_feature (enable = "avx512bw" )] |
464 | #[cfg_attr (test, assert_instr(vpaddusb))] |
465 | pub unsafe fn _mm512_mask_adds_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { |
466 | transmute(src:vpaddusb(a:a.as_u8x64(), b:b.as_u8x64(), src:src.as_u8x64(), mask:k)) |
467 | } |
468 | |
469 | /// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
470 | /// |
471 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epu8&expand=208) |
472 | #[inline ] |
473 | #[target_feature (enable = "avx512bw" )] |
474 | #[cfg_attr (test, assert_instr(vpaddusb))] |
475 | pub unsafe fn _mm512_maskz_adds_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { |
476 | transmute(src:vpaddusb( |
477 | a:a.as_u8x64(), |
478 | b:b.as_u8x64(), |
479 | src:_mm512_setzero_si512().as_u8x64(), |
480 | mask:k, |
481 | )) |
482 | } |
483 | |
484 | /// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
485 | /// |
486 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epu8&expand=204) |
487 | #[inline ] |
488 | #[target_feature (enable = "avx512bw,avx512vl" )] |
489 | #[cfg_attr (test, assert_instr(vpaddusb))] |
490 | pub unsafe fn _mm256_mask_adds_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { |
491 | transmute(src:vpaddusb256(a:a.as_u8x32(), b:b.as_u8x32(), src:src.as_u8x32(), mask:k)) |
492 | } |
493 | |
494 | /// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
495 | /// |
496 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epu8&expand=205) |
497 | #[inline ] |
498 | #[target_feature (enable = "avx512bw,avx512vl" )] |
499 | #[cfg_attr (test, assert_instr(vpaddusb))] |
500 | pub unsafe fn _mm256_maskz_adds_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { |
501 | transmute(src:vpaddusb256( |
502 | a:a.as_u8x32(), |
503 | b:b.as_u8x32(), |
504 | src:_mm256_setzero_si256().as_u8x32(), |
505 | mask:k, |
506 | )) |
507 | } |
508 | |
509 | /// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
510 | /// |
511 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epu8&expand=201) |
512 | #[inline ] |
513 | #[target_feature (enable = "avx512bw,avx512vl" )] |
514 | #[cfg_attr (test, assert_instr(vpaddusb))] |
515 | pub unsafe fn _mm_mask_adds_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { |
516 | transmute(src:vpaddusb128(a:a.as_u8x16(), b:b.as_u8x16(), src:src.as_u8x16(), mask:k)) |
517 | } |
518 | |
519 | /// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
520 | /// |
521 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epu8&expand=202) |
522 | #[inline ] |
523 | #[target_feature (enable = "avx512bw,avx512vl" )] |
524 | #[cfg_attr (test, assert_instr(vpaddusb))] |
525 | pub unsafe fn _mm_maskz_adds_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { |
526 | transmute(src:vpaddusb128( |
527 | a:a.as_u8x16(), |
528 | b:b.as_u8x16(), |
529 | src:_mm_setzero_si128().as_u8x16(), |
530 | mask:k, |
531 | )) |
532 | } |
533 | |
534 | /// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst. |
535 | /// |
536 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epi16&expand=179) |
537 | #[inline ] |
538 | #[target_feature (enable = "avx512bw" )] |
539 | #[cfg_attr (test, assert_instr(vpaddsw))] |
540 | pub unsafe fn _mm512_adds_epi16(a: __m512i, b: __m512i) -> __m512i { |
541 | transmute(src:vpaddsw( |
542 | a:a.as_i16x32(), |
543 | b:b.as_i16x32(), |
544 | src:_mm512_setzero_si512().as_i16x32(), |
545 | mask:0b11111111_11111111_11111111_11111111, |
546 | )) |
547 | } |
548 | |
549 | /// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
550 | /// |
551 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epi16&expand=180) |
552 | #[inline ] |
553 | #[target_feature (enable = "avx512bw" )] |
554 | #[cfg_attr (test, assert_instr(vpaddsw))] |
555 | pub unsafe fn _mm512_mask_adds_epi16( |
556 | src: __m512i, |
557 | k: __mmask32, |
558 | a: __m512i, |
559 | b: __m512i, |
560 | ) -> __m512i { |
561 | transmute(src:vpaddsw(a:a.as_i16x32(), b:b.as_i16x32(), src:src.as_i16x32(), mask:k)) |
562 | } |
563 | |
564 | /// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
565 | /// |
566 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epi16&expand=181) |
567 | #[inline ] |
568 | #[target_feature (enable = "avx512bw" )] |
569 | #[cfg_attr (test, assert_instr(vpaddsw))] |
570 | pub unsafe fn _mm512_maskz_adds_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { |
571 | transmute(src:vpaddsw( |
572 | a:a.as_i16x32(), |
573 | b:b.as_i16x32(), |
574 | src:_mm512_setzero_si512().as_i16x32(), |
575 | mask:k, |
576 | )) |
577 | } |
578 | |
579 | /// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
580 | /// |
581 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epi16&expand=177) |
582 | #[inline ] |
583 | #[target_feature (enable = "avx512bw,avx512vl" )] |
584 | #[cfg_attr (test, assert_instr(vpaddsw))] |
585 | pub unsafe fn _mm256_mask_adds_epi16( |
586 | src: __m256i, |
587 | k: __mmask16, |
588 | a: __m256i, |
589 | b: __m256i, |
590 | ) -> __m256i { |
591 | transmute(src:vpaddsw256(a:a.as_i16x16(), b:b.as_i16x16(), src:src.as_i16x16(), mask:k)) |
592 | } |
593 | |
594 | /// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
595 | /// |
596 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epi16&expand=178) |
597 | #[inline ] |
598 | #[target_feature (enable = "avx512bw,avx512vl" )] |
599 | #[cfg_attr (test, assert_instr(vpaddsw))] |
600 | pub unsafe fn _mm256_maskz_adds_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { |
601 | transmute(src:vpaddsw256( |
602 | a:a.as_i16x16(), |
603 | b:b.as_i16x16(), |
604 | src:_mm256_setzero_si256().as_i16x16(), |
605 | mask:k, |
606 | )) |
607 | } |
608 | |
609 | /// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
610 | /// |
611 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epi16&expand=174) |
612 | #[inline ] |
613 | #[target_feature (enable = "avx512bw,avx512vl" )] |
614 | #[cfg_attr (test, assert_instr(vpaddsw))] |
615 | pub unsafe fn _mm_mask_adds_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
616 | transmute(src:vpaddsw128(a:a.as_i16x8(), b:b.as_i16x8(), src:src.as_i16x8(), mask:k)) |
617 | } |
618 | |
619 | /// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
620 | /// |
621 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epi16&expand=175) |
622 | #[inline ] |
623 | #[target_feature (enable = "avx512bw,avx512vl" )] |
624 | #[cfg_attr (test, assert_instr(vpaddsw))] |
625 | pub unsafe fn _mm_maskz_adds_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
626 | transmute(src:vpaddsw128( |
627 | a:a.as_i16x8(), |
628 | b:b.as_i16x8(), |
629 | src:_mm_setzero_si128().as_i16x8(), |
630 | mask:k, |
631 | )) |
632 | } |
633 | |
634 | /// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst. |
635 | /// |
636 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epi8&expand=188) |
637 | #[inline ] |
638 | #[target_feature (enable = "avx512bw" )] |
639 | #[cfg_attr (test, assert_instr(vpaddsb))] |
640 | pub unsafe fn _mm512_adds_epi8(a: __m512i, b: __m512i) -> __m512i { |
641 | transmute(src:vpaddsb( |
642 | a:a.as_i8x64(), |
643 | b:b.as_i8x64(), |
644 | src:_mm512_setzero_si512().as_i8x64(), |
645 | mask:0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, |
646 | )) |
647 | } |
648 | |
649 | /// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
650 | /// |
651 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epi8&expand=189) |
652 | #[inline ] |
653 | #[target_feature (enable = "avx512bw" )] |
654 | #[cfg_attr (test, assert_instr(vpaddsb))] |
655 | pub unsafe fn _mm512_mask_adds_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { |
656 | transmute(src:vpaddsb(a:a.as_i8x64(), b:b.as_i8x64(), src:src.as_i8x64(), mask:k)) |
657 | } |
658 | |
659 | /// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
660 | /// |
661 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epi8&expand=190) |
662 | #[inline ] |
663 | #[target_feature (enable = "avx512bw" )] |
664 | #[cfg_attr (test, assert_instr(vpaddsb))] |
665 | pub unsafe fn _mm512_maskz_adds_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { |
666 | transmute(src:vpaddsb( |
667 | a:a.as_i8x64(), |
668 | b:b.as_i8x64(), |
669 | src:_mm512_setzero_si512().as_i8x64(), |
670 | mask:k, |
671 | )) |
672 | } |
673 | |
674 | /// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
675 | /// |
676 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epi8&expand=186) |
677 | #[inline ] |
678 | #[target_feature (enable = "avx512bw,avx512vl" )] |
679 | #[cfg_attr (test, assert_instr(vpaddsb))] |
680 | pub unsafe fn _mm256_mask_adds_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { |
681 | transmute(src:vpaddsb256(a:a.as_i8x32(), b:b.as_i8x32(), src:src.as_i8x32(), mask:k)) |
682 | } |
683 | |
684 | /// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
685 | /// |
686 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epi8&expand=187) |
687 | #[inline ] |
688 | #[target_feature (enable = "avx512bw,avx512vl" )] |
689 | #[cfg_attr (test, assert_instr(vpaddsb))] |
690 | pub unsafe fn _mm256_maskz_adds_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { |
691 | transmute(src:vpaddsb256( |
692 | a:a.as_i8x32(), |
693 | b:b.as_i8x32(), |
694 | src:_mm256_setzero_si256().as_i8x32(), |
695 | mask:k, |
696 | )) |
697 | } |
698 | |
699 | /// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
700 | /// |
701 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epi8&expand=183) |
702 | #[inline ] |
703 | #[target_feature (enable = "avx512bw,avx512vl" )] |
704 | #[cfg_attr (test, assert_instr(vpaddsb))] |
705 | pub unsafe fn _mm_mask_adds_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { |
706 | transmute(src:vpaddsb128(a:a.as_i8x16(), b:b.as_i8x16(), src:src.as_i8x16(), mask:k)) |
707 | } |
708 | |
709 | /// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
710 | /// |
711 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epi8&expand=184) |
712 | #[inline ] |
713 | #[target_feature (enable = "avx512bw,avx512vl" )] |
714 | #[cfg_attr (test, assert_instr(vpaddsb))] |
715 | pub unsafe fn _mm_maskz_adds_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { |
716 | transmute(src:vpaddsb128( |
717 | a:a.as_i8x16(), |
718 | b:b.as_i8x16(), |
719 | src:_mm_setzero_si128().as_i8x16(), |
720 | mask:k, |
721 | )) |
722 | } |
723 | |
724 | /// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst. |
725 | /// |
726 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi16&expand=5685) |
727 | #[inline ] |
728 | #[target_feature (enable = "avx512bw" )] |
729 | #[cfg_attr (test, assert_instr(vpsubw))] |
730 | pub unsafe fn _mm512_sub_epi16(a: __m512i, b: __m512i) -> __m512i { |
731 | transmute(src:simd_sub(x:a.as_i16x32(), y:b.as_i16x32())) |
732 | } |
733 | |
734 | /// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
735 | /// |
736 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi16&expand=5683) |
737 | #[inline ] |
738 | #[target_feature (enable = "avx512bw" )] |
739 | #[cfg_attr (test, assert_instr(vpsubw))] |
740 | pub unsafe fn _mm512_mask_sub_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { |
741 | let sub: i16x32 = _mm512_sub_epi16(a, b).as_i16x32(); |
742 | transmute(src:simd_select_bitmask(m:k, a:sub, b:src.as_i16x32())) |
743 | } |
744 | |
745 | /// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
746 | /// |
747 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi16&expand=5684) |
748 | #[inline ] |
749 | #[target_feature (enable = "avx512bw" )] |
750 | #[cfg_attr (test, assert_instr(vpsubw))] |
751 | pub unsafe fn _mm512_maskz_sub_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { |
752 | let sub: i16x32 = _mm512_sub_epi16(a, b).as_i16x32(); |
753 | let zero: i16x32 = _mm512_setzero_si512().as_i16x32(); |
754 | transmute(src:simd_select_bitmask(m:k, a:sub, b:zero)) |
755 | } |
756 | |
757 | /// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
758 | /// |
759 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi16&expand=5680) |
760 | #[inline ] |
761 | #[target_feature (enable = "avx512bw,avx512vl" )] |
762 | #[cfg_attr (test, assert_instr(vpsubw))] |
763 | pub unsafe fn _mm256_mask_sub_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { |
764 | let sub: i16x16 = _mm256_sub_epi16(a, b).as_i16x16(); |
765 | transmute(src:simd_select_bitmask(m:k, a:sub, b:src.as_i16x16())) |
766 | } |
767 | |
768 | /// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
769 | /// |
770 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi16&expand=5681) |
771 | #[inline ] |
772 | #[target_feature (enable = "avx512bw,avx512vl" )] |
773 | #[cfg_attr (test, assert_instr(vpsubw))] |
774 | pub unsafe fn _mm256_maskz_sub_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { |
775 | let sub: i16x16 = _mm256_sub_epi16(a, b).as_i16x16(); |
776 | let zero: i16x16 = _mm256_setzero_si256().as_i16x16(); |
777 | transmute(src:simd_select_bitmask(m:k, a:sub, b:zero)) |
778 | } |
779 | |
780 | /// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
781 | /// |
782 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi16&expand=5677) |
783 | #[inline ] |
784 | #[target_feature (enable = "avx512bw,avx512vl" )] |
785 | #[cfg_attr (test, assert_instr(vpsubw))] |
786 | pub unsafe fn _mm_mask_sub_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
787 | let sub: i16x8 = _mm_sub_epi16(a, b).as_i16x8(); |
788 | transmute(src:simd_select_bitmask(m:k, a:sub, b:src.as_i16x8())) |
789 | } |
790 | |
791 | /// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
792 | /// |
793 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi16&expand=5678) |
794 | #[inline ] |
795 | #[target_feature (enable = "avx512bw,avx512vl" )] |
796 | #[cfg_attr (test, assert_instr(vpsubw))] |
797 | pub unsafe fn _mm_maskz_sub_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
798 | let sub: i16x8 = _mm_sub_epi16(a, b).as_i16x8(); |
799 | let zero: i16x8 = _mm_setzero_si128().as_i16x8(); |
800 | transmute(src:simd_select_bitmask(m:k, a:sub, b:zero)) |
801 | } |
802 | |
803 | /// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst. |
804 | /// |
805 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi8&expand=5712) |
806 | #[inline ] |
807 | #[target_feature (enable = "avx512bw" )] |
808 | #[cfg_attr (test, assert_instr(vpsubb))] |
809 | pub unsafe fn _mm512_sub_epi8(a: __m512i, b: __m512i) -> __m512i { |
810 | transmute(src:simd_sub(x:a.as_i8x64(), y:b.as_i8x64())) |
811 | } |
812 | |
813 | /// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
814 | /// |
815 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi8&expand=5710) |
816 | #[inline ] |
817 | #[target_feature (enable = "avx512bw" )] |
818 | #[cfg_attr (test, assert_instr(vpsubb))] |
819 | pub unsafe fn _mm512_mask_sub_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { |
820 | let sub: i8x64 = _mm512_sub_epi8(a, b).as_i8x64(); |
821 | transmute(src:simd_select_bitmask(m:k, a:sub, b:src.as_i8x64())) |
822 | } |
823 | |
824 | /// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
825 | /// |
826 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi8&expand=5711) |
827 | #[inline ] |
828 | #[target_feature (enable = "avx512bw" )] |
829 | #[cfg_attr (test, assert_instr(vpsubb))] |
830 | pub unsafe fn _mm512_maskz_sub_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { |
831 | let sub: i8x64 = _mm512_sub_epi8(a, b).as_i8x64(); |
832 | let zero: i8x64 = _mm512_setzero_si512().as_i8x64(); |
833 | transmute(src:simd_select_bitmask(m:k, a:sub, b:zero)) |
834 | } |
835 | |
836 | /// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
837 | /// |
838 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi8&expand=5707) |
839 | #[inline ] |
840 | #[target_feature (enable = "avx512bw,avx512vl" )] |
841 | #[cfg_attr (test, assert_instr(vpsubb))] |
842 | pub unsafe fn _mm256_mask_sub_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { |
843 | let sub: i8x32 = _mm256_sub_epi8(a, b).as_i8x32(); |
844 | transmute(src:simd_select_bitmask(m:k, a:sub, b:src.as_i8x32())) |
845 | } |
846 | |
847 | /// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
848 | /// |
849 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi8&expand=5708) |
850 | #[inline ] |
851 | #[target_feature (enable = "avx512bw,avx512vl" )] |
852 | #[cfg_attr (test, assert_instr(vpsubb))] |
853 | pub unsafe fn _mm256_maskz_sub_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { |
854 | let sub: i8x32 = _mm256_sub_epi8(a, b).as_i8x32(); |
855 | let zero: i8x32 = _mm256_setzero_si256().as_i8x32(); |
856 | transmute(src:simd_select_bitmask(m:k, a:sub, b:zero)) |
857 | } |
858 | |
859 | /// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
860 | /// |
861 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi8&expand=5704) |
862 | #[inline ] |
863 | #[target_feature (enable = "avx512bw,avx512vl" )] |
864 | #[cfg_attr (test, assert_instr(vpsubb))] |
865 | pub unsafe fn _mm_mask_sub_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { |
866 | let sub: i8x16 = _mm_sub_epi8(a, b).as_i8x16(); |
867 | transmute(src:simd_select_bitmask(m:k, a:sub, b:src.as_i8x16())) |
868 | } |
869 | |
870 | /// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
871 | /// |
872 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi8&expand=5705) |
873 | #[inline ] |
874 | #[target_feature (enable = "avx512bw,avx512vl" )] |
875 | #[cfg_attr (test, assert_instr(vpsubb))] |
876 | pub unsafe fn _mm_maskz_sub_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { |
877 | let sub: i8x16 = _mm_sub_epi8(a, b).as_i8x16(); |
878 | let zero: i8x16 = _mm_setzero_si128().as_i8x16(); |
879 | transmute(src:simd_select_bitmask(m:k, a:sub, b:zero)) |
880 | } |
881 | |
882 | /// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst. |
883 | /// |
884 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epu16&expand=5793) |
885 | #[inline ] |
886 | #[target_feature (enable = "avx512bw" )] |
887 | #[cfg_attr (test, assert_instr(vpsubusw))] |
888 | pub unsafe fn _mm512_subs_epu16(a: __m512i, b: __m512i) -> __m512i { |
889 | transmute(src:vpsubusw( |
890 | a:a.as_u16x32(), |
891 | b:b.as_u16x32(), |
892 | src:_mm512_setzero_si512().as_u16x32(), |
893 | mask:0b11111111_11111111_11111111_11111111, |
894 | )) |
895 | } |
896 | |
897 | /// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
898 | /// |
899 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epu16&expand=5791) |
900 | #[inline ] |
901 | #[target_feature (enable = "avx512bw" )] |
902 | #[cfg_attr (test, assert_instr(vpsubusw))] |
903 | pub unsafe fn _mm512_mask_subs_epu16( |
904 | src: __m512i, |
905 | k: __mmask32, |
906 | a: __m512i, |
907 | b: __m512i, |
908 | ) -> __m512i { |
909 | transmute(src:vpsubusw(a:a.as_u16x32(), b:b.as_u16x32(), src:src.as_u16x32(), mask:k)) |
910 | } |
911 | |
912 | /// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
913 | /// |
914 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epu16&expand=5792) |
915 | #[inline ] |
916 | #[target_feature (enable = "avx512bw" )] |
917 | #[cfg_attr (test, assert_instr(vpsubusw))] |
918 | pub unsafe fn _mm512_maskz_subs_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { |
919 | transmute(src:vpsubusw( |
920 | a:a.as_u16x32(), |
921 | b:b.as_u16x32(), |
922 | src:_mm512_setzero_si512().as_u16x32(), |
923 | mask:k, |
924 | )) |
925 | } |
926 | |
927 | /// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
928 | /// |
929 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epu16&expand=5788) |
930 | #[inline ] |
931 | #[target_feature (enable = "avx512bw,avx512vl" )] |
932 | #[cfg_attr (test, assert_instr(vpsubusw))] |
933 | pub unsafe fn _mm256_mask_subs_epu16( |
934 | src: __m256i, |
935 | k: __mmask16, |
936 | a: __m256i, |
937 | b: __m256i, |
938 | ) -> __m256i { |
939 | transmute(src:vpsubusw256( |
940 | a:a.as_u16x16(), |
941 | b:b.as_u16x16(), |
942 | src:src.as_u16x16(), |
943 | mask:k, |
944 | )) |
945 | } |
946 | |
947 | /// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
948 | /// |
949 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epu16&expand=5789) |
950 | #[inline ] |
951 | #[target_feature (enable = "avx512bw,avx512vl" )] |
952 | #[cfg_attr (test, assert_instr(vpsubusw))] |
953 | pub unsafe fn _mm256_maskz_subs_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { |
954 | transmute(src:vpsubusw256( |
955 | a:a.as_u16x16(), |
956 | b:b.as_u16x16(), |
957 | src:_mm256_setzero_si256().as_u16x16(), |
958 | mask:k, |
959 | )) |
960 | } |
961 | |
962 | /// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
963 | /// |
964 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epu16&expand=5785) |
965 | #[inline ] |
966 | #[target_feature (enable = "avx512bw,avx512vl" )] |
967 | #[cfg_attr (test, assert_instr(vpsubusw))] |
968 | pub unsafe fn _mm_mask_subs_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
969 | transmute(src:vpsubusw128(a:a.as_u16x8(), b:b.as_u16x8(), src:src.as_u16x8(), mask:k)) |
970 | } |
971 | |
972 | /// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
973 | /// |
974 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epu16&expand=5786) |
975 | #[inline ] |
976 | #[target_feature (enable = "avx512bw,avx512vl" )] |
977 | #[cfg_attr (test, assert_instr(vpsubusw))] |
978 | pub unsafe fn _mm_maskz_subs_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
979 | transmute(src:vpsubusw128( |
980 | a:a.as_u16x8(), |
981 | b:b.as_u16x8(), |
982 | src:_mm_setzero_si128().as_u16x8(), |
983 | mask:k, |
984 | )) |
985 | } |
986 | |
987 | /// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst. |
988 | /// |
989 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epu8&expand=5802) |
990 | #[inline ] |
991 | #[target_feature (enable = "avx512bw" )] |
992 | #[cfg_attr (test, assert_instr(vpsubusb))] |
993 | pub unsafe fn _mm512_subs_epu8(a: __m512i, b: __m512i) -> __m512i { |
994 | transmute(src:vpsubusb( |
995 | a:a.as_u8x64(), |
996 | b:b.as_u8x64(), |
997 | src:_mm512_setzero_si512().as_u8x64(), |
998 | mask:0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, |
999 | )) |
1000 | } |
1001 | |
1002 | /// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1003 | /// |
1004 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epu8&expand=5800) |
1005 | #[inline ] |
1006 | #[target_feature (enable = "avx512bw" )] |
1007 | #[cfg_attr (test, assert_instr(vpsubusb))] |
1008 | pub unsafe fn _mm512_mask_subs_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { |
1009 | transmute(src:vpsubusb(a:a.as_u8x64(), b:b.as_u8x64(), src:src.as_u8x64(), mask:k)) |
1010 | } |
1011 | |
1012 | /// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1013 | /// |
1014 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epu8&expand=5801) |
1015 | #[inline ] |
1016 | #[target_feature (enable = "avx512bw" )] |
1017 | #[cfg_attr (test, assert_instr(vpsubusb))] |
1018 | pub unsafe fn _mm512_maskz_subs_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { |
1019 | transmute(src:vpsubusb( |
1020 | a:a.as_u8x64(), |
1021 | b:b.as_u8x64(), |
1022 | src:_mm512_setzero_si512().as_u8x64(), |
1023 | mask:k, |
1024 | )) |
1025 | } |
1026 | |
1027 | /// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1028 | /// |
1029 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epu8&expand=5797) |
1030 | #[inline ] |
1031 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1032 | #[cfg_attr (test, assert_instr(vpsubusb))] |
1033 | pub unsafe fn _mm256_mask_subs_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { |
1034 | transmute(src:vpsubusb256(a:a.as_u8x32(), b:b.as_u8x32(), src:src.as_u8x32(), mask:k)) |
1035 | } |
1036 | |
1037 | /// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1038 | /// |
1039 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epu8&expand=5798) |
1040 | #[inline ] |
1041 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1042 | #[cfg_attr (test, assert_instr(vpsubusb))] |
1043 | pub unsafe fn _mm256_maskz_subs_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { |
1044 | transmute(src:vpsubusb256( |
1045 | a:a.as_u8x32(), |
1046 | b:b.as_u8x32(), |
1047 | src:_mm256_setzero_si256().as_u8x32(), |
1048 | mask:k, |
1049 | )) |
1050 | } |
1051 | |
1052 | /// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1053 | /// |
1054 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epu8&expand=5794) |
1055 | #[inline ] |
1056 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1057 | #[cfg_attr (test, assert_instr(vpsubusb))] |
1058 | pub unsafe fn _mm_mask_subs_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { |
1059 | transmute(src:vpsubusb128(a:a.as_u8x16(), b:b.as_u8x16(), src:src.as_u8x16(), mask:k)) |
1060 | } |
1061 | |
1062 | /// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1063 | /// |
1064 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epu8&expand=5795) |
1065 | #[inline ] |
1066 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1067 | #[cfg_attr (test, assert_instr(vpsubusb))] |
1068 | pub unsafe fn _mm_maskz_subs_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { |
1069 | transmute(src:vpsubusb128( |
1070 | a:a.as_u8x16(), |
1071 | b:b.as_u8x16(), |
1072 | src:_mm_setzero_si128().as_u8x16(), |
1073 | mask:k, |
1074 | )) |
1075 | } |
1076 | |
1077 | /// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst. |
1078 | /// |
1079 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epi16&expand=5775) |
1080 | #[inline ] |
1081 | #[target_feature (enable = "avx512bw" )] |
1082 | #[cfg_attr (test, assert_instr(vpsubsw))] |
1083 | pub unsafe fn _mm512_subs_epi16(a: __m512i, b: __m512i) -> __m512i { |
1084 | transmute(src:vpsubsw( |
1085 | a:a.as_i16x32(), |
1086 | b:b.as_i16x32(), |
1087 | src:_mm512_setzero_si512().as_i16x32(), |
1088 | mask:0b11111111_11111111_11111111_11111111, |
1089 | )) |
1090 | } |
1091 | |
1092 | /// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1093 | /// |
1094 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epi16&expand=5773) |
1095 | #[inline ] |
1096 | #[target_feature (enable = "avx512bw" )] |
1097 | #[cfg_attr (test, assert_instr(vpsubsw))] |
1098 | pub unsafe fn _mm512_mask_subs_epi16( |
1099 | src: __m512i, |
1100 | k: __mmask32, |
1101 | a: __m512i, |
1102 | b: __m512i, |
1103 | ) -> __m512i { |
1104 | transmute(src:vpsubsw(a:a.as_i16x32(), b:b.as_i16x32(), src:src.as_i16x32(), mask:k)) |
1105 | } |
1106 | |
1107 | /// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1108 | /// |
1109 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epi16&expand=5774) |
1110 | #[inline ] |
1111 | #[target_feature (enable = "avx512bw" )] |
1112 | #[cfg_attr (test, assert_instr(vpsubsw))] |
1113 | pub unsafe fn _mm512_maskz_subs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { |
1114 | transmute(src:vpsubsw( |
1115 | a:a.as_i16x32(), |
1116 | b:b.as_i16x32(), |
1117 | src:_mm512_setzero_si512().as_i16x32(), |
1118 | mask:k, |
1119 | )) |
1120 | } |
1121 | |
1122 | /// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1123 | /// |
1124 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epi16&expand=5770) |
1125 | #[inline ] |
1126 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1127 | #[cfg_attr (test, assert_instr(vpsubsw))] |
1128 | pub unsafe fn _mm256_mask_subs_epi16( |
1129 | src: __m256i, |
1130 | k: __mmask16, |
1131 | a: __m256i, |
1132 | b: __m256i, |
1133 | ) -> __m256i { |
1134 | transmute(src:vpsubsw256(a:a.as_i16x16(), b:b.as_i16x16(), src:src.as_i16x16(), mask:k)) |
1135 | } |
1136 | |
1137 | /// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1138 | /// |
1139 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epi16&expand=5771) |
1140 | #[inline ] |
1141 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1142 | #[cfg_attr (test, assert_instr(vpsubsw))] |
1143 | pub unsafe fn _mm256_maskz_subs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { |
1144 | transmute(src:vpsubsw256( |
1145 | a:a.as_i16x16(), |
1146 | b:b.as_i16x16(), |
1147 | src:_mm256_setzero_si256().as_i16x16(), |
1148 | mask:k, |
1149 | )) |
1150 | } |
1151 | |
1152 | /// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1153 | /// |
1154 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epi16&expand=5767) |
1155 | #[inline ] |
1156 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1157 | #[cfg_attr (test, assert_instr(vpsubsw))] |
1158 | pub unsafe fn _mm_mask_subs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
1159 | transmute(src:vpsubsw128(a:a.as_i16x8(), b:b.as_i16x8(), src:src.as_i16x8(), mask:k)) |
1160 | } |
1161 | |
1162 | /// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1163 | /// |
1164 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epi16&expand=5768) |
1165 | #[inline ] |
1166 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1167 | #[cfg_attr (test, assert_instr(vpsubsw))] |
1168 | pub unsafe fn _mm_maskz_subs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
1169 | transmute(src:vpsubsw128( |
1170 | a:a.as_i16x8(), |
1171 | b:b.as_i16x8(), |
1172 | src:_mm_setzero_si128().as_i16x8(), |
1173 | mask:k, |
1174 | )) |
1175 | } |
1176 | |
1177 | /// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst. |
1178 | /// |
1179 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epi8&expand=5784) |
1180 | #[inline ] |
1181 | #[target_feature (enable = "avx512bw" )] |
1182 | #[cfg_attr (test, assert_instr(vpsubsb))] |
1183 | pub unsafe fn _mm512_subs_epi8(a: __m512i, b: __m512i) -> __m512i { |
1184 | transmute(src:vpsubsb( |
1185 | a:a.as_i8x64(), |
1186 | b:b.as_i8x64(), |
1187 | src:_mm512_setzero_si512().as_i8x64(), |
1188 | mask:0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, |
1189 | )) |
1190 | } |
1191 | |
1192 | /// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1193 | /// |
1194 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epi8&expand=5782) |
1195 | #[inline ] |
1196 | #[target_feature (enable = "avx512bw" )] |
1197 | #[cfg_attr (test, assert_instr(vpsubsb))] |
1198 | pub unsafe fn _mm512_mask_subs_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { |
1199 | transmute(src:vpsubsb(a:a.as_i8x64(), b:b.as_i8x64(), src:src.as_i8x64(), mask:k)) |
1200 | } |
1201 | |
1202 | /// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1203 | /// |
1204 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epi8&expand=5783) |
1205 | #[inline ] |
1206 | #[target_feature (enable = "avx512bw" )] |
1207 | #[cfg_attr (test, assert_instr(vpsubsb))] |
1208 | pub unsafe fn _mm512_maskz_subs_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { |
1209 | transmute(src:vpsubsb( |
1210 | a:a.as_i8x64(), |
1211 | b:b.as_i8x64(), |
1212 | src:_mm512_setzero_si512().as_i8x64(), |
1213 | mask:k, |
1214 | )) |
1215 | } |
1216 | |
1217 | /// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1218 | /// |
1219 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epi8&expand=5779) |
1220 | #[inline ] |
1221 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1222 | #[cfg_attr (test, assert_instr(vpsubsb))] |
1223 | pub unsafe fn _mm256_mask_subs_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { |
1224 | transmute(src:vpsubsb256(a:a.as_i8x32(), b:b.as_i8x32(), src:src.as_i8x32(), mask:k)) |
1225 | } |
1226 | |
1227 | /// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1228 | /// |
1229 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epi8&expand=5780) |
1230 | #[inline ] |
1231 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1232 | #[cfg_attr (test, assert_instr(vpsubsb))] |
1233 | pub unsafe fn _mm256_maskz_subs_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { |
1234 | transmute(src:vpsubsb256( |
1235 | a:a.as_i8x32(), |
1236 | b:b.as_i8x32(), |
1237 | src:_mm256_setzero_si256().as_i8x32(), |
1238 | mask:k, |
1239 | )) |
1240 | } |
1241 | |
1242 | /// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1243 | /// |
1244 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epi8&expand=5776) |
1245 | #[inline ] |
1246 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1247 | #[cfg_attr (test, assert_instr(vpsubsb))] |
1248 | pub unsafe fn _mm_mask_subs_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { |
1249 | transmute(src:vpsubsb128(a:a.as_i8x16(), b:b.as_i8x16(), src:src.as_i8x16(), mask:k)) |
1250 | } |
1251 | |
1252 | /// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1253 | /// |
1254 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epi8&expand=5777) |
1255 | #[inline ] |
1256 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1257 | #[cfg_attr (test, assert_instr(vpsubsb))] |
1258 | pub unsafe fn _mm_maskz_subs_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { |
1259 | transmute(src:vpsubsb128( |
1260 | a:a.as_i8x16(), |
1261 | b:b.as_i8x16(), |
1262 | src:_mm_setzero_si128().as_i8x16(), |
1263 | mask:k, |
1264 | )) |
1265 | } |
1266 | |
1267 | /// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst. |
1268 | /// |
1269 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhi_epu16&expand=3973) |
1270 | #[inline ] |
1271 | #[target_feature (enable = "avx512bw" )] |
1272 | #[cfg_attr (test, assert_instr(vpmulhuw))] |
1273 | pub unsafe fn _mm512_mulhi_epu16(a: __m512i, b: __m512i) -> __m512i { |
1274 | transmute(src:vpmulhuw(a:a.as_u16x32(), b:b.as_u16x32())) |
1275 | } |
1276 | |
1277 | /// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1278 | /// |
1279 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhi_epu16&expand=3971) |
1280 | #[inline ] |
1281 | #[target_feature (enable = "avx512bw" )] |
1282 | #[cfg_attr (test, assert_instr(vpmulhuw))] |
1283 | pub unsafe fn _mm512_mask_mulhi_epu16( |
1284 | src: __m512i, |
1285 | k: __mmask32, |
1286 | a: __m512i, |
1287 | b: __m512i, |
1288 | ) -> __m512i { |
1289 | let mul: u16x32 = _mm512_mulhi_epu16(a, b).as_u16x32(); |
1290 | transmute(src:simd_select_bitmask(m:k, a:mul, b:src.as_u16x32())) |
1291 | } |
1292 | |
1293 | /// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1294 | /// |
1295 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhi_epu16&expand=3972) |
1296 | #[inline ] |
1297 | #[target_feature (enable = "avx512bw" )] |
1298 | #[cfg_attr (test, assert_instr(vpmulhuw))] |
1299 | pub unsafe fn _mm512_maskz_mulhi_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { |
1300 | let mul: u16x32 = _mm512_mulhi_epu16(a, b).as_u16x32(); |
1301 | let zero: u16x32 = _mm512_setzero_si512().as_u16x32(); |
1302 | transmute(src:simd_select_bitmask(m:k, a:mul, b:zero)) |
1303 | } |
1304 | |
1305 | /// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1306 | /// |
1307 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhi_epu16&expand=3968) |
1308 | #[inline ] |
1309 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1310 | #[cfg_attr (test, assert_instr(vpmulhuw))] |
1311 | pub unsafe fn _mm256_mask_mulhi_epu16( |
1312 | src: __m256i, |
1313 | k: __mmask16, |
1314 | a: __m256i, |
1315 | b: __m256i, |
1316 | ) -> __m256i { |
1317 | let mul: u16x16 = _mm256_mulhi_epu16(a, b).as_u16x16(); |
1318 | transmute(src:simd_select_bitmask(m:k, a:mul, b:src.as_u16x16())) |
1319 | } |
1320 | |
1321 | /// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1322 | /// |
1323 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhi_epu16&expand=3969) |
1324 | #[inline ] |
1325 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1326 | #[cfg_attr (test, assert_instr(vpmulhuw))] |
1327 | pub unsafe fn _mm256_maskz_mulhi_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { |
1328 | let mul: u16x16 = _mm256_mulhi_epu16(a, b).as_u16x16(); |
1329 | let zero: u16x16 = _mm256_setzero_si256().as_u16x16(); |
1330 | transmute(src:simd_select_bitmask(m:k, a:mul, b:zero)) |
1331 | } |
1332 | |
1333 | /// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1334 | /// |
1335 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhi_epu16&expand=3965) |
1336 | #[inline ] |
1337 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1338 | #[cfg_attr (test, assert_instr(vpmulhuw))] |
1339 | pub unsafe fn _mm_mask_mulhi_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
1340 | let mul: u16x8 = _mm_mulhi_epu16(a, b).as_u16x8(); |
1341 | transmute(src:simd_select_bitmask(m:k, a:mul, b:src.as_u16x8())) |
1342 | } |
1343 | |
1344 | /// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1345 | /// |
1346 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhi_epu16&expand=3966) |
1347 | #[inline ] |
1348 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1349 | #[cfg_attr (test, assert_instr(vpmulhuw))] |
1350 | pub unsafe fn _mm_maskz_mulhi_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
1351 | let mul: u16x8 = _mm_mulhi_epu16(a, b).as_u16x8(); |
1352 | let zero: u16x8 = _mm_setzero_si128().as_u16x8(); |
1353 | transmute(src:simd_select_bitmask(m:k, a:mul, b:zero)) |
1354 | } |
1355 | |
1356 | /// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst. |
1357 | /// |
1358 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhi_epi16&expand=3962) |
1359 | #[inline ] |
1360 | #[target_feature (enable = "avx512bw" )] |
1361 | #[cfg_attr (test, assert_instr(vpmulhw))] |
1362 | pub unsafe fn _mm512_mulhi_epi16(a: __m512i, b: __m512i) -> __m512i { |
1363 | transmute(src:vpmulhw(a:a.as_i16x32(), b:b.as_i16x32())) |
1364 | } |
1365 | |
1366 | /// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1367 | /// |
1368 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhi_epi16&expand=3960) |
1369 | #[inline ] |
1370 | #[target_feature (enable = "avx512bw" )] |
1371 | #[cfg_attr (test, assert_instr(vpmulhw))] |
1372 | pub unsafe fn _mm512_mask_mulhi_epi16( |
1373 | src: __m512i, |
1374 | k: __mmask32, |
1375 | a: __m512i, |
1376 | b: __m512i, |
1377 | ) -> __m512i { |
1378 | let mul: i16x32 = _mm512_mulhi_epi16(a, b).as_i16x32(); |
1379 | transmute(src:simd_select_bitmask(m:k, a:mul, b:src.as_i16x32())) |
1380 | } |
1381 | |
1382 | /// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1383 | /// |
1384 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhi_epi16&expand=3961) |
1385 | #[inline ] |
1386 | #[target_feature (enable = "avx512bw" )] |
1387 | #[cfg_attr (test, assert_instr(vpmulhw))] |
1388 | pub unsafe fn _mm512_maskz_mulhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { |
1389 | let mul: i16x32 = _mm512_mulhi_epi16(a, b).as_i16x32(); |
1390 | let zero: i16x32 = _mm512_setzero_si512().as_i16x32(); |
1391 | transmute(src:simd_select_bitmask(m:k, a:mul, b:zero)) |
1392 | } |
1393 | |
1394 | /// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1395 | /// |
1396 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhi_epi16&expand=3957) |
1397 | #[inline ] |
1398 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1399 | #[cfg_attr (test, assert_instr(vpmulhw))] |
1400 | pub unsafe fn _mm256_mask_mulhi_epi16( |
1401 | src: __m256i, |
1402 | k: __mmask16, |
1403 | a: __m256i, |
1404 | b: __m256i, |
1405 | ) -> __m256i { |
1406 | let mul: i16x16 = _mm256_mulhi_epi16(a, b).as_i16x16(); |
1407 | transmute(src:simd_select_bitmask(m:k, a:mul, b:src.as_i16x16())) |
1408 | } |
1409 | |
1410 | /// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1411 | /// |
1412 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhi_epi16&expand=3958) |
1413 | #[inline ] |
1414 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1415 | #[cfg_attr (test, assert_instr(vpmulhw))] |
1416 | pub unsafe fn _mm256_maskz_mulhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { |
1417 | let mul: i16x16 = _mm256_mulhi_epi16(a, b).as_i16x16(); |
1418 | let zero: i16x16 = _mm256_setzero_si256().as_i16x16(); |
1419 | transmute(src:simd_select_bitmask(m:k, a:mul, b:zero)) |
1420 | } |
1421 | |
1422 | /// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1423 | /// |
1424 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhi_epi16&expand=3954) |
1425 | #[inline ] |
1426 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1427 | #[cfg_attr (test, assert_instr(vpmulhw))] |
1428 | pub unsafe fn _mm_mask_mulhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
1429 | let mul: i16x8 = _mm_mulhi_epi16(a, b).as_i16x8(); |
1430 | transmute(src:simd_select_bitmask(m:k, a:mul, b:src.as_i16x8())) |
1431 | } |
1432 | |
1433 | /// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1434 | /// |
1435 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhi_epi16&expand=3955) |
1436 | #[inline ] |
1437 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1438 | #[cfg_attr (test, assert_instr(vpmulhw))] |
1439 | pub unsafe fn _mm_maskz_mulhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
1440 | let mul: i16x8 = _mm_mulhi_epi16(a, b).as_i16x8(); |
1441 | let zero: i16x8 = _mm_setzero_si128().as_i16x8(); |
1442 | transmute(src:simd_select_bitmask(m:k, a:mul, b:zero)) |
1443 | } |
1444 | |
1445 | /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst. |
1446 | /// |
1447 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhrs_epi16&expand=3986) |
1448 | #[inline ] |
1449 | #[target_feature (enable = "avx512bw" )] |
1450 | #[cfg_attr (test, assert_instr(vpmulhrsw))] |
1451 | pub unsafe fn _mm512_mulhrs_epi16(a: __m512i, b: __m512i) -> __m512i { |
1452 | transmute(src:vpmulhrsw(a:a.as_i16x32(), b:b.as_i16x32())) |
1453 | } |
1454 | |
1455 | /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1456 | /// |
1457 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhrs_epi16&expand=3984) |
1458 | #[inline ] |
1459 | #[target_feature (enable = "avx512bw" )] |
1460 | #[cfg_attr (test, assert_instr(vpmulhrsw))] |
1461 | pub unsafe fn _mm512_mask_mulhrs_epi16( |
1462 | src: __m512i, |
1463 | k: __mmask32, |
1464 | a: __m512i, |
1465 | b: __m512i, |
1466 | ) -> __m512i { |
1467 | let mul: i16x32 = _mm512_mulhrs_epi16(a, b).as_i16x32(); |
1468 | transmute(src:simd_select_bitmask(m:k, a:mul, b:src.as_i16x32())) |
1469 | } |
1470 | |
1471 | /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1472 | /// |
1473 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhrs_epi16&expand=3985) |
1474 | #[inline ] |
1475 | #[target_feature (enable = "avx512bw" )] |
1476 | #[cfg_attr (test, assert_instr(vpmulhrsw))] |
1477 | pub unsafe fn _mm512_maskz_mulhrs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { |
1478 | let mul: i16x32 = _mm512_mulhrs_epi16(a, b).as_i16x32(); |
1479 | let zero: i16x32 = _mm512_setzero_si512().as_i16x32(); |
1480 | transmute(src:simd_select_bitmask(m:k, a:mul, b:zero)) |
1481 | } |
1482 | |
1483 | /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1484 | /// |
1485 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhrs_epi16&expand=3981) |
1486 | #[inline ] |
1487 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1488 | #[cfg_attr (test, assert_instr(vpmulhrsw))] |
1489 | pub unsafe fn _mm256_mask_mulhrs_epi16( |
1490 | src: __m256i, |
1491 | k: __mmask16, |
1492 | a: __m256i, |
1493 | b: __m256i, |
1494 | ) -> __m256i { |
1495 | let mul: i16x16 = _mm256_mulhrs_epi16(a, b).as_i16x16(); |
1496 | transmute(src:simd_select_bitmask(m:k, a:mul, b:src.as_i16x16())) |
1497 | } |
1498 | |
1499 | /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1500 | /// |
1501 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhrs_epi16&expand=3982) |
1502 | #[inline ] |
1503 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1504 | #[cfg_attr (test, assert_instr(vpmulhrsw))] |
1505 | pub unsafe fn _mm256_maskz_mulhrs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { |
1506 | let mul: i16x16 = _mm256_mulhrs_epi16(a, b).as_i16x16(); |
1507 | let zero: i16x16 = _mm256_setzero_si256().as_i16x16(); |
1508 | transmute(src:simd_select_bitmask(m:k, a:mul, b:zero)) |
1509 | } |
1510 | |
1511 | /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1512 | /// |
1513 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhrs_epi16&expand=3978) |
1514 | #[inline ] |
1515 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1516 | #[cfg_attr (test, assert_instr(vpmulhrsw))] |
1517 | pub unsafe fn _mm_mask_mulhrs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
1518 | let mul: i16x8 = _mm_mulhrs_epi16(a, b).as_i16x8(); |
1519 | transmute(src:simd_select_bitmask(m:k, a:mul, b:src.as_i16x8())) |
1520 | } |
1521 | |
1522 | /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1523 | /// |
1524 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhrs_epi16&expand=3979) |
1525 | #[inline ] |
1526 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1527 | #[cfg_attr (test, assert_instr(vpmulhrsw))] |
1528 | pub unsafe fn _mm_maskz_mulhrs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
1529 | let mul: i16x8 = _mm_mulhrs_epi16(a, b).as_i16x8(); |
1530 | let zero: i16x8 = _mm_setzero_si128().as_i16x8(); |
1531 | transmute(src:simd_select_bitmask(m:k, a:mul, b:zero)) |
1532 | } |
1533 | |
1534 | /// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst. |
1535 | /// |
1536 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi16&expand=3996) |
1537 | #[inline ] |
1538 | #[target_feature (enable = "avx512bw" )] |
1539 | #[cfg_attr (test, assert_instr(vpmullw))] |
1540 | pub unsafe fn _mm512_mullo_epi16(a: __m512i, b: __m512i) -> __m512i { |
1541 | transmute(src:simd_mul(x:a.as_i16x32(), y:b.as_i16x32())) |
1542 | } |
1543 | |
1544 | /// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1545 | /// |
1546 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi16&expand=3994) |
1547 | #[inline ] |
1548 | #[target_feature (enable = "avx512bw" )] |
1549 | #[cfg_attr (test, assert_instr(vpmullw))] |
1550 | pub unsafe fn _mm512_mask_mullo_epi16( |
1551 | src: __m512i, |
1552 | k: __mmask32, |
1553 | a: __m512i, |
1554 | b: __m512i, |
1555 | ) -> __m512i { |
1556 | let mul: i16x32 = _mm512_mullo_epi16(a, b).as_i16x32(); |
1557 | transmute(src:simd_select_bitmask(m:k, a:mul, b:src.as_i16x32())) |
1558 | } |
1559 | |
1560 | /// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1561 | /// |
1562 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi16&expand=3995) |
1563 | #[inline ] |
1564 | #[target_feature (enable = "avx512bw" )] |
1565 | #[cfg_attr (test, assert_instr(vpmullw))] |
1566 | pub unsafe fn _mm512_maskz_mullo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { |
1567 | let mul: i16x32 = _mm512_mullo_epi16(a, b).as_i16x32(); |
1568 | let zero: i16x32 = _mm512_setzero_si512().as_i16x32(); |
1569 | transmute(src:simd_select_bitmask(m:k, a:mul, b:zero)) |
1570 | } |
1571 | |
1572 | /// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1573 | /// |
1574 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi16&expand=3991) |
1575 | #[inline ] |
1576 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1577 | #[cfg_attr (test, assert_instr(vpmullw))] |
1578 | pub unsafe fn _mm256_mask_mullo_epi16( |
1579 | src: __m256i, |
1580 | k: __mmask16, |
1581 | a: __m256i, |
1582 | b: __m256i, |
1583 | ) -> __m256i { |
1584 | let mul: i16x16 = _mm256_mullo_epi16(a, b).as_i16x16(); |
1585 | transmute(src:simd_select_bitmask(m:k, a:mul, b:src.as_i16x16())) |
1586 | } |
1587 | |
1588 | /// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1589 | /// |
1590 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi16&expand=3992) |
1591 | #[inline ] |
1592 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1593 | #[cfg_attr (test, assert_instr(vpmullw))] |
1594 | pub unsafe fn _mm256_maskz_mullo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { |
1595 | let mul: i16x16 = _mm256_mullo_epi16(a, b).as_i16x16(); |
1596 | let zero: i16x16 = _mm256_setzero_si256().as_i16x16(); |
1597 | transmute(src:simd_select_bitmask(m:k, a:mul, b:zero)) |
1598 | } |
1599 | |
1600 | /// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1601 | /// |
1602 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi16&expand=3988) |
1603 | #[inline ] |
1604 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1605 | #[cfg_attr (test, assert_instr(vpmullw))] |
1606 | pub unsafe fn _mm_mask_mullo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
1607 | let mul: i16x8 = _mm_mullo_epi16(a, b).as_i16x8(); |
1608 | transmute(src:simd_select_bitmask(m:k, a:mul, b:src.as_i16x8())) |
1609 | } |
1610 | |
1611 | /// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1612 | /// |
1613 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi16&expand=3989) |
1614 | #[inline ] |
1615 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1616 | #[cfg_attr (test, assert_instr(vpmullw))] |
1617 | pub unsafe fn _mm_maskz_mullo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
1618 | let mul: i16x8 = _mm_mullo_epi16(a, b).as_i16x8(); |
1619 | let zero: i16x8 = _mm_setzero_si128().as_i16x8(); |
1620 | transmute(src:simd_select_bitmask(m:k, a:mul, b:zero)) |
1621 | } |
1622 | |
1623 | /// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst. |
1624 | /// |
1625 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu16&expand=3609) |
1626 | #[inline ] |
1627 | #[target_feature (enable = "avx512bw" )] |
1628 | #[cfg_attr (test, assert_instr(vpmaxuw))] |
1629 | pub unsafe fn _mm512_max_epu16(a: __m512i, b: __m512i) -> __m512i { |
1630 | transmute(src:vpmaxuw(a:a.as_u16x32(), b:b.as_u16x32())) |
1631 | } |
1632 | |
1633 | /// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1634 | /// |
1635 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu16&expand=3607) |
1636 | #[inline ] |
1637 | #[target_feature (enable = "avx512bw" )] |
1638 | #[cfg_attr (test, assert_instr(vpmaxuw))] |
1639 | pub unsafe fn _mm512_mask_max_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { |
1640 | let max: u16x32 = _mm512_max_epu16(a, b).as_u16x32(); |
1641 | transmute(src:simd_select_bitmask(m:k, a:max, b:src.as_u16x32())) |
1642 | } |
1643 | |
1644 | /// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1645 | /// |
1646 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu16&expand=3608) |
1647 | #[inline ] |
1648 | #[target_feature (enable = "avx512bw" )] |
1649 | #[cfg_attr (test, assert_instr(vpmaxuw))] |
1650 | pub unsafe fn _mm512_maskz_max_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { |
1651 | let max: u16x32 = _mm512_max_epu16(a, b).as_u16x32(); |
1652 | let zero: u16x32 = _mm512_setzero_si512().as_u16x32(); |
1653 | transmute(src:simd_select_bitmask(m:k, a:max, b:zero)) |
1654 | } |
1655 | |
1656 | /// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1657 | /// |
1658 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu16&expand=3604) |
1659 | #[inline ] |
1660 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1661 | #[cfg_attr (test, assert_instr(vpmaxuw))] |
1662 | pub unsafe fn _mm256_mask_max_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { |
1663 | let max: u16x16 = _mm256_max_epu16(a, b).as_u16x16(); |
1664 | transmute(src:simd_select_bitmask(m:k, a:max, b:src.as_u16x16())) |
1665 | } |
1666 | |
1667 | /// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1668 | /// |
1669 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu16&expand=3605) |
1670 | #[inline ] |
1671 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1672 | #[cfg_attr (test, assert_instr(vpmaxuw))] |
1673 | pub unsafe fn _mm256_maskz_max_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { |
1674 | let max: u16x16 = _mm256_max_epu16(a, b).as_u16x16(); |
1675 | let zero: u16x16 = _mm256_setzero_si256().as_u16x16(); |
1676 | transmute(src:simd_select_bitmask(m:k, a:max, b:zero)) |
1677 | } |
1678 | |
1679 | /// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1680 | /// |
1681 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu16&expand=3601) |
1682 | #[inline ] |
1683 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1684 | #[cfg_attr (test, assert_instr(vpmaxuw))] |
1685 | pub unsafe fn _mm_mask_max_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
1686 | let max: u16x8 = _mm_max_epu16(a, b).as_u16x8(); |
1687 | transmute(src:simd_select_bitmask(m:k, a:max, b:src.as_u16x8())) |
1688 | } |
1689 | |
1690 | /// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1691 | /// |
1692 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu16&expand=3602) |
1693 | #[inline ] |
1694 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1695 | #[cfg_attr (test, assert_instr(vpmaxuw))] |
1696 | pub unsafe fn _mm_maskz_max_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
1697 | let max: u16x8 = _mm_max_epu16(a, b).as_u16x8(); |
1698 | let zero: u16x8 = _mm_setzero_si128().as_u16x8(); |
1699 | transmute(src:simd_select_bitmask(m:k, a:max, b:zero)) |
1700 | } |
1701 | |
1702 | /// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst. |
1703 | /// |
1704 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu8&expand=3636) |
1705 | #[inline ] |
1706 | #[target_feature (enable = "avx512bw" )] |
1707 | #[cfg_attr (test, assert_instr(vpmaxub))] |
1708 | pub unsafe fn _mm512_max_epu8(a: __m512i, b: __m512i) -> __m512i { |
1709 | transmute(src:vpmaxub(a:a.as_u8x64(), b:b.as_u8x64())) |
1710 | } |
1711 | |
1712 | /// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1713 | /// |
1714 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu8&expand=3634) |
1715 | #[inline ] |
1716 | #[target_feature (enable = "avx512bw" )] |
1717 | #[cfg_attr (test, assert_instr(vpmaxub))] |
1718 | pub unsafe fn _mm512_mask_max_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { |
1719 | let max: u8x64 = _mm512_max_epu8(a, b).as_u8x64(); |
1720 | transmute(src:simd_select_bitmask(m:k, a:max, b:src.as_u8x64())) |
1721 | } |
1722 | |
1723 | /// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1724 | /// |
1725 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu8&expand=3635) |
1726 | #[inline ] |
1727 | #[target_feature (enable = "avx512bw" )] |
1728 | #[cfg_attr (test, assert_instr(vpmaxub))] |
1729 | pub unsafe fn _mm512_maskz_max_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { |
1730 | let max: u8x64 = _mm512_max_epu8(a, b).as_u8x64(); |
1731 | let zero: u8x64 = _mm512_setzero_si512().as_u8x64(); |
1732 | transmute(src:simd_select_bitmask(m:k, a:max, b:zero)) |
1733 | } |
1734 | |
1735 | /// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1736 | /// |
1737 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu8&expand=3631) |
1738 | #[inline ] |
1739 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1740 | #[cfg_attr (test, assert_instr(vpmaxub))] |
1741 | pub unsafe fn _mm256_mask_max_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { |
1742 | let max: u8x32 = _mm256_max_epu8(a, b).as_u8x32(); |
1743 | transmute(src:simd_select_bitmask(m:k, a:max, b:src.as_u8x32())) |
1744 | } |
1745 | |
1746 | /// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1747 | /// |
1748 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu8&expand=3632) |
1749 | #[inline ] |
1750 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1751 | #[cfg_attr (test, assert_instr(vpmaxub))] |
1752 | pub unsafe fn _mm256_maskz_max_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { |
1753 | let max: u8x32 = _mm256_max_epu8(a, b).as_u8x32(); |
1754 | let zero: u8x32 = _mm256_setzero_si256().as_u8x32(); |
1755 | transmute(src:simd_select_bitmask(m:k, a:max, b:zero)) |
1756 | } |
1757 | |
1758 | /// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1759 | /// |
1760 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu8&expand=3628) |
1761 | #[inline ] |
1762 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1763 | #[cfg_attr (test, assert_instr(vpmaxub))] |
1764 | pub unsafe fn _mm_mask_max_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { |
1765 | let max: u8x16 = _mm_max_epu8(a, b).as_u8x16(); |
1766 | transmute(src:simd_select_bitmask(m:k, a:max, b:src.as_u8x16())) |
1767 | } |
1768 | |
1769 | /// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1770 | /// |
1771 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu8&expand=3629) |
1772 | #[inline ] |
1773 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1774 | #[cfg_attr (test, assert_instr(vpmaxub))] |
1775 | pub unsafe fn _mm_maskz_max_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { |
1776 | let max: u8x16 = _mm_max_epu8(a, b).as_u8x16(); |
1777 | let zero: u8x16 = _mm_setzero_si128().as_u8x16(); |
1778 | transmute(src:simd_select_bitmask(m:k, a:max, b:zero)) |
1779 | } |
1780 | |
1781 | /// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst. |
1782 | /// |
1783 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi16&expand=3573) |
1784 | #[inline ] |
1785 | #[target_feature (enable = "avx512bw" )] |
1786 | #[cfg_attr (test, assert_instr(vpmaxsw))] |
1787 | pub unsafe fn _mm512_max_epi16(a: __m512i, b: __m512i) -> __m512i { |
1788 | transmute(src:vpmaxsw(a:a.as_i16x32(), b:b.as_i16x32())) |
1789 | } |
1790 | |
1791 | /// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1792 | /// |
1793 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi16&expand=3571) |
1794 | #[inline ] |
1795 | #[target_feature (enable = "avx512bw" )] |
1796 | #[cfg_attr (test, assert_instr(vpmaxsw))] |
1797 | pub unsafe fn _mm512_mask_max_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { |
1798 | let max: i16x32 = _mm512_max_epi16(a, b).as_i16x32(); |
1799 | transmute(src:simd_select_bitmask(m:k, a:max, b:src.as_i16x32())) |
1800 | } |
1801 | |
1802 | /// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1803 | /// |
1804 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi16&expand=3572) |
1805 | #[inline ] |
1806 | #[target_feature (enable = "avx512bw" )] |
1807 | #[cfg_attr (test, assert_instr(vpmaxsw))] |
1808 | pub unsafe fn _mm512_maskz_max_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { |
1809 | let max: i16x32 = _mm512_max_epi16(a, b).as_i16x32(); |
1810 | let zero: i16x32 = _mm512_setzero_si512().as_i16x32(); |
1811 | transmute(src:simd_select_bitmask(m:k, a:max, b:zero)) |
1812 | } |
1813 | |
1814 | /// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1815 | /// |
1816 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi16&expand=3568) |
1817 | #[inline ] |
1818 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1819 | #[cfg_attr (test, assert_instr(vpmaxsw))] |
1820 | pub unsafe fn _mm256_mask_max_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { |
1821 | let max: i16x16 = _mm256_max_epi16(a, b).as_i16x16(); |
1822 | transmute(src:simd_select_bitmask(m:k, a:max, b:src.as_i16x16())) |
1823 | } |
1824 | |
1825 | /// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1826 | /// |
1827 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi16&expand=3569) |
1828 | #[inline ] |
1829 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1830 | #[cfg_attr (test, assert_instr(vpmaxsw))] |
1831 | pub unsafe fn _mm256_maskz_max_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { |
1832 | let max: i16x16 = _mm256_max_epi16(a, b).as_i16x16(); |
1833 | let zero: i16x16 = _mm256_setzero_si256().as_i16x16(); |
1834 | transmute(src:simd_select_bitmask(m:k, a:max, b:zero)) |
1835 | } |
1836 | |
1837 | /// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1838 | /// |
1839 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi16&expand=3565) |
1840 | #[inline ] |
1841 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1842 | #[cfg_attr (test, assert_instr(vpmaxsw))] |
1843 | pub unsafe fn _mm_mask_max_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
1844 | let max: i16x8 = _mm_max_epi16(a, b).as_i16x8(); |
1845 | transmute(src:simd_select_bitmask(m:k, a:max, b:src.as_i16x8())) |
1846 | } |
1847 | |
1848 | /// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1849 | /// |
1850 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi16&expand=3566) |
1851 | #[inline ] |
1852 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1853 | #[cfg_attr (test, assert_instr(vpmaxsw))] |
1854 | pub unsafe fn _mm_maskz_max_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
1855 | let max: i16x8 = _mm_max_epi16(a, b).as_i16x8(); |
1856 | let zero: i16x8 = _mm_setzero_si128().as_i16x8(); |
1857 | transmute(src:simd_select_bitmask(m:k, a:max, b:zero)) |
1858 | } |
1859 | |
1860 | /// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst. |
1861 | /// |
1862 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi8&expand=3600) |
1863 | #[inline ] |
1864 | #[target_feature (enable = "avx512bw" )] |
1865 | #[cfg_attr (test, assert_instr(vpmaxsb))] |
1866 | pub unsafe fn _mm512_max_epi8(a: __m512i, b: __m512i) -> __m512i { |
1867 | transmute(src:vpmaxsb(a:a.as_i8x64(), b:b.as_i8x64())) |
1868 | } |
1869 | |
1870 | /// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1871 | /// |
1872 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi8&expand=3598) |
1873 | #[inline ] |
1874 | #[target_feature (enable = "avx512bw" )] |
1875 | #[cfg_attr (test, assert_instr(vpmaxsb))] |
1876 | pub unsafe fn _mm512_mask_max_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { |
1877 | let max: i8x64 = _mm512_max_epi8(a, b).as_i8x64(); |
1878 | transmute(src:simd_select_bitmask(m:k, a:max, b:src.as_i8x64())) |
1879 | } |
1880 | |
1881 | /// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1882 | /// |
1883 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi8&expand=3599) |
1884 | #[inline ] |
1885 | #[target_feature (enable = "avx512bw" )] |
1886 | #[cfg_attr (test, assert_instr(vpmaxsb))] |
1887 | pub unsafe fn _mm512_maskz_max_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { |
1888 | let max: i8x64 = _mm512_max_epi8(a, b).as_i8x64(); |
1889 | let zero: i8x64 = _mm512_setzero_si512().as_i8x64(); |
1890 | transmute(src:simd_select_bitmask(m:k, a:max, b:zero)) |
1891 | } |
1892 | |
1893 | /// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1894 | /// |
1895 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi8&expand=3595) |
1896 | #[inline ] |
1897 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1898 | #[cfg_attr (test, assert_instr(vpmaxsb))] |
1899 | pub unsafe fn _mm256_mask_max_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { |
1900 | let max: i8x32 = _mm256_max_epi8(a, b).as_i8x32(); |
1901 | transmute(src:simd_select_bitmask(m:k, a:max, b:src.as_i8x32())) |
1902 | } |
1903 | |
1904 | /// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1905 | /// |
1906 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi8&expand=3596) |
1907 | #[inline ] |
1908 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1909 | #[cfg_attr (test, assert_instr(vpmaxsb))] |
1910 | pub unsafe fn _mm256_maskz_max_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { |
1911 | let max: i8x32 = _mm256_max_epi8(a, b).as_i8x32(); |
1912 | let zero: i8x32 = _mm256_setzero_si256().as_i8x32(); |
1913 | transmute(src:simd_select_bitmask(m:k, a:max, b:zero)) |
1914 | } |
1915 | |
1916 | /// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1917 | /// |
1918 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi8&expand=3592) |
1919 | #[inline ] |
1920 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1921 | #[cfg_attr (test, assert_instr(vpmaxsb))] |
1922 | pub unsafe fn _mm_mask_max_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { |
1923 | let max: i8x16 = _mm_max_epi8(a, b).as_i8x16(); |
1924 | transmute(src:simd_select_bitmask(m:k, a:max, b:src.as_i8x16())) |
1925 | } |
1926 | |
1927 | /// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1928 | /// |
1929 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi8&expand=3593) |
1930 | #[inline ] |
1931 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1932 | #[cfg_attr (test, assert_instr(vpmaxsb))] |
1933 | pub unsafe fn _mm_maskz_max_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { |
1934 | let max: i8x16 = _mm_max_epi8(a, b).as_i8x16(); |
1935 | let zero: i8x16 = _mm_setzero_si128().as_i8x16(); |
1936 | transmute(src:simd_select_bitmask(m:k, a:max, b:zero)) |
1937 | } |
1938 | |
1939 | /// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst. |
1940 | /// |
1941 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu16&expand=3723) |
1942 | #[inline ] |
1943 | #[target_feature (enable = "avx512bw" )] |
1944 | #[cfg_attr (test, assert_instr(vpminuw))] |
1945 | pub unsafe fn _mm512_min_epu16(a: __m512i, b: __m512i) -> __m512i { |
1946 | transmute(src:vpminuw(a:a.as_u16x32(), b:b.as_u16x32())) |
1947 | } |
1948 | |
1949 | /// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1950 | /// |
1951 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu16&expand=3721) |
1952 | #[inline ] |
1953 | #[target_feature (enable = "avx512bw" )] |
1954 | #[cfg_attr (test, assert_instr(vpminuw))] |
1955 | pub unsafe fn _mm512_mask_min_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { |
1956 | let min: u16x32 = _mm512_min_epu16(a, b).as_u16x32(); |
1957 | transmute(src:simd_select_bitmask(m:k, a:min, b:src.as_u16x32())) |
1958 | } |
1959 | |
1960 | /// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1961 | /// |
1962 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu16&expand=3722) |
1963 | #[inline ] |
1964 | #[target_feature (enable = "avx512bw" )] |
1965 | #[cfg_attr (test, assert_instr(vpminuw))] |
1966 | pub unsafe fn _mm512_maskz_min_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { |
1967 | let min: u16x32 = _mm512_min_epu16(a, b).as_u16x32(); |
1968 | let zero: u16x32 = _mm512_setzero_si512().as_u16x32(); |
1969 | transmute(src:simd_select_bitmask(m:k, a:min, b:zero)) |
1970 | } |
1971 | |
1972 | /// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1973 | /// |
1974 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu16&expand=3718) |
1975 | #[inline ] |
1976 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1977 | #[cfg_attr (test, assert_instr(vpminuw))] |
1978 | pub unsafe fn _mm256_mask_min_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { |
1979 | let min: u16x16 = _mm256_min_epu16(a, b).as_u16x16(); |
1980 | transmute(src:simd_select_bitmask(m:k, a:min, b:src.as_u16x16())) |
1981 | } |
1982 | |
1983 | /// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
1984 | /// |
1985 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu16&expand=3719) |
1986 | #[inline ] |
1987 | #[target_feature (enable = "avx512bw,avx512vl" )] |
1988 | #[cfg_attr (test, assert_instr(vpminuw))] |
1989 | pub unsafe fn _mm256_maskz_min_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { |
1990 | let min: u16x16 = _mm256_min_epu16(a, b).as_u16x16(); |
1991 | let zero: u16x16 = _mm256_setzero_si256().as_u16x16(); |
1992 | transmute(src:simd_select_bitmask(m:k, a:min, b:zero)) |
1993 | } |
1994 | |
1995 | /// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
1996 | /// |
1997 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu16&expand=3715) |
1998 | #[inline ] |
1999 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2000 | #[cfg_attr (test, assert_instr(vpminuw))] |
2001 | pub unsafe fn _mm_mask_min_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
2002 | let min: u16x8 = _mm_min_epu16(a, b).as_u16x8(); |
2003 | transmute(src:simd_select_bitmask(m:k, a:min, b:src.as_u16x8())) |
2004 | } |
2005 | |
2006 | /// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
2007 | /// |
2008 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu16&expand=3716) |
2009 | #[inline ] |
2010 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2011 | #[cfg_attr (test, assert_instr(vpminuw))] |
2012 | pub unsafe fn _mm_maskz_min_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
2013 | let min: u16x8 = _mm_min_epu16(a, b).as_u16x8(); |
2014 | let zero: u16x8 = _mm_setzero_si128().as_u16x8(); |
2015 | transmute(src:simd_select_bitmask(m:k, a:min, b:zero)) |
2016 | } |
2017 | |
2018 | /// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst. |
2019 | /// |
2020 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu8&expand=3750) |
2021 | #[inline ] |
2022 | #[target_feature (enable = "avx512bw" )] |
2023 | #[cfg_attr (test, assert_instr(vpminub))] |
2024 | pub unsafe fn _mm512_min_epu8(a: __m512i, b: __m512i) -> __m512i { |
2025 | transmute(src:vpminub(a:a.as_u8x64(), b:b.as_u8x64())) |
2026 | } |
2027 | |
2028 | /// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
2029 | /// |
2030 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu8&expand=3748) |
2031 | #[inline ] |
2032 | #[target_feature (enable = "avx512bw" )] |
2033 | #[cfg_attr (test, assert_instr(vpminub))] |
2034 | pub unsafe fn _mm512_mask_min_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { |
2035 | let min: u8x64 = _mm512_min_epu8(a, b).as_u8x64(); |
2036 | transmute(src:simd_select_bitmask(m:k, a:min, b:src.as_u8x64())) |
2037 | } |
2038 | |
2039 | /// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
2040 | /// |
2041 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu8&expand=3749) |
2042 | #[inline ] |
2043 | #[target_feature (enable = "avx512bw" )] |
2044 | #[cfg_attr (test, assert_instr(vpminub))] |
2045 | pub unsafe fn _mm512_maskz_min_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { |
2046 | let min: u8x64 = _mm512_min_epu8(a, b).as_u8x64(); |
2047 | let zero: u8x64 = _mm512_setzero_si512().as_u8x64(); |
2048 | transmute(src:simd_select_bitmask(m:k, a:min, b:zero)) |
2049 | } |
2050 | |
2051 | /// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
2052 | /// |
2053 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu8&expand=3745) |
2054 | #[inline ] |
2055 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2056 | #[cfg_attr (test, assert_instr(vpminub))] |
2057 | pub unsafe fn _mm256_mask_min_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { |
2058 | let min: u8x32 = _mm256_min_epu8(a, b).as_u8x32(); |
2059 | transmute(src:simd_select_bitmask(m:k, a:min, b:src.as_u8x32())) |
2060 | } |
2061 | |
2062 | /// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
2063 | /// |
2064 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu8&expand=3746) |
2065 | #[inline ] |
2066 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2067 | #[cfg_attr (test, assert_instr(vpminub))] |
2068 | pub unsafe fn _mm256_maskz_min_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { |
2069 | let min: u8x32 = _mm256_min_epu8(a, b).as_u8x32(); |
2070 | let zero: u8x32 = _mm256_setzero_si256().as_u8x32(); |
2071 | transmute(src:simd_select_bitmask(m:k, a:min, b:zero)) |
2072 | } |
2073 | |
2074 | /// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
2075 | /// |
2076 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu8&expand=3742) |
2077 | #[inline ] |
2078 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2079 | #[cfg_attr (test, assert_instr(vpminub))] |
2080 | pub unsafe fn _mm_mask_min_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { |
2081 | let min: u8x16 = _mm_min_epu8(a, b).as_u8x16(); |
2082 | transmute(src:simd_select_bitmask(m:k, a:min, b:src.as_u8x16())) |
2083 | } |
2084 | |
2085 | /// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
2086 | /// |
2087 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu8&expand=3743) |
2088 | #[inline ] |
2089 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2090 | #[cfg_attr (test, assert_instr(vpminub))] |
2091 | pub unsafe fn _mm_maskz_min_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { |
2092 | let min: u8x16 = _mm_min_epu8(a, b).as_u8x16(); |
2093 | let zero: u8x16 = _mm_setzero_si128().as_u8x16(); |
2094 | transmute(src:simd_select_bitmask(m:k, a:min, b:zero)) |
2095 | } |
2096 | |
2097 | /// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst. |
2098 | /// |
2099 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi16&expand=3687) |
2100 | #[inline ] |
2101 | #[target_feature (enable = "avx512bw" )] |
2102 | #[cfg_attr (test, assert_instr(vpminsw))] |
2103 | pub unsafe fn _mm512_min_epi16(a: __m512i, b: __m512i) -> __m512i { |
2104 | transmute(src:vpminsw(a:a.as_i16x32(), b:b.as_i16x32())) |
2105 | } |
2106 | |
2107 | /// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
2108 | /// |
2109 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi16&expand=3685) |
2110 | #[inline ] |
2111 | #[target_feature (enable = "avx512bw" )] |
2112 | #[cfg_attr (test, assert_instr(vpminsw))] |
2113 | pub unsafe fn _mm512_mask_min_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { |
2114 | let min: i16x32 = _mm512_min_epi16(a, b).as_i16x32(); |
2115 | transmute(src:simd_select_bitmask(m:k, a:min, b:src.as_i16x32())) |
2116 | } |
2117 | |
2118 | /// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
2119 | /// |
2120 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi16&expand=3686) |
2121 | #[inline ] |
2122 | #[target_feature (enable = "avx512bw" )] |
2123 | #[cfg_attr (test, assert_instr(vpminsw))] |
2124 | pub unsafe fn _mm512_maskz_min_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { |
2125 | let min: i16x32 = _mm512_min_epi16(a, b).as_i16x32(); |
2126 | let zero: i16x32 = _mm512_setzero_si512().as_i16x32(); |
2127 | transmute(src:simd_select_bitmask(m:k, a:min, b:zero)) |
2128 | } |
2129 | |
2130 | /// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
2131 | /// |
2132 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi16&expand=3682) |
2133 | #[inline ] |
2134 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2135 | #[cfg_attr (test, assert_instr(vpminsw))] |
2136 | pub unsafe fn _mm256_mask_min_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { |
2137 | let min: i16x16 = _mm256_min_epi16(a, b).as_i16x16(); |
2138 | transmute(src:simd_select_bitmask(m:k, a:min, b:src.as_i16x16())) |
2139 | } |
2140 | |
2141 | /// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
2142 | /// |
2143 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi16&expand=3683) |
2144 | #[inline ] |
2145 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2146 | #[cfg_attr (test, assert_instr(vpminsw))] |
2147 | pub unsafe fn _mm256_maskz_min_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { |
2148 | let min: i16x16 = _mm256_min_epi16(a, b).as_i16x16(); |
2149 | let zero: i16x16 = _mm256_setzero_si256().as_i16x16(); |
2150 | transmute(src:simd_select_bitmask(m:k, a:min, b:zero)) |
2151 | } |
2152 | |
2153 | /// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
2154 | /// |
2155 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi16&expand=3679) |
2156 | #[inline ] |
2157 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2158 | #[cfg_attr (test, assert_instr(vpminsw))] |
2159 | pub unsafe fn _mm_mask_min_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
2160 | let min: i16x8 = _mm_min_epi16(a, b).as_i16x8(); |
2161 | transmute(src:simd_select_bitmask(m:k, a:min, b:src.as_i16x8())) |
2162 | } |
2163 | |
2164 | /// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
2165 | /// |
2166 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi16&expand=3680) |
2167 | #[inline ] |
2168 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2169 | #[cfg_attr (test, assert_instr(vpminsw))] |
2170 | pub unsafe fn _mm_maskz_min_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
2171 | let min: i16x8 = _mm_min_epi16(a, b).as_i16x8(); |
2172 | let zero: i16x8 = _mm_setzero_si128().as_i16x8(); |
2173 | transmute(src:simd_select_bitmask(m:k, a:min, b:zero)) |
2174 | } |
2175 | |
2176 | /// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst. |
2177 | /// |
2178 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi8&expand=3714) |
2179 | #[inline ] |
2180 | #[target_feature (enable = "avx512bw" )] |
2181 | #[cfg_attr (test, assert_instr(vpminsb))] |
2182 | pub unsafe fn _mm512_min_epi8(a: __m512i, b: __m512i) -> __m512i { |
2183 | transmute(src:vpminsb(a:a.as_i8x64(), b:b.as_i8x64())) |
2184 | } |
2185 | |
2186 | /// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
2187 | /// |
2188 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi8&expand=3712) |
2189 | #[inline ] |
2190 | #[target_feature (enable = "avx512bw" )] |
2191 | #[cfg_attr (test, assert_instr(vpminsb))] |
2192 | pub unsafe fn _mm512_mask_min_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { |
2193 | let min: i8x64 = _mm512_min_epi8(a, b).as_i8x64(); |
2194 | transmute(src:simd_select_bitmask(m:k, a:min, b:src.as_i8x64())) |
2195 | } |
2196 | |
2197 | /// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
2198 | /// |
2199 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi8&expand=3713) |
2200 | #[inline ] |
2201 | #[target_feature (enable = "avx512bw" )] |
2202 | #[cfg_attr (test, assert_instr(vpminsb))] |
2203 | pub unsafe fn _mm512_maskz_min_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { |
2204 | let min: i8x64 = _mm512_min_epi8(a, b).as_i8x64(); |
2205 | let zero: i8x64 = _mm512_setzero_si512().as_i8x64(); |
2206 | transmute(src:simd_select_bitmask(m:k, a:min, b:zero)) |
2207 | } |
2208 | |
2209 | /// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
2210 | /// |
2211 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi8&expand=3709) |
2212 | #[inline ] |
2213 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2214 | #[cfg_attr (test, assert_instr(vpminsb))] |
2215 | pub unsafe fn _mm256_mask_min_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { |
2216 | let min: i8x32 = _mm256_min_epi8(a, b).as_i8x32(); |
2217 | transmute(src:simd_select_bitmask(m:k, a:min, b:src.as_i8x32())) |
2218 | } |
2219 | |
2220 | /// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
2221 | /// |
2222 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi8&expand=3710) |
2223 | #[inline ] |
2224 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2225 | #[cfg_attr (test, assert_instr(vpminsb))] |
2226 | pub unsafe fn _mm256_maskz_min_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { |
2227 | let min: i8x32 = _mm256_min_epi8(a, b).as_i8x32(); |
2228 | let zero: i8x32 = _mm256_setzero_si256().as_i8x32(); |
2229 | transmute(src:simd_select_bitmask(m:k, a:min, b:zero)) |
2230 | } |
2231 | |
2232 | /// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
2233 | /// |
2234 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi8&expand=3706) |
2235 | #[inline ] |
2236 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2237 | #[cfg_attr (test, assert_instr(vpminsb))] |
2238 | pub unsafe fn _mm_mask_min_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { |
2239 | let min: i8x16 = _mm_min_epi8(a, b).as_i8x16(); |
2240 | transmute(src:simd_select_bitmask(m:k, a:min, b:src.as_i8x16())) |
2241 | } |
2242 | |
2243 | /// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
2244 | /// |
2245 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi8&expand=3707) |
2246 | #[inline ] |
2247 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2248 | #[cfg_attr (test, assert_instr(vpminsb))] |
2249 | pub unsafe fn _mm_maskz_min_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { |
2250 | let min: i8x16 = _mm_min_epi8(a, b).as_i8x16(); |
2251 | let zero: i8x16 = _mm_setzero_si128().as_i8x16(); |
2252 | transmute(src:simd_select_bitmask(m:k, a:min, b:zero)) |
2253 | } |
2254 | |
2255 | /// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k. |
2256 | /// |
2257 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_cmplt_epu16_mask&expand=1050) |
2258 | #[inline ] |
2259 | #[target_feature (enable = "avx512bw" )] |
2260 | #[cfg_attr (test, assert_instr(vpcmp))] |
2261 | pub unsafe fn _mm512_cmplt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { |
2262 | simd_bitmask::<u16x32, _>(simd_lt(x:a.as_u16x32(), y:b.as_u16x32())) |
2263 | } |
2264 | |
2265 | /// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
2266 | /// |
2267 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu16_mask&expand=1051) |
2268 | #[inline ] |
2269 | #[target_feature (enable = "avx512bw" )] |
2270 | #[cfg_attr (test, assert_instr(vpcmp))] |
2271 | pub unsafe fn _mm512_mask_cmplt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { |
2272 | _mm512_cmplt_epu16_mask(a, b) & k1 |
2273 | } |
2274 | |
2275 | /// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k. |
2276 | /// |
2277 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_cmplt_epu16_mask&expand=1050) |
2278 | #[inline ] |
2279 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2280 | #[cfg_attr (test, assert_instr(vpcmp))] |
2281 | pub unsafe fn _mm256_cmplt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { |
2282 | simd_bitmask::<u16x16, _>(simd_lt(x:a.as_u16x16(), y:b.as_u16x16())) |
2283 | } |
2284 | |
2285 | /// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
2286 | /// |
2287 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu16_mask&expand=1049) |
2288 | #[inline ] |
2289 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2290 | #[cfg_attr (test, assert_instr(vpcmp))] |
2291 | pub unsafe fn _mm256_mask_cmplt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { |
2292 | _mm256_cmplt_epu16_mask(a, b) & k1 |
2293 | } |
2294 | |
2295 | /// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k. |
2296 | /// |
2297 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi16_mask&expand=1018) |
2298 | #[inline ] |
2299 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2300 | #[cfg_attr (test, assert_instr(vpcmp))] |
2301 | pub unsafe fn _mm_cmplt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { |
2302 | simd_bitmask::<u16x8, _>(simd_lt(x:a.as_u16x8(), y:b.as_u16x8())) |
2303 | } |
2304 | |
2305 | /// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
2306 | /// |
2307 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi16_mask&expand=1019) |
2308 | #[inline ] |
2309 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2310 | #[cfg_attr (test, assert_instr(vpcmp))] |
2311 | pub unsafe fn _mm_mask_cmplt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { |
2312 | _mm_cmplt_epu16_mask(a, b) & k1 |
2313 | } |
2314 | |
2315 | /// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k. |
2316 | /// |
2317 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm512_cmplt_epu8_mask&expand=1068) |
2318 | #[inline ] |
2319 | #[target_feature (enable = "avx512bw" )] |
2320 | #[cfg_attr (test, assert_instr(vpcmp))] |
2321 | pub unsafe fn _mm512_cmplt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { |
2322 | simd_bitmask::<u8x64, _>(simd_lt(x:a.as_u8x64(), y:b.as_u8x64())) |
2323 | } |
2324 | |
2325 | /// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
2326 | /// |
2327 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu8_mask&expand=1069) |
2328 | #[inline ] |
2329 | #[target_feature (enable = "avx512bw" )] |
2330 | #[cfg_attr (test, assert_instr(vpcmp))] |
2331 | pub unsafe fn _mm512_mask_cmplt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { |
2332 | _mm512_cmplt_epu8_mask(a, b) & k1 |
2333 | } |
2334 | |
2335 | /// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k. |
2336 | /// |
2337 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu8_mask&expand=1066) |
2338 | #[inline ] |
2339 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2340 | #[cfg_attr (test, assert_instr(vpcmp))] |
2341 | pub unsafe fn _mm256_cmplt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { |
2342 | simd_bitmask::<u8x32, _>(simd_lt(x:a.as_u8x32(), y:b.as_u8x32())) |
2343 | } |
2344 | |
2345 | /// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
2346 | /// |
2347 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu8_mask&expand=1067) |
2348 | #[inline ] |
2349 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2350 | #[cfg_attr (test, assert_instr(vpcmp))] |
2351 | pub unsafe fn _mm256_mask_cmplt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { |
2352 | _mm256_cmplt_epu8_mask(a, b) & k1 |
2353 | } |
2354 | |
2355 | /// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k. |
2356 | /// |
2357 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu8_mask&expand=1064) |
2358 | #[inline ] |
2359 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2360 | #[cfg_attr (test, assert_instr(vpcmp))] |
2361 | pub unsafe fn _mm_cmplt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { |
2362 | simd_bitmask::<u8x16, _>(simd_lt(x:a.as_u8x16(), y:b.as_u8x16())) |
2363 | } |
2364 | |
2365 | /// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
2366 | /// |
2367 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu8_mask&expand=1065) |
2368 | #[inline ] |
2369 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2370 | #[cfg_attr (test, assert_instr(vpcmp))] |
2371 | pub unsafe fn _mm_mask_cmplt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { |
2372 | _mm_cmplt_epu8_mask(a, b) & k1 |
2373 | } |
2374 | |
2375 | /// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k. |
2376 | /// |
2377 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi16_mask&expand=1022) |
2378 | #[inline ] |
2379 | #[target_feature (enable = "avx512bw" )] |
2380 | #[cfg_attr (test, assert_instr(vpcmp))] |
2381 | pub unsafe fn _mm512_cmplt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { |
2382 | simd_bitmask::<i16x32, _>(simd_lt(x:a.as_i16x32(), y:b.as_i16x32())) |
2383 | } |
2384 | |
2385 | /// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
2386 | /// |
2387 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi16_mask&expand=1023) |
2388 | #[inline ] |
2389 | #[target_feature (enable = "avx512bw" )] |
2390 | #[cfg_attr (test, assert_instr(vpcmp))] |
2391 | pub unsafe fn _mm512_mask_cmplt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { |
2392 | _mm512_cmplt_epi16_mask(a, b) & k1 |
2393 | } |
2394 | |
2395 | /// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k. |
2396 | /// |
2397 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi16_mask&expand=1020) |
2398 | #[inline ] |
2399 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2400 | #[cfg_attr (test, assert_instr(vpcmp))] |
2401 | pub unsafe fn _mm256_cmplt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { |
2402 | simd_bitmask::<i16x16, _>(simd_lt(x:a.as_i16x16(), y:b.as_i16x16())) |
2403 | } |
2404 | |
2405 | /// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
2406 | /// |
2407 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi16_mask&expand=1021) |
2408 | #[inline ] |
2409 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2410 | #[cfg_attr (test, assert_instr(vpcmp))] |
2411 | pub unsafe fn _mm256_mask_cmplt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { |
2412 | _mm256_cmplt_epi16_mask(a, b) & k1 |
2413 | } |
2414 | |
2415 | /// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k. |
2416 | /// |
2417 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi16_mask&expand=1018) |
2418 | #[inline ] |
2419 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2420 | #[cfg_attr (test, assert_instr(vpcmp))] |
2421 | pub unsafe fn _mm_cmplt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { |
2422 | simd_bitmask::<i16x8, _>(simd_lt(x:a.as_i16x8(), y:b.as_i16x8())) |
2423 | } |
2424 | |
2425 | /// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
2426 | /// |
2427 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi16_mask&expand=1019) |
2428 | #[inline ] |
2429 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2430 | #[cfg_attr (test, assert_instr(vpcmp))] |
2431 | pub unsafe fn _mm_mask_cmplt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { |
2432 | _mm_cmplt_epi16_mask(a, b) & k1 |
2433 | } |
2434 | |
2435 | /// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k. |
2436 | /// |
2437 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi8_mask&expand=1044) |
2438 | #[inline ] |
2439 | #[target_feature (enable = "avx512bw" )] |
2440 | #[cfg_attr (test, assert_instr(vpcmp))] |
2441 | pub unsafe fn _mm512_cmplt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { |
2442 | simd_bitmask::<i8x64, _>(simd_lt(x:a.as_i8x64(), y:b.as_i8x64())) |
2443 | } |
2444 | |
2445 | /// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
2446 | /// |
2447 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi8_mask&expand=1045) |
2448 | #[inline ] |
2449 | #[target_feature (enable = "avx512bw" )] |
2450 | #[cfg_attr (test, assert_instr(vpcmp))] |
2451 | pub unsafe fn _mm512_mask_cmplt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { |
2452 | _mm512_cmplt_epi8_mask(a, b) & k1 |
2453 | } |
2454 | |
2455 | /// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k. |
2456 | /// |
2457 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi8_mask&expand=1042) |
2458 | #[inline ] |
2459 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2460 | #[cfg_attr (test, assert_instr(vpcmp))] |
2461 | pub unsafe fn _mm256_cmplt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { |
2462 | simd_bitmask::<i8x32, _>(simd_lt(x:a.as_i8x32(), y:b.as_i8x32())) |
2463 | } |
2464 | |
2465 | /// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
2466 | /// |
2467 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi8_mask&expand=1043) |
2468 | #[inline ] |
2469 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2470 | #[cfg_attr (test, assert_instr(vpcmp))] |
2471 | pub unsafe fn _mm256_mask_cmplt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { |
2472 | _mm256_cmplt_epi8_mask(a, b) & k1 |
2473 | } |
2474 | |
2475 | /// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k. |
2476 | /// |
2477 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi8_mask&expand=1040) |
2478 | #[inline ] |
2479 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2480 | #[cfg_attr (test, assert_instr(vpcmp))] |
2481 | pub unsafe fn _mm_cmplt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { |
2482 | simd_bitmask::<i8x16, _>(simd_lt(x:a.as_i8x16(), y:b.as_i8x16())) |
2483 | } |
2484 | |
2485 | /// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
2486 | /// |
2487 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi8_mask&expand=1041) |
2488 | #[inline ] |
2489 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2490 | #[cfg_attr (test, assert_instr(vpcmp))] |
2491 | pub unsafe fn _mm_mask_cmplt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { |
2492 | _mm_cmplt_epi8_mask(a, b) & k1 |
2493 | } |
2494 | |
2495 | /// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k. |
2496 | /// |
2497 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu16_mask&expand=927) |
2498 | #[inline ] |
2499 | #[target_feature (enable = "avx512bw" )] |
2500 | #[cfg_attr (test, assert_instr(vpcmp))] |
2501 | pub unsafe fn _mm512_cmpgt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { |
2502 | simd_bitmask::<u16x32, _>(simd_gt(x:a.as_u16x32(), y:b.as_u16x32())) |
2503 | } |
2504 | |
2505 | /// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
2506 | /// |
2507 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu16_mask&expand=928) |
2508 | #[inline ] |
2509 | #[target_feature (enable = "avx512bw" )] |
2510 | #[cfg_attr (test, assert_instr(vpcmp))] |
2511 | pub unsafe fn _mm512_mask_cmpgt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { |
2512 | _mm512_cmpgt_epu16_mask(a, b) & k1 |
2513 | } |
2514 | |
2515 | /// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k. |
2516 | /// |
2517 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu16_mask&expand=925) |
2518 | #[inline ] |
2519 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2520 | #[cfg_attr (test, assert_instr(vpcmp))] |
2521 | pub unsafe fn _mm256_cmpgt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { |
2522 | simd_bitmask::<u16x16, _>(simd_gt(x:a.as_u16x16(), y:b.as_u16x16())) |
2523 | } |
2524 | |
2525 | /// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
2526 | /// |
2527 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu16_mask&expand=926) |
2528 | #[inline ] |
2529 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2530 | #[cfg_attr (test, assert_instr(vpcmp))] |
2531 | pub unsafe fn _mm256_mask_cmpgt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { |
2532 | _mm256_cmpgt_epu16_mask(a, b) & k1 |
2533 | } |
2534 | |
2535 | /// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k. |
2536 | /// |
2537 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu16_mask&expand=923) |
2538 | #[inline ] |
2539 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2540 | #[cfg_attr (test, assert_instr(vpcmp))] |
2541 | pub unsafe fn _mm_cmpgt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { |
2542 | simd_bitmask::<u16x8, _>(simd_gt(x:a.as_u16x8(), y:b.as_u16x8())) |
2543 | } |
2544 | |
2545 | /// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
2546 | /// |
2547 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu16_mask&expand=924) |
2548 | #[inline ] |
2549 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2550 | #[cfg_attr (test, assert_instr(vpcmp))] |
2551 | pub unsafe fn _mm_mask_cmpgt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { |
2552 | _mm_cmpgt_epu16_mask(a, b) & k1 |
2553 | } |
2554 | |
2555 | /// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k. |
2556 | /// |
2557 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu8_mask&expand=945) |
2558 | #[inline ] |
2559 | #[target_feature (enable = "avx512bw" )] |
2560 | #[cfg_attr (test, assert_instr(vpcmp))] |
2561 | pub unsafe fn _mm512_cmpgt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { |
2562 | simd_bitmask::<u8x64, _>(simd_gt(x:a.as_u8x64(), y:b.as_u8x64())) |
2563 | } |
2564 | |
2565 | /// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
2566 | /// |
2567 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu8_mask&expand=946) |
2568 | #[inline ] |
2569 | #[target_feature (enable = "avx512bw" )] |
2570 | #[cfg_attr (test, assert_instr(vpcmp))] |
2571 | pub unsafe fn _mm512_mask_cmpgt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { |
2572 | _mm512_cmpgt_epu8_mask(a, b) & k1 |
2573 | } |
2574 | |
2575 | /// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k. |
2576 | /// |
2577 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu8_mask&expand=943) |
2578 | #[inline ] |
2579 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2580 | #[cfg_attr (test, assert_instr(vpcmp))] |
2581 | pub unsafe fn _mm256_cmpgt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { |
2582 | simd_bitmask::<u8x32, _>(simd_gt(x:a.as_u8x32(), y:b.as_u8x32())) |
2583 | } |
2584 | |
2585 | /// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
2586 | /// |
2587 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu8_mask&expand=944) |
2588 | #[inline ] |
2589 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2590 | #[cfg_attr (test, assert_instr(vpcmp))] |
2591 | pub unsafe fn _mm256_mask_cmpgt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { |
2592 | _mm256_cmpgt_epu8_mask(a, b) & k1 |
2593 | } |
2594 | |
2595 | /// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k. |
2596 | /// |
2597 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu8_mask&expand=941) |
2598 | #[inline ] |
2599 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2600 | #[cfg_attr (test, assert_instr(vpcmp))] |
2601 | pub unsafe fn _mm_cmpgt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { |
2602 | simd_bitmask::<u8x16, _>(simd_gt(x:a.as_u8x16(), y:b.as_u8x16())) |
2603 | } |
2604 | |
2605 | /// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
2606 | /// |
2607 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu8_mask&expand=942) |
2608 | #[inline ] |
2609 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2610 | #[cfg_attr (test, assert_instr(vpcmp))] |
2611 | pub unsafe fn _mm_mask_cmpgt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { |
2612 | _mm_cmpgt_epu8_mask(a, b) & k1 |
2613 | } |
2614 | |
2615 | /// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k. |
2616 | /// |
2617 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi16_mask&expand=897) |
2618 | #[inline ] |
2619 | #[target_feature (enable = "avx512bw" )] |
2620 | #[cfg_attr (test, assert_instr(vpcmp))] |
2621 | pub unsafe fn _mm512_cmpgt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { |
2622 | simd_bitmask::<i16x32, _>(simd_gt(x:a.as_i16x32(), y:b.as_i16x32())) |
2623 | } |
2624 | |
2625 | /// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
2626 | /// |
2627 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi16_mask&expand=898) |
2628 | #[inline ] |
2629 | #[target_feature (enable = "avx512bw" )] |
2630 | #[cfg_attr (test, assert_instr(vpcmp))] |
2631 | pub unsafe fn _mm512_mask_cmpgt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { |
2632 | _mm512_cmpgt_epi16_mask(a, b) & k1 |
2633 | } |
2634 | |
2635 | /// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k. |
2636 | /// |
2637 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi16_mask&expand=895) |
2638 | #[inline ] |
2639 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2640 | #[cfg_attr (test, assert_instr(vpcmp))] |
2641 | pub unsafe fn _mm256_cmpgt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { |
2642 | simd_bitmask::<i16x16, _>(simd_gt(x:a.as_i16x16(), y:b.as_i16x16())) |
2643 | } |
2644 | |
2645 | /// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
2646 | /// |
2647 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi16_mask&expand=896) |
2648 | #[inline ] |
2649 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2650 | #[cfg_attr (test, assert_instr(vpcmp))] |
2651 | pub unsafe fn _mm256_mask_cmpgt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { |
2652 | _mm256_cmpgt_epi16_mask(a, b) & k1 |
2653 | } |
2654 | |
2655 | /// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k. |
2656 | /// |
2657 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi16_mask&expand=893) |
2658 | #[inline ] |
2659 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2660 | #[cfg_attr (test, assert_instr(vpcmp))] |
2661 | pub unsafe fn _mm_cmpgt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { |
2662 | simd_bitmask::<i16x8, _>(simd_gt(x:a.as_i16x8(), y:b.as_i16x8())) |
2663 | } |
2664 | |
2665 | /// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
2666 | /// |
2667 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi16_mask&expand=894) |
2668 | #[inline ] |
2669 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2670 | #[cfg_attr (test, assert_instr(vpcmp))] |
2671 | pub unsafe fn _mm_mask_cmpgt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { |
2672 | _mm_cmpgt_epi16_mask(a, b) & k1 |
2673 | } |
2674 | |
2675 | /// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k. |
2676 | /// |
2677 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi8_mask&expand=921) |
2678 | #[inline ] |
2679 | #[target_feature (enable = "avx512bw" )] |
2680 | #[cfg_attr (test, assert_instr(vpcmp))] |
2681 | pub unsafe fn _mm512_cmpgt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { |
2682 | simd_bitmask::<i8x64, _>(simd_gt(x:a.as_i8x64(), y:b.as_i8x64())) |
2683 | } |
2684 | |
2685 | /// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
2686 | /// |
2687 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi8_mask&expand=922) |
2688 | #[inline ] |
2689 | #[target_feature (enable = "avx512bw" )] |
2690 | #[cfg_attr (test, assert_instr(vpcmp))] |
2691 | pub unsafe fn _mm512_mask_cmpgt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { |
2692 | _mm512_cmpgt_epi8_mask(a, b) & k1 |
2693 | } |
2694 | |
2695 | /// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k. |
2696 | /// |
2697 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi8_mask&expand=919) |
2698 | #[inline ] |
2699 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2700 | #[cfg_attr (test, assert_instr(vpcmp))] |
2701 | pub unsafe fn _mm256_cmpgt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { |
2702 | simd_bitmask::<i8x32, _>(simd_gt(x:a.as_i8x32(), y:b.as_i8x32())) |
2703 | } |
2704 | |
2705 | /// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
2706 | /// |
2707 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi8_mask&expand=920) |
2708 | #[inline ] |
2709 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2710 | #[cfg_attr (test, assert_instr(vpcmp))] |
2711 | pub unsafe fn _mm256_mask_cmpgt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { |
2712 | _mm256_cmpgt_epi8_mask(a, b) & k1 |
2713 | } |
2714 | |
2715 | /// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k. |
2716 | /// |
2717 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi8_mask&expand=917) |
2718 | #[inline ] |
2719 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2720 | #[cfg_attr (test, assert_instr(vpcmp))] |
2721 | pub unsafe fn _mm_cmpgt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { |
2722 | simd_bitmask::<i8x16, _>(simd_gt(x:a.as_i8x16(), y:b.as_i8x16())) |
2723 | } |
2724 | |
2725 | /// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
2726 | /// |
2727 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi8_mask&expand=918) |
2728 | #[inline ] |
2729 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2730 | #[cfg_attr (test, assert_instr(vpcmp))] |
2731 | pub unsafe fn _mm_mask_cmpgt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { |
2732 | _mm_cmpgt_epi8_mask(a, b) & k1 |
2733 | } |
2734 | |
2735 | /// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. |
2736 | /// |
2737 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu16_mask&expand=989) |
2738 | #[inline ] |
2739 | #[target_feature (enable = "avx512bw" )] |
2740 | #[cfg_attr (test, assert_instr(vpcmp))] |
2741 | pub unsafe fn _mm512_cmple_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { |
2742 | simd_bitmask::<u16x32, _>(simd_le(x:a.as_u16x32(), y:b.as_u16x32())) |
2743 | } |
2744 | |
2745 | /// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
2746 | /// |
2747 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu16_mask&expand=990) |
2748 | #[inline ] |
2749 | #[target_feature (enable = "avx512bw" )] |
2750 | #[cfg_attr (test, assert_instr(vpcmp))] |
2751 | pub unsafe fn _mm512_mask_cmple_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { |
2752 | _mm512_cmple_epu16_mask(a, b) & k1 |
2753 | } |
2754 | |
2755 | /// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. |
2756 | /// |
2757 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu16_mask&expand=987) |
2758 | #[inline ] |
2759 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2760 | #[cfg_attr (test, assert_instr(vpcmp))] |
2761 | pub unsafe fn _mm256_cmple_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { |
2762 | simd_bitmask::<u16x16, _>(simd_le(x:a.as_u16x16(), y:b.as_u16x16())) |
2763 | } |
2764 | |
2765 | /// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
2766 | /// |
2767 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu16_mask&expand=988) |
2768 | #[inline ] |
2769 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2770 | #[cfg_attr (test, assert_instr(vpcmp))] |
2771 | pub unsafe fn _mm256_mask_cmple_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { |
2772 | _mm256_cmple_epu16_mask(a, b) & k1 |
2773 | } |
2774 | |
2775 | /// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. |
2776 | /// |
2777 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu16_mask&expand=985) |
2778 | #[inline ] |
2779 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2780 | #[cfg_attr (test, assert_instr(vpcmp))] |
2781 | pub unsafe fn _mm_cmple_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { |
2782 | simd_bitmask::<u16x8, _>(simd_le(x:a.as_u16x8(), y:b.as_u16x8())) |
2783 | } |
2784 | |
2785 | /// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
2786 | /// |
2787 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu16_mask&expand=986) |
2788 | #[inline ] |
2789 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2790 | #[cfg_attr (test, assert_instr(vpcmp))] |
2791 | pub unsafe fn _mm_mask_cmple_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { |
2792 | _mm_cmple_epu16_mask(a, b) & k1 |
2793 | } |
2794 | |
2795 | /// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. |
2796 | /// |
2797 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu8_mask&expand=1007) |
2798 | #[inline ] |
2799 | #[target_feature (enable = "avx512bw" )] |
2800 | #[cfg_attr (test, assert_instr(vpcmp))] |
2801 | pub unsafe fn _mm512_cmple_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { |
2802 | simd_bitmask::<u8x64, _>(simd_le(x:a.as_u8x64(), y:b.as_u8x64())) |
2803 | } |
2804 | |
2805 | /// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
2806 | /// |
2807 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu8_mask&expand=1008) |
2808 | #[inline ] |
2809 | #[target_feature (enable = "avx512bw" )] |
2810 | #[cfg_attr (test, assert_instr(vpcmp))] |
2811 | pub unsafe fn _mm512_mask_cmple_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { |
2812 | _mm512_cmple_epu8_mask(a, b) & k1 |
2813 | } |
2814 | |
2815 | /// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. |
2816 | /// |
2817 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu8_mask&expand=1005) |
2818 | #[inline ] |
2819 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2820 | #[cfg_attr (test, assert_instr(vpcmp))] |
2821 | pub unsafe fn _mm256_cmple_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { |
2822 | simd_bitmask::<u8x32, _>(simd_le(x:a.as_u8x32(), y:b.as_u8x32())) |
2823 | } |
2824 | |
2825 | /// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
2826 | /// |
2827 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu8_mask&expand=1006) |
2828 | #[inline ] |
2829 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2830 | #[cfg_attr (test, assert_instr(vpcmp))] |
2831 | pub unsafe fn _mm256_mask_cmple_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { |
2832 | _mm256_cmple_epu8_mask(a, b) & k1 |
2833 | } |
2834 | |
2835 | /// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. |
2836 | /// |
2837 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu8_mask&expand=1003) |
2838 | #[inline ] |
2839 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2840 | #[cfg_attr (test, assert_instr(vpcmp))] |
2841 | pub unsafe fn _mm_cmple_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { |
2842 | simd_bitmask::<u8x16, _>(simd_le(x:a.as_u8x16(), y:b.as_u8x16())) |
2843 | } |
2844 | |
2845 | /// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
2846 | /// |
2847 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu8_mask&expand=1004) |
2848 | #[inline ] |
2849 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2850 | #[cfg_attr (test, assert_instr(vpcmp))] |
2851 | pub unsafe fn _mm_mask_cmple_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { |
2852 | _mm_cmple_epu8_mask(a, b) & k1 |
2853 | } |
2854 | |
2855 | /// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. |
2856 | /// |
2857 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi16_mask&expand=965) |
2858 | #[inline ] |
2859 | #[target_feature (enable = "avx512bw" )] |
2860 | #[cfg_attr (test, assert_instr(vpcmp))] |
2861 | pub unsafe fn _mm512_cmple_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { |
2862 | simd_bitmask::<i16x32, _>(simd_le(x:a.as_i16x32(), y:b.as_i16x32())) |
2863 | } |
2864 | |
2865 | /// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
2866 | /// |
2867 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi16_mask&expand=966) |
2868 | #[inline ] |
2869 | #[target_feature (enable = "avx512bw" )] |
2870 | #[cfg_attr (test, assert_instr(vpcmp))] |
2871 | pub unsafe fn _mm512_mask_cmple_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { |
2872 | _mm512_cmple_epi16_mask(a, b) & k1 |
2873 | } |
2874 | |
2875 | /// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. |
2876 | /// |
2877 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi16_mask&expand=963) |
2878 | #[inline ] |
2879 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2880 | #[cfg_attr (test, assert_instr(vpcmp))] |
2881 | pub unsafe fn _mm256_cmple_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { |
2882 | simd_bitmask::<i16x16, _>(simd_le(x:a.as_i16x16(), y:b.as_i16x16())) |
2883 | } |
2884 | |
2885 | /// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
2886 | /// |
2887 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi16_mask&expand=964) |
2888 | #[inline ] |
2889 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2890 | #[cfg_attr (test, assert_instr(vpcmp))] |
2891 | pub unsafe fn _mm256_mask_cmple_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { |
2892 | _mm256_cmple_epi16_mask(a, b) & k1 |
2893 | } |
2894 | |
2895 | /// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. |
2896 | /// |
2897 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi16_mask&expand=961) |
2898 | #[inline ] |
2899 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2900 | #[cfg_attr (test, assert_instr(vpcmp))] |
2901 | pub unsafe fn _mm_cmple_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { |
2902 | simd_bitmask::<i16x8, _>(simd_le(x:a.as_i16x8(), y:b.as_i16x8())) |
2903 | } |
2904 | |
2905 | /// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
2906 | /// |
2907 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi16_mask&expand=962) |
2908 | #[inline ] |
2909 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2910 | #[cfg_attr (test, assert_instr(vpcmp))] |
2911 | pub unsafe fn _mm_mask_cmple_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { |
2912 | _mm_cmple_epi16_mask(a, b) & k1 |
2913 | } |
2914 | |
2915 | /// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. |
2916 | /// |
2917 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi8_mask&expand=983) |
2918 | #[inline ] |
2919 | #[target_feature (enable = "avx512bw" )] |
2920 | #[cfg_attr (test, assert_instr(vpcmp))] |
2921 | pub unsafe fn _mm512_cmple_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { |
2922 | simd_bitmask::<i8x64, _>(simd_le(x:a.as_i8x64(), y:b.as_i8x64())) |
2923 | } |
2924 | |
2925 | /// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
2926 | /// |
2927 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi8_mask&expand=984) |
2928 | #[inline ] |
2929 | #[target_feature (enable = "avx512bw" )] |
2930 | #[cfg_attr (test, assert_instr(vpcmp))] |
2931 | pub unsafe fn _mm512_mask_cmple_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { |
2932 | _mm512_cmple_epi8_mask(a, b) & k1 |
2933 | } |
2934 | |
2935 | /// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. |
2936 | /// |
2937 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi8_mask&expand=981) |
2938 | #[inline ] |
2939 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2940 | #[cfg_attr (test, assert_instr(vpcmp))] |
2941 | pub unsafe fn _mm256_cmple_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { |
2942 | simd_bitmask::<i8x32, _>(simd_le(x:a.as_i8x32(), y:b.as_i8x32())) |
2943 | } |
2944 | |
2945 | /// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
2946 | /// |
2947 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi8_mask&expand=982) |
2948 | #[inline ] |
2949 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2950 | #[cfg_attr (test, assert_instr(vpcmp))] |
2951 | pub unsafe fn _mm256_mask_cmple_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { |
2952 | _mm256_cmple_epi8_mask(a, b) & k1 |
2953 | } |
2954 | |
2955 | /// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. |
2956 | /// |
2957 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi8_mask&expand=979) |
2958 | #[inline ] |
2959 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2960 | #[cfg_attr (test, assert_instr(vpcmp))] |
2961 | pub unsafe fn _mm_cmple_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { |
2962 | simd_bitmask::<i8x16, _>(simd_le(x:a.as_i8x16(), y:b.as_i8x16())) |
2963 | } |
2964 | |
2965 | /// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
2966 | /// |
2967 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi8_mask&expand=980) |
2968 | #[inline ] |
2969 | #[target_feature (enable = "avx512bw,avx512vl" )] |
2970 | #[cfg_attr (test, assert_instr(vpcmp))] |
2971 | pub unsafe fn _mm_mask_cmple_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { |
2972 | _mm_cmple_epi8_mask(a, b) & k1 |
2973 | } |
2974 | |
2975 | /// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. |
2976 | /// |
2977 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu16_mask&expand=867) |
2978 | #[inline ] |
2979 | #[target_feature (enable = "avx512bw" )] |
2980 | #[cfg_attr (test, assert_instr(vpcmp))] |
2981 | pub unsafe fn _mm512_cmpge_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { |
2982 | simd_bitmask::<u16x32, _>(simd_ge(x:a.as_u16x32(), y:b.as_u16x32())) |
2983 | } |
2984 | |
2985 | /// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
2986 | /// |
2987 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu16_mask&expand=868) |
2988 | #[inline ] |
2989 | #[target_feature (enable = "avx512bw" )] |
2990 | #[cfg_attr (test, assert_instr(vpcmp))] |
2991 | pub unsafe fn _mm512_mask_cmpge_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { |
2992 | _mm512_cmpge_epu16_mask(a, b) & k1 |
2993 | } |
2994 | |
2995 | /// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. |
2996 | /// |
2997 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu16_mask&expand=865) |
2998 | #[inline ] |
2999 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3000 | #[cfg_attr (test, assert_instr(vpcmp))] |
3001 | pub unsafe fn _mm256_cmpge_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { |
3002 | simd_bitmask::<u16x16, _>(simd_ge(x:a.as_u16x16(), y:b.as_u16x16())) |
3003 | } |
3004 | |
3005 | /// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3006 | /// |
3007 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu16_mask&expand=866) |
3008 | #[inline ] |
3009 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3010 | #[cfg_attr (test, assert_instr(vpcmp))] |
3011 | pub unsafe fn _mm256_mask_cmpge_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { |
3012 | _mm256_cmpge_epu16_mask(a, b) & k1 |
3013 | } |
3014 | |
3015 | /// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. |
3016 | /// |
3017 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu16_mask&expand=863) |
3018 | #[inline ] |
3019 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3020 | #[cfg_attr (test, assert_instr(vpcmp))] |
3021 | pub unsafe fn _mm_cmpge_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { |
3022 | simd_bitmask::<u16x8, _>(simd_ge(x:a.as_u16x8(), y:b.as_u16x8())) |
3023 | } |
3024 | |
3025 | /// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3026 | /// |
3027 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu16_mask&expand=864) |
3028 | #[inline ] |
3029 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3030 | #[cfg_attr (test, assert_instr(vpcmp))] |
3031 | pub unsafe fn _mm_mask_cmpge_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { |
3032 | _mm_cmpge_epu16_mask(a, b) & k1 |
3033 | } |
3034 | |
3035 | /// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. |
3036 | /// |
3037 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu8_mask&expand=885) |
3038 | #[inline ] |
3039 | #[target_feature (enable = "avx512bw" )] |
3040 | #[cfg_attr (test, assert_instr(vpcmp))] |
3041 | pub unsafe fn _mm512_cmpge_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { |
3042 | simd_bitmask::<u8x64, _>(simd_ge(x:a.as_u8x64(), y:b.as_u8x64())) |
3043 | } |
3044 | |
3045 | /// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3046 | /// |
3047 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu8_mask&expand=886) |
3048 | #[inline ] |
3049 | #[target_feature (enable = "avx512bw" )] |
3050 | #[cfg_attr (test, assert_instr(vpcmp))] |
3051 | pub unsafe fn _mm512_mask_cmpge_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { |
3052 | _mm512_cmpge_epu8_mask(a, b) & k1 |
3053 | } |
3054 | |
3055 | /// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. |
3056 | /// |
3057 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu8_mask&expand=883) |
3058 | #[inline ] |
3059 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3060 | #[cfg_attr (test, assert_instr(vpcmp))] |
3061 | pub unsafe fn _mm256_cmpge_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { |
3062 | simd_bitmask::<u8x32, _>(simd_ge(x:a.as_u8x32(), y:b.as_u8x32())) |
3063 | } |
3064 | |
3065 | /// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3066 | /// |
3067 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu8_mask&expand=884) |
3068 | #[inline ] |
3069 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3070 | #[cfg_attr (test, assert_instr(vpcmp))] |
3071 | pub unsafe fn _mm256_mask_cmpge_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { |
3072 | _mm256_cmpge_epu8_mask(a, b) & k1 |
3073 | } |
3074 | |
3075 | /// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. |
3076 | /// |
3077 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu8_mask&expand=881) |
3078 | #[inline ] |
3079 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3080 | #[cfg_attr (test, assert_instr(vpcmp))] |
3081 | pub unsafe fn _mm_cmpge_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { |
3082 | simd_bitmask::<u8x16, _>(simd_ge(x:a.as_u8x16(), y:b.as_u8x16())) |
3083 | } |
3084 | |
3085 | /// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3086 | /// |
3087 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu8_mask&expand=882) |
3088 | #[inline ] |
3089 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3090 | #[cfg_attr (test, assert_instr(vpcmp))] |
3091 | pub unsafe fn _mm_mask_cmpge_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { |
3092 | _mm_cmpge_epu8_mask(a, b) & k1 |
3093 | } |
3094 | |
3095 | /// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. |
3096 | /// |
3097 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi16_mask&expand=843) |
3098 | #[inline ] |
3099 | #[target_feature (enable = "avx512bw" )] |
3100 | #[cfg_attr (test, assert_instr(vpcmp))] |
3101 | pub unsafe fn _mm512_cmpge_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { |
3102 | simd_bitmask::<i16x32, _>(simd_ge(x:a.as_i16x32(), y:b.as_i16x32())) |
3103 | } |
3104 | |
3105 | /// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3106 | /// |
3107 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi16_mask&expand=844) |
3108 | #[inline ] |
3109 | #[target_feature (enable = "avx512bw" )] |
3110 | #[cfg_attr (test, assert_instr(vpcmp))] |
3111 | pub unsafe fn _mm512_mask_cmpge_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { |
3112 | _mm512_cmpge_epi16_mask(a, b) & k1 |
3113 | } |
3114 | |
3115 | /// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. |
3116 | /// |
3117 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi16_mask&expand=841) |
3118 | #[inline ] |
3119 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3120 | #[cfg_attr (test, assert_instr(vpcmp))] |
3121 | pub unsafe fn _mm256_cmpge_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { |
3122 | simd_bitmask::<i16x16, _>(simd_ge(x:a.as_i16x16(), y:b.as_i16x16())) |
3123 | } |
3124 | |
3125 | /// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3126 | /// |
3127 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi16_mask&expand=842) |
3128 | #[inline ] |
3129 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3130 | #[cfg_attr (test, assert_instr(vpcmp))] |
3131 | pub unsafe fn _mm256_mask_cmpge_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { |
3132 | _mm256_cmpge_epi16_mask(a, b) & k1 |
3133 | } |
3134 | |
3135 | /// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. |
3136 | /// |
3137 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi16_mask&expand=839) |
3138 | #[inline ] |
3139 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3140 | #[cfg_attr (test, assert_instr(vpcmp))] |
3141 | pub unsafe fn _mm_cmpge_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { |
3142 | simd_bitmask::<i16x8, _>(simd_ge(x:a.as_i16x8(), y:b.as_i16x8())) |
3143 | } |
3144 | |
3145 | /// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3146 | /// |
3147 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi16_mask&expand=840) |
3148 | #[inline ] |
3149 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3150 | #[cfg_attr (test, assert_instr(vpcmp))] |
3151 | pub unsafe fn _mm_mask_cmpge_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { |
3152 | _mm_cmpge_epi16_mask(a, b) & k1 |
3153 | } |
3154 | |
3155 | /// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. |
3156 | /// |
3157 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi8_mask&expand=861) |
3158 | #[inline ] |
3159 | #[target_feature (enable = "avx512bw" )] |
3160 | #[cfg_attr (test, assert_instr(vpcmp))] |
3161 | pub unsafe fn _mm512_cmpge_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { |
3162 | simd_bitmask::<i8x64, _>(simd_ge(x:a.as_i8x64(), y:b.as_i8x64())) |
3163 | } |
3164 | |
3165 | /// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3166 | /// |
3167 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi8_mask&expand=862) |
3168 | #[inline ] |
3169 | #[target_feature (enable = "avx512bw" )] |
3170 | #[cfg_attr (test, assert_instr(vpcmp))] |
3171 | pub unsafe fn _mm512_mask_cmpge_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { |
3172 | _mm512_cmpge_epi8_mask(a, b) & k1 |
3173 | } |
3174 | |
3175 | /// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. |
3176 | /// |
3177 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi8_mask&expand=859) |
3178 | #[inline ] |
3179 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3180 | #[cfg_attr (test, assert_instr(vpcmp))] |
3181 | pub unsafe fn _mm256_cmpge_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { |
3182 | simd_bitmask::<i8x32, _>(simd_ge(x:a.as_i8x32(), y:b.as_i8x32())) |
3183 | } |
3184 | |
3185 | /// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3186 | /// |
3187 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi8_mask&expand=860) |
3188 | #[inline ] |
3189 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3190 | #[cfg_attr (test, assert_instr(vpcmp))] |
3191 | pub unsafe fn _mm256_mask_cmpge_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { |
3192 | _mm256_cmpge_epi8_mask(a, b) & k1 |
3193 | } |
3194 | |
3195 | /// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. |
3196 | /// |
3197 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi8_mask&expand=857) |
3198 | #[inline ] |
3199 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3200 | #[cfg_attr (test, assert_instr(vpcmp))] |
3201 | pub unsafe fn _mm_cmpge_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { |
3202 | simd_bitmask::<i8x16, _>(simd_ge(x:a.as_i8x16(), y:b.as_i8x16())) |
3203 | } |
3204 | |
3205 | /// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3206 | /// |
3207 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi8_mask&expand=858) |
3208 | #[inline ] |
3209 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3210 | #[cfg_attr (test, assert_instr(vpcmp))] |
3211 | pub unsafe fn _mm_mask_cmpge_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { |
3212 | _mm_cmpge_epi8_mask(a, b) & k1 |
3213 | } |
3214 | |
3215 | /// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k. |
3216 | /// |
3217 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu16_mask&expand=801) |
3218 | #[inline ] |
3219 | #[target_feature (enable = "avx512bw" )] |
3220 | #[cfg_attr (test, assert_instr(vpcmp))] |
3221 | pub unsafe fn _mm512_cmpeq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { |
3222 | simd_bitmask::<u16x32, _>(simd_eq(x:a.as_u16x32(), y:b.as_u16x32())) |
3223 | } |
3224 | |
3225 | /// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3226 | /// |
3227 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu16_mask&expand=802) |
3228 | #[inline ] |
3229 | #[target_feature (enable = "avx512bw" )] |
3230 | #[cfg_attr (test, assert_instr(vpcmp))] |
3231 | pub unsafe fn _mm512_mask_cmpeq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { |
3232 | _mm512_cmpeq_epu16_mask(a, b) & k1 |
3233 | } |
3234 | |
3235 | /// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k. |
3236 | /// |
3237 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu16_mask&expand=799) |
3238 | #[inline ] |
3239 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3240 | #[cfg_attr (test, assert_instr(vpcmp))] |
3241 | pub unsafe fn _mm256_cmpeq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { |
3242 | simd_bitmask::<u16x16, _>(simd_eq(x:a.as_u16x16(), y:b.as_u16x16())) |
3243 | } |
3244 | |
3245 | /// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3246 | /// |
3247 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu16_mask&expand=800) |
3248 | #[inline ] |
3249 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3250 | #[cfg_attr (test, assert_instr(vpcmp))] |
3251 | pub unsafe fn _mm256_mask_cmpeq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { |
3252 | _mm256_cmpeq_epu16_mask(a, b) & k1 |
3253 | } |
3254 | |
3255 | /// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k. |
3256 | /// |
3257 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu16_mask&expand=797) |
3258 | #[inline ] |
3259 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3260 | #[cfg_attr (test, assert_instr(vpcmp))] |
3261 | pub unsafe fn _mm_cmpeq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { |
3262 | simd_bitmask::<u16x8, _>(simd_eq(x:a.as_u16x8(), y:b.as_u16x8())) |
3263 | } |
3264 | |
3265 | /// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3266 | /// |
3267 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu16_mask&expand=798) |
3268 | #[inline ] |
3269 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3270 | #[cfg_attr (test, assert_instr(vpcmp))] |
3271 | pub unsafe fn _mm_mask_cmpeq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { |
3272 | _mm_cmpeq_epu16_mask(a, b) & k1 |
3273 | } |
3274 | |
3275 | /// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k. |
3276 | /// |
3277 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu8_mask&expand=819) |
3278 | #[inline ] |
3279 | #[target_feature (enable = "avx512bw" )] |
3280 | #[cfg_attr (test, assert_instr(vpcmp))] |
3281 | pub unsafe fn _mm512_cmpeq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { |
3282 | simd_bitmask::<u8x64, _>(simd_eq(x:a.as_u8x64(), y:b.as_u8x64())) |
3283 | } |
3284 | |
3285 | /// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3286 | /// |
3287 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu8_mask&expand=820) |
3288 | #[inline ] |
3289 | #[target_feature (enable = "avx512bw" )] |
3290 | #[cfg_attr (test, assert_instr(vpcmp))] |
3291 | pub unsafe fn _mm512_mask_cmpeq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { |
3292 | _mm512_cmpeq_epu8_mask(a, b) & k1 |
3293 | } |
3294 | |
3295 | /// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k. |
3296 | /// |
3297 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu8_mask&expand=817) |
3298 | #[inline ] |
3299 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3300 | #[cfg_attr (test, assert_instr(vpcmp))] |
3301 | pub unsafe fn _mm256_cmpeq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { |
3302 | simd_bitmask::<u8x32, _>(simd_eq(x:a.as_u8x32(), y:b.as_u8x32())) |
3303 | } |
3304 | |
3305 | /// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3306 | /// |
3307 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu8_mask&expand=818) |
3308 | #[inline ] |
3309 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3310 | #[cfg_attr (test, assert_instr(vpcmp))] |
3311 | pub unsafe fn _mm256_mask_cmpeq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { |
3312 | _mm256_cmpeq_epu8_mask(a, b) & k1 |
3313 | } |
3314 | |
3315 | /// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k. |
3316 | /// |
3317 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu8_mask&expand=815) |
3318 | #[inline ] |
3319 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3320 | #[cfg_attr (test, assert_instr(vpcmp))] |
3321 | pub unsafe fn _mm_cmpeq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { |
3322 | simd_bitmask::<u8x16, _>(simd_eq(x:a.as_u8x16(), y:b.as_u8x16())) |
3323 | } |
3324 | |
3325 | /// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3326 | /// |
3327 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu8_mask&expand=816) |
3328 | #[inline ] |
3329 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3330 | #[cfg_attr (test, assert_instr(vpcmp))] |
3331 | pub unsafe fn _mm_mask_cmpeq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { |
3332 | _mm_cmpeq_epu8_mask(a, b) & k1 |
3333 | } |
3334 | |
3335 | /// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k. |
3336 | /// |
3337 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi16_mask&expand=771) |
3338 | #[inline ] |
3339 | #[target_feature (enable = "avx512bw" )] |
3340 | #[cfg_attr (test, assert_instr(vpcmp))] |
3341 | pub unsafe fn _mm512_cmpeq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { |
3342 | simd_bitmask::<i16x32, _>(simd_eq(x:a.as_i16x32(), y:b.as_i16x32())) |
3343 | } |
3344 | |
3345 | /// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3346 | /// |
3347 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi16_mask&expand=772) |
3348 | #[inline ] |
3349 | #[target_feature (enable = "avx512bw" )] |
3350 | #[cfg_attr (test, assert_instr(vpcmp))] |
3351 | pub unsafe fn _mm512_mask_cmpeq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { |
3352 | _mm512_cmpeq_epi16_mask(a, b) & k1 |
3353 | } |
3354 | |
3355 | /// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k. |
3356 | /// |
3357 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi16_mask&expand=769) |
3358 | #[inline ] |
3359 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3360 | #[cfg_attr (test, assert_instr(vpcmp))] |
3361 | pub unsafe fn _mm256_cmpeq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { |
3362 | simd_bitmask::<i16x16, _>(simd_eq(x:a.as_i16x16(), y:b.as_i16x16())) |
3363 | } |
3364 | |
3365 | /// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3366 | /// |
3367 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi16_mask&expand=770) |
3368 | #[inline ] |
3369 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3370 | #[cfg_attr (test, assert_instr(vpcmp))] |
3371 | pub unsafe fn _mm256_mask_cmpeq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { |
3372 | _mm256_cmpeq_epi16_mask(a, b) & k1 |
3373 | } |
3374 | |
3375 | /// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k. |
3376 | /// |
3377 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi16_mask&expand=767) |
3378 | #[inline ] |
3379 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3380 | #[cfg_attr (test, assert_instr(vpcmp))] |
3381 | pub unsafe fn _mm_cmpeq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { |
3382 | simd_bitmask::<i16x8, _>(simd_eq(x:a.as_i16x8(), y:b.as_i16x8())) |
3383 | } |
3384 | |
3385 | /// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3386 | /// |
3387 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi16_mask&expand=768) |
3388 | #[inline ] |
3389 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3390 | #[cfg_attr (test, assert_instr(vpcmp))] |
3391 | pub unsafe fn _mm_mask_cmpeq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { |
3392 | _mm_cmpeq_epi16_mask(a, b) & k1 |
3393 | } |
3394 | |
3395 | /// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k. |
3396 | /// |
3397 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi8_mask&expand=795) |
3398 | #[inline ] |
3399 | #[target_feature (enable = "avx512bw" )] |
3400 | #[cfg_attr (test, assert_instr(vpcmp))] |
3401 | pub unsafe fn _mm512_cmpeq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { |
3402 | simd_bitmask::<i8x64, _>(simd_eq(x:a.as_i8x64(), y:b.as_i8x64())) |
3403 | } |
3404 | |
3405 | /// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3406 | /// |
3407 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi8_mask&expand=796) |
3408 | #[inline ] |
3409 | #[target_feature (enable = "avx512bw" )] |
3410 | #[cfg_attr (test, assert_instr(vpcmp))] |
3411 | pub unsafe fn _mm512_mask_cmpeq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { |
3412 | _mm512_cmpeq_epi8_mask(a, b) & k1 |
3413 | } |
3414 | |
3415 | /// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k. |
3416 | /// |
3417 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi8_mask&expand=793) |
3418 | #[inline ] |
3419 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3420 | #[cfg_attr (test, assert_instr(vpcmp))] |
3421 | pub unsafe fn _mm256_cmpeq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { |
3422 | simd_bitmask::<i8x32, _>(simd_eq(x:a.as_i8x32(), y:b.as_i8x32())) |
3423 | } |
3424 | |
3425 | /// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3426 | /// |
3427 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi8_mask&expand=794) |
3428 | #[inline ] |
3429 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3430 | #[cfg_attr (test, assert_instr(vpcmp))] |
3431 | pub unsafe fn _mm256_mask_cmpeq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { |
3432 | _mm256_cmpeq_epi8_mask(a, b) & k1 |
3433 | } |
3434 | |
3435 | /// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k. |
3436 | /// |
3437 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi8_mask&expand=791) |
3438 | #[inline ] |
3439 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3440 | #[cfg_attr (test, assert_instr(vpcmp))] |
3441 | pub unsafe fn _mm_cmpeq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { |
3442 | simd_bitmask::<i8x16, _>(simd_eq(x:a.as_i8x16(), y:b.as_i8x16())) |
3443 | } |
3444 | |
3445 | /// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3446 | /// |
3447 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi8_mask&expand=792) |
3448 | #[inline ] |
3449 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3450 | #[cfg_attr (test, assert_instr(vpcmp))] |
3451 | pub unsafe fn _mm_mask_cmpeq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { |
3452 | _mm_cmpeq_epi8_mask(a, b) & k1 |
3453 | } |
3454 | |
3455 | /// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k. |
3456 | /// |
3457 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu16_mask&expand=1106) |
3458 | #[inline ] |
3459 | #[target_feature (enable = "avx512bw" )] |
3460 | #[cfg_attr (test, assert_instr(vpcmp))] |
3461 | pub unsafe fn _mm512_cmpneq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { |
3462 | simd_bitmask::<u16x32, _>(simd_ne(x:a.as_u16x32(), y:b.as_u16x32())) |
3463 | } |
3464 | |
3465 | /// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3466 | /// |
3467 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu16_mask&expand=1107) |
3468 | #[inline ] |
3469 | #[target_feature (enable = "avx512bw" )] |
3470 | #[cfg_attr (test, assert_instr(vpcmp))] |
3471 | pub unsafe fn _mm512_mask_cmpneq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { |
3472 | _mm512_cmpneq_epu16_mask(a, b) & k1 |
3473 | } |
3474 | |
3475 | /// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k. |
3476 | /// |
3477 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu16_mask&expand=1104) |
3478 | #[inline ] |
3479 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3480 | #[cfg_attr (test, assert_instr(vpcmp))] |
3481 | pub unsafe fn _mm256_cmpneq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { |
3482 | simd_bitmask::<u16x16, _>(simd_ne(x:a.as_u16x16(), y:b.as_u16x16())) |
3483 | } |
3484 | |
3485 | /// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3486 | /// |
3487 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu16_mask&expand=1105) |
3488 | #[inline ] |
3489 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3490 | #[cfg_attr (test, assert_instr(vpcmp))] |
3491 | pub unsafe fn _mm256_mask_cmpneq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { |
3492 | _mm256_cmpneq_epu16_mask(a, b) & k1 |
3493 | } |
3494 | |
3495 | /// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k. |
3496 | /// |
3497 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu16_mask&expand=1102) |
3498 | #[inline ] |
3499 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3500 | #[cfg_attr (test, assert_instr(vpcmp))] |
3501 | pub unsafe fn _mm_cmpneq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { |
3502 | simd_bitmask::<u16x8, _>(simd_ne(x:a.as_u16x8(), y:b.as_u16x8())) |
3503 | } |
3504 | |
3505 | /// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3506 | /// |
3507 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu16_mask&expand=1103) |
3508 | #[inline ] |
3509 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3510 | #[cfg_attr (test, assert_instr(vpcmp))] |
3511 | pub unsafe fn _mm_mask_cmpneq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { |
3512 | _mm_cmpneq_epu16_mask(a, b) & k1 |
3513 | } |
3514 | |
3515 | /// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k. |
3516 | /// |
3517 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu8_mask&expand=1124) |
3518 | #[inline ] |
3519 | #[target_feature (enable = "avx512bw" )] |
3520 | #[cfg_attr (test, assert_instr(vpcmp))] |
3521 | pub unsafe fn _mm512_cmpneq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { |
3522 | simd_bitmask::<u8x64, _>(simd_ne(x:a.as_u8x64(), y:b.as_u8x64())) |
3523 | } |
3524 | |
3525 | /// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3526 | /// |
3527 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu8_mask&expand=1125) |
3528 | #[inline ] |
3529 | #[target_feature (enable = "avx512bw" )] |
3530 | #[cfg_attr (test, assert_instr(vpcmp))] |
3531 | pub unsafe fn _mm512_mask_cmpneq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { |
3532 | _mm512_cmpneq_epu8_mask(a, b) & k1 |
3533 | } |
3534 | |
3535 | /// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k. |
3536 | /// |
3537 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu8_mask&expand=1122) |
3538 | #[inline ] |
3539 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3540 | #[cfg_attr (test, assert_instr(vpcmp))] |
3541 | pub unsafe fn _mm256_cmpneq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { |
3542 | simd_bitmask::<u8x32, _>(simd_ne(x:a.as_u8x32(), y:b.as_u8x32())) |
3543 | } |
3544 | |
3545 | /// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3546 | /// |
3547 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu8_mask&expand=1123) |
3548 | #[inline ] |
3549 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3550 | #[cfg_attr (test, assert_instr(vpcmp))] |
3551 | pub unsafe fn _mm256_mask_cmpneq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { |
3552 | _mm256_cmpneq_epu8_mask(a, b) & k1 |
3553 | } |
3554 | |
3555 | /// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k. |
3556 | /// |
3557 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu8_mask&expand=1120) |
3558 | #[inline ] |
3559 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3560 | #[cfg_attr (test, assert_instr(vpcmp))] |
3561 | pub unsafe fn _mm_cmpneq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { |
3562 | simd_bitmask::<u8x16, _>(simd_ne(x:a.as_u8x16(), y:b.as_u8x16())) |
3563 | } |
3564 | |
3565 | /// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3566 | /// |
3567 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu8_mask&expand=1121) |
3568 | #[inline ] |
3569 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3570 | #[cfg_attr (test, assert_instr(vpcmp))] |
3571 | pub unsafe fn _mm_mask_cmpneq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { |
3572 | _mm_cmpneq_epu8_mask(a, b) & k1 |
3573 | } |
3574 | |
3575 | /// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k. |
3576 | /// |
3577 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi16_mask&expand=1082) |
3578 | #[inline ] |
3579 | #[target_feature (enable = "avx512bw" )] |
3580 | #[cfg_attr (test, assert_instr(vpcmp))] |
3581 | pub unsafe fn _mm512_cmpneq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { |
3582 | simd_bitmask::<i16x32, _>(simd_ne(x:a.as_i16x32(), y:b.as_i16x32())) |
3583 | } |
3584 | |
3585 | /// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3586 | /// |
3587 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi16_mask&expand=1083) |
3588 | #[inline ] |
3589 | #[target_feature (enable = "avx512bw" )] |
3590 | #[cfg_attr (test, assert_instr(vpcmp))] |
3591 | pub unsafe fn _mm512_mask_cmpneq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { |
3592 | _mm512_cmpneq_epi16_mask(a, b) & k1 |
3593 | } |
3594 | |
3595 | /// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k. |
3596 | /// |
3597 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi16_mask&expand=1080) |
3598 | #[inline ] |
3599 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3600 | #[cfg_attr (test, assert_instr(vpcmp))] |
3601 | pub unsafe fn _mm256_cmpneq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { |
3602 | simd_bitmask::<i16x16, _>(simd_ne(x:a.as_i16x16(), y:b.as_i16x16())) |
3603 | } |
3604 | |
3605 | /// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3606 | /// |
3607 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi16_mask&expand=1081) |
3608 | #[inline ] |
3609 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3610 | #[cfg_attr (test, assert_instr(vpcmp))] |
3611 | pub unsafe fn _mm256_mask_cmpneq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { |
3612 | _mm256_cmpneq_epi16_mask(a, b) & k1 |
3613 | } |
3614 | |
3615 | /// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k. |
3616 | /// |
3617 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi16_mask&expand=1078) |
3618 | #[inline ] |
3619 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3620 | #[cfg_attr (test, assert_instr(vpcmp))] |
3621 | pub unsafe fn _mm_cmpneq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { |
3622 | simd_bitmask::<i16x8, _>(simd_ne(x:a.as_i16x8(), y:b.as_i16x8())) |
3623 | } |
3624 | |
3625 | /// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3626 | /// |
3627 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi16_mask&expand=1079) |
3628 | #[inline ] |
3629 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3630 | #[cfg_attr (test, assert_instr(vpcmp))] |
3631 | pub unsafe fn _mm_mask_cmpneq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { |
3632 | _mm_cmpneq_epi16_mask(a, b) & k1 |
3633 | } |
3634 | |
3635 | /// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k. |
3636 | /// |
3637 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi8_mask&expand=1100) |
3638 | #[inline ] |
3639 | #[target_feature (enable = "avx512bw" )] |
3640 | #[cfg_attr (test, assert_instr(vpcmp))] |
3641 | pub unsafe fn _mm512_cmpneq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { |
3642 | simd_bitmask::<i8x64, _>(simd_ne(x:a.as_i8x64(), y:b.as_i8x64())) |
3643 | } |
3644 | |
3645 | /// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3646 | /// |
3647 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi8_mask&expand=1101) |
3648 | #[inline ] |
3649 | #[target_feature (enable = "avx512bw" )] |
3650 | #[cfg_attr (test, assert_instr(vpcmp))] |
3651 | pub unsafe fn _mm512_mask_cmpneq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { |
3652 | _mm512_cmpneq_epi8_mask(a, b) & k1 |
3653 | } |
3654 | |
3655 | /// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k. |
3656 | /// |
3657 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi8_mask&expand=1098) |
3658 | #[inline ] |
3659 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3660 | #[cfg_attr (test, assert_instr(vpcmp))] |
3661 | pub unsafe fn _mm256_cmpneq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { |
3662 | simd_bitmask::<i8x32, _>(simd_ne(x:a.as_i8x32(), y:b.as_i8x32())) |
3663 | } |
3664 | |
3665 | /// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3666 | /// |
3667 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi8_mask&expand=1099) |
3668 | #[inline ] |
3669 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3670 | #[cfg_attr (test, assert_instr(vpcmp))] |
3671 | pub unsafe fn _mm256_mask_cmpneq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { |
3672 | _mm256_cmpneq_epi8_mask(a, b) & k1 |
3673 | } |
3674 | |
3675 | /// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k. |
3676 | /// |
3677 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi8_mask&expand=1096) |
3678 | #[inline ] |
3679 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3680 | #[cfg_attr (test, assert_instr(vpcmp))] |
3681 | pub unsafe fn _mm_cmpneq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { |
3682 | simd_bitmask::<i8x16, _>(simd_ne(x:a.as_i8x16(), y:b.as_i8x16())) |
3683 | } |
3684 | |
3685 | /// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3686 | /// |
3687 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi8_mask&expand=1097) |
3688 | #[inline ] |
3689 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3690 | #[cfg_attr (test, assert_instr(vpcmp))] |
3691 | pub unsafe fn _mm_mask_cmpneq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { |
3692 | _mm_cmpneq_epi8_mask(a, b) & k1 |
3693 | } |
3694 | |
3695 | /// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by `IMM8`, and store the results in mask vector k. |
3696 | /// |
3697 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu16_mask&expand=715) |
3698 | #[inline ] |
3699 | #[target_feature (enable = "avx512bw" )] |
3700 | #[rustc_legacy_const_generics (2)] |
3701 | #[cfg_attr (test, assert_instr(vpcmp, IMM8 = 0))] |
3702 | pub unsafe fn _mm512_cmp_epu16_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask32 { |
3703 | static_assert_uimm_bits!(IMM8, 3); |
3704 | let a: u16x32 = a.as_u16x32(); |
3705 | let b: u16x32 = b.as_u16x32(); |
3706 | vpcmpuw(a, b, IMM8, mask:0b11111111_11111111_11111111_11111111) |
3707 | } |
3708 | |
3709 | /// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3710 | /// |
3711 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu16_mask&expand=716) |
3712 | #[inline ] |
3713 | #[target_feature (enable = "avx512bw" )] |
3714 | #[rustc_legacy_const_generics (3)] |
3715 | #[cfg_attr (test, assert_instr(vpcmp, IMM8 = 0))] |
3716 | pub unsafe fn _mm512_mask_cmp_epu16_mask<const IMM8: i32>( |
3717 | k1: __mmask32, |
3718 | a: __m512i, |
3719 | b: __m512i, |
3720 | ) -> __mmask32 { |
3721 | static_assert_uimm_bits!(IMM8, 3); |
3722 | let a: u16x32 = a.as_u16x32(); |
3723 | let b: u16x32 = b.as_u16x32(); |
3724 | vpcmpuw(a, b, IMM8, mask:k1) |
3725 | } |
3726 | |
3727 | /// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. |
3728 | /// |
3729 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu16_mask&expand=713) |
3730 | #[inline ] |
3731 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3732 | #[rustc_legacy_const_generics (2)] |
3733 | #[cfg_attr (test, assert_instr(vpcmp, IMM8 = 0))] |
3734 | pub unsafe fn _mm256_cmp_epu16_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask16 { |
3735 | static_assert_uimm_bits!(IMM8, 3); |
3736 | let a: u16x16 = a.as_u16x16(); |
3737 | let b: u16x16 = b.as_u16x16(); |
3738 | vpcmpuw256(a, b, IMM8, mask:0b11111111_11111111) |
3739 | } |
3740 | |
3741 | /// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3742 | /// |
3743 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu16_mask&expand=714) |
3744 | #[inline ] |
3745 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3746 | #[rustc_legacy_const_generics (3)] |
3747 | #[cfg_attr (test, assert_instr(vpcmp, IMM8 = 0))] |
3748 | pub unsafe fn _mm256_mask_cmp_epu16_mask<const IMM8: i32>( |
3749 | k1: __mmask16, |
3750 | a: __m256i, |
3751 | b: __m256i, |
3752 | ) -> __mmask16 { |
3753 | static_assert_uimm_bits!(IMM8, 3); |
3754 | let a: u16x16 = a.as_u16x16(); |
3755 | let b: u16x16 = b.as_u16x16(); |
3756 | vpcmpuw256(a, b, IMM8, mask:k1) |
3757 | } |
3758 | |
3759 | /// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. |
3760 | /// |
3761 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu16_mask&expand=711) |
3762 | #[inline ] |
3763 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3764 | #[rustc_legacy_const_generics (2)] |
3765 | #[cfg_attr (test, assert_instr(vpcmp, IMM8 = 0))] |
3766 | pub unsafe fn _mm_cmp_epu16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask8 { |
3767 | static_assert_uimm_bits!(IMM8, 3); |
3768 | let a: u16x8 = a.as_u16x8(); |
3769 | let b: u16x8 = b.as_u16x8(); |
3770 | vpcmpuw128(a, b, IMM8, mask:0b11111111) |
3771 | } |
3772 | |
3773 | /// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3774 | /// |
3775 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu16_mask&expand=712) |
3776 | #[inline ] |
3777 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3778 | #[rustc_legacy_const_generics (3)] |
3779 | #[cfg_attr (test, assert_instr(vpcmp, IMM8 = 0))] |
3780 | pub unsafe fn _mm_mask_cmp_epu16_mask<const IMM8: i32>( |
3781 | k1: __mmask8, |
3782 | a: __m128i, |
3783 | b: __m128i, |
3784 | ) -> __mmask8 { |
3785 | static_assert_uimm_bits!(IMM8, 3); |
3786 | let a: u16x8 = a.as_u16x8(); |
3787 | let b: u16x8 = b.as_u16x8(); |
3788 | vpcmpuw128(a, b, IMM8, mask:k1) |
3789 | } |
3790 | |
3791 | /// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. |
3792 | /// |
3793 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu8_mask&expand=733) |
3794 | #[inline ] |
3795 | #[target_feature (enable = "avx512bw" )] |
3796 | #[rustc_legacy_const_generics (2)] |
3797 | #[cfg_attr (test, assert_instr(vpcmp, IMM8 = 0))] |
3798 | pub unsafe fn _mm512_cmp_epu8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask64 { |
3799 | static_assert_uimm_bits!(IMM8, 3); |
3800 | let a: u8x64 = a.as_u8x64(); |
3801 | let b: u8x64 = b.as_u8x64(); |
3802 | vpcmpub( |
3803 | a, |
3804 | b, |
3805 | IMM8, |
3806 | mask:0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, |
3807 | ) |
3808 | } |
3809 | |
3810 | /// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3811 | /// |
3812 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu8_mask&expand=734) |
3813 | #[inline ] |
3814 | #[target_feature (enable = "avx512bw" )] |
3815 | #[rustc_legacy_const_generics (3)] |
3816 | #[cfg_attr (test, assert_instr(vpcmp, IMM8 = 0))] |
3817 | pub unsafe fn _mm512_mask_cmp_epu8_mask<const IMM8: i32>( |
3818 | k1: __mmask64, |
3819 | a: __m512i, |
3820 | b: __m512i, |
3821 | ) -> __mmask64 { |
3822 | static_assert_uimm_bits!(IMM8, 3); |
3823 | let a: u8x64 = a.as_u8x64(); |
3824 | let b: u8x64 = b.as_u8x64(); |
3825 | vpcmpub(a, b, IMM8, mask:k1) |
3826 | } |
3827 | |
3828 | /// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. |
3829 | /// |
3830 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu8_mask&expand=731) |
3831 | #[inline ] |
3832 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3833 | #[rustc_legacy_const_generics (2)] |
3834 | #[cfg_attr (test, assert_instr(vpcmp, IMM8 = 0))] |
3835 | pub unsafe fn _mm256_cmp_epu8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask32 { |
3836 | static_assert_uimm_bits!(IMM8, 3); |
3837 | let a: u8x32 = a.as_u8x32(); |
3838 | let b: u8x32 = b.as_u8x32(); |
3839 | vpcmpub256(a, b, IMM8, mask:0b11111111_11111111_11111111_11111111) |
3840 | } |
3841 | |
3842 | /// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3843 | /// |
3844 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu8_mask&expand=732) |
3845 | #[inline ] |
3846 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3847 | #[rustc_legacy_const_generics (3)] |
3848 | #[cfg_attr (test, assert_instr(vpcmp, IMM8 = 0))] |
3849 | pub unsafe fn _mm256_mask_cmp_epu8_mask<const IMM8: i32>( |
3850 | k1: __mmask32, |
3851 | a: __m256i, |
3852 | b: __m256i, |
3853 | ) -> __mmask32 { |
3854 | static_assert_uimm_bits!(IMM8, 3); |
3855 | let a: u8x32 = a.as_u8x32(); |
3856 | let b: u8x32 = b.as_u8x32(); |
3857 | vpcmpub256(a, b, IMM8, mask:k1) |
3858 | } |
3859 | |
3860 | /// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. |
3861 | /// |
3862 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu8_mask&expand=729) |
3863 | #[inline ] |
3864 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3865 | #[rustc_legacy_const_generics (2)] |
3866 | #[cfg_attr (test, assert_instr(vpcmp, IMM8 = 0))] |
3867 | pub unsafe fn _mm_cmp_epu8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask16 { |
3868 | static_assert_uimm_bits!(IMM8, 3); |
3869 | let a: u8x16 = a.as_u8x16(); |
3870 | let b: u8x16 = b.as_u8x16(); |
3871 | vpcmpub128(a, b, IMM8, mask:0b11111111_11111111) |
3872 | } |
3873 | |
3874 | /// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3875 | /// |
3876 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu8_mask&expand=730) |
3877 | #[inline ] |
3878 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3879 | #[rustc_legacy_const_generics (3)] |
3880 | #[cfg_attr (test, assert_instr(vpcmp, IMM8 = 0))] |
3881 | pub unsafe fn _mm_mask_cmp_epu8_mask<const IMM8: i32>( |
3882 | k1: __mmask16, |
3883 | a: __m128i, |
3884 | b: __m128i, |
3885 | ) -> __mmask16 { |
3886 | static_assert_uimm_bits!(IMM8, 3); |
3887 | let a: u8x16 = a.as_u8x16(); |
3888 | let b: u8x16 = b.as_u8x16(); |
3889 | vpcmpub128(a, b, IMM8, mask:k1) |
3890 | } |
3891 | |
3892 | /// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. |
3893 | /// |
3894 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi16_mask&expand=691) |
3895 | #[inline ] |
3896 | #[target_feature (enable = "avx512bw" )] |
3897 | #[rustc_legacy_const_generics (2)] |
3898 | #[cfg_attr (test, assert_instr(vpcmp, IMM8 = 0))] |
3899 | pub unsafe fn _mm512_cmp_epi16_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask32 { |
3900 | static_assert_uimm_bits!(IMM8, 3); |
3901 | let a: i16x32 = a.as_i16x32(); |
3902 | let b: i16x32 = b.as_i16x32(); |
3903 | vpcmpw(a, b, IMM8, mask:0b11111111_11111111_11111111_11111111) |
3904 | } |
3905 | |
3906 | /// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3907 | /// |
3908 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi16_mask&expand=692) |
3909 | #[inline ] |
3910 | #[target_feature (enable = "avx512bw" )] |
3911 | #[rustc_legacy_const_generics (3)] |
3912 | #[cfg_attr (test, assert_instr(vpcmp, IMM8 = 0))] |
3913 | pub unsafe fn _mm512_mask_cmp_epi16_mask<const IMM8: i32>( |
3914 | k1: __mmask32, |
3915 | a: __m512i, |
3916 | b: __m512i, |
3917 | ) -> __mmask32 { |
3918 | static_assert_uimm_bits!(IMM8, 3); |
3919 | let a: i16x32 = a.as_i16x32(); |
3920 | let b: i16x32 = b.as_i16x32(); |
3921 | vpcmpw(a, b, IMM8, mask:k1) |
3922 | } |
3923 | |
3924 | /// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. |
3925 | /// |
3926 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi16_mask&expand=689) |
3927 | #[inline ] |
3928 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3929 | #[rustc_legacy_const_generics (2)] |
3930 | #[cfg_attr (test, assert_instr(vpcmp, IMM8 = 0))] |
3931 | pub unsafe fn _mm256_cmp_epi16_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask16 { |
3932 | static_assert_uimm_bits!(IMM8, 3); |
3933 | let a: i16x16 = a.as_i16x16(); |
3934 | let b: i16x16 = b.as_i16x16(); |
3935 | vpcmpw256(a, b, IMM8, mask:0b11111111_11111111) |
3936 | } |
3937 | |
3938 | /// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3939 | /// |
3940 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi16_mask&expand=690) |
3941 | #[inline ] |
3942 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3943 | #[rustc_legacy_const_generics (3)] |
3944 | #[cfg_attr (test, assert_instr(vpcmp, IMM8 = 0))] |
3945 | pub unsafe fn _mm256_mask_cmp_epi16_mask<const IMM8: i32>( |
3946 | k1: __mmask16, |
3947 | a: __m256i, |
3948 | b: __m256i, |
3949 | ) -> __mmask16 { |
3950 | static_assert_uimm_bits!(IMM8, 3); |
3951 | let a: i16x16 = a.as_i16x16(); |
3952 | let b: i16x16 = b.as_i16x16(); |
3953 | vpcmpw256(a, b, IMM8, mask:k1) |
3954 | } |
3955 | |
3956 | /// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. |
3957 | /// |
3958 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi16_mask&expand=687) |
3959 | #[inline ] |
3960 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3961 | #[rustc_legacy_const_generics (2)] |
3962 | #[cfg_attr (test, assert_instr(vpcmp, IMM8 = 0))] |
3963 | pub unsafe fn _mm_cmp_epi16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask8 { |
3964 | static_assert_uimm_bits!(IMM8, 3); |
3965 | let a: i16x8 = a.as_i16x8(); |
3966 | let b: i16x8 = b.as_i16x8(); |
3967 | vpcmpw128(a, b, IMM8, mask:0b11111111) |
3968 | } |
3969 | |
3970 | /// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
3971 | /// |
3972 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi16_mask&expand=688) |
3973 | #[inline ] |
3974 | #[target_feature (enable = "avx512bw,avx512vl" )] |
3975 | #[rustc_legacy_const_generics (3)] |
3976 | #[cfg_attr (test, assert_instr(vpcmp, IMM8 = 0))] |
3977 | pub unsafe fn _mm_mask_cmp_epi16_mask<const IMM8: i32>( |
3978 | k1: __mmask8, |
3979 | a: __m128i, |
3980 | b: __m128i, |
3981 | ) -> __mmask8 { |
3982 | static_assert_uimm_bits!(IMM8, 3); |
3983 | let a: i16x8 = a.as_i16x8(); |
3984 | let b: i16x8 = b.as_i16x8(); |
3985 | vpcmpw128(a, b, IMM8, mask:k1) |
3986 | } |
3987 | |
3988 | /// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. |
3989 | /// |
3990 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi8_mask&expand=709) |
3991 | #[inline ] |
3992 | #[target_feature (enable = "avx512bw" )] |
3993 | #[rustc_legacy_const_generics (2)] |
3994 | #[cfg_attr (test, assert_instr(vpcmp, IMM8 = 0))] |
3995 | pub unsafe fn _mm512_cmp_epi8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask64 { |
3996 | static_assert_uimm_bits!(IMM8, 3); |
3997 | let a: i8x64 = a.as_i8x64(); |
3998 | let b: i8x64 = b.as_i8x64(); |
3999 | vpcmpb( |
4000 | a, |
4001 | b, |
4002 | IMM8, |
4003 | mask:0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, |
4004 | ) |
4005 | } |
4006 | |
4007 | /// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
4008 | /// |
4009 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi8_mask&expand=710) |
4010 | #[inline ] |
4011 | #[target_feature (enable = "avx512bw" )] |
4012 | #[rustc_legacy_const_generics (3)] |
4013 | #[cfg_attr (test, assert_instr(vpcmp, IMM8 = 0))] |
4014 | pub unsafe fn _mm512_mask_cmp_epi8_mask<const IMM8: i32>( |
4015 | k1: __mmask64, |
4016 | a: __m512i, |
4017 | b: __m512i, |
4018 | ) -> __mmask64 { |
4019 | static_assert_uimm_bits!(IMM8, 3); |
4020 | let a: i8x64 = a.as_i8x64(); |
4021 | let b: i8x64 = b.as_i8x64(); |
4022 | vpcmpb(a, b, IMM8, mask:k1) |
4023 | } |
4024 | |
4025 | /// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. |
4026 | /// |
4027 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi8_mask&expand=707) |
4028 | #[inline ] |
4029 | #[target_feature (enable = "avx512bw,avx512vl" )] |
4030 | #[rustc_legacy_const_generics (2)] |
4031 | #[cfg_attr (test, assert_instr(vpcmp, IMM8 = 0))] |
4032 | pub unsafe fn _mm256_cmp_epi8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask32 { |
4033 | static_assert_uimm_bits!(IMM8, 3); |
4034 | let a: i8x32 = a.as_i8x32(); |
4035 | let b: i8x32 = b.as_i8x32(); |
4036 | vpcmpb256(a, b, IMM8, mask:0b11111111_11111111_11111111_11111111) |
4037 | } |
4038 | |
4039 | /// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
4040 | /// |
4041 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi8_mask&expand=708) |
4042 | #[inline ] |
4043 | #[target_feature (enable = "avx512bw,avx512vl" )] |
4044 | #[rustc_legacy_const_generics (3)] |
4045 | #[cfg_attr (test, assert_instr(vpcmp, IMM8 = 0))] |
4046 | pub unsafe fn _mm256_mask_cmp_epi8_mask<const IMM8: i32>( |
4047 | k1: __mmask32, |
4048 | a: __m256i, |
4049 | b: __m256i, |
4050 | ) -> __mmask32 { |
4051 | static_assert_uimm_bits!(IMM8, 3); |
4052 | let a: i8x32 = a.as_i8x32(); |
4053 | let b: i8x32 = b.as_i8x32(); |
4054 | vpcmpb256(a, b, IMM8, mask:k1) |
4055 | } |
4056 | |
4057 | /// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. |
4058 | /// |
4059 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi8_mask&expand=705) |
4060 | #[inline ] |
4061 | #[target_feature (enable = "avx512bw,avx512vl" )] |
4062 | #[rustc_legacy_const_generics (2)] |
4063 | #[cfg_attr (test, assert_instr(vpcmp, IMM8 = 0))] |
4064 | pub unsafe fn _mm_cmp_epi8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask16 { |
4065 | static_assert_uimm_bits!(IMM8, 3); |
4066 | let a: i8x16 = a.as_i8x16(); |
4067 | let b: i8x16 = b.as_i8x16(); |
4068 | vpcmpb128(a, b, IMM8, mask:0b11111111_11111111) |
4069 | } |
4070 | |
4071 | /// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
4072 | /// |
4073 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi8_mask&expand=706) |
4074 | #[inline ] |
4075 | #[target_feature (enable = "avx512bw,avx512vl" )] |
4076 | #[rustc_legacy_const_generics (3)] |
4077 | #[cfg_attr (test, assert_instr(vpcmp, IMM8 = 0))] |
4078 | pub unsafe fn _mm_mask_cmp_epi8_mask<const IMM8: i32>( |
4079 | k1: __mmask16, |
4080 | a: __m128i, |
4081 | b: __m128i, |
4082 | ) -> __mmask16 { |
4083 | static_assert_uimm_bits!(IMM8, 3); |
4084 | let a: i8x16 = a.as_i8x16(); |
4085 | let b: i8x16 = b.as_i8x16(); |
4086 | vpcmpb128(a, b, IMM8, mask:k1) |
4087 | } |
4088 | |
4089 | /// Load 512-bits (composed of 32 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary. |
4090 | /// |
4091 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi16&expand=3368) |
4092 | #[inline ] |
4093 | #[target_feature (enable = "avx512bw" )] |
4094 | #[cfg_attr (test, assert_instr(vmovups))] //should be vmovdqu16 |
4095 | pub unsafe fn _mm512_loadu_epi16(mem_addr: *const i16) -> __m512i { |
4096 | ptr::read_unaligned(src:mem_addr as *const __m512i) |
4097 | } |
4098 | |
4099 | /// Load 256-bits (composed of 16 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary. |
4100 | /// |
4101 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi16&expand=3365) |
4102 | #[inline ] |
4103 | #[target_feature (enable = "avx512bw,avx512vl" )] |
4104 | #[cfg_attr (test, assert_instr(vmovups))] //should be vmovdqu16 |
4105 | pub unsafe fn _mm256_loadu_epi16(mem_addr: *const i16) -> __m256i { |
4106 | ptr::read_unaligned(src:mem_addr as *const __m256i) |
4107 | } |
4108 | |
4109 | /// Load 128-bits (composed of 8 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary. |
4110 | /// |
4111 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi16&expand=3362) |
4112 | #[inline ] |
4113 | #[target_feature (enable = "avx512bw,avx512vl" )] |
4114 | #[cfg_attr (test, assert_instr(vmovups))] //should be vmovdqu16 |
4115 | pub unsafe fn _mm_loadu_epi16(mem_addr: *const i16) -> __m128i { |
4116 | ptr::read_unaligned(src:mem_addr as *const __m128i) |
4117 | } |
4118 | |
4119 | /// Load 512-bits (composed of 64 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary. |
4120 | /// |
4121 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi8&expand=3395) |
4122 | #[inline ] |
4123 | #[target_feature (enable = "avx512bw" )] |
4124 | #[cfg_attr (test, assert_instr(vmovups))] //should be vmovdqu8 |
4125 | pub unsafe fn _mm512_loadu_epi8(mem_addr: *const i8) -> __m512i { |
4126 | ptr::read_unaligned(src:mem_addr as *const __m512i) |
4127 | } |
4128 | |
4129 | /// Load 256-bits (composed of 32 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary. |
4130 | /// |
4131 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi8&expand=3392) |
4132 | #[inline ] |
4133 | #[target_feature (enable = "avx512bw,avx512vl" )] |
4134 | #[cfg_attr (test, assert_instr(vmovups))] //should be vmovdqu8 |
4135 | pub unsafe fn _mm256_loadu_epi8(mem_addr: *const i8) -> __m256i { |
4136 | ptr::read_unaligned(src:mem_addr as *const __m256i) |
4137 | } |
4138 | |
4139 | /// Load 128-bits (composed of 16 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary. |
4140 | /// |
4141 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi8&expand=3389) |
4142 | #[inline ] |
4143 | #[target_feature (enable = "avx512bw,avx512vl" )] |
4144 | #[cfg_attr (test, assert_instr(vmovups))] //should be vmovdqu8 |
4145 | pub unsafe fn _mm_loadu_epi8(mem_addr: *const i8) -> __m128i { |
4146 | ptr::read_unaligned(src:mem_addr as *const __m128i) |
4147 | } |
4148 | |
4149 | /// Store 512-bits (composed of 32 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary. |
4150 | /// |
4151 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi16&expand=5622) |
4152 | #[inline ] |
4153 | #[target_feature (enable = "avx512bw" )] |
4154 | #[cfg_attr (test, assert_instr(vmovups))] //should be vmovdqu16 |
4155 | pub unsafe fn _mm512_storeu_epi16(mem_addr: *mut i16, a: __m512i) { |
4156 | ptr::write_unaligned(dst:mem_addr as *mut __m512i, src:a); |
4157 | } |
4158 | |
4159 | /// Store 256-bits (composed of 16 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary. |
4160 | /// |
4161 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi16&expand=5620) |
4162 | #[inline ] |
4163 | #[target_feature (enable = "avx512bw,avx512vl" )] |
4164 | #[cfg_attr (test, assert_instr(vmovups))] //should be vmovdqu16 |
4165 | pub unsafe fn _mm256_storeu_epi16(mem_addr: *mut i16, a: __m256i) { |
4166 | ptr::write_unaligned(dst:mem_addr as *mut __m256i, src:a); |
4167 | } |
4168 | |
4169 | /// Store 128-bits (composed of 8 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary. |
4170 | /// |
4171 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi16&expand=5618) |
4172 | #[inline ] |
4173 | #[target_feature (enable = "avx512bw,avx512vl" )] |
4174 | #[cfg_attr (test, assert_instr(vmovups))] //should be vmovdqu16 |
4175 | pub unsafe fn _mm_storeu_epi16(mem_addr: *mut i16, a: __m128i) { |
4176 | ptr::write_unaligned(dst:mem_addr as *mut __m128i, src:a); |
4177 | } |
4178 | |
4179 | /// Store 512-bits (composed of 64 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary. |
4180 | /// |
4181 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi8&expand=5640) |
4182 | #[inline ] |
4183 | #[target_feature (enable = "avx512bw" )] |
4184 | #[cfg_attr (test, assert_instr(vmovups))] //should be vmovdqu8 |
4185 | pub unsafe fn _mm512_storeu_epi8(mem_addr: *mut i8, a: __m512i) { |
4186 | ptr::write_unaligned(dst:mem_addr as *mut __m512i, src:a); |
4187 | } |
4188 | |
4189 | /// Store 256-bits (composed of 32 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary. |
4190 | /// |
4191 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi8&expand=5638) |
4192 | #[inline ] |
4193 | #[target_feature (enable = "avx512bw,avx512vl" )] |
4194 | #[cfg_attr (test, assert_instr(vmovups))] //should be vmovdqu8 |
4195 | pub unsafe fn _mm256_storeu_epi8(mem_addr: *mut i8, a: __m256i) { |
4196 | ptr::write_unaligned(dst:mem_addr as *mut __m256i, src:a); |
4197 | } |
4198 | |
4199 | /// Store 128-bits (composed of 16 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary. |
4200 | /// |
4201 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi8&expand=5636) |
4202 | #[inline ] |
4203 | #[target_feature (enable = "avx512bw,avx512vl" )] |
4204 | #[cfg_attr (test, assert_instr(vmovups))] //should be vmovdqu8 |
4205 | pub unsafe fn _mm_storeu_epi8(mem_addr: *mut i8, a: __m128i) { |
4206 | ptr::write_unaligned(dst:mem_addr as *mut __m128i, src:a); |
4207 | } |
4208 | |
4209 | /// Load packed 16-bit integers from memory into dst using writemask k |
4210 | /// (elements are copied from src when the corresponding mask bit is not set). |
4211 | /// mem_addr does not need to be aligned on any particular boundary. |
4212 | /// |
4213 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi16) |
4214 | #[inline ] |
4215 | #[target_feature (enable = "avx512f,avx512bw" )] |
4216 | pub unsafe fn _mm512_mask_loadu_epi16(src: __m512i, k: __mmask32, mem_addr: *const i16) -> __m512i { |
4217 | let mut dst: __m512i = src; |
4218 | asm!( |
4219 | vpl!("vmovdqu16 {dst}{{{k}}}" ), |
4220 | p = in(reg) mem_addr, |
4221 | k = in(kreg) k, |
4222 | dst = inout(zmm_reg) dst, |
4223 | options(pure, readonly, nostack) |
4224 | ); |
4225 | dst |
4226 | } |
4227 | |
4228 | /// Load packed 16-bit integers from memory into dst using zeromask k |
4229 | /// (elements are zeroed out when the corresponding mask bit is not set). |
4230 | /// mem_addr does not need to be aligned on any particular boundary. |
4231 | /// |
4232 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi16) |
4233 | #[inline ] |
4234 | #[target_feature (enable = "avx512f,avx512bw" )] |
4235 | pub unsafe fn _mm512_maskz_loadu_epi16(k: __mmask32, mem_addr: *const i16) -> __m512i { |
4236 | let mut dst: __m512i; |
4237 | asm!( |
4238 | vpl!("vmovdqu16 {dst}{{{k}}} {{z}}" ), |
4239 | p = in(reg) mem_addr, |
4240 | k = in(kreg) k, |
4241 | dst = out(zmm_reg) dst, |
4242 | options(pure, readonly, nostack) |
4243 | ); |
4244 | dst |
4245 | } |
4246 | |
4247 | /// Load packed 8-bit integers from memory into dst using writemask k |
4248 | /// (elements are copied from src when the corresponding mask bit is not set). |
4249 | /// mem_addr does not need to be aligned on any particular boundary. |
4250 | /// |
4251 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi8) |
4252 | #[inline ] |
4253 | #[target_feature (enable = "avx512f,avx512bw" )] |
4254 | pub unsafe fn _mm512_mask_loadu_epi8(src: __m512i, k: __mmask64, mem_addr: *const i8) -> __m512i { |
4255 | let mut dst: __m512i = src; |
4256 | asm!( |
4257 | vpl!("vmovdqu8 {dst}{{{k}}}" ), |
4258 | p = in(reg) mem_addr, |
4259 | k = in(kreg) k, |
4260 | dst = inout(zmm_reg) dst, |
4261 | options(pure, readonly, nostack) |
4262 | ); |
4263 | dst |
4264 | } |
4265 | |
4266 | /// Load packed 8-bit integers from memory into dst using zeromask k |
4267 | /// (elements are zeroed out when the corresponding mask bit is not set). |
4268 | /// mem_addr does not need to be aligned on any particular boundary. |
4269 | /// |
4270 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi8) |
4271 | #[inline ] |
4272 | #[target_feature (enable = "avx512f,avx512bw" )] |
4273 | pub unsafe fn _mm512_maskz_loadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m512i { |
4274 | let mut dst: __m512i; |
4275 | asm!( |
4276 | vpl!("vmovdqu8 {dst}{{{k}}} {{z}}" ), |
4277 | p = in(reg) mem_addr, |
4278 | k = in(kreg) k, |
4279 | dst = out(zmm_reg) dst, |
4280 | options(pure, readonly, nostack) |
4281 | ); |
4282 | dst |
4283 | } |
4284 | |
4285 | /// Load packed 16-bit integers from memory into dst using writemask k |
4286 | /// (elements are copied from src when the corresponding mask bit is not set). |
4287 | /// mem_addr does not need to be aligned on any particular boundary. |
4288 | /// |
4289 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi16) |
4290 | #[inline ] |
4291 | #[target_feature (enable = "avx512f,avx512bw,avx512vl,avx" )] |
4292 | pub unsafe fn _mm256_mask_loadu_epi16(src: __m256i, k: __mmask16, mem_addr: *const i16) -> __m256i { |
4293 | let mut dst: __m256i = src; |
4294 | asm!( |
4295 | vpl!("vmovdqu16 {dst}{{{k}}}" ), |
4296 | p = in(reg) mem_addr, |
4297 | k = in(kreg) k, |
4298 | dst = inout(ymm_reg) dst, |
4299 | options(pure, readonly, nostack) |
4300 | ); |
4301 | dst |
4302 | } |
4303 | |
4304 | /// Load packed 16-bit integers from memory into dst using zeromask k |
4305 | /// (elements are zeroed out when the corresponding mask bit is not set). |
4306 | /// mem_addr does not need to be aligned on any particular boundary. |
4307 | /// |
4308 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi16) |
4309 | #[inline ] |
4310 | #[target_feature (enable = "avx512f,avx512bw,avx512vl,avx" )] |
4311 | pub unsafe fn _mm256_maskz_loadu_epi16(k: __mmask16, mem_addr: *const i16) -> __m256i { |
4312 | let mut dst: __m256i; |
4313 | asm!( |
4314 | vpl!("vmovdqu16 {dst}{{{k}}} {{z}}" ), |
4315 | p = in(reg) mem_addr, |
4316 | k = in(kreg) k, |
4317 | dst = out(ymm_reg) dst, |
4318 | options(pure, readonly, nostack) |
4319 | ); |
4320 | dst |
4321 | } |
4322 | |
4323 | /// Load packed 8-bit integers from memory into dst using writemask k |
4324 | /// (elements are copied from src when the corresponding mask bit is not set). |
4325 | /// mem_addr does not need to be aligned on any particular boundary. |
4326 | /// |
4327 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi8) |
4328 | #[inline ] |
4329 | #[target_feature (enable = "avx512f,avx512bw,avx512vl,avx" )] |
4330 | pub unsafe fn _mm256_mask_loadu_epi8(src: __m256i, k: __mmask32, mem_addr: *const i8) -> __m256i { |
4331 | let mut dst: __m256i = src; |
4332 | asm!( |
4333 | vpl!("vmovdqu8 {dst}{{{k}}}" ), |
4334 | p = in(reg) mem_addr, |
4335 | k = in(kreg) k, |
4336 | dst = inout(ymm_reg) dst, |
4337 | options(pure, readonly, nostack) |
4338 | ); |
4339 | dst |
4340 | } |
4341 | |
4342 | /// Load packed 8-bit integers from memory into dst using zeromask k |
4343 | /// (elements are zeroed out when the corresponding mask bit is not set). |
4344 | /// mem_addr does not need to be aligned on any particular boundary. |
4345 | /// |
4346 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi8) |
4347 | #[inline ] |
4348 | #[target_feature (enable = "avx512f,avx512bw,avx512vl,avx" )] |
4349 | pub unsafe fn _mm256_maskz_loadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m256i { |
4350 | let mut dst: __m256i; |
4351 | asm!( |
4352 | vpl!("vmovdqu8 {dst}{{{k}}} {{z}}" ), |
4353 | p = in(reg) mem_addr, |
4354 | k = in(kreg) k, |
4355 | dst = out(ymm_reg) dst, |
4356 | options(pure, readonly, nostack) |
4357 | ); |
4358 | dst |
4359 | } |
4360 | |
4361 | /// Load packed 16-bit integers from memory into dst using writemask k |
4362 | /// (elements are copied from src when the corresponding mask bit is not set). |
4363 | /// mem_addr does not need to be aligned on any particular boundary. |
4364 | /// |
4365 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi16) |
4366 | #[inline ] |
4367 | #[target_feature (enable = "avx512f,avx512bw,avx512vl,avx,sse" )] |
4368 | pub unsafe fn _mm_mask_loadu_epi16(src: __m128i, k: __mmask8, mem_addr: *const i16) -> __m128i { |
4369 | let mut dst: __m128i = src; |
4370 | asm!( |
4371 | vpl!("vmovdqu16 {dst}{{{k}}}" ), |
4372 | p = in(reg) mem_addr, |
4373 | k = in(kreg) k, |
4374 | dst = inout(xmm_reg) dst, |
4375 | options(pure, readonly, nostack) |
4376 | ); |
4377 | dst |
4378 | } |
4379 | |
4380 | /// Load packed 16-bit integers from memory into dst using zeromask k |
4381 | /// (elements are zeroed out when the corresponding mask bit is not set). |
4382 | /// mem_addr does not need to be aligned on any particular boundary. |
4383 | /// |
4384 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi16) |
4385 | #[inline ] |
4386 | #[target_feature (enable = "avx512f,avx512bw,avx512vl,avx,sse" )] |
4387 | pub unsafe fn _mm_maskz_loadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128i { |
4388 | let mut dst: __m128i; |
4389 | asm!( |
4390 | vpl!("vmovdqu16 {dst}{{{k}}} {{z}}" ), |
4391 | p = in(reg) mem_addr, |
4392 | k = in(kreg) k, |
4393 | dst = out(xmm_reg) dst, |
4394 | options(pure, readonly, nostack) |
4395 | ); |
4396 | dst |
4397 | } |
4398 | |
4399 | /// Load packed 8-bit integers from memory into dst using writemask k |
4400 | /// (elements are copied from src when the corresponding mask bit is not set). |
4401 | /// mem_addr does not need to be aligned on any particular boundary. |
4402 | /// |
4403 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi8) |
4404 | #[inline ] |
4405 | #[target_feature (enable = "avx512f,avx512bw,avx512vl,avx,sse" )] |
4406 | pub unsafe fn _mm_mask_loadu_epi8(src: __m128i, k: __mmask16, mem_addr: *const i8) -> __m128i { |
4407 | let mut dst: __m128i = src; |
4408 | asm!( |
4409 | vpl!("vmovdqu8 {dst}{{{k}}}" ), |
4410 | p = in(reg) mem_addr, |
4411 | k = in(kreg) k, |
4412 | dst = inout(xmm_reg) dst, |
4413 | options(pure, readonly, nostack) |
4414 | ); |
4415 | dst |
4416 | } |
4417 | |
4418 | /// Load packed 8-bit integers from memory into dst using zeromask k |
4419 | /// (elements are zeroed out when the corresponding mask bit is not set). |
4420 | /// mem_addr does not need to be aligned on any particular boundary. |
4421 | /// |
4422 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi8) |
4423 | #[inline ] |
4424 | #[target_feature (enable = "avx512f,avx512bw,avx512vl,avx,sse" )] |
4425 | pub unsafe fn _mm_maskz_loadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i { |
4426 | let mut dst: __m128i; |
4427 | asm!( |
4428 | vpl!("vmovdqu8 {dst}{{{k}}} {{z}}" ), |
4429 | p = in(reg) mem_addr, |
4430 | k = in(kreg) k, |
4431 | dst = out(xmm_reg) dst, |
4432 | options(pure, readonly, nostack) |
4433 | ); |
4434 | dst |
4435 | } |
4436 | |
4437 | /// Store packed 16-bit integers from a into memory using writemask k. |
4438 | /// mem_addr does not need to be aligned on any particular boundary. |
4439 | /// |
4440 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi16) |
4441 | #[inline ] |
4442 | #[target_feature (enable = "avx512f,avx512bw" )] |
4443 | pub unsafe fn _mm512_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask32, a: __m512i) { |
4444 | asm!( |
4445 | vps!("vmovdqu16" , "{{{mask}}}, {a}" ), |
4446 | p = in(reg) mem_addr, |
4447 | mask = in(kreg) mask, |
4448 | a = in(zmm_reg) a, |
4449 | options(nostack) |
4450 | ); |
4451 | } |
4452 | |
4453 | /// Store packed 8-bit integers from a into memory using writemask k. |
4454 | /// mem_addr does not need to be aligned on any particular boundary. |
4455 | /// |
4456 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi8) |
4457 | #[inline ] |
4458 | #[target_feature (enable = "avx512f,avx512bw" )] |
4459 | pub unsafe fn _mm512_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask64, a: __m512i) { |
4460 | asm!( |
4461 | vps!("vmovdqu8" , "{{{mask}}}, {a}" ), |
4462 | p = in(reg) mem_addr, |
4463 | mask = in(kreg) mask, |
4464 | a = in(zmm_reg) a, |
4465 | options(nostack) |
4466 | ); |
4467 | } |
4468 | |
4469 | /// Store packed 16-bit integers from a into memory using writemask k. |
4470 | /// mem_addr does not need to be aligned on any particular boundary. |
4471 | /// |
4472 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi16) |
4473 | #[inline ] |
4474 | #[target_feature (enable = "avx512f,avx512bw,avx512vl,avx" )] |
4475 | pub unsafe fn _mm256_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask16, a: __m256i) { |
4476 | asm!( |
4477 | vps!("vmovdqu16" , "{{{mask}}}, {a}" ), |
4478 | p = in(reg) mem_addr, |
4479 | mask = in(kreg) mask, |
4480 | a = in(ymm_reg) a, |
4481 | options(nostack) |
4482 | ); |
4483 | } |
4484 | |
4485 | /// Store packed 8-bit integers from a into memory using writemask k. |
4486 | /// mem_addr does not need to be aligned on any particular boundary. |
4487 | /// |
4488 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi8) |
4489 | #[inline ] |
4490 | #[target_feature (enable = "avx512f,avx512bw,avx512vl,avx" )] |
4491 | pub unsafe fn _mm256_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask32, a: __m256i) { |
4492 | asm!( |
4493 | vps!("vmovdqu8" , "{{{mask}}}, {a}" ), |
4494 | p = in(reg) mem_addr, |
4495 | mask = in(kreg) mask, |
4496 | a = in(ymm_reg) a, |
4497 | options(nostack) |
4498 | ); |
4499 | } |
4500 | |
4501 | /// Store packed 16-bit integers from a into memory using writemask k. |
4502 | /// mem_addr does not need to be aligned on any particular boundary. |
4503 | /// |
4504 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi16) |
4505 | #[inline ] |
4506 | #[target_feature (enable = "avx512f,avx512bw,avx512vl,avx,sse" )] |
4507 | pub unsafe fn _mm_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask8, a: __m128i) { |
4508 | asm!( |
4509 | vps!("vmovdqu16" , "{{{mask}}}, {a}" ), |
4510 | p = in(reg) mem_addr, |
4511 | mask = in(kreg) mask, |
4512 | a = in(xmm_reg) a, |
4513 | options(nostack) |
4514 | ); |
4515 | } |
4516 | |
4517 | /// Store packed 8-bit integers from a into memory using writemask k. |
4518 | /// mem_addr does not need to be aligned on any particular boundary. |
4519 | /// |
4520 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi8) |
4521 | #[inline ] |
4522 | #[target_feature (enable = "avx512f,avx512bw,avx512vl,avx,sse" )] |
4523 | pub unsafe fn _mm_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask16, a: __m128i) { |
4524 | asm!( |
4525 | vps!("vmovdqu8" , "{{{mask}}}, {a}" ), |
4526 | p = in(reg) mem_addr, |
4527 | mask = in(kreg) mask, |
4528 | a = in(xmm_reg) a, |
4529 | options(nostack) |
4530 | ); |
4531 | } |
4532 | |
4533 | /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst. |
4534 | /// |
4535 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_madd_epi16&expand=3511) |
4536 | #[inline ] |
4537 | #[target_feature (enable = "avx512bw" )] |
4538 | #[cfg_attr (test, assert_instr(vpmaddwd))] |
4539 | pub unsafe fn _mm512_madd_epi16(a: __m512i, b: __m512i) -> __m512i { |
4540 | transmute(src:vpmaddwd(a:a.as_i16x32(), b:b.as_i16x32())) |
4541 | } |
4542 | |
4543 | /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
4544 | /// |
4545 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_madd_epi16&expand=3512) |
4546 | #[inline ] |
4547 | #[target_feature (enable = "avx512bw" )] |
4548 | #[cfg_attr (test, assert_instr(vpmaddwd))] |
4549 | pub unsafe fn _mm512_mask_madd_epi16( |
4550 | src: __m512i, |
4551 | k: __mmask16, |
4552 | a: __m512i, |
4553 | b: __m512i, |
4554 | ) -> __m512i { |
4555 | let madd: i32x16 = _mm512_madd_epi16(a, b).as_i32x16(); |
4556 | transmute(src:simd_select_bitmask(m:k, a:madd, b:src.as_i32x16())) |
4557 | } |
4558 | |
4559 | /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
4560 | /// |
4561 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_madd_epi16&expand=3513) |
4562 | #[inline ] |
4563 | #[target_feature (enable = "avx512bw" )] |
4564 | #[cfg_attr (test, assert_instr(vpmaddwd))] |
4565 | pub unsafe fn _mm512_maskz_madd_epi16(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { |
4566 | let madd: i32x16 = _mm512_madd_epi16(a, b).as_i32x16(); |
4567 | let zero: i32x16 = _mm512_setzero_si512().as_i32x16(); |
4568 | transmute(src:simd_select_bitmask(m:k, a:madd, b:zero)) |
4569 | } |
4570 | |
4571 | /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
4572 | /// |
4573 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_madd_epi16&expand=3509) |
4574 | #[inline ] |
4575 | #[target_feature (enable = "avx512bw,avx512vl" )] |
4576 | #[cfg_attr (test, assert_instr(vpmaddwd))] |
4577 | pub unsafe fn _mm256_mask_madd_epi16(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { |
4578 | let madd: i32x8 = _mm256_madd_epi16(a, b).as_i32x8(); |
4579 | transmute(src:simd_select_bitmask(m:k, a:madd, b:src.as_i32x8())) |
4580 | } |
4581 | |
4582 | /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
4583 | /// |
4584 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_madd_epi16&expand=3510) |
4585 | #[inline ] |
4586 | #[target_feature (enable = "avx512bw,avx512vl" )] |
4587 | #[cfg_attr (test, assert_instr(vpmaddwd))] |
4588 | pub unsafe fn _mm256_maskz_madd_epi16(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { |
4589 | let madd: i32x8 = _mm256_madd_epi16(a, b).as_i32x8(); |
4590 | let zero: i32x8 = _mm256_setzero_si256().as_i32x8(); |
4591 | transmute(src:simd_select_bitmask(m:k, a:madd, b:zero)) |
4592 | } |
4593 | |
4594 | /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
4595 | /// |
4596 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_madd_epi16&expand=3506) |
4597 | #[inline ] |
4598 | #[target_feature (enable = "avx512bw,avx512vl" )] |
4599 | #[cfg_attr (test, assert_instr(vpmaddwd))] |
4600 | pub unsafe fn _mm_mask_madd_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
4601 | let madd: i32x4 = _mm_madd_epi16(a, b).as_i32x4(); |
4602 | transmute(src:simd_select_bitmask(m:k, a:madd, b:src.as_i32x4())) |
4603 | } |
4604 | |
4605 | /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
4606 | /// |
4607 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_madd_epi16&expand=3507) |
4608 | #[inline ] |
4609 | #[target_feature (enable = "avx512bw,avx512vl" )] |
4610 | #[cfg_attr (test, assert_instr(vpmaddwd))] |
4611 | pub unsafe fn _mm_maskz_madd_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
4612 | let madd: i32x4 = _mm_madd_epi16(a, b).as_i32x4(); |
4613 | let zero: i32x4 = _mm_setzero_si128().as_i32x4(); |
4614 | transmute(src:simd_select_bitmask(m:k, a:madd, b:zero)) |
4615 | } |
4616 | |
4617 | /// Vertically multiply each unsigned 8-bit integer from a with the corresponding signed 8-bit integer from b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst. |
4618 | /// |
4619 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maddubs_epi16&expand=3539) |
4620 | #[inline ] |
4621 | #[target_feature (enable = "avx512bw" )] |
4622 | #[cfg_attr (test, assert_instr(vpmaddubsw))] |
4623 | pub unsafe fn _mm512_maddubs_epi16(a: __m512i, b: __m512i) -> __m512i { |
4624 | transmute(src:vpmaddubsw(a:a.as_i8x64(), b:b.as_i8x64())) |
4625 | } |
4626 | |
4627 | /// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
4628 | /// |
4629 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_maddubs_epi16&expand=3540) |
4630 | #[inline ] |
4631 | #[target_feature (enable = "avx512bw" )] |
4632 | #[cfg_attr (test, assert_instr(vpmaddubsw))] |
4633 | pub unsafe fn _mm512_mask_maddubs_epi16( |
4634 | src: __m512i, |
4635 | k: __mmask32, |
4636 | a: __m512i, |
4637 | b: __m512i, |
4638 | ) -> __m512i { |
4639 | let madd: i16x32 = _mm512_maddubs_epi16(a, b).as_i16x32(); |
4640 | transmute(src:simd_select_bitmask(m:k, a:madd, b:src.as_i16x32())) |
4641 | } |
4642 | |
4643 | /// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
4644 | /// |
4645 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_maddubs_epi16&expand=3541) |
4646 | #[inline ] |
4647 | #[target_feature (enable = "avx512bw" )] |
4648 | #[cfg_attr (test, assert_instr(vpmaddubsw))] |
4649 | pub unsafe fn _mm512_maskz_maddubs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { |
4650 | let madd: i16x32 = _mm512_maddubs_epi16(a, b).as_i16x32(); |
4651 | let zero: i16x32 = _mm512_setzero_si512().as_i16x32(); |
4652 | transmute(src:simd_select_bitmask(m:k, a:madd, b:zero)) |
4653 | } |
4654 | |
4655 | /// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
4656 | /// |
4657 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_maddubs_epi16&expand=3537) |
4658 | #[inline ] |
4659 | #[target_feature (enable = "avx512bw,avx512vl" )] |
4660 | #[cfg_attr (test, assert_instr(vpmaddubsw))] |
4661 | pub unsafe fn _mm256_mask_maddubs_epi16( |
4662 | src: __m256i, |
4663 | k: __mmask16, |
4664 | a: __m256i, |
4665 | b: __m256i, |
4666 | ) -> __m256i { |
4667 | let madd: i16x16 = _mm256_maddubs_epi16(a, b).as_i16x16(); |
4668 | transmute(src:simd_select_bitmask(m:k, a:madd, b:src.as_i16x16())) |
4669 | } |
4670 | |
4671 | /// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
4672 | /// |
4673 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_maddubs_epi16&expand=3538) |
4674 | #[inline ] |
4675 | #[target_feature (enable = "avx512bw,avx512vl" )] |
4676 | #[cfg_attr (test, assert_instr(vpmaddubsw))] |
4677 | pub unsafe fn _mm256_maskz_maddubs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { |
4678 | let madd: i16x16 = _mm256_maddubs_epi16(a, b).as_i16x16(); |
4679 | let zero: i16x16 = _mm256_setzero_si256().as_i16x16(); |
4680 | transmute(src:simd_select_bitmask(m:k, a:madd, b:zero)) |
4681 | } |
4682 | |
4683 | /// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
4684 | /// |
4685 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_maddubs_epi16&expand=3534) |
4686 | #[inline ] |
4687 | #[target_feature (enable = "avx512bw,avx512vl" )] |
4688 | #[cfg_attr (test, assert_instr(vpmaddubsw))] |
4689 | pub unsafe fn _mm_mask_maddubs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
4690 | let madd: i16x8 = _mm_maddubs_epi16(a, b).as_i16x8(); |
4691 | transmute(src:simd_select_bitmask(m:k, a:madd, b:src.as_i16x8())) |
4692 | } |
4693 | |
4694 | /// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
4695 | /// |
4696 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_maddubs_epi16&expand=3535) |
4697 | #[inline ] |
4698 | #[target_feature (enable = "avx512bw,avx512vl" )] |
4699 | #[cfg_attr (test, assert_instr(vpmaddubsw))] |
4700 | pub unsafe fn _mm_maskz_maddubs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
4701 | let madd: i16x8 = _mm_maddubs_epi16(a, b).as_i16x8(); |
4702 | let zero: i16x8 = _mm_setzero_si128().as_i16x8(); |
4703 | transmute(src:simd_select_bitmask(m:k, a:madd, b:zero)) |
4704 | } |
4705 | |
4706 | /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst. |
4707 | /// |
4708 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packs_epi32&expand=4091) |
4709 | #[inline ] |
4710 | #[target_feature (enable = "avx512bw" )] |
4711 | #[cfg_attr (test, assert_instr(vpackssdw))] |
4712 | pub unsafe fn _mm512_packs_epi32(a: __m512i, b: __m512i) -> __m512i { |
4713 | transmute(src:vpackssdw(a:a.as_i32x16(), b:b.as_i32x16())) |
4714 | } |
4715 | |
4716 | /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
4717 | /// |
4718 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packs_epi32&expand=4089) |
4719 | #[inline ] |
4720 | #[target_feature (enable = "avx512bw" )] |
4721 | #[cfg_attr (test, assert_instr(vpackssdw))] |
4722 | pub unsafe fn _mm512_mask_packs_epi32( |
4723 | src: __m512i, |
4724 | k: __mmask32, |
4725 | a: __m512i, |
4726 | b: __m512i, |
4727 | ) -> __m512i { |
4728 | let pack: i16x32 = _mm512_packs_epi32(a, b).as_i16x32(); |
4729 | transmute(src:simd_select_bitmask(m:k, a:pack, b:src.as_i16x32())) |
4730 | } |
4731 | |
4732 | /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
4733 | /// |
4734 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packs_epi32&expand=4090) |
4735 | #[inline ] |
4736 | #[target_feature (enable = "avx512bw" )] |
4737 | #[cfg_attr (test, assert_instr(vpackssdw))] |
4738 | pub unsafe fn _mm512_maskz_packs_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { |
4739 | let pack: i16x32 = _mm512_packs_epi32(a, b).as_i16x32(); |
4740 | let zero: i16x32 = _mm512_setzero_si512().as_i16x32(); |
4741 | transmute(src:simd_select_bitmask(m:k, a:pack, b:zero)) |
4742 | } |
4743 | |
4744 | /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
4745 | /// |
4746 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packs_epi32&expand=4086) |
4747 | #[inline ] |
4748 | #[target_feature (enable = "avx512bw,avx512vl" )] |
4749 | #[cfg_attr (test, assert_instr(vpackssdw))] |
4750 | pub unsafe fn _mm256_mask_packs_epi32( |
4751 | src: __m256i, |
4752 | k: __mmask16, |
4753 | a: __m256i, |
4754 | b: __m256i, |
4755 | ) -> __m256i { |
4756 | let pack: i16x16 = _mm256_packs_epi32(a, b).as_i16x16(); |
4757 | transmute(src:simd_select_bitmask(m:k, a:pack, b:src.as_i16x16())) |
4758 | } |
4759 | |
4760 | /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
4761 | /// |
4762 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packs_epi32&expand=4087) |
4763 | #[inline ] |
4764 | #[target_feature (enable = "avx512bw,avx512vl" )] |
4765 | #[cfg_attr (test, assert_instr(vpackssdw))] |
4766 | pub unsafe fn _mm256_maskz_packs_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { |
4767 | let pack: i16x16 = _mm256_packs_epi32(a, b).as_i16x16(); |
4768 | let zero: i16x16 = _mm256_setzero_si256().as_i16x16(); |
4769 | transmute(src:simd_select_bitmask(m:k, a:pack, b:zero)) |
4770 | } |
4771 | |
4772 | /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
4773 | /// |
4774 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packs_epi32&expand=4083) |
4775 | #[inline ] |
4776 | #[target_feature (enable = "avx512bw,avx512vl" )] |
4777 | #[cfg_attr (test, assert_instr(vpackssdw))] |
4778 | pub unsafe fn _mm_mask_packs_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
4779 | let pack: i16x8 = _mm_packs_epi32(a, b).as_i16x8(); |
4780 | transmute(src:simd_select_bitmask(m:k, a:pack, b:src.as_i16x8())) |
4781 | } |
4782 | |
4783 | /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
4784 | /// |
4785 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packs_epi32&expand=4084) |
4786 | #[inline ] |
4787 | #[target_feature (enable = "avx512bw,avx512vl" )] |
4788 | #[cfg_attr (test, assert_instr(vpackssdw))] |
4789 | pub unsafe fn _mm_maskz_packs_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
4790 | let pack: i16x8 = _mm_packs_epi32(a, b).as_i16x8(); |
4791 | let zero: i16x8 = _mm_setzero_si128().as_i16x8(); |
4792 | transmute(src:simd_select_bitmask(m:k, a:pack, b:zero)) |
4793 | } |
4794 | |
4795 | /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst. |
4796 | /// |
4797 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packs_epi16&expand=4082) |
4798 | #[inline ] |
4799 | #[target_feature (enable = "avx512bw" )] |
4800 | #[cfg_attr (test, assert_instr(vpacksswb))] |
4801 | pub unsafe fn _mm512_packs_epi16(a: __m512i, b: __m512i) -> __m512i { |
4802 | transmute(src:vpacksswb(a:a.as_i16x32(), b:b.as_i16x32())) |
4803 | } |
4804 | |
4805 | /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
4806 | /// |
4807 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packs_epi16&expand=4080) |
4808 | #[inline ] |
4809 | #[target_feature (enable = "avx512bw" )] |
4810 | #[cfg_attr (test, assert_instr(vpacksswb))] |
4811 | pub unsafe fn _mm512_mask_packs_epi16( |
4812 | src: __m512i, |
4813 | k: __mmask64, |
4814 | a: __m512i, |
4815 | b: __m512i, |
4816 | ) -> __m512i { |
4817 | let pack: i8x64 = _mm512_packs_epi16(a, b).as_i8x64(); |
4818 | transmute(src:simd_select_bitmask(m:k, a:pack, b:src.as_i8x64())) |
4819 | } |
4820 | |
4821 | /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
4822 | /// |
4823 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packs_epi16&expand=4081) |
4824 | #[inline ] |
4825 | #[target_feature (enable = "avx512bw" )] |
4826 | #[cfg_attr (test, assert_instr(vpacksswb))] |
4827 | pub unsafe fn _mm512_maskz_packs_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { |
4828 | let pack: i8x64 = _mm512_packs_epi16(a, b).as_i8x64(); |
4829 | let zero: i8x64 = _mm512_setzero_si512().as_i8x64(); |
4830 | transmute(src:simd_select_bitmask(m:k, a:pack, b:zero)) |
4831 | } |
4832 | |
4833 | /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
4834 | /// |
4835 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packs_epi16&expand=4077) |
4836 | #[inline ] |
4837 | #[target_feature (enable = "avx512bw,avx512vl" )] |
4838 | #[cfg_attr (test, assert_instr(vpacksswb))] |
4839 | pub unsafe fn _mm256_mask_packs_epi16( |
4840 | src: __m256i, |
4841 | k: __mmask32, |
4842 | a: __m256i, |
4843 | b: __m256i, |
4844 | ) -> __m256i { |
4845 | let pack: i8x32 = _mm256_packs_epi16(a, b).as_i8x32(); |
4846 | transmute(src:simd_select_bitmask(m:k, a:pack, b:src.as_i8x32())) |
4847 | } |
4848 | |
4849 | /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
4850 | /// |
4851 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=#text=_mm256_maskz_packs_epi16&expand=4078) |
4852 | #[inline ] |
4853 | #[target_feature (enable = "avx512bw,avx512vl" )] |
4854 | #[cfg_attr (test, assert_instr(vpacksswb))] |
4855 | pub unsafe fn _mm256_maskz_packs_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { |
4856 | let pack: i8x32 = _mm256_packs_epi16(a, b).as_i8x32(); |
4857 | let zero: i8x32 = _mm256_setzero_si256().as_i8x32(); |
4858 | transmute(src:simd_select_bitmask(m:k, a:pack, b:zero)) |
4859 | } |
4860 | |
4861 | /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
4862 | /// |
4863 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packs_epi16&expand=4074) |
4864 | #[inline ] |
4865 | #[target_feature (enable = "avx512bw,avx512vl" )] |
4866 | #[cfg_attr (test, assert_instr(vpacksswb))] |
4867 | pub unsafe fn _mm_mask_packs_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { |
4868 | let pack: i8x16 = _mm_packs_epi16(a, b).as_i8x16(); |
4869 | transmute(src:simd_select_bitmask(m:k, a:pack, b:src.as_i8x16())) |
4870 | } |
4871 | |
4872 | /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
4873 | /// |
4874 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packs_epi16&expand=4075) |
4875 | #[inline ] |
4876 | #[target_feature (enable = "avx512bw,avx512vl" )] |
4877 | #[cfg_attr (test, assert_instr(vpacksswb))] |
4878 | pub unsafe fn _mm_maskz_packs_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { |
4879 | let pack: i8x16 = _mm_packs_epi16(a, b).as_i8x16(); |
4880 | let zero: i8x16 = _mm_setzero_si128().as_i8x16(); |
4881 | transmute(src:simd_select_bitmask(m:k, a:pack, b:zero)) |
4882 | } |
4883 | |
4884 | /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst. |
4885 | /// |
4886 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packus_epi32&expand=4130) |
4887 | #[inline ] |
4888 | #[target_feature (enable = "avx512bw" )] |
4889 | #[cfg_attr (test, assert_instr(vpackusdw))] |
4890 | pub unsafe fn _mm512_packus_epi32(a: __m512i, b: __m512i) -> __m512i { |
4891 | transmute(src:vpackusdw(a:a.as_i32x16(), b:b.as_i32x16())) |
4892 | } |
4893 | |
4894 | /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
4895 | /// |
4896 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packus_epi32&expand=4128) |
4897 | #[inline ] |
4898 | #[target_feature (enable = "avx512bw" )] |
4899 | #[cfg_attr (test, assert_instr(vpackusdw))] |
4900 | pub unsafe fn _mm512_mask_packus_epi32( |
4901 | src: __m512i, |
4902 | k: __mmask32, |
4903 | a: __m512i, |
4904 | b: __m512i, |
4905 | ) -> __m512i { |
4906 | let pack: i16x32 = _mm512_packus_epi32(a, b).as_i16x32(); |
4907 | transmute(src:simd_select_bitmask(m:k, a:pack, b:src.as_i16x32())) |
4908 | } |
4909 | |
4910 | /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
4911 | /// |
4912 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packus_epi32&expand=4129) |
4913 | #[inline ] |
4914 | #[target_feature (enable = "avx512bw" )] |
4915 | #[cfg_attr (test, assert_instr(vpackusdw))] |
4916 | pub unsafe fn _mm512_maskz_packus_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { |
4917 | let pack: i16x32 = _mm512_packus_epi32(a, b).as_i16x32(); |
4918 | let zero: i16x32 = _mm512_setzero_si512().as_i16x32(); |
4919 | transmute(src:simd_select_bitmask(m:k, a:pack, b:zero)) |
4920 | } |
4921 | |
4922 | /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
4923 | /// |
4924 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packus_epi32&expand=4125) |
4925 | #[inline ] |
4926 | #[target_feature (enable = "avx512bw,avx512vl" )] |
4927 | #[cfg_attr (test, assert_instr(vpackusdw))] |
4928 | pub unsafe fn _mm256_mask_packus_epi32( |
4929 | src: __m256i, |
4930 | k: __mmask16, |
4931 | a: __m256i, |
4932 | b: __m256i, |
4933 | ) -> __m256i { |
4934 | let pack: i16x16 = _mm256_packus_epi32(a, b).as_i16x16(); |
4935 | transmute(src:simd_select_bitmask(m:k, a:pack, b:src.as_i16x16())) |
4936 | } |
4937 | |
4938 | /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
4939 | /// |
4940 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packus_epi32&expand=4126) |
4941 | #[inline ] |
4942 | #[target_feature (enable = "avx512bw,avx512vl" )] |
4943 | #[cfg_attr (test, assert_instr(vpackusdw))] |
4944 | pub unsafe fn _mm256_maskz_packus_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { |
4945 | let pack: i16x16 = _mm256_packus_epi32(a, b).as_i16x16(); |
4946 | let zero: i16x16 = _mm256_setzero_si256().as_i16x16(); |
4947 | transmute(src:simd_select_bitmask(m:k, a:pack, b:zero)) |
4948 | } |
4949 | |
4950 | /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
4951 | /// |
4952 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packus_epi32&expand=4122) |
4953 | #[inline ] |
4954 | #[target_feature (enable = "avx512bw,avx512vl" )] |
4955 | #[cfg_attr (test, assert_instr(vpackusdw))] |
4956 | pub unsafe fn _mm_mask_packus_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
4957 | let pack: i16x8 = _mm_packus_epi32(a, b).as_i16x8(); |
4958 | transmute(src:simd_select_bitmask(m:k, a:pack, b:src.as_i16x8())) |
4959 | } |
4960 | |
4961 | /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
4962 | /// |
4963 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packus_epi32&expand=4123) |
4964 | #[inline ] |
4965 | #[target_feature (enable = "avx512bw,avx512vl" )] |
4966 | #[cfg_attr (test, assert_instr(vpackusdw))] |
4967 | pub unsafe fn _mm_maskz_packus_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
4968 | let pack: i16x8 = _mm_packus_epi32(a, b).as_i16x8(); |
4969 | let zero: i16x8 = _mm_setzero_si128().as_i16x8(); |
4970 | transmute(src:simd_select_bitmask(m:k, a:pack, b:zero)) |
4971 | } |
4972 | |
4973 | /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst. |
4974 | /// |
4975 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packus_epi16&expand=4121) |
4976 | #[inline ] |
4977 | #[target_feature (enable = "avx512bw" )] |
4978 | #[cfg_attr (test, assert_instr(vpackuswb))] |
4979 | pub unsafe fn _mm512_packus_epi16(a: __m512i, b: __m512i) -> __m512i { |
4980 | transmute(src:vpackuswb(a:a.as_i16x32(), b:b.as_i16x32())) |
4981 | } |
4982 | |
4983 | /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
4984 | /// |
4985 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packus_epi16&expand=4119) |
4986 | #[inline ] |
4987 | #[target_feature (enable = "avx512bw" )] |
4988 | #[cfg_attr (test, assert_instr(vpackuswb))] |
4989 | pub unsafe fn _mm512_mask_packus_epi16( |
4990 | src: __m512i, |
4991 | k: __mmask64, |
4992 | a: __m512i, |
4993 | b: __m512i, |
4994 | ) -> __m512i { |
4995 | let pack: i8x64 = _mm512_packus_epi16(a, b).as_i8x64(); |
4996 | transmute(src:simd_select_bitmask(m:k, a:pack, b:src.as_i8x64())) |
4997 | } |
4998 | |
4999 | /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
5000 | /// |
5001 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packus_epi16&expand=4120) |
5002 | #[inline ] |
5003 | #[target_feature (enable = "avx512bw" )] |
5004 | #[cfg_attr (test, assert_instr(vpackuswb))] |
5005 | pub unsafe fn _mm512_maskz_packus_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { |
5006 | let pack: i8x64 = _mm512_packus_epi16(a, b).as_i8x64(); |
5007 | let zero: i8x64 = _mm512_setzero_si512().as_i8x64(); |
5008 | transmute(src:simd_select_bitmask(m:k, a:pack, b:zero)) |
5009 | } |
5010 | |
5011 | /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
5012 | /// |
5013 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packus_epi16&expand=4116) |
5014 | #[inline ] |
5015 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5016 | #[cfg_attr (test, assert_instr(vpackuswb))] |
5017 | pub unsafe fn _mm256_mask_packus_epi16( |
5018 | src: __m256i, |
5019 | k: __mmask32, |
5020 | a: __m256i, |
5021 | b: __m256i, |
5022 | ) -> __m256i { |
5023 | let pack: i8x32 = _mm256_packus_epi16(a, b).as_i8x32(); |
5024 | transmute(src:simd_select_bitmask(m:k, a:pack, b:src.as_i8x32())) |
5025 | } |
5026 | |
5027 | /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
5028 | /// |
5029 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packus_epi16&expand=4117) |
5030 | #[inline ] |
5031 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5032 | #[cfg_attr (test, assert_instr(vpackuswb))] |
5033 | pub unsafe fn _mm256_maskz_packus_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { |
5034 | let pack: i8x32 = _mm256_packus_epi16(a, b).as_i8x32(); |
5035 | let zero: i8x32 = _mm256_setzero_si256().as_i8x32(); |
5036 | transmute(src:simd_select_bitmask(m:k, a:pack, b:zero)) |
5037 | } |
5038 | |
5039 | /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
5040 | /// |
5041 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packus_epi16&expand=4113) |
5042 | #[inline ] |
5043 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5044 | #[cfg_attr (test, assert_instr(vpackuswb))] |
5045 | pub unsafe fn _mm_mask_packus_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { |
5046 | let pack: i8x16 = _mm_packus_epi16(a, b).as_i8x16(); |
5047 | transmute(src:simd_select_bitmask(m:k, a:pack, b:src.as_i8x16())) |
5048 | } |
5049 | |
5050 | /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
5051 | /// |
5052 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packus_epi16&expand=4114) |
5053 | #[inline ] |
5054 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5055 | #[cfg_attr (test, assert_instr(vpackuswb))] |
5056 | pub unsafe fn _mm_maskz_packus_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { |
5057 | let pack: i8x16 = _mm_packus_epi16(a, b).as_i8x16(); |
5058 | let zero: i8x16 = _mm_setzero_si128().as_i8x16(); |
5059 | transmute(src:simd_select_bitmask(m:k, a:pack, b:zero)) |
5060 | } |
5061 | |
5062 | /// Average packed unsigned 16-bit integers in a and b, and store the results in dst. |
5063 | /// |
5064 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_avg_epu16&expand=388) |
5065 | #[inline ] |
5066 | #[target_feature (enable = "avx512bw" )] |
5067 | #[cfg_attr (test, assert_instr(vpavgw))] |
5068 | pub unsafe fn _mm512_avg_epu16(a: __m512i, b: __m512i) -> __m512i { |
5069 | transmute(src:vpavgw(a:a.as_u16x32(), b:b.as_u16x32())) |
5070 | } |
5071 | |
5072 | /// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
5073 | /// |
5074 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_avg_epu16&expand=389) |
5075 | #[inline ] |
5076 | #[target_feature (enable = "avx512bw" )] |
5077 | #[cfg_attr (test, assert_instr(vpavgw))] |
5078 | pub unsafe fn _mm512_mask_avg_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { |
5079 | let avg: u16x32 = _mm512_avg_epu16(a, b).as_u16x32(); |
5080 | transmute(src:simd_select_bitmask(m:k, a:avg, b:src.as_u16x32())) |
5081 | } |
5082 | |
5083 | /// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
5084 | /// |
5085 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_avg_epu16&expand=390) |
5086 | #[inline ] |
5087 | #[target_feature (enable = "avx512bw" )] |
5088 | #[cfg_attr (test, assert_instr(vpavgw))] |
5089 | pub unsafe fn _mm512_maskz_avg_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { |
5090 | let avg: u16x32 = _mm512_avg_epu16(a, b).as_u16x32(); |
5091 | let zero: u16x32 = _mm512_setzero_si512().as_u16x32(); |
5092 | transmute(src:simd_select_bitmask(m:k, a:avg, b:zero)) |
5093 | } |
5094 | |
5095 | /// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
5096 | /// |
5097 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_avg_epu16&expand=386) |
5098 | #[inline ] |
5099 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5100 | #[cfg_attr (test, assert_instr(vpavgw))] |
5101 | pub unsafe fn _mm256_mask_avg_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { |
5102 | let avg: u16x16 = _mm256_avg_epu16(a, b).as_u16x16(); |
5103 | transmute(src:simd_select_bitmask(m:k, a:avg, b:src.as_u16x16())) |
5104 | } |
5105 | |
5106 | /// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
5107 | /// |
5108 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_avg_epu16&expand=387) |
5109 | #[inline ] |
5110 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5111 | #[cfg_attr (test, assert_instr(vpavgw))] |
5112 | pub unsafe fn _mm256_maskz_avg_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { |
5113 | let avg: u16x16 = _mm256_avg_epu16(a, b).as_u16x16(); |
5114 | let zero: u16x16 = _mm256_setzero_si256().as_u16x16(); |
5115 | transmute(src:simd_select_bitmask(m:k, a:avg, b:zero)) |
5116 | } |
5117 | |
5118 | /// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
5119 | /// |
5120 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_avg_epu16&expand=383) |
5121 | #[inline ] |
5122 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5123 | #[cfg_attr (test, assert_instr(vpavgw))] |
5124 | pub unsafe fn _mm_mask_avg_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
5125 | let avg: u16x8 = _mm_avg_epu16(a, b).as_u16x8(); |
5126 | transmute(src:simd_select_bitmask(m:k, a:avg, b:src.as_u16x8())) |
5127 | } |
5128 | |
5129 | /// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
5130 | /// |
5131 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_avg_epu16&expand=384) |
5132 | #[inline ] |
5133 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5134 | #[cfg_attr (test, assert_instr(vpavgw))] |
5135 | pub unsafe fn _mm_maskz_avg_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
5136 | let avg: u16x8 = _mm_avg_epu16(a, b).as_u16x8(); |
5137 | let zero: u16x8 = _mm_setzero_si128().as_u16x8(); |
5138 | transmute(src:simd_select_bitmask(m:k, a:avg, b:zero)) |
5139 | } |
5140 | |
5141 | /// Average packed unsigned 8-bit integers in a and b, and store the results in dst. |
5142 | /// |
5143 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_avg_epu8&expand=397) |
5144 | #[inline ] |
5145 | #[target_feature (enable = "avx512bw" )] |
5146 | #[cfg_attr (test, assert_instr(vpavgb))] |
5147 | pub unsafe fn _mm512_avg_epu8(a: __m512i, b: __m512i) -> __m512i { |
5148 | transmute(src:vpavgb(a:a.as_u8x64(), b:b.as_u8x64())) |
5149 | } |
5150 | |
5151 | /// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
5152 | /// |
5153 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_avg_epu8&expand=398) |
5154 | #[inline ] |
5155 | #[target_feature (enable = "avx512bw" )] |
5156 | #[cfg_attr (test, assert_instr(vpavgb))] |
5157 | pub unsafe fn _mm512_mask_avg_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { |
5158 | let avg: u8x64 = _mm512_avg_epu8(a, b).as_u8x64(); |
5159 | transmute(src:simd_select_bitmask(m:k, a:avg, b:src.as_u8x64())) |
5160 | } |
5161 | |
5162 | /// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
5163 | /// |
5164 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_avg_epu8&expand=399) |
5165 | #[inline ] |
5166 | #[target_feature (enable = "avx512bw" )] |
5167 | #[cfg_attr (test, assert_instr(vpavgb))] |
5168 | pub unsafe fn _mm512_maskz_avg_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { |
5169 | let avg: u8x64 = _mm512_avg_epu8(a, b).as_u8x64(); |
5170 | let zero: u8x64 = _mm512_setzero_si512().as_u8x64(); |
5171 | transmute(src:simd_select_bitmask(m:k, a:avg, b:zero)) |
5172 | } |
5173 | |
5174 | /// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
5175 | /// |
5176 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_avg_epu8&expand=395) |
5177 | #[inline ] |
5178 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5179 | #[cfg_attr (test, assert_instr(vpavgb))] |
5180 | pub unsafe fn _mm256_mask_avg_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { |
5181 | let avg: u8x32 = _mm256_avg_epu8(a, b).as_u8x32(); |
5182 | transmute(src:simd_select_bitmask(m:k, a:avg, b:src.as_u8x32())) |
5183 | } |
5184 | |
5185 | /// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
5186 | /// |
5187 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_avg_epu8&expand=396) |
5188 | #[inline ] |
5189 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5190 | #[cfg_attr (test, assert_instr(vpavgb))] |
5191 | pub unsafe fn _mm256_maskz_avg_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { |
5192 | let avg: u8x32 = _mm256_avg_epu8(a, b).as_u8x32(); |
5193 | let zero: u8x32 = _mm256_setzero_si256().as_u8x32(); |
5194 | transmute(src:simd_select_bitmask(m:k, a:avg, b:zero)) |
5195 | } |
5196 | |
5197 | /// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
5198 | /// |
5199 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_avg_epu8&expand=392) |
5200 | #[inline ] |
5201 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5202 | #[cfg_attr (test, assert_instr(vpavgb))] |
5203 | pub unsafe fn _mm_mask_avg_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { |
5204 | let avg: u8x16 = _mm_avg_epu8(a, b).as_u8x16(); |
5205 | transmute(src:simd_select_bitmask(m:k, a:avg, b:src.as_u8x16())) |
5206 | } |
5207 | |
5208 | /// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
5209 | /// |
5210 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_avg_epu8&expand=393) |
5211 | #[inline ] |
5212 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5213 | #[cfg_attr (test, assert_instr(vpavgb))] |
5214 | pub unsafe fn _mm_maskz_avg_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { |
5215 | let avg: u8x16 = _mm_avg_epu8(a, b).as_u8x16(); |
5216 | let zero: u8x16 = _mm_setzero_si128().as_u8x16(); |
5217 | transmute(src:simd_select_bitmask(m:k, a:avg, b:zero)) |
5218 | } |
5219 | |
5220 | /// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst. |
5221 | /// |
5222 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi16&expand=5271) |
5223 | #[inline ] |
5224 | #[target_feature (enable = "avx512bw" )] |
5225 | #[cfg_attr (test, assert_instr(vpsllw))] |
5226 | pub unsafe fn _mm512_sll_epi16(a: __m512i, count: __m128i) -> __m512i { |
5227 | transmute(src:vpsllw(a:a.as_i16x32(), count:count.as_i16x8())) |
5228 | } |
5229 | |
5230 | /// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
5231 | /// |
5232 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi16&expand=5269) |
5233 | #[inline ] |
5234 | #[target_feature (enable = "avx512bw" )] |
5235 | #[cfg_attr (test, assert_instr(vpsllw))] |
5236 | pub unsafe fn _mm512_mask_sll_epi16( |
5237 | src: __m512i, |
5238 | k: __mmask32, |
5239 | a: __m512i, |
5240 | count: __m128i, |
5241 | ) -> __m512i { |
5242 | let shf: i16x32 = _mm512_sll_epi16(a, count).as_i16x32(); |
5243 | transmute(src:simd_select_bitmask(m:k, a:shf, b:src.as_i16x32())) |
5244 | } |
5245 | |
5246 | /// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
5247 | /// |
5248 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi16&expand=5270) |
5249 | #[inline ] |
5250 | #[target_feature (enable = "avx512bw" )] |
5251 | #[cfg_attr (test, assert_instr(vpsllw))] |
5252 | pub unsafe fn _mm512_maskz_sll_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i { |
5253 | let shf: i16x32 = _mm512_sll_epi16(a, count).as_i16x32(); |
5254 | let zero: i16x32 = _mm512_setzero_si512().as_i16x32(); |
5255 | transmute(src:simd_select_bitmask(m:k, a:shf, b:zero)) |
5256 | } |
5257 | |
5258 | /// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
5259 | /// |
5260 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi16&expand=5266) |
5261 | #[inline ] |
5262 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5263 | #[cfg_attr (test, assert_instr(vpsllw))] |
5264 | pub unsafe fn _mm256_mask_sll_epi16( |
5265 | src: __m256i, |
5266 | k: __mmask16, |
5267 | a: __m256i, |
5268 | count: __m128i, |
5269 | ) -> __m256i { |
5270 | let shf: i16x16 = _mm256_sll_epi16(a, count).as_i16x16(); |
5271 | transmute(src:simd_select_bitmask(m:k, a:shf, b:src.as_i16x16())) |
5272 | } |
5273 | |
5274 | /// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
5275 | /// |
5276 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi16&expand=5267) |
5277 | #[inline ] |
5278 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5279 | #[cfg_attr (test, assert_instr(vpsllw))] |
5280 | pub unsafe fn _mm256_maskz_sll_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i { |
5281 | let shf: i16x16 = _mm256_sll_epi16(a, count).as_i16x16(); |
5282 | let zero: i16x16 = _mm256_setzero_si256().as_i16x16(); |
5283 | transmute(src:simd_select_bitmask(m:k, a:shf, b:zero)) |
5284 | } |
5285 | |
5286 | /// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
5287 | /// |
5288 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi16&expand=5263) |
5289 | #[inline ] |
5290 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5291 | #[cfg_attr (test, assert_instr(vpsllw))] |
5292 | pub unsafe fn _mm_mask_sll_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { |
5293 | let shf: i16x8 = _mm_sll_epi16(a, count).as_i16x8(); |
5294 | transmute(src:simd_select_bitmask(m:k, a:shf, b:src.as_i16x8())) |
5295 | } |
5296 | |
5297 | /// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
5298 | /// |
5299 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi16&expand=5264) |
5300 | #[inline ] |
5301 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5302 | #[cfg_attr (test, assert_instr(vpsllw))] |
5303 | pub unsafe fn _mm_maskz_sll_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { |
5304 | let shf: i16x8 = _mm_sll_epi16(a, count).as_i16x8(); |
5305 | let zero: i16x8 = _mm_setzero_si128().as_i16x8(); |
5306 | transmute(src:simd_select_bitmask(m:k, a:shf, b:zero)) |
5307 | } |
5308 | |
5309 | /// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst. |
5310 | /// |
5311 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi16&expand=5301) |
5312 | #[inline ] |
5313 | #[target_feature (enable = "avx512bw" )] |
5314 | #[cfg_attr (test, assert_instr(vpsllw, IMM8 = 5))] |
5315 | #[rustc_legacy_const_generics (1)] |
5316 | pub unsafe fn _mm512_slli_epi16<const IMM8: u32>(a: __m512i) -> __m512i { |
5317 | static_assert_uimm_bits!(IMM8, 8); |
5318 | if IMM8 >= 16 { |
5319 | _mm512_setzero_si512() |
5320 | } else { |
5321 | transmute(src:simd_shl(x:a.as_u16x32(), y:u16x32::splat(IMM8 as u16))) |
5322 | } |
5323 | } |
5324 | |
5325 | /// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
5326 | /// |
5327 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi16&expand=5299) |
5328 | #[inline ] |
5329 | #[target_feature (enable = "avx512bw" )] |
5330 | #[cfg_attr (test, assert_instr(vpsllw, IMM8 = 5))] |
5331 | #[rustc_legacy_const_generics (3)] |
5332 | pub unsafe fn _mm512_mask_slli_epi16<const IMM8: u32>( |
5333 | src: __m512i, |
5334 | k: __mmask32, |
5335 | a: __m512i, |
5336 | ) -> __m512i { |
5337 | static_assert_uimm_bits!(IMM8, 8); |
5338 | let shf: u16x32 = if IMM8 >= 16 { |
5339 | u16x32::splat(0) |
5340 | } else { |
5341 | simd_shl(x:a.as_u16x32(), y:u16x32::splat(IMM8 as u16)) |
5342 | }; |
5343 | transmute(src:simd_select_bitmask(m:k, a:shf, b:src.as_u16x32())) |
5344 | } |
5345 | |
5346 | /// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
5347 | /// |
5348 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi16&expand=5300) |
5349 | #[inline ] |
5350 | #[target_feature (enable = "avx512bw" )] |
5351 | #[cfg_attr (test, assert_instr(vpsllw, IMM8 = 5))] |
5352 | #[rustc_legacy_const_generics (2)] |
5353 | pub unsafe fn _mm512_maskz_slli_epi16<const IMM8: u32>(k: __mmask32, a: __m512i) -> __m512i { |
5354 | static_assert_uimm_bits!(IMM8, 8); |
5355 | if IMM8 >= 16 { |
5356 | _mm512_setzero_si512() |
5357 | } else { |
5358 | let shf: u16x32 = simd_shl(x:a.as_u16x32(), y:u16x32::splat(IMM8 as u16)); |
5359 | let zero: u16x32 = u16x32::splat(0); |
5360 | transmute(src:simd_select_bitmask(m:k, a:shf, b:zero)) |
5361 | } |
5362 | } |
5363 | |
5364 | /// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
5365 | /// |
5366 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi16&expand=5296) |
5367 | #[inline ] |
5368 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5369 | #[cfg_attr (test, assert_instr(vpsllw, IMM8 = 5))] |
5370 | #[rustc_legacy_const_generics (3)] |
5371 | pub unsafe fn _mm256_mask_slli_epi16<const IMM8: u32>( |
5372 | src: __m256i, |
5373 | k: __mmask16, |
5374 | a: __m256i, |
5375 | ) -> __m256i { |
5376 | static_assert_uimm_bits!(IMM8, 8); |
5377 | let shf: u16x16 = if IMM8 >= 16 { |
5378 | u16x16::splat(0) |
5379 | } else { |
5380 | simd_shl(x:a.as_u16x16(), y:u16x16::splat(IMM8 as u16)) |
5381 | }; |
5382 | transmute(src:simd_select_bitmask(m:k, a:shf, b:src.as_u16x16())) |
5383 | } |
5384 | |
5385 | /// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
5386 | /// |
5387 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi16&expand=5297) |
5388 | #[inline ] |
5389 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5390 | #[cfg_attr (test, assert_instr(vpsllw, IMM8 = 5))] |
5391 | #[rustc_legacy_const_generics (2)] |
5392 | pub unsafe fn _mm256_maskz_slli_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m256i { |
5393 | static_assert_uimm_bits!(IMM8, 8); |
5394 | if IMM8 >= 16 { |
5395 | _mm256_setzero_si256() |
5396 | } else { |
5397 | let shf: u16x16 = simd_shl(x:a.as_u16x16(), y:u16x16::splat(IMM8 as u16)); |
5398 | let zero: u16x16 = u16x16::splat(0); |
5399 | transmute(src:simd_select_bitmask(m:k, a:shf, b:zero)) |
5400 | } |
5401 | } |
5402 | |
5403 | /// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
5404 | /// |
5405 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi16&expand=5293) |
5406 | #[inline ] |
5407 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5408 | #[cfg_attr (test, assert_instr(vpsllw, IMM8 = 5))] |
5409 | #[rustc_legacy_const_generics (3)] |
5410 | pub unsafe fn _mm_mask_slli_epi16<const IMM8: u32>( |
5411 | src: __m128i, |
5412 | k: __mmask8, |
5413 | a: __m128i, |
5414 | ) -> __m128i { |
5415 | static_assert_uimm_bits!(IMM8, 8); |
5416 | let shf: u16x8 = if IMM8 >= 16 { |
5417 | u16x8::splat(0) |
5418 | } else { |
5419 | simd_shl(x:a.as_u16x8(), y:u16x8::splat(IMM8 as u16)) |
5420 | }; |
5421 | transmute(src:simd_select_bitmask(m:k, a:shf, b:src.as_u16x8())) |
5422 | } |
5423 | |
5424 | /// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
5425 | /// |
5426 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi16&expand=5294) |
5427 | #[inline ] |
5428 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5429 | #[cfg_attr (test, assert_instr(vpsllw, IMM8 = 5))] |
5430 | #[rustc_legacy_const_generics (2)] |
5431 | pub unsafe fn _mm_maskz_slli_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i { |
5432 | static_assert_uimm_bits!(IMM8, 8); |
5433 | if IMM8 >= 16 { |
5434 | _mm_setzero_si128() |
5435 | } else { |
5436 | let shf: u16x8 = simd_shl(x:a.as_u16x8(), y:u16x8::splat(IMM8 as u16)); |
5437 | let zero: u16x8 = u16x8::splat(0); |
5438 | transmute(src:simd_select_bitmask(m:k, a:shf, b:zero)) |
5439 | } |
5440 | } |
5441 | |
5442 | /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. |
5443 | /// |
5444 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi16&expand=5333) |
5445 | #[inline ] |
5446 | #[target_feature (enable = "avx512bw" )] |
5447 | #[cfg_attr (test, assert_instr(vpsllvw))] |
5448 | pub unsafe fn _mm512_sllv_epi16(a: __m512i, count: __m512i) -> __m512i { |
5449 | transmute(src:vpsllvw(a:a.as_i16x32(), b:count.as_i16x32())) |
5450 | } |
5451 | |
5452 | /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
5453 | /// |
5454 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi16&expand=5331) |
5455 | #[inline ] |
5456 | #[target_feature (enable = "avx512bw" )] |
5457 | #[cfg_attr (test, assert_instr(vpsllvw))] |
5458 | pub unsafe fn _mm512_mask_sllv_epi16( |
5459 | src: __m512i, |
5460 | k: __mmask32, |
5461 | a: __m512i, |
5462 | count: __m512i, |
5463 | ) -> __m512i { |
5464 | let shf: i16x32 = _mm512_sllv_epi16(a, count).as_i16x32(); |
5465 | transmute(src:simd_select_bitmask(m:k, a:shf, b:src.as_i16x32())) |
5466 | } |
5467 | |
5468 | /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
5469 | /// |
5470 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi16&expand=5332) |
5471 | #[inline ] |
5472 | #[target_feature (enable = "avx512bw" )] |
5473 | #[cfg_attr (test, assert_instr(vpsllvw))] |
5474 | pub unsafe fn _mm512_maskz_sllv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i { |
5475 | let shf: i16x32 = _mm512_sllv_epi16(a, count).as_i16x32(); |
5476 | let zero: i16x32 = _mm512_setzero_si512().as_i16x32(); |
5477 | transmute(src:simd_select_bitmask(m:k, a:shf, b:zero)) |
5478 | } |
5479 | |
5480 | /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. |
5481 | /// |
5482 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sllv_epi16&expand=5330) |
5483 | #[inline ] |
5484 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5485 | #[cfg_attr (test, assert_instr(vpsllvw))] |
5486 | pub unsafe fn _mm256_sllv_epi16(a: __m256i, count: __m256i) -> __m256i { |
5487 | transmute(src:vpsllvw256(a:a.as_i16x16(), b:count.as_i16x16())) |
5488 | } |
5489 | |
5490 | /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
5491 | /// |
5492 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi16&expand=5328) |
5493 | #[inline ] |
5494 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5495 | #[cfg_attr (test, assert_instr(vpsllvw))] |
5496 | pub unsafe fn _mm256_mask_sllv_epi16( |
5497 | src: __m256i, |
5498 | k: __mmask16, |
5499 | a: __m256i, |
5500 | count: __m256i, |
5501 | ) -> __m256i { |
5502 | let shf: i16x16 = _mm256_sllv_epi16(a, count).as_i16x16(); |
5503 | transmute(src:simd_select_bitmask(m:k, a:shf, b:src.as_i16x16())) |
5504 | } |
5505 | |
5506 | /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
5507 | /// |
5508 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi16&expand=5329) |
5509 | #[inline ] |
5510 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5511 | #[cfg_attr (test, assert_instr(vpsllvw))] |
5512 | pub unsafe fn _mm256_maskz_sllv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i { |
5513 | let shf: i16x16 = _mm256_sllv_epi16(a, count).as_i16x16(); |
5514 | let zero: i16x16 = _mm256_setzero_si256().as_i16x16(); |
5515 | transmute(src:simd_select_bitmask(m:k, a:shf, b:zero)) |
5516 | } |
5517 | |
5518 | /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. |
5519 | /// |
5520 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sllv_epi16&expand=5327) |
5521 | #[inline ] |
5522 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5523 | #[cfg_attr (test, assert_instr(vpsllvw))] |
5524 | pub unsafe fn _mm_sllv_epi16(a: __m128i, count: __m128i) -> __m128i { |
5525 | transmute(src:vpsllvw128(a:a.as_i16x8(), b:count.as_i16x8())) |
5526 | } |
5527 | |
5528 | /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
5529 | /// |
5530 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi16&expand=5325) |
5531 | #[inline ] |
5532 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5533 | #[cfg_attr (test, assert_instr(vpsllvw))] |
5534 | pub unsafe fn _mm_mask_sllv_epi16( |
5535 | src: __m128i, |
5536 | k: __mmask8, |
5537 | a: __m128i, |
5538 | count: __m128i, |
5539 | ) -> __m128i { |
5540 | let shf: i16x8 = _mm_sllv_epi16(a, count).as_i16x8(); |
5541 | transmute(src:simd_select_bitmask(m:k, a:shf, b:src.as_i16x8())) |
5542 | } |
5543 | |
5544 | /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
5545 | /// |
5546 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi16&expand=5326) |
5547 | #[inline ] |
5548 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5549 | #[cfg_attr (test, assert_instr(vpsllvw))] |
5550 | pub unsafe fn _mm_maskz_sllv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { |
5551 | let shf: i16x8 = _mm_sllv_epi16(a, count).as_i16x8(); |
5552 | let zero: i16x8 = _mm_setzero_si128().as_i16x8(); |
5553 | transmute(src:simd_select_bitmask(m:k, a:shf, b:zero)) |
5554 | } |
5555 | |
5556 | /// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst. |
5557 | /// |
5558 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi16&expand=5483) |
5559 | #[inline ] |
5560 | #[target_feature (enable = "avx512bw" )] |
5561 | #[cfg_attr (test, assert_instr(vpsrlw))] |
5562 | pub unsafe fn _mm512_srl_epi16(a: __m512i, count: __m128i) -> __m512i { |
5563 | transmute(src:vpsrlw(a:a.as_i16x32(), count:count.as_i16x8())) |
5564 | } |
5565 | |
5566 | /// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
5567 | /// |
5568 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi16&expand=5481) |
5569 | #[inline ] |
5570 | #[target_feature (enable = "avx512bw" )] |
5571 | #[cfg_attr (test, assert_instr(vpsrlw))] |
5572 | pub unsafe fn _mm512_mask_srl_epi16( |
5573 | src: __m512i, |
5574 | k: __mmask32, |
5575 | a: __m512i, |
5576 | count: __m128i, |
5577 | ) -> __m512i { |
5578 | let shf: i16x32 = _mm512_srl_epi16(a, count).as_i16x32(); |
5579 | transmute(src:simd_select_bitmask(m:k, a:shf, b:src.as_i16x32())) |
5580 | } |
5581 | |
5582 | /// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
5583 | /// |
5584 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi16&expand=5482) |
5585 | #[inline ] |
5586 | #[target_feature (enable = "avx512bw" )] |
5587 | #[cfg_attr (test, assert_instr(vpsrlw))] |
5588 | pub unsafe fn _mm512_maskz_srl_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i { |
5589 | let shf: i16x32 = _mm512_srl_epi16(a, count).as_i16x32(); |
5590 | let zero: i16x32 = _mm512_setzero_si512().as_i16x32(); |
5591 | transmute(src:simd_select_bitmask(m:k, a:shf, b:zero)) |
5592 | } |
5593 | |
5594 | /// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
5595 | /// |
5596 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi16&expand=5478) |
5597 | #[inline ] |
5598 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5599 | #[cfg_attr (test, assert_instr(vpsrlw))] |
5600 | pub unsafe fn _mm256_mask_srl_epi16( |
5601 | src: __m256i, |
5602 | k: __mmask16, |
5603 | a: __m256i, |
5604 | count: __m128i, |
5605 | ) -> __m256i { |
5606 | let shf: i16x16 = _mm256_srl_epi16(a, count).as_i16x16(); |
5607 | transmute(src:simd_select_bitmask(m:k, a:shf, b:src.as_i16x16())) |
5608 | } |
5609 | |
5610 | /// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
5611 | /// |
5612 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi16&expand=5479) |
5613 | #[inline ] |
5614 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5615 | #[cfg_attr (test, assert_instr(vpsrlw))] |
5616 | pub unsafe fn _mm256_maskz_srl_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i { |
5617 | let shf: i16x16 = _mm256_srl_epi16(a, count).as_i16x16(); |
5618 | let zero: i16x16 = _mm256_setzero_si256().as_i16x16(); |
5619 | transmute(src:simd_select_bitmask(m:k, a:shf, b:zero)) |
5620 | } |
5621 | |
5622 | /// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
5623 | /// |
5624 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi16&expand=5475) |
5625 | #[inline ] |
5626 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5627 | #[cfg_attr (test, assert_instr(vpsrlw))] |
5628 | pub unsafe fn _mm_mask_srl_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { |
5629 | let shf: i16x8 = _mm_srl_epi16(a, count).as_i16x8(); |
5630 | transmute(src:simd_select_bitmask(m:k, a:shf, b:src.as_i16x8())) |
5631 | } |
5632 | |
5633 | /// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
5634 | /// |
5635 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi16&expand=5476) |
5636 | #[inline ] |
5637 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5638 | #[cfg_attr (test, assert_instr(vpsrlw))] |
5639 | pub unsafe fn _mm_maskz_srl_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { |
5640 | let shf: i16x8 = _mm_srl_epi16(a, count).as_i16x8(); |
5641 | let zero: i16x8 = _mm_setzero_si128().as_i16x8(); |
5642 | transmute(src:simd_select_bitmask(m:k, a:shf, b:zero)) |
5643 | } |
5644 | |
5645 | /// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst. |
5646 | /// |
5647 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi16&expand=5513) |
5648 | #[inline ] |
5649 | #[target_feature (enable = "avx512bw" )] |
5650 | #[cfg_attr (test, assert_instr(vpsrlw, IMM8 = 5))] |
5651 | #[rustc_legacy_const_generics (1)] |
5652 | pub unsafe fn _mm512_srli_epi16<const IMM8: u32>(a: __m512i) -> __m512i { |
5653 | static_assert_uimm_bits!(IMM8, 8); |
5654 | if IMM8 >= 16 { |
5655 | _mm512_setzero_si512() |
5656 | } else { |
5657 | transmute(src:simd_shr(x:a.as_u16x32(), y:u16x32::splat(IMM8 as u16))) |
5658 | } |
5659 | } |
5660 | |
5661 | /// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
5662 | /// |
5663 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi16&expand=5511) |
5664 | #[inline ] |
5665 | #[target_feature (enable = "avx512bw" )] |
5666 | #[cfg_attr (test, assert_instr(vpsrlw, IMM8 = 5))] |
5667 | #[rustc_legacy_const_generics (3)] |
5668 | pub unsafe fn _mm512_mask_srli_epi16<const IMM8: u32>( |
5669 | src: __m512i, |
5670 | k: __mmask32, |
5671 | a: __m512i, |
5672 | ) -> __m512i { |
5673 | static_assert_uimm_bits!(IMM8, 8); |
5674 | let shf: u16x32 = if IMM8 >= 16 { |
5675 | u16x32::splat(0) |
5676 | } else { |
5677 | simd_shr(x:a.as_u16x32(), y:u16x32::splat(IMM8 as u16)) |
5678 | }; |
5679 | transmute(src:simd_select_bitmask(m:k, a:shf, b:src.as_u16x32())) |
5680 | } |
5681 | |
5682 | /// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
5683 | /// |
5684 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi16&expand=5512) |
5685 | #[inline ] |
5686 | #[target_feature (enable = "avx512bw" )] |
5687 | #[cfg_attr (test, assert_instr(vpsrlw, IMM8 = 5))] |
5688 | #[rustc_legacy_const_generics (2)] |
5689 | pub unsafe fn _mm512_maskz_srli_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i { |
5690 | static_assert_uimm_bits!(IMM8, 8); |
5691 | //imm8 should be u32, it seems the document to verify is incorrect |
5692 | if IMM8 >= 16 { |
5693 | _mm512_setzero_si512() |
5694 | } else { |
5695 | let shf: u16x32 = simd_shr(x:a.as_u16x32(), y:u16x32::splat(IMM8 as u16)); |
5696 | let zero: u16x32 = u16x32::splat(0); |
5697 | transmute(src:simd_select_bitmask(m:k, a:shf, b:zero)) |
5698 | } |
5699 | } |
5700 | |
5701 | /// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
5702 | /// |
5703 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi16&expand=5508) |
5704 | #[inline ] |
5705 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5706 | #[cfg_attr (test, assert_instr(vpsrlw, IMM8 = 5))] |
5707 | #[rustc_legacy_const_generics (3)] |
5708 | pub unsafe fn _mm256_mask_srli_epi16<const IMM8: i32>( |
5709 | src: __m256i, |
5710 | k: __mmask16, |
5711 | a: __m256i, |
5712 | ) -> __m256i { |
5713 | static_assert_uimm_bits!(IMM8, 8); |
5714 | let shf: __m256i = _mm256_srli_epi16::<IMM8>(a); |
5715 | transmute(src:simd_select_bitmask(m:k, a:shf.as_i16x16(), b:src.as_i16x16())) |
5716 | } |
5717 | |
5718 | /// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
5719 | /// |
5720 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi16&expand=5509) |
5721 | #[inline ] |
5722 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5723 | #[cfg_attr (test, assert_instr(vpsrlw, IMM8 = 5))] |
5724 | #[rustc_legacy_const_generics (2)] |
5725 | pub unsafe fn _mm256_maskz_srli_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i { |
5726 | static_assert_uimm_bits!(IMM8, 8); |
5727 | let shf: __m256i = _mm256_srli_epi16::<IMM8>(a); |
5728 | let zero: i16x16 = _mm256_setzero_si256().as_i16x16(); |
5729 | transmute(src:simd_select_bitmask(m:k, a:shf.as_i16x16(), b:zero)) |
5730 | } |
5731 | |
5732 | /// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
5733 | /// |
5734 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi16&expand=5505) |
5735 | #[inline ] |
5736 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5737 | #[cfg_attr (test, assert_instr(vpsrlw, IMM8 = 5))] |
5738 | #[rustc_legacy_const_generics (3)] |
5739 | pub unsafe fn _mm_mask_srli_epi16<const IMM8: i32>( |
5740 | src: __m128i, |
5741 | k: __mmask8, |
5742 | a: __m128i, |
5743 | ) -> __m128i { |
5744 | static_assert_uimm_bits!(IMM8, 8); |
5745 | let shf: __m128i = _mm_srli_epi16::<IMM8>(a); |
5746 | transmute(src:simd_select_bitmask(m:k, a:shf.as_i16x8(), b:src.as_i16x8())) |
5747 | } |
5748 | |
5749 | /// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
5750 | /// |
5751 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi16&expand=5506) |
5752 | #[inline ] |
5753 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5754 | #[cfg_attr (test, assert_instr(vpsrlw, IMM8 = 5))] |
5755 | #[rustc_legacy_const_generics (2)] |
5756 | pub unsafe fn _mm_maskz_srli_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i { |
5757 | static_assert_uimm_bits!(IMM8, 8); |
5758 | let shf: __m128i = _mm_srli_epi16::<IMM8>(a); |
5759 | let zero: i16x8 = _mm_setzero_si128().as_i16x8(); |
5760 | transmute(src:simd_select_bitmask(m:k, a:shf.as_i16x8(), b:zero)) |
5761 | } |
5762 | |
5763 | /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. |
5764 | /// |
5765 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi16&expand=5545) |
5766 | #[inline ] |
5767 | #[target_feature (enable = "avx512bw" )] |
5768 | #[cfg_attr (test, assert_instr(vpsrlvw))] |
5769 | pub unsafe fn _mm512_srlv_epi16(a: __m512i, count: __m512i) -> __m512i { |
5770 | transmute(src:vpsrlvw(a:a.as_i16x32(), b:count.as_i16x32())) |
5771 | } |
5772 | |
5773 | /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
5774 | /// |
5775 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi16&expand=5543) |
5776 | #[inline ] |
5777 | #[target_feature (enable = "avx512bw" )] |
5778 | #[cfg_attr (test, assert_instr(vpsrlvw))] |
5779 | pub unsafe fn _mm512_mask_srlv_epi16( |
5780 | src: __m512i, |
5781 | k: __mmask32, |
5782 | a: __m512i, |
5783 | count: __m512i, |
5784 | ) -> __m512i { |
5785 | let shf: i16x32 = _mm512_srlv_epi16(a, count).as_i16x32(); |
5786 | transmute(src:simd_select_bitmask(m:k, a:shf, b:src.as_i16x32())) |
5787 | } |
5788 | |
5789 | /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
5790 | /// |
5791 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi16&expand=5544) |
5792 | #[inline ] |
5793 | #[target_feature (enable = "avx512bw" )] |
5794 | #[cfg_attr (test, assert_instr(vpsrlvw))] |
5795 | pub unsafe fn _mm512_maskz_srlv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i { |
5796 | let shf: i16x32 = _mm512_srlv_epi16(a, count).as_i16x32(); |
5797 | let zero: i16x32 = _mm512_setzero_si512().as_i16x32(); |
5798 | transmute(src:simd_select_bitmask(m:k, a:shf, b:zero)) |
5799 | } |
5800 | |
5801 | /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. |
5802 | /// |
5803 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srlv_epi16&expand=5542) |
5804 | #[inline ] |
5805 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5806 | #[cfg_attr (test, assert_instr(vpsrlvw))] |
5807 | pub unsafe fn _mm256_srlv_epi16(a: __m256i, count: __m256i) -> __m256i { |
5808 | transmute(src:vpsrlvw256(a:a.as_i16x16(), b:count.as_i16x16())) |
5809 | } |
5810 | |
5811 | /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
5812 | /// |
5813 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi16&expand=5540) |
5814 | #[inline ] |
5815 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5816 | #[cfg_attr (test, assert_instr(vpsrlvw))] |
5817 | pub unsafe fn _mm256_mask_srlv_epi16( |
5818 | src: __m256i, |
5819 | k: __mmask16, |
5820 | a: __m256i, |
5821 | count: __m256i, |
5822 | ) -> __m256i { |
5823 | let shf: i16x16 = _mm256_srlv_epi16(a, count).as_i16x16(); |
5824 | transmute(src:simd_select_bitmask(m:k, a:shf, b:src.as_i16x16())) |
5825 | } |
5826 | |
5827 | /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
5828 | /// |
5829 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi16&expand=5541) |
5830 | #[inline ] |
5831 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5832 | #[cfg_attr (test, assert_instr(vpsrlvw))] |
5833 | pub unsafe fn _mm256_maskz_srlv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i { |
5834 | let shf: i16x16 = _mm256_srlv_epi16(a, count).as_i16x16(); |
5835 | let zero: i16x16 = _mm256_setzero_si256().as_i16x16(); |
5836 | transmute(src:simd_select_bitmask(m:k, a:shf, b:zero)) |
5837 | } |
5838 | |
5839 | /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. |
5840 | /// |
5841 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srlv_epi16&expand=5539) |
5842 | #[inline ] |
5843 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5844 | #[cfg_attr (test, assert_instr(vpsrlvw))] |
5845 | pub unsafe fn _mm_srlv_epi16(a: __m128i, count: __m128i) -> __m128i { |
5846 | transmute(src:vpsrlvw128(a:a.as_i16x8(), b:count.as_i16x8())) |
5847 | } |
5848 | |
5849 | /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
5850 | /// |
5851 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi16&expand=5537) |
5852 | #[inline ] |
5853 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5854 | #[cfg_attr (test, assert_instr(vpsrlvw))] |
5855 | pub unsafe fn _mm_mask_srlv_epi16( |
5856 | src: __m128i, |
5857 | k: __mmask8, |
5858 | a: __m128i, |
5859 | count: __m128i, |
5860 | ) -> __m128i { |
5861 | let shf: i16x8 = _mm_srlv_epi16(a, count).as_i16x8(); |
5862 | transmute(src:simd_select_bitmask(m:k, a:shf, b:src.as_i16x8())) |
5863 | } |
5864 | |
5865 | /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
5866 | /// |
5867 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi16&expand=5538) |
5868 | #[inline ] |
5869 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5870 | #[cfg_attr (test, assert_instr(vpsrlvw))] |
5871 | pub unsafe fn _mm_maskz_srlv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { |
5872 | let shf: i16x8 = _mm_srlv_epi16(a, count).as_i16x8(); |
5873 | let zero: i16x8 = _mm_setzero_si128().as_i16x8(); |
5874 | transmute(src:simd_select_bitmask(m:k, a:shf, b:zero)) |
5875 | } |
5876 | |
5877 | /// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst. |
5878 | /// |
5879 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi16&expand=5398) |
5880 | #[inline ] |
5881 | #[target_feature (enable = "avx512bw" )] |
5882 | #[cfg_attr (test, assert_instr(vpsraw))] |
5883 | pub unsafe fn _mm512_sra_epi16(a: __m512i, count: __m128i) -> __m512i { |
5884 | transmute(src:vpsraw(a:a.as_i16x32(), count:count.as_i16x8())) |
5885 | } |
5886 | |
5887 | /// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
5888 | /// |
5889 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi16&expand=5396) |
5890 | #[inline ] |
5891 | #[target_feature (enable = "avx512bw" )] |
5892 | #[cfg_attr (test, assert_instr(vpsraw))] |
5893 | pub unsafe fn _mm512_mask_sra_epi16( |
5894 | src: __m512i, |
5895 | k: __mmask32, |
5896 | a: __m512i, |
5897 | count: __m128i, |
5898 | ) -> __m512i { |
5899 | let shf: i16x32 = _mm512_sra_epi16(a, count).as_i16x32(); |
5900 | transmute(src:simd_select_bitmask(m:k, a:shf, b:src.as_i16x32())) |
5901 | } |
5902 | |
5903 | /// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
5904 | /// |
5905 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi16&expand=5397) |
5906 | #[inline ] |
5907 | #[target_feature (enable = "avx512bw" )] |
5908 | #[cfg_attr (test, assert_instr(vpsraw))] |
5909 | pub unsafe fn _mm512_maskz_sra_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i { |
5910 | let shf: i16x32 = _mm512_sra_epi16(a, count).as_i16x32(); |
5911 | let zero: i16x32 = _mm512_setzero_si512().as_i16x32(); |
5912 | transmute(src:simd_select_bitmask(m:k, a:shf, b:zero)) |
5913 | } |
5914 | |
5915 | /// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
5916 | /// |
5917 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi16&expand=5393) |
5918 | #[inline ] |
5919 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5920 | #[cfg_attr (test, assert_instr(vpsraw))] |
5921 | pub unsafe fn _mm256_mask_sra_epi16( |
5922 | src: __m256i, |
5923 | k: __mmask16, |
5924 | a: __m256i, |
5925 | count: __m128i, |
5926 | ) -> __m256i { |
5927 | let shf: i16x16 = _mm256_sra_epi16(a, count).as_i16x16(); |
5928 | transmute(src:simd_select_bitmask(m:k, a:shf, b:src.as_i16x16())) |
5929 | } |
5930 | |
5931 | /// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
5932 | /// |
5933 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi16&expand=5394) |
5934 | #[inline ] |
5935 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5936 | #[cfg_attr (test, assert_instr(vpsraw))] |
5937 | pub unsafe fn _mm256_maskz_sra_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i { |
5938 | let shf: i16x16 = _mm256_sra_epi16(a, count).as_i16x16(); |
5939 | let zero: i16x16 = _mm256_setzero_si256().as_i16x16(); |
5940 | transmute(src:simd_select_bitmask(m:k, a:shf, b:zero)) |
5941 | } |
5942 | |
5943 | /// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
5944 | /// |
5945 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi16&expand=5390) |
5946 | #[inline ] |
5947 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5948 | #[cfg_attr (test, assert_instr(vpsraw))] |
5949 | pub unsafe fn _mm_mask_sra_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { |
5950 | let shf: i16x8 = _mm_sra_epi16(a, count).as_i16x8(); |
5951 | transmute(src:simd_select_bitmask(m:k, a:shf, b:src.as_i16x8())) |
5952 | } |
5953 | |
5954 | /// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
5955 | /// |
5956 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi16&expand=5391) |
5957 | #[inline ] |
5958 | #[target_feature (enable = "avx512bw,avx512vl" )] |
5959 | #[cfg_attr (test, assert_instr(vpsraw))] |
5960 | pub unsafe fn _mm_maskz_sra_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { |
5961 | let shf: i16x8 = _mm_sra_epi16(a, count).as_i16x8(); |
5962 | let zero: i16x8 = _mm_setzero_si128().as_i16x8(); |
5963 | transmute(src:simd_select_bitmask(m:k, a:shf, b:zero)) |
5964 | } |
5965 | |
5966 | /// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst. |
5967 | /// |
5968 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi16&expand=5427) |
5969 | #[inline ] |
5970 | #[target_feature (enable = "avx512bw" )] |
5971 | #[cfg_attr (test, assert_instr(vpsraw, IMM8 = 1))] |
5972 | #[rustc_legacy_const_generics (1)] |
5973 | pub unsafe fn _mm512_srai_epi16<const IMM8: u32>(a: __m512i) -> __m512i { |
5974 | static_assert_uimm_bits!(IMM8, 8); |
5975 | transmute(src:simd_shr(x:a.as_i16x32(), y:i16x32::splat(IMM8.min(15) as i16))) |
5976 | } |
5977 | |
5978 | /// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
5979 | /// |
5980 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi16&expand=5425) |
5981 | #[inline ] |
5982 | #[target_feature (enable = "avx512bw" )] |
5983 | #[cfg_attr (test, assert_instr(vpsraw, IMM8 = 1))] |
5984 | #[rustc_legacy_const_generics (3)] |
5985 | pub unsafe fn _mm512_mask_srai_epi16<const IMM8: u32>( |
5986 | src: __m512i, |
5987 | k: __mmask32, |
5988 | a: __m512i, |
5989 | ) -> __m512i { |
5990 | static_assert_uimm_bits!(IMM8, 8); |
5991 | let shf: i16x32 = simd_shr(x:a.as_i16x32(), y:i16x32::splat(IMM8.min(15) as i16)); |
5992 | transmute(src:simd_select_bitmask(m:k, a:shf, b:src.as_i16x32())) |
5993 | } |
5994 | |
5995 | /// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
5996 | /// |
5997 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi16&expand=5426) |
5998 | #[inline ] |
5999 | #[target_feature (enable = "avx512bw" )] |
6000 | #[cfg_attr (test, assert_instr(vpsraw, IMM8 = 1))] |
6001 | #[rustc_legacy_const_generics (2)] |
6002 | pub unsafe fn _mm512_maskz_srai_epi16<const IMM8: u32>(k: __mmask32, a: __m512i) -> __m512i { |
6003 | static_assert_uimm_bits!(IMM8, 8); |
6004 | let shf: i16x32 = simd_shr(x:a.as_i16x32(), y:i16x32::splat(IMM8.min(15) as i16)); |
6005 | let zero: i16x32 = i16x32::splat(0); |
6006 | transmute(src:simd_select_bitmask(m:k, a:shf, b:zero)) |
6007 | } |
6008 | |
6009 | /// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
6010 | /// |
6011 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi16&expand=5422) |
6012 | #[inline ] |
6013 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6014 | #[cfg_attr (test, assert_instr(vpsraw, IMM8 = 1))] |
6015 | #[rustc_legacy_const_generics (3)] |
6016 | pub unsafe fn _mm256_mask_srai_epi16<const IMM8: u32>( |
6017 | src: __m256i, |
6018 | k: __mmask16, |
6019 | a: __m256i, |
6020 | ) -> __m256i { |
6021 | static_assert_uimm_bits!(IMM8, 8); |
6022 | let r: i16x16 = simd_shr(x:a.as_i16x16(), y:i16x16::splat(IMM8.min(15) as i16)); |
6023 | transmute(src:simd_select_bitmask(m:k, a:r, b:src.as_i16x16())) |
6024 | } |
6025 | |
6026 | /// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
6027 | /// |
6028 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi16&expand=5423) |
6029 | #[inline ] |
6030 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6031 | #[cfg_attr (test, assert_instr(vpsraw, IMM8 = 1))] |
6032 | #[rustc_legacy_const_generics (2)] |
6033 | pub unsafe fn _mm256_maskz_srai_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m256i { |
6034 | static_assert_uimm_bits!(IMM8, 8); |
6035 | let r: i16x16 = simd_shr(x:a.as_i16x16(), y:i16x16::splat(IMM8.min(15) as i16)); |
6036 | let zero: i16x16 = i16x16::splat(0); |
6037 | transmute(src:simd_select_bitmask(m:k, a:r, b:zero)) |
6038 | } |
6039 | |
6040 | /// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
6041 | /// |
6042 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi16&expand=5419) |
6043 | #[inline ] |
6044 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6045 | #[cfg_attr (test, assert_instr(vpsraw, IMM8 = 1))] |
6046 | #[rustc_legacy_const_generics (3)] |
6047 | pub unsafe fn _mm_mask_srai_epi16<const IMM8: u32>( |
6048 | src: __m128i, |
6049 | k: __mmask8, |
6050 | a: __m128i, |
6051 | ) -> __m128i { |
6052 | static_assert_uimm_bits!(IMM8, 8); |
6053 | let r: i16x8 = simd_shr(x:a.as_i16x8(), y:i16x8::splat(IMM8.min(15) as i16)); |
6054 | transmute(src:simd_select_bitmask(m:k, a:r, b:src.as_i16x8())) |
6055 | } |
6056 | |
6057 | /// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
6058 | /// |
6059 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi16&expand=5420) |
6060 | #[inline ] |
6061 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6062 | #[cfg_attr (test, assert_instr(vpsraw, IMM8 = 1))] |
6063 | #[rustc_legacy_const_generics (2)] |
6064 | pub unsafe fn _mm_maskz_srai_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i { |
6065 | static_assert_uimm_bits!(IMM8, 8); |
6066 | let r: i16x8 = simd_shr(x:a.as_i16x8(), y:i16x8::splat(IMM8.min(15) as i16)); |
6067 | let zero: i16x8 = i16x8::splat(0); |
6068 | transmute(src:simd_select_bitmask(m:k, a:r, b:zero)) |
6069 | } |
6070 | |
6071 | /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. |
6072 | /// |
6073 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi16&expand=5456) |
6074 | #[inline ] |
6075 | #[target_feature (enable = "avx512bw" )] |
6076 | #[cfg_attr (test, assert_instr(vpsravw))] |
6077 | pub unsafe fn _mm512_srav_epi16(a: __m512i, count: __m512i) -> __m512i { |
6078 | transmute(src:vpsravw(a:a.as_i16x32(), count:count.as_i16x32())) |
6079 | } |
6080 | |
6081 | /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
6082 | /// |
6083 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi16&expand=5454) |
6084 | #[inline ] |
6085 | #[target_feature (enable = "avx512bw" )] |
6086 | #[cfg_attr (test, assert_instr(vpsravw))] |
6087 | pub unsafe fn _mm512_mask_srav_epi16( |
6088 | src: __m512i, |
6089 | k: __mmask32, |
6090 | a: __m512i, |
6091 | count: __m512i, |
6092 | ) -> __m512i { |
6093 | let shf: i16x32 = _mm512_srav_epi16(a, count).as_i16x32(); |
6094 | transmute(src:simd_select_bitmask(m:k, a:shf, b:src.as_i16x32())) |
6095 | } |
6096 | |
6097 | /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
6098 | /// |
6099 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi16&expand=5455) |
6100 | #[inline ] |
6101 | #[target_feature (enable = "avx512bw" )] |
6102 | #[cfg_attr (test, assert_instr(vpsravw))] |
6103 | pub unsafe fn _mm512_maskz_srav_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i { |
6104 | let shf: i16x32 = _mm512_srav_epi16(a, count).as_i16x32(); |
6105 | let zero: i16x32 = _mm512_setzero_si512().as_i16x32(); |
6106 | transmute(src:simd_select_bitmask(m:k, a:shf, b:zero)) |
6107 | } |
6108 | |
6109 | /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. |
6110 | /// |
6111 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi16&expand=5453) |
6112 | #[inline ] |
6113 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6114 | #[cfg_attr (test, assert_instr(vpsravw))] |
6115 | pub unsafe fn _mm256_srav_epi16(a: __m256i, count: __m256i) -> __m256i { |
6116 | transmute(src:vpsravw256(a:a.as_i16x16(), count:count.as_i16x16())) |
6117 | } |
6118 | |
6119 | /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
6120 | /// |
6121 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi16&expand=5451) |
6122 | #[inline ] |
6123 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6124 | #[cfg_attr (test, assert_instr(vpsravw))] |
6125 | pub unsafe fn _mm256_mask_srav_epi16( |
6126 | src: __m256i, |
6127 | k: __mmask16, |
6128 | a: __m256i, |
6129 | count: __m256i, |
6130 | ) -> __m256i { |
6131 | let shf: i16x16 = _mm256_srav_epi16(a, count).as_i16x16(); |
6132 | transmute(src:simd_select_bitmask(m:k, a:shf, b:src.as_i16x16())) |
6133 | } |
6134 | |
6135 | /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
6136 | /// |
6137 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi16&expand=5452) |
6138 | #[inline ] |
6139 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6140 | #[cfg_attr (test, assert_instr(vpsravw))] |
6141 | pub unsafe fn _mm256_maskz_srav_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i { |
6142 | let shf: i16x16 = _mm256_srav_epi16(a, count).as_i16x16(); |
6143 | let zero: i16x16 = _mm256_setzero_si256().as_i16x16(); |
6144 | transmute(src:simd_select_bitmask(m:k, a:shf, b:zero)) |
6145 | } |
6146 | |
6147 | /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. |
6148 | /// |
6149 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi16&expand=5450) |
6150 | #[inline ] |
6151 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6152 | #[cfg_attr (test, assert_instr(vpsravw))] |
6153 | pub unsafe fn _mm_srav_epi16(a: __m128i, count: __m128i) -> __m128i { |
6154 | transmute(src:vpsravw128(a:a.as_i16x8(), count:count.as_i16x8())) |
6155 | } |
6156 | |
6157 | /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
6158 | /// |
6159 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi16&expand=5448) |
6160 | #[inline ] |
6161 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6162 | #[cfg_attr (test, assert_instr(vpsravw))] |
6163 | pub unsafe fn _mm_mask_srav_epi16( |
6164 | src: __m128i, |
6165 | k: __mmask8, |
6166 | a: __m128i, |
6167 | count: __m128i, |
6168 | ) -> __m128i { |
6169 | let shf: i16x8 = _mm_srav_epi16(a, count).as_i16x8(); |
6170 | transmute(src:simd_select_bitmask(m:k, a:shf, b:src.as_i16x8())) |
6171 | } |
6172 | |
6173 | /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
6174 | /// |
6175 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi16&expand=5449) |
6176 | #[inline ] |
6177 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6178 | #[cfg_attr (test, assert_instr(vpsravw))] |
6179 | pub unsafe fn _mm_maskz_srav_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { |
6180 | let shf: i16x8 = _mm_srav_epi16(a, count).as_i16x8(); |
6181 | let zero: i16x8 = _mm_setzero_si128().as_i16x8(); |
6182 | transmute(src:simd_select_bitmask(m:k, a:shf, b:zero)) |
6183 | } |
6184 | |
6185 | /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. |
6186 | /// |
6187 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi16&expand=4226) |
6188 | #[inline ] |
6189 | #[target_feature (enable = "avx512bw" )] |
6190 | #[cfg_attr (test, assert_instr(vperm))] //vpermi2w or vpermt2w |
6191 | pub unsafe fn _mm512_permutex2var_epi16(a: __m512i, idx: __m512i, b: __m512i) -> __m512i { |
6192 | transmute(src:vpermi2w(a:a.as_i16x32(), idx:idx.as_i16x32(), b:b.as_i16x32())) |
6193 | } |
6194 | |
6195 | /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
6196 | /// |
6197 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi16&expand=4223) |
6198 | #[inline ] |
6199 | #[target_feature (enable = "avx512bw" )] |
6200 | #[cfg_attr (test, assert_instr(vpermt2w))] |
6201 | pub unsafe fn _mm512_mask_permutex2var_epi16( |
6202 | a: __m512i, |
6203 | k: __mmask32, |
6204 | idx: __m512i, |
6205 | b: __m512i, |
6206 | ) -> __m512i { |
6207 | let permute: i16x32 = _mm512_permutex2var_epi16(a, idx, b).as_i16x32(); |
6208 | transmute(src:simd_select_bitmask(m:k, a:permute, b:a.as_i16x32())) |
6209 | } |
6210 | |
6211 | /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
6212 | /// |
6213 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi16&expand=4225) |
6214 | #[inline ] |
6215 | #[target_feature (enable = "avx512bw" )] |
6216 | #[cfg_attr (test, assert_instr(vperm))] //vpermi2w or vpermt2w |
6217 | pub unsafe fn _mm512_maskz_permutex2var_epi16( |
6218 | k: __mmask32, |
6219 | a: __m512i, |
6220 | idx: __m512i, |
6221 | b: __m512i, |
6222 | ) -> __m512i { |
6223 | let permute: i16x32 = _mm512_permutex2var_epi16(a, idx, b).as_i16x32(); |
6224 | let zero: i16x32 = _mm512_setzero_si512().as_i16x32(); |
6225 | transmute(src:simd_select_bitmask(m:k, a:permute, b:zero)) |
6226 | } |
6227 | |
6228 | /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). |
6229 | /// |
6230 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi16&expand=4224) |
6231 | #[inline ] |
6232 | #[target_feature (enable = "avx512bw" )] |
6233 | #[cfg_attr (test, assert_instr(vpermi2w))] |
6234 | pub unsafe fn _mm512_mask2_permutex2var_epi16( |
6235 | a: __m512i, |
6236 | idx: __m512i, |
6237 | k: __mmask32, |
6238 | b: __m512i, |
6239 | ) -> __m512i { |
6240 | let permute: i16x32 = _mm512_permutex2var_epi16(a, idx, b).as_i16x32(); |
6241 | transmute(src:simd_select_bitmask(m:k, a:permute, b:idx.as_i16x32())) |
6242 | } |
6243 | |
6244 | /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. |
6245 | /// |
6246 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi16&expand=4222) |
6247 | #[inline ] |
6248 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6249 | #[cfg_attr (test, assert_instr(vperm))] //vpermi2w or vpermt2w |
6250 | pub unsafe fn _mm256_permutex2var_epi16(a: __m256i, idx: __m256i, b: __m256i) -> __m256i { |
6251 | transmute(src:vpermi2w256(a:a.as_i16x16(), idx:idx.as_i16x16(), b:b.as_i16x16())) |
6252 | } |
6253 | |
6254 | /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
6255 | /// |
6256 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi16&expand=4219) |
6257 | #[inline ] |
6258 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6259 | #[cfg_attr (test, assert_instr(vpermt2w))] |
6260 | pub unsafe fn _mm256_mask_permutex2var_epi16( |
6261 | a: __m256i, |
6262 | k: __mmask16, |
6263 | idx: __m256i, |
6264 | b: __m256i, |
6265 | ) -> __m256i { |
6266 | let permute: i16x16 = _mm256_permutex2var_epi16(a, idx, b).as_i16x16(); |
6267 | transmute(src:simd_select_bitmask(m:k, a:permute, b:a.as_i16x16())) |
6268 | } |
6269 | |
6270 | /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
6271 | /// |
6272 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi16&expand=4221) |
6273 | #[inline ] |
6274 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6275 | #[cfg_attr (test, assert_instr(vperm))] //vpermi2w or vpermt2w |
6276 | pub unsafe fn _mm256_maskz_permutex2var_epi16( |
6277 | k: __mmask16, |
6278 | a: __m256i, |
6279 | idx: __m256i, |
6280 | b: __m256i, |
6281 | ) -> __m256i { |
6282 | let permute: i16x16 = _mm256_permutex2var_epi16(a, idx, b).as_i16x16(); |
6283 | let zero: i16x16 = _mm256_setzero_si256().as_i16x16(); |
6284 | transmute(src:simd_select_bitmask(m:k, a:permute, b:zero)) |
6285 | } |
6286 | |
6287 | /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). |
6288 | /// |
6289 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi16&expand=4220) |
6290 | #[inline ] |
6291 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6292 | #[cfg_attr (test, assert_instr(vpermi2w))] |
6293 | pub unsafe fn _mm256_mask2_permutex2var_epi16( |
6294 | a: __m256i, |
6295 | idx: __m256i, |
6296 | k: __mmask16, |
6297 | b: __m256i, |
6298 | ) -> __m256i { |
6299 | let permute: i16x16 = _mm256_permutex2var_epi16(a, idx, b).as_i16x16(); |
6300 | transmute(src:simd_select_bitmask(m:k, a:permute, b:idx.as_i16x16())) |
6301 | } |
6302 | |
6303 | /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. |
6304 | /// |
6305 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi16&expand=4218) |
6306 | #[inline ] |
6307 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6308 | #[cfg_attr (test, assert_instr(vperm))] //vpermi2w or vpermt2w |
6309 | pub unsafe fn _mm_permutex2var_epi16(a: __m128i, idx: __m128i, b: __m128i) -> __m128i { |
6310 | transmute(src:vpermi2w128(a:a.as_i16x8(), idx:idx.as_i16x8(), b:b.as_i16x8())) |
6311 | } |
6312 | |
6313 | /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). |
6314 | /// |
6315 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi16&expand=4215) |
6316 | #[inline ] |
6317 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6318 | #[cfg_attr (test, assert_instr(vpermt2w))] |
6319 | pub unsafe fn _mm_mask_permutex2var_epi16( |
6320 | a: __m128i, |
6321 | k: __mmask8, |
6322 | idx: __m128i, |
6323 | b: __m128i, |
6324 | ) -> __m128i { |
6325 | let permute: i16x8 = _mm_permutex2var_epi16(a, idx, b).as_i16x8(); |
6326 | transmute(src:simd_select_bitmask(m:k, a:permute, b:a.as_i16x8())) |
6327 | } |
6328 | |
6329 | /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
6330 | /// |
6331 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi16&expand=4217) |
6332 | #[inline ] |
6333 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6334 | #[cfg_attr (test, assert_instr(vperm))] //vpermi2w or vpermt2w |
6335 | pub unsafe fn _mm_maskz_permutex2var_epi16( |
6336 | k: __mmask8, |
6337 | a: __m128i, |
6338 | idx: __m128i, |
6339 | b: __m128i, |
6340 | ) -> __m128i { |
6341 | let permute: i16x8 = _mm_permutex2var_epi16(a, idx, b).as_i16x8(); |
6342 | let zero: i16x8 = _mm_setzero_si128().as_i16x8(); |
6343 | transmute(src:simd_select_bitmask(m:k, a:permute, b:zero)) |
6344 | } |
6345 | |
6346 | /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). |
6347 | /// |
6348 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi16&expand=4216) |
6349 | #[inline ] |
6350 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6351 | #[cfg_attr (test, assert_instr(vpermi2w))] |
6352 | pub unsafe fn _mm_mask2_permutex2var_epi16( |
6353 | a: __m128i, |
6354 | idx: __m128i, |
6355 | k: __mmask8, |
6356 | b: __m128i, |
6357 | ) -> __m128i { |
6358 | let permute: i16x8 = _mm_permutex2var_epi16(a, idx, b).as_i16x8(); |
6359 | transmute(src:simd_select_bitmask(m:k, a:permute, b:idx.as_i16x8())) |
6360 | } |
6361 | |
6362 | /// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. |
6363 | /// |
6364 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi16&expand=4295) |
6365 | #[inline ] |
6366 | #[target_feature (enable = "avx512bw" )] |
6367 | #[cfg_attr (test, assert_instr(vpermw))] |
6368 | pub unsafe fn _mm512_permutexvar_epi16(idx: __m512i, a: __m512i) -> __m512i { |
6369 | transmute(src:vpermw(a:a.as_i16x32(), idx:idx.as_i16x32())) |
6370 | } |
6371 | |
6372 | /// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
6373 | /// |
6374 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi16&expand=4293) |
6375 | #[inline ] |
6376 | #[target_feature (enable = "avx512bw" )] |
6377 | #[cfg_attr (test, assert_instr(vpermw))] |
6378 | pub unsafe fn _mm512_mask_permutexvar_epi16( |
6379 | src: __m512i, |
6380 | k: __mmask32, |
6381 | idx: __m512i, |
6382 | a: __m512i, |
6383 | ) -> __m512i { |
6384 | let permute: i16x32 = _mm512_permutexvar_epi16(idx, a).as_i16x32(); |
6385 | transmute(src:simd_select_bitmask(m:k, a:permute, b:src.as_i16x32())) |
6386 | } |
6387 | |
6388 | /// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
6389 | /// |
6390 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi16&expand=4294) |
6391 | #[inline ] |
6392 | #[target_feature (enable = "avx512bw" )] |
6393 | #[cfg_attr (test, assert_instr(vpermw))] |
6394 | pub unsafe fn _mm512_maskz_permutexvar_epi16(k: __mmask32, idx: __m512i, a: __m512i) -> __m512i { |
6395 | let permute: i16x32 = _mm512_permutexvar_epi16(idx, a).as_i16x32(); |
6396 | let zero: i16x32 = _mm512_setzero_si512().as_i16x32(); |
6397 | transmute(src:simd_select_bitmask(m:k, a:permute, b:zero)) |
6398 | } |
6399 | |
6400 | /// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. |
6401 | /// |
6402 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi16&expand=4292) |
6403 | #[inline ] |
6404 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6405 | #[cfg_attr (test, assert_instr(vpermw))] |
6406 | pub unsafe fn _mm256_permutexvar_epi16(idx: __m256i, a: __m256i) -> __m256i { |
6407 | transmute(src:vpermw256(a:a.as_i16x16(), idx:idx.as_i16x16())) |
6408 | } |
6409 | |
6410 | /// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
6411 | /// |
6412 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi16&expand=4290) |
6413 | #[inline ] |
6414 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6415 | #[cfg_attr (test, assert_instr(vpermw))] |
6416 | pub unsafe fn _mm256_mask_permutexvar_epi16( |
6417 | src: __m256i, |
6418 | k: __mmask16, |
6419 | idx: __m256i, |
6420 | a: __m256i, |
6421 | ) -> __m256i { |
6422 | let permute: i16x16 = _mm256_permutexvar_epi16(idx, a).as_i16x16(); |
6423 | transmute(src:simd_select_bitmask(m:k, a:permute, b:src.as_i16x16())) |
6424 | } |
6425 | |
6426 | /// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
6427 | /// |
6428 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi16&expand=4291) |
6429 | #[inline ] |
6430 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6431 | #[cfg_attr (test, assert_instr(vpermw))] |
6432 | pub unsafe fn _mm256_maskz_permutexvar_epi16(k: __mmask16, idx: __m256i, a: __m256i) -> __m256i { |
6433 | let permute: i16x16 = _mm256_permutexvar_epi16(idx, a).as_i16x16(); |
6434 | let zero: i16x16 = _mm256_setzero_si256().as_i16x16(); |
6435 | transmute(src:simd_select_bitmask(m:k, a:permute, b:zero)) |
6436 | } |
6437 | |
6438 | /// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. |
6439 | /// |
6440 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutexvar_epi16&expand=4289) |
6441 | #[inline ] |
6442 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6443 | #[cfg_attr (test, assert_instr(vpermw))] |
6444 | pub unsafe fn _mm_permutexvar_epi16(idx: __m128i, a: __m128i) -> __m128i { |
6445 | transmute(src:vpermw128(a:a.as_i16x8(), idx:idx.as_i16x8())) |
6446 | } |
6447 | |
6448 | /// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
6449 | /// |
6450 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutexvar_epi16&expand=4287) |
6451 | #[inline ] |
6452 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6453 | #[cfg_attr (test, assert_instr(vpermw))] |
6454 | pub unsafe fn _mm_mask_permutexvar_epi16( |
6455 | src: __m128i, |
6456 | k: __mmask8, |
6457 | idx: __m128i, |
6458 | a: __m128i, |
6459 | ) -> __m128i { |
6460 | let permute: i16x8 = _mm_permutexvar_epi16(idx, a).as_i16x8(); |
6461 | transmute(src:simd_select_bitmask(m:k, a:permute, b:src.as_i16x8())) |
6462 | } |
6463 | |
6464 | /// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
6465 | /// |
6466 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutexvar_epi16&expand=4288) |
6467 | #[inline ] |
6468 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6469 | #[cfg_attr (test, assert_instr(vpermw))] |
6470 | pub unsafe fn _mm_maskz_permutexvar_epi16(k: __mmask8, idx: __m128i, a: __m128i) -> __m128i { |
6471 | let permute: i16x8 = _mm_permutexvar_epi16(idx, a).as_i16x8(); |
6472 | let zero: i16x8 = _mm_setzero_si128().as_i16x8(); |
6473 | transmute(src:simd_select_bitmask(m:k, a:permute, b:zero)) |
6474 | } |
6475 | |
6476 | /// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst. |
6477 | /// |
6478 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi16&expand=430) |
6479 | #[inline ] |
6480 | #[target_feature (enable = "avx512bw" )] |
6481 | #[cfg_attr (test, assert_instr(vmovdqu16))] //should be vpblendmw |
6482 | pub unsafe fn _mm512_mask_blend_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { |
6483 | transmute(src:simd_select_bitmask(m:k, a:b.as_i16x32(), b:a.as_i16x32())) |
6484 | } |
6485 | |
6486 | /// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst. |
6487 | /// |
6488 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi16&expand=429) |
6489 | #[inline ] |
6490 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6491 | #[cfg_attr (test, assert_instr(vmovdqu16))] //should be vpblendmw |
6492 | pub unsafe fn _mm256_mask_blend_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { |
6493 | transmute(src:simd_select_bitmask(m:k, a:b.as_i16x16(), b:a.as_i16x16())) |
6494 | } |
6495 | |
6496 | /// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst. |
6497 | /// |
6498 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi16&expand=427) |
6499 | #[inline ] |
6500 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6501 | #[cfg_attr (test, assert_instr(vmovdqu16))] //should be vpblendmw |
6502 | pub unsafe fn _mm_mask_blend_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
6503 | transmute(src:simd_select_bitmask(m:k, a:b.as_i16x8(), b:a.as_i16x8())) |
6504 | } |
6505 | |
6506 | /// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst. |
6507 | /// |
6508 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi8&expand=441) |
6509 | #[inline ] |
6510 | #[target_feature (enable = "avx512bw" )] |
6511 | #[cfg_attr (test, assert_instr(vmovdqu8))] //should be vpblendmb |
6512 | pub unsafe fn _mm512_mask_blend_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { |
6513 | transmute(src:simd_select_bitmask(m:k, a:b.as_i8x64(), b:a.as_i8x64())) |
6514 | } |
6515 | |
6516 | /// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst. |
6517 | /// |
6518 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi8&expand=440) |
6519 | #[inline ] |
6520 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6521 | #[cfg_attr (test, assert_instr(vmovdqu8))] //should be vpblendmb |
6522 | pub unsafe fn _mm256_mask_blend_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { |
6523 | transmute(src:simd_select_bitmask(m:k, a:b.as_i8x32(), b:a.as_i8x32())) |
6524 | } |
6525 | |
6526 | /// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst. |
6527 | /// |
6528 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi8&expand=439) |
6529 | #[inline ] |
6530 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6531 | #[cfg_attr (test, assert_instr(vmovdqu8))] //should be vpblendmb |
6532 | pub unsafe fn _mm_mask_blend_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { |
6533 | transmute(src:simd_select_bitmask(m:k, a:b.as_i8x16(), b:a.as_i8x16())) |
6534 | } |
6535 | |
6536 | /// Broadcast the low packed 16-bit integer from a to all elements of dst. |
6537 | /// |
6538 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastw_epi16&expand=587) |
6539 | #[inline ] |
6540 | #[target_feature (enable = "avx512bw" )] |
6541 | #[cfg_attr (test, assert_instr(vpbroadcastw))] |
6542 | pub unsafe fn _mm512_broadcastw_epi16(a: __m128i) -> __m512i { |
6543 | let a: i16x32 = _mm512_castsi128_si512(a).as_i16x32(); |
6544 | let ret: i16x32 = simd_shuffle!( |
6545 | a, |
6546 | a, |
6547 | [ |
6548 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
6549 | 0, 0, 0, |
6550 | ], |
6551 | ); |
6552 | transmute(src:ret) |
6553 | } |
6554 | |
6555 | /// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
6556 | /// |
6557 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastw_epi16&expand=588) |
6558 | #[inline ] |
6559 | #[target_feature (enable = "avx512bw" )] |
6560 | #[cfg_attr (test, assert_instr(vpbroadcastw))] |
6561 | pub unsafe fn _mm512_mask_broadcastw_epi16(src: __m512i, k: __mmask32, a: __m128i) -> __m512i { |
6562 | let broadcast: i16x32 = _mm512_broadcastw_epi16(a).as_i16x32(); |
6563 | transmute(src:simd_select_bitmask(m:k, a:broadcast, b:src.as_i16x32())) |
6564 | } |
6565 | |
6566 | /// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
6567 | /// |
6568 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastw_epi16&expand=589) |
6569 | #[inline ] |
6570 | #[target_feature (enable = "avx512bw" )] |
6571 | #[cfg_attr (test, assert_instr(vpbroadcastw))] |
6572 | pub unsafe fn _mm512_maskz_broadcastw_epi16(k: __mmask32, a: __m128i) -> __m512i { |
6573 | let broadcast: i16x32 = _mm512_broadcastw_epi16(a).as_i16x32(); |
6574 | let zero: i16x32 = _mm512_setzero_si512().as_i16x32(); |
6575 | transmute(src:simd_select_bitmask(m:k, a:broadcast, b:zero)) |
6576 | } |
6577 | |
6578 | /// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
6579 | /// |
6580 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastw_epi16&expand=585) |
6581 | #[inline ] |
6582 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6583 | #[cfg_attr (test, assert_instr(vpbroadcastw))] |
6584 | pub unsafe fn _mm256_mask_broadcastw_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i { |
6585 | let broadcast: i16x16 = _mm256_broadcastw_epi16(a).as_i16x16(); |
6586 | transmute(src:simd_select_bitmask(m:k, a:broadcast, b:src.as_i16x16())) |
6587 | } |
6588 | |
6589 | /// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
6590 | /// |
6591 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastw_epi16&expand=586) |
6592 | #[inline ] |
6593 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6594 | #[cfg_attr (test, assert_instr(vpbroadcastw))] |
6595 | pub unsafe fn _mm256_maskz_broadcastw_epi16(k: __mmask16, a: __m128i) -> __m256i { |
6596 | let broadcast: i16x16 = _mm256_broadcastw_epi16(a).as_i16x16(); |
6597 | let zero: i16x16 = _mm256_setzero_si256().as_i16x16(); |
6598 | transmute(src:simd_select_bitmask(m:k, a:broadcast, b:zero)) |
6599 | } |
6600 | |
6601 | /// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
6602 | /// |
6603 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastw_epi16&expand=582) |
6604 | #[inline ] |
6605 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6606 | #[cfg_attr (test, assert_instr(vpbroadcastw))] |
6607 | pub unsafe fn _mm_mask_broadcastw_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { |
6608 | let broadcast: i16x8 = _mm_broadcastw_epi16(a).as_i16x8(); |
6609 | transmute(src:simd_select_bitmask(m:k, a:broadcast, b:src.as_i16x8())) |
6610 | } |
6611 | |
6612 | /// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
6613 | /// |
6614 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastw_epi16&expand=583) |
6615 | #[inline ] |
6616 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6617 | #[cfg_attr (test, assert_instr(vpbroadcastw))] |
6618 | pub unsafe fn _mm_maskz_broadcastw_epi16(k: __mmask8, a: __m128i) -> __m128i { |
6619 | let broadcast: i16x8 = _mm_broadcastw_epi16(a).as_i16x8(); |
6620 | let zero: i16x8 = _mm_setzero_si128().as_i16x8(); |
6621 | transmute(src:simd_select_bitmask(m:k, a:broadcast, b:zero)) |
6622 | } |
6623 | |
6624 | /// Broadcast the low packed 8-bit integer from a to all elements of dst. |
6625 | /// |
6626 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastb_epi8&expand=536) |
6627 | #[inline ] |
6628 | #[target_feature (enable = "avx512bw" )] |
6629 | #[cfg_attr (test, assert_instr(vpbroadcastb))] |
6630 | pub unsafe fn _mm512_broadcastb_epi8(a: __m128i) -> __m512i { |
6631 | let a: i8x64 = _mm512_castsi128_si512(a).as_i8x64(); |
6632 | let ret: i8x64 = simd_shuffle!( |
6633 | a, |
6634 | a, |
6635 | [ |
6636 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
6637 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
6638 | 0, 0, 0, 0, 0, 0, |
6639 | ], |
6640 | ); |
6641 | transmute(src:ret) |
6642 | } |
6643 | |
6644 | /// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
6645 | /// |
6646 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastb_epi8&expand=537) |
6647 | #[inline ] |
6648 | #[target_feature (enable = "avx512bw" )] |
6649 | #[cfg_attr (test, assert_instr(vpbroadcastb))] |
6650 | pub unsafe fn _mm512_mask_broadcastb_epi8(src: __m512i, k: __mmask64, a: __m128i) -> __m512i { |
6651 | let broadcast: i8x64 = _mm512_broadcastb_epi8(a).as_i8x64(); |
6652 | transmute(src:simd_select_bitmask(m:k, a:broadcast, b:src.as_i8x64())) |
6653 | } |
6654 | |
6655 | /// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
6656 | /// |
6657 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastb_epi8&expand=538) |
6658 | #[inline ] |
6659 | #[target_feature (enable = "avx512bw" )] |
6660 | #[cfg_attr (test, assert_instr(vpbroadcastb))] |
6661 | pub unsafe fn _mm512_maskz_broadcastb_epi8(k: __mmask64, a: __m128i) -> __m512i { |
6662 | let broadcast: i8x64 = _mm512_broadcastb_epi8(a).as_i8x64(); |
6663 | let zero: i8x64 = _mm512_setzero_si512().as_i8x64(); |
6664 | transmute(src:simd_select_bitmask(m:k, a:broadcast, b:zero)) |
6665 | } |
6666 | |
6667 | /// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
6668 | /// |
6669 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastb_epi8&expand=534) |
6670 | #[inline ] |
6671 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6672 | #[cfg_attr (test, assert_instr(vpbroadcastb))] |
6673 | pub unsafe fn _mm256_mask_broadcastb_epi8(src: __m256i, k: __mmask32, a: __m128i) -> __m256i { |
6674 | let broadcast: i8x32 = _mm256_broadcastb_epi8(a).as_i8x32(); |
6675 | transmute(src:simd_select_bitmask(m:k, a:broadcast, b:src.as_i8x32())) |
6676 | } |
6677 | |
6678 | /// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
6679 | /// |
6680 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastb_epi8&expand=535) |
6681 | #[inline ] |
6682 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6683 | #[cfg_attr (test, assert_instr(vpbroadcastb))] |
6684 | pub unsafe fn _mm256_maskz_broadcastb_epi8(k: __mmask32, a: __m128i) -> __m256i { |
6685 | let broadcast: i8x32 = _mm256_broadcastb_epi8(a).as_i8x32(); |
6686 | let zero: i8x32 = _mm256_setzero_si256().as_i8x32(); |
6687 | transmute(src:simd_select_bitmask(m:k, a:broadcast, b:zero)) |
6688 | } |
6689 | |
6690 | /// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
6691 | /// |
6692 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastb_epi8&expand=531) |
6693 | #[inline ] |
6694 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6695 | #[cfg_attr (test, assert_instr(vpbroadcastb))] |
6696 | pub unsafe fn _mm_mask_broadcastb_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i { |
6697 | let broadcast: i8x16 = _mm_broadcastb_epi8(a).as_i8x16(); |
6698 | transmute(src:simd_select_bitmask(m:k, a:broadcast, b:src.as_i8x16())) |
6699 | } |
6700 | |
6701 | /// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
6702 | /// |
6703 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastb_epi8&expand=532) |
6704 | #[inline ] |
6705 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6706 | #[cfg_attr (test, assert_instr(vpbroadcastb))] |
6707 | pub unsafe fn _mm_maskz_broadcastb_epi8(k: __mmask16, a: __m128i) -> __m128i { |
6708 | let broadcast: i8x16 = _mm_broadcastb_epi8(a).as_i8x16(); |
6709 | let zero: i8x16 = _mm_setzero_si128().as_i8x16(); |
6710 | transmute(src:simd_select_bitmask(m:k, a:broadcast, b:zero)) |
6711 | } |
6712 | |
6713 | /// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst. |
6714 | /// |
6715 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi16&expand=6012) |
6716 | #[inline ] |
6717 | #[target_feature (enable = "avx512bw" )] |
6718 | #[cfg_attr (test, assert_instr(vpunpckhwd))] |
6719 | pub unsafe fn _mm512_unpackhi_epi16(a: __m512i, b: __m512i) -> __m512i { |
6720 | let a: i16x32 = a.as_i16x32(); |
6721 | let b: i16x32 = b.as_i16x32(); |
6722 | #[rustfmt::skip] |
6723 | let r: i16x32 = simd_shuffle!( |
6724 | a, |
6725 | b, |
6726 | [ |
6727 | 4, 32 + 4, 5, 32 + 5, |
6728 | 6, 32 + 6, 7, 32 + 7, |
6729 | 12, 32 + 12, 13, 32 + 13, |
6730 | 14, 32 + 14, 15, 32 + 15, |
6731 | 20, 32 + 20, 21, 32 + 21, |
6732 | 22, 32 + 22, 23, 32 + 23, |
6733 | 28, 32 + 28, 29, 32 + 29, |
6734 | 30, 32 + 30, 31, 32 + 31, |
6735 | ], |
6736 | ); |
6737 | transmute(src:r) |
6738 | } |
6739 | |
6740 | /// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
6741 | /// |
6742 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi16&expand=6010) |
6743 | #[inline ] |
6744 | #[target_feature (enable = "avx512bw" )] |
6745 | #[cfg_attr (test, assert_instr(vpunpckhwd))] |
6746 | pub unsafe fn _mm512_mask_unpackhi_epi16( |
6747 | src: __m512i, |
6748 | k: __mmask32, |
6749 | a: __m512i, |
6750 | b: __m512i, |
6751 | ) -> __m512i { |
6752 | let unpackhi: i16x32 = _mm512_unpackhi_epi16(a, b).as_i16x32(); |
6753 | transmute(src:simd_select_bitmask(m:k, a:unpackhi, b:src.as_i16x32())) |
6754 | } |
6755 | |
6756 | /// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
6757 | /// |
6758 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi16&expand=6011) |
6759 | #[inline ] |
6760 | #[target_feature (enable = "avx512bw" )] |
6761 | #[cfg_attr (test, assert_instr(vpunpckhwd))] |
6762 | pub unsafe fn _mm512_maskz_unpackhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { |
6763 | let unpackhi: i16x32 = _mm512_unpackhi_epi16(a, b).as_i16x32(); |
6764 | let zero: i16x32 = _mm512_setzero_si512().as_i16x32(); |
6765 | transmute(src:simd_select_bitmask(m:k, a:unpackhi, b:zero)) |
6766 | } |
6767 | |
6768 | /// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
6769 | /// |
6770 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi16&expand=6007) |
6771 | #[inline ] |
6772 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6773 | #[cfg_attr (test, assert_instr(vpunpckhwd))] |
6774 | pub unsafe fn _mm256_mask_unpackhi_epi16( |
6775 | src: __m256i, |
6776 | k: __mmask16, |
6777 | a: __m256i, |
6778 | b: __m256i, |
6779 | ) -> __m256i { |
6780 | let unpackhi: i16x16 = _mm256_unpackhi_epi16(a, b).as_i16x16(); |
6781 | transmute(src:simd_select_bitmask(m:k, a:unpackhi, b:src.as_i16x16())) |
6782 | } |
6783 | |
6784 | /// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
6785 | /// |
6786 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi16&expand=6008) |
6787 | #[inline ] |
6788 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6789 | #[cfg_attr (test, assert_instr(vpunpckhwd))] |
6790 | pub unsafe fn _mm256_maskz_unpackhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { |
6791 | let unpackhi: i16x16 = _mm256_unpackhi_epi16(a, b).as_i16x16(); |
6792 | let zero: i16x16 = _mm256_setzero_si256().as_i16x16(); |
6793 | transmute(src:simd_select_bitmask(m:k, a:unpackhi, b:zero)) |
6794 | } |
6795 | |
6796 | /// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
6797 | /// |
6798 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi16&expand=6004) |
6799 | #[inline ] |
6800 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6801 | #[cfg_attr (test, assert_instr(vpunpckhwd))] |
6802 | pub unsafe fn _mm_mask_unpackhi_epi16( |
6803 | src: __m128i, |
6804 | k: __mmask8, |
6805 | a: __m128i, |
6806 | b: __m128i, |
6807 | ) -> __m128i { |
6808 | let unpackhi: i16x8 = _mm_unpackhi_epi16(a, b).as_i16x8(); |
6809 | transmute(src:simd_select_bitmask(m:k, a:unpackhi, b:src.as_i16x8())) |
6810 | } |
6811 | |
6812 | /// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
6813 | /// |
6814 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi16&expand=6005) |
6815 | #[inline ] |
6816 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6817 | #[cfg_attr (test, assert_instr(vpunpckhwd))] |
6818 | pub unsafe fn _mm_maskz_unpackhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
6819 | let unpackhi: i16x8 = _mm_unpackhi_epi16(a, b).as_i16x8(); |
6820 | let zero: i16x8 = _mm_setzero_si128().as_i16x8(); |
6821 | transmute(src:simd_select_bitmask(m:k, a:unpackhi, b:zero)) |
6822 | } |
6823 | |
6824 | /// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst. |
6825 | /// |
6826 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi8&expand=6039) |
6827 | #[inline ] |
6828 | #[target_feature (enable = "avx512bw" )] |
6829 | #[cfg_attr (test, assert_instr(vpunpckhbw))] |
6830 | pub unsafe fn _mm512_unpackhi_epi8(a: __m512i, b: __m512i) -> __m512i { |
6831 | let a = a.as_i8x64(); |
6832 | let b = b.as_i8x64(); |
6833 | #[rustfmt::skip] |
6834 | let r: i8x64 = simd_shuffle!( |
6835 | a, |
6836 | b, |
6837 | [ |
6838 | 8, 64+8, 9, 64+9, |
6839 | 10, 64+10, 11, 64+11, |
6840 | 12, 64+12, 13, 64+13, |
6841 | 14, 64+14, 15, 64+15, |
6842 | 24, 64+24, 25, 64+25, |
6843 | 26, 64+26, 27, 64+27, |
6844 | 28, 64+28, 29, 64+29, |
6845 | 30, 64+30, 31, 64+31, |
6846 | 40, 64+40, 41, 64+41, |
6847 | 42, 64+42, 43, 64+43, |
6848 | 44, 64+44, 45, 64+45, |
6849 | 46, 64+46, 47, 64+47, |
6850 | 56, 64+56, 57, 64+57, |
6851 | 58, 64+58, 59, 64+59, |
6852 | 60, 64+60, 61, 64+61, |
6853 | 62, 64+62, 63, 64+63, |
6854 | ], |
6855 | ); |
6856 | transmute(r) |
6857 | } |
6858 | |
6859 | /// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
6860 | /// |
6861 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi8&expand=6037) |
6862 | #[inline ] |
6863 | #[target_feature (enable = "avx512bw" )] |
6864 | #[cfg_attr (test, assert_instr(vpunpckhbw))] |
6865 | pub unsafe fn _mm512_mask_unpackhi_epi8( |
6866 | src: __m512i, |
6867 | k: __mmask64, |
6868 | a: __m512i, |
6869 | b: __m512i, |
6870 | ) -> __m512i { |
6871 | let unpackhi: i8x64 = _mm512_unpackhi_epi8(a, b).as_i8x64(); |
6872 | transmute(src:simd_select_bitmask(m:k, a:unpackhi, b:src.as_i8x64())) |
6873 | } |
6874 | |
6875 | /// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
6876 | /// |
6877 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi8&expand=6038) |
6878 | #[inline ] |
6879 | #[target_feature (enable = "avx512bw" )] |
6880 | #[cfg_attr (test, assert_instr(vpunpckhbw))] |
6881 | pub unsafe fn _mm512_maskz_unpackhi_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { |
6882 | let unpackhi: i8x64 = _mm512_unpackhi_epi8(a, b).as_i8x64(); |
6883 | let zero: i8x64 = _mm512_setzero_si512().as_i8x64(); |
6884 | transmute(src:simd_select_bitmask(m:k, a:unpackhi, b:zero)) |
6885 | } |
6886 | |
6887 | /// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
6888 | /// |
6889 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi8&expand=6034) |
6890 | #[inline ] |
6891 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6892 | #[cfg_attr (test, assert_instr(vpunpckhbw))] |
6893 | pub unsafe fn _mm256_mask_unpackhi_epi8( |
6894 | src: __m256i, |
6895 | k: __mmask32, |
6896 | a: __m256i, |
6897 | b: __m256i, |
6898 | ) -> __m256i { |
6899 | let unpackhi: i8x32 = _mm256_unpackhi_epi8(a, b).as_i8x32(); |
6900 | transmute(src:simd_select_bitmask(m:k, a:unpackhi, b:src.as_i8x32())) |
6901 | } |
6902 | |
6903 | /// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
6904 | /// |
6905 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi8&expand=6035) |
6906 | #[inline ] |
6907 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6908 | #[cfg_attr (test, assert_instr(vpunpckhbw))] |
6909 | pub unsafe fn _mm256_maskz_unpackhi_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { |
6910 | let unpackhi: i8x32 = _mm256_unpackhi_epi8(a, b).as_i8x32(); |
6911 | let zero: i8x32 = _mm256_setzero_si256().as_i8x32(); |
6912 | transmute(src:simd_select_bitmask(m:k, a:unpackhi, b:zero)) |
6913 | } |
6914 | |
6915 | /// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
6916 | /// |
6917 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi8&expand=6031) |
6918 | #[inline ] |
6919 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6920 | #[cfg_attr (test, assert_instr(vpunpckhbw))] |
6921 | pub unsafe fn _mm_mask_unpackhi_epi8( |
6922 | src: __m128i, |
6923 | k: __mmask16, |
6924 | a: __m128i, |
6925 | b: __m128i, |
6926 | ) -> __m128i { |
6927 | let unpackhi: i8x16 = _mm_unpackhi_epi8(a, b).as_i8x16(); |
6928 | transmute(src:simd_select_bitmask(m:k, a:unpackhi, b:src.as_i8x16())) |
6929 | } |
6930 | |
6931 | /// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
6932 | /// |
6933 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi8&expand=6032) |
6934 | #[inline ] |
6935 | #[target_feature (enable = "avx512bw,avx512vl" )] |
6936 | #[cfg_attr (test, assert_instr(vpunpckhbw))] |
6937 | pub unsafe fn _mm_maskz_unpackhi_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { |
6938 | let unpackhi: i8x16 = _mm_unpackhi_epi8(a, b).as_i8x16(); |
6939 | let zero: i8x16 = _mm_setzero_si128().as_i8x16(); |
6940 | transmute(src:simd_select_bitmask(m:k, a:unpackhi, b:zero)) |
6941 | } |
6942 | |
6943 | /// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst. |
6944 | /// |
6945 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi16&expand=6069) |
6946 | #[inline ] |
6947 | #[target_feature (enable = "avx512bw" )] |
6948 | #[cfg_attr (test, assert_instr(vpunpcklwd))] |
6949 | pub unsafe fn _mm512_unpacklo_epi16(a: __m512i, b: __m512i) -> __m512i { |
6950 | let a: i16x32 = a.as_i16x32(); |
6951 | let b: i16x32 = b.as_i16x32(); |
6952 | #[rustfmt::skip] |
6953 | let r: i16x32 = simd_shuffle!( |
6954 | a, |
6955 | b, |
6956 | [ |
6957 | 0, 32+0, 1, 32+1, |
6958 | 2, 32+2, 3, 32+3, |
6959 | 8, 32+8, 9, 32+9, |
6960 | 10, 32+10, 11, 32+11, |
6961 | 16, 32+16, 17, 32+17, |
6962 | 18, 32+18, 19, 32+19, |
6963 | 24, 32+24, 25, 32+25, |
6964 | 26, 32+26, 27, 32+27 |
6965 | ], |
6966 | ); |
6967 | transmute(src:r) |
6968 | } |
6969 | |
6970 | /// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
6971 | /// |
6972 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi16&expand=6067) |
6973 | #[inline ] |
6974 | #[target_feature (enable = "avx512bw" )] |
6975 | #[cfg_attr (test, assert_instr(vpunpcklwd))] |
6976 | pub unsafe fn _mm512_mask_unpacklo_epi16( |
6977 | src: __m512i, |
6978 | k: __mmask32, |
6979 | a: __m512i, |
6980 | b: __m512i, |
6981 | ) -> __m512i { |
6982 | let unpacklo: i16x32 = _mm512_unpacklo_epi16(a, b).as_i16x32(); |
6983 | transmute(src:simd_select_bitmask(m:k, a:unpacklo, b:src.as_i16x32())) |
6984 | } |
6985 | |
6986 | /// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
6987 | /// |
6988 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi16&expand=6068) |
6989 | #[inline ] |
6990 | #[target_feature (enable = "avx512bw" )] |
6991 | #[cfg_attr (test, assert_instr(vpunpcklwd))] |
6992 | pub unsafe fn _mm512_maskz_unpacklo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { |
6993 | let unpacklo: i16x32 = _mm512_unpacklo_epi16(a, b).as_i16x32(); |
6994 | let zero: i16x32 = _mm512_setzero_si512().as_i16x32(); |
6995 | transmute(src:simd_select_bitmask(m:k, a:unpacklo, b:zero)) |
6996 | } |
6997 | |
6998 | /// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
6999 | /// |
7000 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi16&expand=6064) |
7001 | #[inline ] |
7002 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7003 | #[cfg_attr (test, assert_instr(vpunpcklwd))] |
7004 | pub unsafe fn _mm256_mask_unpacklo_epi16( |
7005 | src: __m256i, |
7006 | k: __mmask16, |
7007 | a: __m256i, |
7008 | b: __m256i, |
7009 | ) -> __m256i { |
7010 | let unpacklo: i16x16 = _mm256_unpacklo_epi16(a, b).as_i16x16(); |
7011 | transmute(src:simd_select_bitmask(m:k, a:unpacklo, b:src.as_i16x16())) |
7012 | } |
7013 | |
7014 | /// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
7015 | /// |
7016 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi16&expand=6065) |
7017 | #[inline ] |
7018 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7019 | #[cfg_attr (test, assert_instr(vpunpcklwd))] |
7020 | pub unsafe fn _mm256_maskz_unpacklo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { |
7021 | let unpacklo: i16x16 = _mm256_unpacklo_epi16(a, b).as_i16x16(); |
7022 | let zero: i16x16 = _mm256_setzero_si256().as_i16x16(); |
7023 | transmute(src:simd_select_bitmask(m:k, a:unpacklo, b:zero)) |
7024 | } |
7025 | |
7026 | /// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
7027 | /// |
7028 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi16&expand=6061) |
7029 | #[inline ] |
7030 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7031 | #[cfg_attr (test, assert_instr(vpunpcklwd))] |
7032 | pub unsafe fn _mm_mask_unpacklo_epi16( |
7033 | src: __m128i, |
7034 | k: __mmask8, |
7035 | a: __m128i, |
7036 | b: __m128i, |
7037 | ) -> __m128i { |
7038 | let unpacklo: i16x8 = _mm_unpacklo_epi16(a, b).as_i16x8(); |
7039 | transmute(src:simd_select_bitmask(m:k, a:unpacklo, b:src.as_i16x8())) |
7040 | } |
7041 | |
7042 | /// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
7043 | /// |
7044 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi16&expand=6062) |
7045 | #[inline ] |
7046 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7047 | #[cfg_attr (test, assert_instr(vpunpcklwd))] |
7048 | pub unsafe fn _mm_maskz_unpacklo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
7049 | let unpacklo: i16x8 = _mm_unpacklo_epi16(a, b).as_i16x8(); |
7050 | let zero: i16x8 = _mm_setzero_si128().as_i16x8(); |
7051 | transmute(src:simd_select_bitmask(m:k, a:unpacklo, b:zero)) |
7052 | } |
7053 | |
7054 | /// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst. |
7055 | /// |
7056 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi8&expand=6096) |
7057 | #[inline ] |
7058 | #[target_feature (enable = "avx512bw" )] |
7059 | #[cfg_attr (test, assert_instr(vpunpcklbw))] |
7060 | pub unsafe fn _mm512_unpacklo_epi8(a: __m512i, b: __m512i) -> __m512i { |
7061 | let a = a.as_i8x64(); |
7062 | let b = b.as_i8x64(); |
7063 | #[rustfmt::skip] |
7064 | let r: i8x64 = simd_shuffle!( |
7065 | a, |
7066 | b, |
7067 | [ |
7068 | 0, 64+0, 1, 64+1, |
7069 | 2, 64+2, 3, 64+3, |
7070 | 4, 64+4, 5, 64+5, |
7071 | 6, 64+6, 7, 64+7, |
7072 | 16, 64+16, 17, 64+17, |
7073 | 18, 64+18, 19, 64+19, |
7074 | 20, 64+20, 21, 64+21, |
7075 | 22, 64+22, 23, 64+23, |
7076 | 32, 64+32, 33, 64+33, |
7077 | 34, 64+34, 35, 64+35, |
7078 | 36, 64+36, 37, 64+37, |
7079 | 38, 64+38, 39, 64+39, |
7080 | 48, 64+48, 49, 64+49, |
7081 | 50, 64+50, 51, 64+51, |
7082 | 52, 64+52, 53, 64+53, |
7083 | 54, 64+54, 55, 64+55, |
7084 | ], |
7085 | ); |
7086 | transmute(r) |
7087 | } |
7088 | |
7089 | /// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
7090 | /// |
7091 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi8&expand=6094) |
7092 | #[inline ] |
7093 | #[target_feature (enable = "avx512bw" )] |
7094 | #[cfg_attr (test, assert_instr(vpunpcklbw))] |
7095 | pub unsafe fn _mm512_mask_unpacklo_epi8( |
7096 | src: __m512i, |
7097 | k: __mmask64, |
7098 | a: __m512i, |
7099 | b: __m512i, |
7100 | ) -> __m512i { |
7101 | let unpacklo: i8x64 = _mm512_unpacklo_epi8(a, b).as_i8x64(); |
7102 | transmute(src:simd_select_bitmask(m:k, a:unpacklo, b:src.as_i8x64())) |
7103 | } |
7104 | |
7105 | /// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
7106 | /// |
7107 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi8&expand=6095) |
7108 | #[inline ] |
7109 | #[target_feature (enable = "avx512bw" )] |
7110 | #[cfg_attr (test, assert_instr(vpunpcklbw))] |
7111 | pub unsafe fn _mm512_maskz_unpacklo_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { |
7112 | let unpacklo: i8x64 = _mm512_unpacklo_epi8(a, b).as_i8x64(); |
7113 | let zero: i8x64 = _mm512_setzero_si512().as_i8x64(); |
7114 | transmute(src:simd_select_bitmask(m:k, a:unpacklo, b:zero)) |
7115 | } |
7116 | |
7117 | /// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
7118 | /// |
7119 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi8&expand=6091) |
7120 | #[inline ] |
7121 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7122 | #[cfg_attr (test, assert_instr(vpunpcklbw))] |
7123 | pub unsafe fn _mm256_mask_unpacklo_epi8( |
7124 | src: __m256i, |
7125 | k: __mmask32, |
7126 | a: __m256i, |
7127 | b: __m256i, |
7128 | ) -> __m256i { |
7129 | let unpacklo: i8x32 = _mm256_unpacklo_epi8(a, b).as_i8x32(); |
7130 | transmute(src:simd_select_bitmask(m:k, a:unpacklo, b:src.as_i8x32())) |
7131 | } |
7132 | |
7133 | /// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
7134 | /// |
7135 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi8&expand=6092) |
7136 | #[inline ] |
7137 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7138 | #[cfg_attr (test, assert_instr(vpunpcklbw))] |
7139 | pub unsafe fn _mm256_maskz_unpacklo_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { |
7140 | let unpacklo: i8x32 = _mm256_unpacklo_epi8(a, b).as_i8x32(); |
7141 | let zero: i8x32 = _mm256_setzero_si256().as_i8x32(); |
7142 | transmute(src:simd_select_bitmask(m:k, a:unpacklo, b:zero)) |
7143 | } |
7144 | |
7145 | /// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
7146 | /// |
7147 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi8&expand=6088) |
7148 | #[inline ] |
7149 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7150 | #[cfg_attr (test, assert_instr(vpunpcklbw))] |
7151 | pub unsafe fn _mm_mask_unpacklo_epi8( |
7152 | src: __m128i, |
7153 | k: __mmask16, |
7154 | a: __m128i, |
7155 | b: __m128i, |
7156 | ) -> __m128i { |
7157 | let unpacklo: i8x16 = _mm_unpacklo_epi8(a, b).as_i8x16(); |
7158 | transmute(src:simd_select_bitmask(m:k, a:unpacklo, b:src.as_i8x16())) |
7159 | } |
7160 | |
7161 | /// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
7162 | /// |
7163 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi8&expand=6089) |
7164 | #[inline ] |
7165 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7166 | #[cfg_attr (test, assert_instr(vpunpcklbw))] |
7167 | pub unsafe fn _mm_maskz_unpacklo_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { |
7168 | let unpacklo: i8x16 = _mm_unpacklo_epi8(a, b).as_i8x16(); |
7169 | let zero: i8x16 = _mm_setzero_si128().as_i8x16(); |
7170 | transmute(src:simd_select_bitmask(m:k, a:unpacklo, b:zero)) |
7171 | } |
7172 | |
7173 | /// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
7174 | /// |
7175 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi16&expand=3795) |
7176 | #[inline ] |
7177 | #[target_feature (enable = "avx512bw" )] |
7178 | #[cfg_attr (test, assert_instr(vmovdqu16))] |
7179 | pub unsafe fn _mm512_mask_mov_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i { |
7180 | let mov: i16x32 = a.as_i16x32(); |
7181 | transmute(src:simd_select_bitmask(m:k, a:mov, b:src.as_i16x32())) |
7182 | } |
7183 | |
7184 | /// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
7185 | /// |
7186 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi16&expand=3796) |
7187 | #[inline ] |
7188 | #[target_feature (enable = "avx512bw" )] |
7189 | #[cfg_attr (test, assert_instr(vmovdqu16))] |
7190 | pub unsafe fn _mm512_maskz_mov_epi16(k: __mmask32, a: __m512i) -> __m512i { |
7191 | let mov: i16x32 = a.as_i16x32(); |
7192 | let zero: i16x32 = _mm512_setzero_si512().as_i16x32(); |
7193 | transmute(src:simd_select_bitmask(m:k, a:mov, b:zero)) |
7194 | } |
7195 | |
7196 | /// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
7197 | /// |
7198 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi16&expand=3793) |
7199 | #[inline ] |
7200 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7201 | #[cfg_attr (test, assert_instr(vmovdqu16))] |
7202 | pub unsafe fn _mm256_mask_mov_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i { |
7203 | let mov: i16x16 = a.as_i16x16(); |
7204 | transmute(src:simd_select_bitmask(m:k, a:mov, b:src.as_i16x16())) |
7205 | } |
7206 | |
7207 | /// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
7208 | /// |
7209 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi16&expand=3794) |
7210 | #[inline ] |
7211 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7212 | #[cfg_attr (test, assert_instr(vmovdqu16))] |
7213 | pub unsafe fn _mm256_maskz_mov_epi16(k: __mmask16, a: __m256i) -> __m256i { |
7214 | let mov: i16x16 = a.as_i16x16(); |
7215 | let zero: i16x16 = _mm256_setzero_si256().as_i16x16(); |
7216 | transmute(src:simd_select_bitmask(m:k, a:mov, b:zero)) |
7217 | } |
7218 | |
7219 | /// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
7220 | /// |
7221 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi16&expand=3791) |
7222 | #[inline ] |
7223 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7224 | #[cfg_attr (test, assert_instr(vmovdqu16))] |
7225 | pub unsafe fn _mm_mask_mov_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { |
7226 | let mov: i16x8 = a.as_i16x8(); |
7227 | transmute(src:simd_select_bitmask(m:k, a:mov, b:src.as_i16x8())) |
7228 | } |
7229 | |
7230 | /// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
7231 | /// |
7232 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi16&expand=3792) |
7233 | #[inline ] |
7234 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7235 | #[cfg_attr (test, assert_instr(vmovdqu16))] |
7236 | pub unsafe fn _mm_maskz_mov_epi16(k: __mmask8, a: __m128i) -> __m128i { |
7237 | let mov: i16x8 = a.as_i16x8(); |
7238 | let zero: i16x8 = _mm_setzero_si128().as_i16x8(); |
7239 | transmute(src:simd_select_bitmask(m:k, a:mov, b:zero)) |
7240 | } |
7241 | |
7242 | /// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
7243 | /// |
7244 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi8&expand=3813) |
7245 | #[inline ] |
7246 | #[target_feature (enable = "avx512bw" )] |
7247 | #[cfg_attr (test, assert_instr(vmovdqu8))] |
7248 | pub unsafe fn _mm512_mask_mov_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i { |
7249 | let mov: i8x64 = a.as_i8x64(); |
7250 | transmute(src:simd_select_bitmask(m:k, a:mov, b:src.as_i8x64())) |
7251 | } |
7252 | |
7253 | /// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
7254 | /// |
7255 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi8&expand=3814) |
7256 | #[inline ] |
7257 | #[target_feature (enable = "avx512bw" )] |
7258 | #[cfg_attr (test, assert_instr(vmovdqu8))] |
7259 | pub unsafe fn _mm512_maskz_mov_epi8(k: __mmask64, a: __m512i) -> __m512i { |
7260 | let mov: i8x64 = a.as_i8x64(); |
7261 | let zero: i8x64 = _mm512_setzero_si512().as_i8x64(); |
7262 | transmute(src:simd_select_bitmask(m:k, a:mov, b:zero)) |
7263 | } |
7264 | |
7265 | /// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
7266 | /// |
7267 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi8&expand=3811) |
7268 | #[inline ] |
7269 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7270 | #[cfg_attr (test, assert_instr(vmovdqu8))] |
7271 | pub unsafe fn _mm256_mask_mov_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i { |
7272 | let mov: i8x32 = a.as_i8x32(); |
7273 | transmute(src:simd_select_bitmask(m:k, a:mov, b:src.as_i8x32())) |
7274 | } |
7275 | |
7276 | /// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
7277 | /// |
7278 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi8&expand=3812) |
7279 | #[inline ] |
7280 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7281 | #[cfg_attr (test, assert_instr(vmovdqu8))] |
7282 | pub unsafe fn _mm256_maskz_mov_epi8(k: __mmask32, a: __m256i) -> __m256i { |
7283 | let mov: i8x32 = a.as_i8x32(); |
7284 | let zero: i8x32 = _mm256_setzero_si256().as_i8x32(); |
7285 | transmute(src:simd_select_bitmask(m:k, a:mov, b:zero)) |
7286 | } |
7287 | |
7288 | /// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
7289 | /// |
7290 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi8&expand=3809) |
7291 | #[inline ] |
7292 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7293 | #[cfg_attr (test, assert_instr(vmovdqu8))] |
7294 | pub unsafe fn _mm_mask_mov_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i { |
7295 | let mov: i8x16 = a.as_i8x16(); |
7296 | transmute(src:simd_select_bitmask(m:k, a:mov, b:src.as_i8x16())) |
7297 | } |
7298 | |
7299 | /// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
7300 | /// |
7301 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi8&expand=3810) |
7302 | #[inline ] |
7303 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7304 | #[cfg_attr (test, assert_instr(vmovdqu8))] |
7305 | pub unsafe fn _mm_maskz_mov_epi8(k: __mmask16, a: __m128i) -> __m128i { |
7306 | let mov: i8x16 = a.as_i8x16(); |
7307 | let zero: i8x16 = _mm_setzero_si128().as_i8x16(); |
7308 | transmute(src:simd_select_bitmask(m:k, a:mov, b:zero)) |
7309 | } |
7310 | |
7311 | /// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
7312 | /// |
7313 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi16&expand=4942) |
7314 | #[inline ] |
7315 | #[target_feature (enable = "avx512bw" )] |
7316 | #[cfg_attr (test, assert_instr(vpbroadcastw))] |
7317 | pub unsafe fn _mm512_mask_set1_epi16(src: __m512i, k: __mmask32, a: i16) -> __m512i { |
7318 | let r: i16x32 = _mm512_set1_epi16(a).as_i16x32(); |
7319 | transmute(src:simd_select_bitmask(m:k, a:r, b:src.as_i16x32())) |
7320 | } |
7321 | |
7322 | /// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
7323 | /// |
7324 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi16&expand=4943) |
7325 | #[inline ] |
7326 | #[target_feature (enable = "avx512bw" )] |
7327 | #[cfg_attr (test, assert_instr(vpbroadcastw))] |
7328 | pub unsafe fn _mm512_maskz_set1_epi16(k: __mmask32, a: i16) -> __m512i { |
7329 | let r: i16x32 = _mm512_set1_epi16(a).as_i16x32(); |
7330 | let zero: i16x32 = _mm512_setzero_si512().as_i16x32(); |
7331 | transmute(src:simd_select_bitmask(m:k, a:r, b:zero)) |
7332 | } |
7333 | |
7334 | /// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
7335 | /// |
7336 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi16&expand=4939) |
7337 | #[inline ] |
7338 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7339 | #[cfg_attr (test, assert_instr(vpbroadcastw))] |
7340 | pub unsafe fn _mm256_mask_set1_epi16(src: __m256i, k: __mmask16, a: i16) -> __m256i { |
7341 | let r: i16x16 = _mm256_set1_epi16(a).as_i16x16(); |
7342 | transmute(src:simd_select_bitmask(m:k, a:r, b:src.as_i16x16())) |
7343 | } |
7344 | |
7345 | /// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
7346 | /// |
7347 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi16&expand=4940) |
7348 | #[inline ] |
7349 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7350 | #[cfg_attr (test, assert_instr(vpbroadcastw))] |
7351 | pub unsafe fn _mm256_maskz_set1_epi16(k: __mmask16, a: i16) -> __m256i { |
7352 | let r: i16x16 = _mm256_set1_epi16(a).as_i16x16(); |
7353 | let zero: i16x16 = _mm256_setzero_si256().as_i16x16(); |
7354 | transmute(src:simd_select_bitmask(m:k, a:r, b:zero)) |
7355 | } |
7356 | |
7357 | /// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
7358 | /// |
7359 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi16&expand=4936) |
7360 | #[inline ] |
7361 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7362 | #[cfg_attr (test, assert_instr(vpbroadcastw))] |
7363 | pub unsafe fn _mm_mask_set1_epi16(src: __m128i, k: __mmask8, a: i16) -> __m128i { |
7364 | let r: i16x8 = _mm_set1_epi16(a).as_i16x8(); |
7365 | transmute(src:simd_select_bitmask(m:k, a:r, b:src.as_i16x8())) |
7366 | } |
7367 | |
7368 | /// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
7369 | /// |
7370 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi16&expand=4937) |
7371 | #[inline ] |
7372 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7373 | #[cfg_attr (test, assert_instr(vpbroadcastw))] |
7374 | pub unsafe fn _mm_maskz_set1_epi16(k: __mmask8, a: i16) -> __m128i { |
7375 | let r: i16x8 = _mm_set1_epi16(a).as_i16x8(); |
7376 | let zero: i16x8 = _mm_setzero_si128().as_i16x8(); |
7377 | transmute(src:simd_select_bitmask(m:k, a:r, b:zero)) |
7378 | } |
7379 | |
7380 | /// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
7381 | /// |
7382 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi8&expand=4970) |
7383 | #[inline ] |
7384 | #[target_feature (enable = "avx512bw" )] |
7385 | #[cfg_attr (test, assert_instr(vpbroadcast))] |
7386 | pub unsafe fn _mm512_mask_set1_epi8(src: __m512i, k: __mmask64, a: i8) -> __m512i { |
7387 | let r: i8x64 = _mm512_set1_epi8(a).as_i8x64(); |
7388 | transmute(src:simd_select_bitmask(m:k, a:r, b:src.as_i8x64())) |
7389 | } |
7390 | |
7391 | /// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
7392 | /// |
7393 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi8&expand=4971) |
7394 | #[inline ] |
7395 | #[target_feature (enable = "avx512bw" )] |
7396 | #[cfg_attr (test, assert_instr(vpbroadcast))] |
7397 | pub unsafe fn _mm512_maskz_set1_epi8(k: __mmask64, a: i8) -> __m512i { |
7398 | let r: i8x64 = _mm512_set1_epi8(a).as_i8x64(); |
7399 | let zero: i8x64 = _mm512_setzero_si512().as_i8x64(); |
7400 | transmute(src:simd_select_bitmask(m:k, a:r, b:zero)) |
7401 | } |
7402 | |
7403 | /// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
7404 | /// |
7405 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi8&expand=4967) |
7406 | #[inline ] |
7407 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7408 | #[cfg_attr (test, assert_instr(vpbroadcast))] |
7409 | pub unsafe fn _mm256_mask_set1_epi8(src: __m256i, k: __mmask32, a: i8) -> __m256i { |
7410 | let r: i8x32 = _mm256_set1_epi8(a).as_i8x32(); |
7411 | transmute(src:simd_select_bitmask(m:k, a:r, b:src.as_i8x32())) |
7412 | } |
7413 | |
7414 | /// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
7415 | /// |
7416 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi8&expand=4968) |
7417 | #[inline ] |
7418 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7419 | #[cfg_attr (test, assert_instr(vpbroadcast))] |
7420 | pub unsafe fn _mm256_maskz_set1_epi8(k: __mmask32, a: i8) -> __m256i { |
7421 | let r: i8x32 = _mm256_set1_epi8(a).as_i8x32(); |
7422 | let zero: i8x32 = _mm256_setzero_si256().as_i8x32(); |
7423 | transmute(src:simd_select_bitmask(m:k, a:r, b:zero)) |
7424 | } |
7425 | |
7426 | /// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
7427 | /// |
7428 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi8&expand=4964) |
7429 | #[inline ] |
7430 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7431 | #[cfg_attr (test, assert_instr(vpbroadcast))] |
7432 | pub unsafe fn _mm_mask_set1_epi8(src: __m128i, k: __mmask16, a: i8) -> __m128i { |
7433 | let r: i8x16 = _mm_set1_epi8(a).as_i8x16(); |
7434 | transmute(src:simd_select_bitmask(m:k, a:r, b:src.as_i8x16())) |
7435 | } |
7436 | |
7437 | /// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
7438 | /// |
7439 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi8&expand=4965) |
7440 | #[inline ] |
7441 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7442 | #[cfg_attr (test, assert_instr(vpbroadcast))] |
7443 | pub unsafe fn _mm_maskz_set1_epi8(k: __mmask16, a: i8) -> __m128i { |
7444 | let r: i8x16 = _mm_set1_epi8(a).as_i8x16(); |
7445 | let zero: i8x16 = _mm_setzero_si128().as_i8x16(); |
7446 | transmute(src:simd_select_bitmask(m:k, a:r, b:zero)) |
7447 | } |
7448 | |
7449 | /// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst. |
7450 | /// |
7451 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shufflelo_epi16&expand=5221) |
7452 | #[inline ] |
7453 | #[target_feature (enable = "avx512bw" )] |
7454 | #[cfg_attr (test, assert_instr(vpshuflw, IMM8 = 0))] |
7455 | #[rustc_legacy_const_generics (1)] |
7456 | pub unsafe fn _mm512_shufflelo_epi16<const IMM8: i32>(a: __m512i) -> __m512i { |
7457 | static_assert_uimm_bits!(IMM8, 8); |
7458 | let a = a.as_i16x32(); |
7459 | let r: i16x32 = simd_shuffle!( |
7460 | a, |
7461 | a, |
7462 | [ |
7463 | IMM8 as u32 & 0b11, |
7464 | (IMM8 as u32 >> 2) & 0b11, |
7465 | (IMM8 as u32 >> 4) & 0b11, |
7466 | (IMM8 as u32 >> 6) & 0b11, |
7467 | 4, |
7468 | 5, |
7469 | 6, |
7470 | 7, |
7471 | (IMM8 as u32 & 0b11) + 8, |
7472 | ((IMM8 as u32 >> 2) & 0b11) + 8, |
7473 | ((IMM8 as u32 >> 4) & 0b11) + 8, |
7474 | ((IMM8 as u32 >> 6) & 0b11) + 8, |
7475 | 12, |
7476 | 13, |
7477 | 14, |
7478 | 15, |
7479 | (IMM8 as u32 & 0b11) + 16, |
7480 | ((IMM8 as u32 >> 2) & 0b11) + 16, |
7481 | ((IMM8 as u32 >> 4) & 0b11) + 16, |
7482 | ((IMM8 as u32 >> 6) & 0b11) + 16, |
7483 | 20, |
7484 | 21, |
7485 | 22, |
7486 | 23, |
7487 | (IMM8 as u32 & 0b11) + 24, |
7488 | ((IMM8 as u32 >> 2) & 0b11) + 24, |
7489 | ((IMM8 as u32 >> 4) & 0b11) + 24, |
7490 | ((IMM8 as u32 >> 6) & 0b11) + 24, |
7491 | 28, |
7492 | 29, |
7493 | 30, |
7494 | 31, |
7495 | ], |
7496 | ); |
7497 | transmute(r) |
7498 | } |
7499 | |
7500 | /// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). |
7501 | /// |
7502 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shufflelo_epi16&expand=5219) |
7503 | #[inline ] |
7504 | #[target_feature (enable = "avx512bw" )] |
7505 | #[cfg_attr (test, assert_instr(vpshuflw, IMM8 = 0))] |
7506 | #[rustc_legacy_const_generics (3)] |
7507 | pub unsafe fn _mm512_mask_shufflelo_epi16<const IMM8: i32>( |
7508 | src: __m512i, |
7509 | k: __mmask32, |
7510 | a: __m512i, |
7511 | ) -> __m512i { |
7512 | static_assert_uimm_bits!(IMM8, 8); |
7513 | let r: __m512i = _mm512_shufflelo_epi16::<IMM8>(a); |
7514 | transmute(src:simd_select_bitmask(m:k, a:r.as_i16x32(), b:src.as_i16x32())) |
7515 | } |
7516 | |
7517 | /// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
7518 | /// |
7519 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shufflelo_epi16&expand=5220) |
7520 | #[inline ] |
7521 | #[target_feature (enable = "avx512bw" )] |
7522 | #[cfg_attr (test, assert_instr(vpshuflw, IMM8 = 0))] |
7523 | #[rustc_legacy_const_generics (2)] |
7524 | pub unsafe fn _mm512_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i { |
7525 | static_assert_uimm_bits!(IMM8, 8); |
7526 | let r: __m512i = _mm512_shufflelo_epi16::<IMM8>(a); |
7527 | let zero: i16x32 = _mm512_setzero_si512().as_i16x32(); |
7528 | transmute(src:simd_select_bitmask(m:k, a:r.as_i16x32(), b:zero)) |
7529 | } |
7530 | |
7531 | /// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). |
7532 | /// |
7533 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shufflelo_epi16&expand=5216) |
7534 | #[inline ] |
7535 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7536 | #[cfg_attr (test, assert_instr(vpshuflw, IMM8 = 5))] |
7537 | #[rustc_legacy_const_generics (3)] |
7538 | pub unsafe fn _mm256_mask_shufflelo_epi16<const IMM8: i32>( |
7539 | src: __m256i, |
7540 | k: __mmask16, |
7541 | a: __m256i, |
7542 | ) -> __m256i { |
7543 | static_assert_uimm_bits!(IMM8, 8); |
7544 | let shuffle: __m256i = _mm256_shufflelo_epi16::<IMM8>(a); |
7545 | transmute(src:simd_select_bitmask(m:k, a:shuffle.as_i16x16(), b:src.as_i16x16())) |
7546 | } |
7547 | |
7548 | /// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). |
7549 | /// |
7550 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shufflelo_epi16&expand=5217) |
7551 | #[inline ] |
7552 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7553 | #[cfg_attr (test, assert_instr(vpshuflw, IMM8 = 5))] |
7554 | #[rustc_legacy_const_generics (2)] |
7555 | pub unsafe fn _mm256_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i { |
7556 | static_assert_uimm_bits!(IMM8, 8); |
7557 | let shuffle: __m256i = _mm256_shufflelo_epi16::<IMM8>(a); |
7558 | let zero: i16x16 = _mm256_setzero_si256().as_i16x16(); |
7559 | transmute(src:simd_select_bitmask(m:k, a:shuffle.as_i16x16(), b:zero)) |
7560 | } |
7561 | |
7562 | /// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). |
7563 | /// |
7564 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shufflelo_epi16&expand=5213) |
7565 | #[inline ] |
7566 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7567 | #[cfg_attr (test, assert_instr(vpshuflw, IMM8 = 5))] |
7568 | #[rustc_legacy_const_generics (3)] |
7569 | pub unsafe fn _mm_mask_shufflelo_epi16<const IMM8: i32>( |
7570 | src: __m128i, |
7571 | k: __mmask8, |
7572 | a: __m128i, |
7573 | ) -> __m128i { |
7574 | static_assert_uimm_bits!(IMM8, 8); |
7575 | let shuffle: __m128i = _mm_shufflelo_epi16::<IMM8>(a); |
7576 | transmute(src:simd_select_bitmask(m:k, a:shuffle.as_i16x8(), b:src.as_i16x8())) |
7577 | } |
7578 | |
7579 | /// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). |
7580 | /// |
7581 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shufflelo_epi16&expand=5214) |
7582 | #[inline ] |
7583 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7584 | #[cfg_attr (test, assert_instr(vpshuflw, IMM8 = 5))] |
7585 | #[rustc_legacy_const_generics (2)] |
7586 | pub unsafe fn _mm_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i { |
7587 | static_assert_uimm_bits!(IMM8, 8); |
7588 | let shuffle: __m128i = _mm_shufflelo_epi16::<IMM8>(a); |
7589 | let zero: i16x8 = _mm_setzero_si128().as_i16x8(); |
7590 | transmute(src:simd_select_bitmask(m:k, a:shuffle.as_i16x8(), b:zero)) |
7591 | } |
7592 | |
7593 | /// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst. |
7594 | /// |
7595 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shufflehi_epi16&expand=5212) |
7596 | #[inline ] |
7597 | #[target_feature (enable = "avx512bw" )] |
7598 | #[cfg_attr (test, assert_instr(vpshufhw, IMM8 = 0))] |
7599 | #[rustc_legacy_const_generics (1)] |
7600 | pub unsafe fn _mm512_shufflehi_epi16<const IMM8: i32>(a: __m512i) -> __m512i { |
7601 | static_assert_uimm_bits!(IMM8, 8); |
7602 | let a = a.as_i16x32(); |
7603 | let r: i16x32 = simd_shuffle!( |
7604 | a, |
7605 | a, |
7606 | [ |
7607 | 0, |
7608 | 1, |
7609 | 2, |
7610 | 3, |
7611 | (IMM8 as u32 & 0b11) + 4, |
7612 | ((IMM8 as u32 >> 2) & 0b11) + 4, |
7613 | ((IMM8 as u32 >> 4) & 0b11) + 4, |
7614 | ((IMM8 as u32 >> 6) & 0b11) + 4, |
7615 | 8, |
7616 | 9, |
7617 | 10, |
7618 | 11, |
7619 | (IMM8 as u32 & 0b11) + 12, |
7620 | ((IMM8 as u32 >> 2) & 0b11) + 12, |
7621 | ((IMM8 as u32 >> 4) & 0b11) + 12, |
7622 | ((IMM8 as u32 >> 6) & 0b11) + 12, |
7623 | 16, |
7624 | 17, |
7625 | 18, |
7626 | 19, |
7627 | (IMM8 as u32 & 0b11) + 20, |
7628 | ((IMM8 as u32 >> 2) & 0b11) + 20, |
7629 | ((IMM8 as u32 >> 4) & 0b11) + 20, |
7630 | ((IMM8 as u32 >> 6) & 0b11) + 20, |
7631 | 24, |
7632 | 25, |
7633 | 26, |
7634 | 27, |
7635 | (IMM8 as u32 & 0b11) + 28, |
7636 | ((IMM8 as u32 >> 2) & 0b11) + 28, |
7637 | ((IMM8 as u32 >> 4) & 0b11) + 28, |
7638 | ((IMM8 as u32 >> 6) & 0b11) + 28, |
7639 | ], |
7640 | ); |
7641 | transmute(r) |
7642 | } |
7643 | |
7644 | /// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). |
7645 | /// |
7646 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shufflehi_epi16&expand=5210) |
7647 | #[inline ] |
7648 | #[target_feature (enable = "avx512bw" )] |
7649 | #[cfg_attr (test, assert_instr(vpshufhw, IMM8 = 0))] |
7650 | #[rustc_legacy_const_generics (3)] |
7651 | pub unsafe fn _mm512_mask_shufflehi_epi16<const IMM8: i32>( |
7652 | src: __m512i, |
7653 | k: __mmask32, |
7654 | a: __m512i, |
7655 | ) -> __m512i { |
7656 | static_assert_uimm_bits!(IMM8, 8); |
7657 | let r: __m512i = _mm512_shufflehi_epi16::<IMM8>(a); |
7658 | transmute(src:simd_select_bitmask(m:k, a:r.as_i16x32(), b:src.as_i16x32())) |
7659 | } |
7660 | |
7661 | /// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
7662 | /// |
7663 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shufflehi_epi16&expand=5211) |
7664 | #[inline ] |
7665 | #[target_feature (enable = "avx512bw" )] |
7666 | #[cfg_attr (test, assert_instr(vpshufhw, IMM8 = 0))] |
7667 | #[rustc_legacy_const_generics (2)] |
7668 | pub unsafe fn _mm512_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i { |
7669 | static_assert_uimm_bits!(IMM8, 8); |
7670 | let r: __m512i = _mm512_shufflehi_epi16::<IMM8>(a); |
7671 | let zero: i16x32 = _mm512_setzero_si512().as_i16x32(); |
7672 | transmute(src:simd_select_bitmask(m:k, a:r.as_i16x32(), b:zero)) |
7673 | } |
7674 | |
7675 | /// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). |
7676 | /// |
7677 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shufflehi_epi16&expand=5207) |
7678 | #[inline ] |
7679 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7680 | #[cfg_attr (test, assert_instr(vpshufhw, IMM8 = 5))] |
7681 | #[rustc_legacy_const_generics (3)] |
7682 | pub unsafe fn _mm256_mask_shufflehi_epi16<const IMM8: i32>( |
7683 | src: __m256i, |
7684 | k: __mmask16, |
7685 | a: __m256i, |
7686 | ) -> __m256i { |
7687 | static_assert_uimm_bits!(IMM8, 8); |
7688 | let shuffle: __m256i = _mm256_shufflehi_epi16::<IMM8>(a); |
7689 | transmute(src:simd_select_bitmask(m:k, a:shuffle.as_i16x16(), b:src.as_i16x16())) |
7690 | } |
7691 | |
7692 | /// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
7693 | /// |
7694 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shufflehi_epi16&expand=5208) |
7695 | #[inline ] |
7696 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7697 | #[cfg_attr (test, assert_instr(vpshufhw, IMM8 = 5))] |
7698 | #[rustc_legacy_const_generics (2)] |
7699 | pub unsafe fn _mm256_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i { |
7700 | static_assert_uimm_bits!(IMM8, 8); |
7701 | let shuffle: __m256i = _mm256_shufflehi_epi16::<IMM8>(a); |
7702 | let zero: i16x16 = _mm256_setzero_si256().as_i16x16(); |
7703 | transmute(src:simd_select_bitmask(m:k, a:shuffle.as_i16x16(), b:zero)) |
7704 | } |
7705 | |
7706 | /// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). |
7707 | /// |
7708 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shufflehi_epi16&expand=5204) |
7709 | #[inline ] |
7710 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7711 | #[cfg_attr (test, assert_instr(vpshufhw, IMM8 = 5))] |
7712 | #[rustc_legacy_const_generics (3)] |
7713 | pub unsafe fn _mm_mask_shufflehi_epi16<const IMM8: i32>( |
7714 | src: __m128i, |
7715 | k: __mmask8, |
7716 | a: __m128i, |
7717 | ) -> __m128i { |
7718 | static_assert_uimm_bits!(IMM8, 8); |
7719 | let shuffle: __m128i = _mm_shufflehi_epi16::<IMM8>(a); |
7720 | transmute(src:simd_select_bitmask(m:k, a:shuffle.as_i16x8(), b:src.as_i16x8())) |
7721 | } |
7722 | |
7723 | /// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
7724 | /// |
7725 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shufflehi_epi16&expand=5205) |
7726 | #[inline ] |
7727 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7728 | #[cfg_attr (test, assert_instr(vpshufhw, IMM8 = 5))] |
7729 | #[rustc_legacy_const_generics (2)] |
7730 | pub unsafe fn _mm_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i { |
7731 | static_assert_uimm_bits!(IMM8, 8); |
7732 | let shuffle: __m128i = _mm_shufflehi_epi16::<IMM8>(a); |
7733 | let zero: i16x8 = _mm_setzero_si128().as_i16x8(); |
7734 | transmute(src:simd_select_bitmask(m:k, a:shuffle.as_i16x8(), b:zero)) |
7735 | } |
7736 | |
7737 | /// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst. |
7738 | /// |
7739 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_epi8&expand=5159) |
7740 | #[inline ] |
7741 | #[target_feature (enable = "avx512bw" )] |
7742 | #[cfg_attr (test, assert_instr(vpshufb))] |
7743 | pub unsafe fn _mm512_shuffle_epi8(a: __m512i, b: __m512i) -> __m512i { |
7744 | transmute(src:vpshufb(a:a.as_i8x64(), b:b.as_i8x64())) |
7745 | } |
7746 | |
7747 | /// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
7748 | /// |
7749 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_epi8&expand=5157) |
7750 | #[inline ] |
7751 | #[target_feature (enable = "avx512bw" )] |
7752 | #[cfg_attr (test, assert_instr(vpshufb))] |
7753 | pub unsafe fn _mm512_mask_shuffle_epi8( |
7754 | src: __m512i, |
7755 | k: __mmask64, |
7756 | a: __m512i, |
7757 | b: __m512i, |
7758 | ) -> __m512i { |
7759 | let shuffle: i8x64 = _mm512_shuffle_epi8(a, b).as_i8x64(); |
7760 | transmute(src:simd_select_bitmask(m:k, a:shuffle, b:src.as_i8x64())) |
7761 | } |
7762 | |
7763 | /// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
7764 | /// |
7765 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_epi8&expand=5158) |
7766 | #[inline ] |
7767 | #[target_feature (enable = "avx512bw" )] |
7768 | #[cfg_attr (test, assert_instr(vpshufb))] |
7769 | pub unsafe fn _mm512_maskz_shuffle_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { |
7770 | let shuffle: i8x64 = _mm512_shuffle_epi8(a, b).as_i8x64(); |
7771 | let zero: i8x64 = _mm512_setzero_si512().as_i8x64(); |
7772 | transmute(src:simd_select_bitmask(m:k, a:shuffle, b:zero)) |
7773 | } |
7774 | |
7775 | /// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
7776 | /// |
7777 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_epi8&expand=5154) |
7778 | #[inline ] |
7779 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7780 | #[cfg_attr (test, assert_instr(vpshufb))] |
7781 | pub unsafe fn _mm256_mask_shuffle_epi8( |
7782 | src: __m256i, |
7783 | k: __mmask32, |
7784 | a: __m256i, |
7785 | b: __m256i, |
7786 | ) -> __m256i { |
7787 | let shuffle: i8x32 = _mm256_shuffle_epi8(a, b).as_i8x32(); |
7788 | transmute(src:simd_select_bitmask(m:k, a:shuffle, b:src.as_i8x32())) |
7789 | } |
7790 | |
7791 | /// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
7792 | /// |
7793 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_epi8&expand=5155) |
7794 | #[inline ] |
7795 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7796 | #[cfg_attr (test, assert_instr(vpshufb))] |
7797 | pub unsafe fn _mm256_maskz_shuffle_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { |
7798 | let shuffle: i8x32 = _mm256_shuffle_epi8(a, b).as_i8x32(); |
7799 | let zero: i8x32 = _mm256_setzero_si256().as_i8x32(); |
7800 | transmute(src:simd_select_bitmask(m:k, a:shuffle, b:zero)) |
7801 | } |
7802 | |
7803 | /// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
7804 | /// |
7805 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_epi8&expand=5151) |
7806 | #[inline ] |
7807 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7808 | #[cfg_attr (test, assert_instr(vpshufb))] |
7809 | pub unsafe fn _mm_mask_shuffle_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { |
7810 | let shuffle: i8x16 = _mm_shuffle_epi8(a, b).as_i8x16(); |
7811 | transmute(src:simd_select_bitmask(m:k, a:shuffle, b:src.as_i8x16())) |
7812 | } |
7813 | |
7814 | /// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
7815 | /// |
7816 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_epi8&expand=5152) |
7817 | #[inline ] |
7818 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7819 | #[cfg_attr (test, assert_instr(vpshufb))] |
7820 | pub unsafe fn _mm_maskz_shuffle_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { |
7821 | let shuffle: i8x16 = _mm_shuffle_epi8(a, b).as_i8x16(); |
7822 | let zero: i8x16 = _mm_setzero_si128().as_i8x16(); |
7823 | transmute(src:simd_select_bitmask(m:k, a:shuffle, b:zero)) |
7824 | } |
7825 | |
7826 | /// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero. |
7827 | /// |
7828 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi16_mask&expand=5884) |
7829 | #[inline ] |
7830 | #[target_feature (enable = "avx512bw" )] |
7831 | #[cfg_attr (test, assert_instr(vptestmw))] |
7832 | pub unsafe fn _mm512_test_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { |
7833 | let and: __m512i = _mm512_and_si512(a, b); |
7834 | let zero: __m512i = _mm512_setzero_si512(); |
7835 | _mm512_cmpneq_epi16_mask(a:and, b:zero) |
7836 | } |
7837 | |
7838 | /// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero. |
7839 | /// |
7840 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi16_mask&expand=5883) |
7841 | #[inline ] |
7842 | #[target_feature (enable = "avx512bw" )] |
7843 | #[cfg_attr (test, assert_instr(vptestmw))] |
7844 | pub unsafe fn _mm512_mask_test_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { |
7845 | let and: __m512i = _mm512_and_si512(a, b); |
7846 | let zero: __m512i = _mm512_setzero_si512(); |
7847 | _mm512_mask_cmpneq_epi16_mask(k1:k, a:and, b:zero) |
7848 | } |
7849 | |
7850 | /// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero. |
7851 | /// |
7852 | // [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi16_mask&expand=5882) |
7853 | #[inline ] |
7854 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7855 | #[cfg_attr (test, assert_instr(vptestmw))] |
7856 | pub unsafe fn _mm256_test_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { |
7857 | let and: __m256i = _mm256_and_si256(a, b); |
7858 | let zero: __m256i = _mm256_setzero_si256(); |
7859 | _mm256_cmpneq_epi16_mask(a:and, b:zero) |
7860 | } |
7861 | |
7862 | /// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero. |
7863 | /// |
7864 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi16_mask&expand=5881) |
7865 | #[inline ] |
7866 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7867 | #[cfg_attr (test, assert_instr(vptestmw))] |
7868 | pub unsafe fn _mm256_mask_test_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { |
7869 | let and: __m256i = _mm256_and_si256(a, b); |
7870 | let zero: __m256i = _mm256_setzero_si256(); |
7871 | _mm256_mask_cmpneq_epi16_mask(k1:k, a:and, b:zero) |
7872 | } |
7873 | |
7874 | /// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero. |
7875 | /// |
7876 | // [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi16_mask&expand=5880) |
7877 | #[inline ] |
7878 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7879 | #[cfg_attr (test, assert_instr(vptestmw))] |
7880 | pub unsafe fn _mm_test_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { |
7881 | let and: __m128i = _mm_and_si128(a, b); |
7882 | let zero: __m128i = _mm_setzero_si128(); |
7883 | _mm_cmpneq_epi16_mask(a:and, b:zero) |
7884 | } |
7885 | |
7886 | /// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero. |
7887 | /// |
7888 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi16_mask&expand=5879) |
7889 | #[inline ] |
7890 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7891 | #[cfg_attr (test, assert_instr(vptestmw))] |
7892 | pub unsafe fn _mm_mask_test_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { |
7893 | let and: __m128i = _mm_and_si128(a, b); |
7894 | let zero: __m128i = _mm_setzero_si128(); |
7895 | _mm_mask_cmpneq_epi16_mask(k1:k, a:and, b:zero) |
7896 | } |
7897 | |
7898 | /// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero. |
7899 | /// |
7900 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi8_mask&expand=5902) |
7901 | #[inline ] |
7902 | #[target_feature (enable = "avx512bw" )] |
7903 | #[cfg_attr (test, assert_instr(vptestmb))] |
7904 | pub unsafe fn _mm512_test_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { |
7905 | let and: __m512i = _mm512_and_si512(a, b); |
7906 | let zero: __m512i = _mm512_setzero_si512(); |
7907 | _mm512_cmpneq_epi8_mask(a:and, b:zero) |
7908 | } |
7909 | |
7910 | /// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero. |
7911 | /// |
7912 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi8_mask&expand=5901) |
7913 | #[inline ] |
7914 | #[target_feature (enable = "avx512bw" )] |
7915 | #[cfg_attr (test, assert_instr(vptestmb))] |
7916 | pub unsafe fn _mm512_mask_test_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { |
7917 | let and: __m512i = _mm512_and_si512(a, b); |
7918 | let zero: __m512i = _mm512_setzero_si512(); |
7919 | _mm512_mask_cmpneq_epi8_mask(k1:k, a:and, b:zero) |
7920 | } |
7921 | |
7922 | /// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero. |
7923 | /// |
7924 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi8_mask&expand=5900) |
7925 | #[inline ] |
7926 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7927 | #[cfg_attr (test, assert_instr(vptestmb))] |
7928 | pub unsafe fn _mm256_test_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { |
7929 | let and: __m256i = _mm256_and_si256(a, b); |
7930 | let zero: __m256i = _mm256_setzero_si256(); |
7931 | _mm256_cmpneq_epi8_mask(a:and, b:zero) |
7932 | } |
7933 | |
7934 | /// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero. |
7935 | /// |
7936 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi8_mask&expand=5899) |
7937 | #[inline ] |
7938 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7939 | #[cfg_attr (test, assert_instr(vptestmb))] |
7940 | pub unsafe fn _mm256_mask_test_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { |
7941 | let and: __m256i = _mm256_and_si256(a, b); |
7942 | let zero: __m256i = _mm256_setzero_si256(); |
7943 | _mm256_mask_cmpneq_epi8_mask(k1:k, a:and, b:zero) |
7944 | } |
7945 | |
7946 | /// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero. |
7947 | /// |
7948 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi8_mask&expand=5898) |
7949 | #[inline ] |
7950 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7951 | #[cfg_attr (test, assert_instr(vptestmb))] |
7952 | pub unsafe fn _mm_test_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { |
7953 | let and: __m128i = _mm_and_si128(a, b); |
7954 | let zero: __m128i = _mm_setzero_si128(); |
7955 | _mm_cmpneq_epi8_mask(a:and, b:zero) |
7956 | } |
7957 | |
7958 | /// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero. |
7959 | /// |
7960 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi8_mask&expand=5897) |
7961 | #[inline ] |
7962 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7963 | #[cfg_attr (test, assert_instr(vptestmb))] |
7964 | pub unsafe fn _mm_mask_test_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { |
7965 | let and: __m128i = _mm_and_si128(a, b); |
7966 | let zero: __m128i = _mm_setzero_si128(); |
7967 | _mm_mask_cmpneq_epi8_mask(k1:k, a:and, b:zero) |
7968 | } |
7969 | |
7970 | /// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero. |
7971 | /// |
7972 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi16_mask&expand=5915) |
7973 | #[inline ] |
7974 | #[target_feature (enable = "avx512bw" )] |
7975 | #[cfg_attr (test, assert_instr(vptestnmw))] |
7976 | pub unsafe fn _mm512_testn_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { |
7977 | let and: __m512i = _mm512_and_si512(a, b); |
7978 | let zero: __m512i = _mm512_setzero_si512(); |
7979 | _mm512_cmpeq_epi16_mask(a:and, b:zero) |
7980 | } |
7981 | |
7982 | /// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero. |
7983 | /// |
7984 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi16&expand=5914) |
7985 | #[inline ] |
7986 | #[target_feature (enable = "avx512bw" )] |
7987 | #[cfg_attr (test, assert_instr(vptestnmw))] |
7988 | pub unsafe fn _mm512_mask_testn_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { |
7989 | let and: __m512i = _mm512_and_si512(a, b); |
7990 | let zero: __m512i = _mm512_setzero_si512(); |
7991 | _mm512_mask_cmpeq_epi16_mask(k1:k, a:and, b:zero) |
7992 | } |
7993 | |
7994 | /// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero. |
7995 | /// |
7996 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi16_mask&expand=5913) |
7997 | #[inline ] |
7998 | #[target_feature (enable = "avx512bw,avx512vl" )] |
7999 | #[cfg_attr (test, assert_instr(vptestnmw))] |
8000 | pub unsafe fn _mm256_testn_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { |
8001 | let and: __m256i = _mm256_and_si256(a, b); |
8002 | let zero: __m256i = _mm256_setzero_si256(); |
8003 | _mm256_cmpeq_epi16_mask(a:and, b:zero) |
8004 | } |
8005 | |
8006 | /// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero. |
8007 | /// |
8008 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi16_mask&expand=5912) |
8009 | #[inline ] |
8010 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8011 | #[cfg_attr (test, assert_instr(vptestnmw))] |
8012 | pub unsafe fn _mm256_mask_testn_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { |
8013 | let and: __m256i = _mm256_and_si256(a, b); |
8014 | let zero: __m256i = _mm256_setzero_si256(); |
8015 | _mm256_mask_cmpeq_epi16_mask(k1:k, a:and, b:zero) |
8016 | } |
8017 | |
8018 | /// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero. |
8019 | /// |
8020 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi16_mask&expand=5911) |
8021 | #[inline ] |
8022 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8023 | #[cfg_attr (test, assert_instr(vptestnmw))] |
8024 | pub unsafe fn _mm_testn_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { |
8025 | let and: __m128i = _mm_and_si128(a, b); |
8026 | let zero: __m128i = _mm_setzero_si128(); |
8027 | _mm_cmpeq_epi16_mask(a:and, b:zero) |
8028 | } |
8029 | |
8030 | /// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero. |
8031 | /// |
8032 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi16_mask&expand=5910) |
8033 | #[inline ] |
8034 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8035 | #[cfg_attr (test, assert_instr(vptestnmw))] |
8036 | pub unsafe fn _mm_mask_testn_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { |
8037 | let and: __m128i = _mm_and_si128(a, b); |
8038 | let zero: __m128i = _mm_setzero_si128(); |
8039 | _mm_mask_cmpeq_epi16_mask(k1:k, a:and, b:zero) |
8040 | } |
8041 | |
8042 | /// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero. |
8043 | /// |
8044 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi8_mask&expand=5933) |
8045 | #[inline ] |
8046 | #[target_feature (enable = "avx512bw" )] |
8047 | #[cfg_attr (test, assert_instr(vptestnmb))] |
8048 | pub unsafe fn _mm512_testn_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { |
8049 | let and: __m512i = _mm512_and_si512(a, b); |
8050 | let zero: __m512i = _mm512_setzero_si512(); |
8051 | _mm512_cmpeq_epi8_mask(a:and, b:zero) |
8052 | } |
8053 | |
8054 | /// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero. |
8055 | /// |
8056 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi8_mask&expand=5932) |
8057 | #[inline ] |
8058 | #[target_feature (enable = "avx512bw" )] |
8059 | #[cfg_attr (test, assert_instr(vptestnmb))] |
8060 | pub unsafe fn _mm512_mask_testn_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { |
8061 | let and: __m512i = _mm512_and_si512(a, b); |
8062 | let zero: __m512i = _mm512_setzero_si512(); |
8063 | _mm512_mask_cmpeq_epi8_mask(k1:k, a:and, b:zero) |
8064 | } |
8065 | |
8066 | /// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero. |
8067 | /// |
8068 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi8_mask&expand=5931) |
8069 | #[inline ] |
8070 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8071 | #[cfg_attr (test, assert_instr(vptestnmb))] |
8072 | pub unsafe fn _mm256_testn_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { |
8073 | let and: __m256i = _mm256_and_si256(a, b); |
8074 | let zero: __m256i = _mm256_setzero_si256(); |
8075 | _mm256_cmpeq_epi8_mask(a:and, b:zero) |
8076 | } |
8077 | |
8078 | /// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero. |
8079 | /// |
8080 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi8_mask&expand=5930) |
8081 | #[inline ] |
8082 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8083 | #[cfg_attr (test, assert_instr(vptestnmb))] |
8084 | pub unsafe fn _mm256_mask_testn_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { |
8085 | let and: __m256i = _mm256_and_si256(a, b); |
8086 | let zero: __m256i = _mm256_setzero_si256(); |
8087 | _mm256_mask_cmpeq_epi8_mask(k1:k, a:and, b:zero) |
8088 | } |
8089 | |
8090 | /// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero. |
8091 | /// |
8092 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi8_mask&expand=5929) |
8093 | #[inline ] |
8094 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8095 | #[cfg_attr (test, assert_instr(vptestnmb))] |
8096 | pub unsafe fn _mm_testn_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { |
8097 | let and: __m128i = _mm_and_si128(a, b); |
8098 | let zero: __m128i = _mm_setzero_si128(); |
8099 | _mm_cmpeq_epi8_mask(a:and, b:zero) |
8100 | } |
8101 | |
8102 | /// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero. |
8103 | /// |
8104 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi8_mask&expand=5928) |
8105 | #[inline ] |
8106 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8107 | #[cfg_attr (test, assert_instr(vptestnmb))] |
8108 | pub unsafe fn _mm_mask_testn_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { |
8109 | let and: __m128i = _mm_and_si128(a, b); |
8110 | let zero: __m128i = _mm_setzero_si128(); |
8111 | _mm_mask_cmpeq_epi8_mask(k1:k, a:and, b:zero) |
8112 | } |
8113 | |
8114 | /// Store 64-bit mask from a into memory. |
8115 | /// |
8116 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask64&expand=5578) |
8117 | #[inline ] |
8118 | #[target_feature (enable = "avx512bw" )] |
8119 | #[cfg_attr (test, assert_instr(mov))] //should be kmovq |
8120 | pub unsafe fn _store_mask64(mem_addr: *mut u64, a: __mmask64) { |
8121 | ptr::write(dst:mem_addr as *mut __mmask64, src:a); |
8122 | } |
8123 | |
8124 | /// Store 32-bit mask from a into memory. |
8125 | /// |
8126 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask32&expand=5577) |
8127 | #[inline ] |
8128 | #[target_feature (enable = "avx512bw" )] |
8129 | #[cfg_attr (test, assert_instr(mov))] //should be kmovd |
8130 | pub unsafe fn _store_mask32(mem_addr: *mut u32, a: __mmask32) { |
8131 | ptr::write(dst:mem_addr as *mut __mmask32, src:a); |
8132 | } |
8133 | |
8134 | /// Load 64-bit mask from memory into k. |
8135 | /// |
8136 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask64&expand=3318) |
8137 | #[inline ] |
8138 | #[target_feature (enable = "avx512bw" )] |
8139 | #[cfg_attr (test, assert_instr(mov))] //should be kmovq |
8140 | pub unsafe fn _load_mask64(mem_addr: *const u64) -> __mmask64 { |
8141 | ptr::read(src:mem_addr as *const __mmask64) |
8142 | } |
8143 | |
8144 | /// Load 32-bit mask from memory into k. |
8145 | /// |
8146 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask32&expand=3317) |
8147 | #[inline ] |
8148 | #[target_feature (enable = "avx512bw" )] |
8149 | #[cfg_attr (test, assert_instr(mov))] //should be kmovd |
8150 | pub unsafe fn _load_mask32(mem_addr: *const u32) -> __mmask32 { |
8151 | ptr::read(src:mem_addr as *const __mmask32) |
8152 | } |
8153 | |
8154 | /// Compute the absolute differences of packed unsigned 8-bit integers in a and b, then horizontally sum each consecutive 8 differences to produce eight unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in dst. |
8155 | /// |
8156 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sad_epu8&expand=4855) |
8157 | #[inline ] |
8158 | #[target_feature (enable = "avx512bw" )] |
8159 | #[cfg_attr (test, assert_instr(vpsadbw))] |
8160 | pub unsafe fn _mm512_sad_epu8(a: __m512i, b: __m512i) -> __m512i { |
8161 | transmute(src:vpsadbw(a:a.as_u8x64(), b:b.as_u8x64())) |
8162 | } |
8163 | |
8164 | /// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. |
8165 | /// |
8166 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dbsad_epu8&expand=2114) |
8167 | #[inline ] |
8168 | #[target_feature (enable = "avx512bw" )] |
8169 | #[rustc_legacy_const_generics (2)] |
8170 | #[cfg_attr (test, assert_instr(vdbpsadbw, IMM8 = 0))] |
8171 | pub unsafe fn _mm512_dbsad_epu8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i { |
8172 | static_assert_uimm_bits!(IMM8, 8); |
8173 | let a: u8x64 = a.as_u8x64(); |
8174 | let b: u8x64 = b.as_u8x64(); |
8175 | let r: u16x32 = vdbpsadbw(a, b, IMM8); |
8176 | transmute(src:r) |
8177 | } |
8178 | |
8179 | /// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. |
8180 | /// |
8181 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dbsad_epu8&expand=2115) |
8182 | #[inline ] |
8183 | #[target_feature (enable = "avx512bw" )] |
8184 | #[rustc_legacy_const_generics (4)] |
8185 | #[cfg_attr (test, assert_instr(vdbpsadbw, IMM8 = 0))] |
8186 | pub unsafe fn _mm512_mask_dbsad_epu8<const IMM8: i32>( |
8187 | src: __m512i, |
8188 | k: __mmask32, |
8189 | a: __m512i, |
8190 | b: __m512i, |
8191 | ) -> __m512i { |
8192 | static_assert_uimm_bits!(IMM8, 8); |
8193 | let a: u8x64 = a.as_u8x64(); |
8194 | let b: u8x64 = b.as_u8x64(); |
8195 | let r: u16x32 = vdbpsadbw(a, b, IMM8); |
8196 | transmute(src:simd_select_bitmask(m:k, a:r, b:src.as_u16x32())) |
8197 | } |
8198 | |
8199 | /// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. |
8200 | /// |
8201 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dbsad_epu8&expand=2116) |
8202 | #[inline ] |
8203 | #[target_feature (enable = "avx512bw" )] |
8204 | #[rustc_legacy_const_generics (3)] |
8205 | #[cfg_attr (test, assert_instr(vdbpsadbw, IMM8 = 0))] |
8206 | pub unsafe fn _mm512_maskz_dbsad_epu8<const IMM8: i32>( |
8207 | k: __mmask32, |
8208 | a: __m512i, |
8209 | b: __m512i, |
8210 | ) -> __m512i { |
8211 | static_assert_uimm_bits!(IMM8, 8); |
8212 | let a: u8x64 = a.as_u8x64(); |
8213 | let b: u8x64 = b.as_u8x64(); |
8214 | let r: u16x32 = vdbpsadbw(a, b, IMM8); |
8215 | transmute(src:simd_select_bitmask( |
8216 | m:k, |
8217 | a:r, |
8218 | b:_mm512_setzero_si512().as_u16x32(), |
8219 | )) |
8220 | } |
8221 | |
8222 | /// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. |
8223 | /// |
8224 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dbsad_epu8&expand=2111) |
8225 | #[inline ] |
8226 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8227 | #[rustc_legacy_const_generics (2)] |
8228 | #[cfg_attr (test, assert_instr(vdbpsadbw, IMM8 = 0))] |
8229 | pub unsafe fn _mm256_dbsad_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i { |
8230 | static_assert_uimm_bits!(IMM8, 8); |
8231 | let a: u8x32 = a.as_u8x32(); |
8232 | let b: u8x32 = b.as_u8x32(); |
8233 | let r: u16x16 = vdbpsadbw256(a, b, IMM8); |
8234 | transmute(src:r) |
8235 | } |
8236 | |
8237 | /// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. |
8238 | /// |
8239 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dbsad_epu8&expand=2112) |
8240 | #[inline ] |
8241 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8242 | #[rustc_legacy_const_generics (4)] |
8243 | #[cfg_attr (test, assert_instr(vdbpsadbw, IMM8 = 0))] |
8244 | pub unsafe fn _mm256_mask_dbsad_epu8<const IMM8: i32>( |
8245 | src: __m256i, |
8246 | k: __mmask16, |
8247 | a: __m256i, |
8248 | b: __m256i, |
8249 | ) -> __m256i { |
8250 | static_assert_uimm_bits!(IMM8, 8); |
8251 | let a: u8x32 = a.as_u8x32(); |
8252 | let b: u8x32 = b.as_u8x32(); |
8253 | let r: u16x16 = vdbpsadbw256(a, b, IMM8); |
8254 | transmute(src:simd_select_bitmask(m:k, a:r, b:src.as_u16x16())) |
8255 | } |
8256 | |
8257 | /// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. |
8258 | /// |
8259 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dbsad_epu8&expand=2113) |
8260 | #[inline ] |
8261 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8262 | #[rustc_legacy_const_generics (3)] |
8263 | #[cfg_attr (test, assert_instr(vdbpsadbw, IMM8 = 0))] |
8264 | pub unsafe fn _mm256_maskz_dbsad_epu8<const IMM8: i32>( |
8265 | k: __mmask16, |
8266 | a: __m256i, |
8267 | b: __m256i, |
8268 | ) -> __m256i { |
8269 | static_assert_uimm_bits!(IMM8, 8); |
8270 | let a: u8x32 = a.as_u8x32(); |
8271 | let b: u8x32 = b.as_u8x32(); |
8272 | let r: u16x16 = vdbpsadbw256(a, b, IMM8); |
8273 | transmute(src:simd_select_bitmask( |
8274 | m:k, |
8275 | a:r, |
8276 | b:_mm256_setzero_si256().as_u16x16(), |
8277 | )) |
8278 | } |
8279 | |
8280 | /// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. |
8281 | /// |
8282 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dbsad_epu8&expand=2108) |
8283 | #[inline ] |
8284 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8285 | #[rustc_legacy_const_generics (2)] |
8286 | #[cfg_attr (test, assert_instr(vdbpsadbw, IMM8 = 0))] |
8287 | pub unsafe fn _mm_dbsad_epu8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i { |
8288 | static_assert_uimm_bits!(IMM8, 8); |
8289 | let a: u8x16 = a.as_u8x16(); |
8290 | let b: u8x16 = b.as_u8x16(); |
8291 | let r: u16x8 = vdbpsadbw128(a, b, IMM8); |
8292 | transmute(src:r) |
8293 | } |
8294 | |
8295 | /// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. |
8296 | /// |
8297 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dbsad_epu8&expand=2109) |
8298 | #[inline ] |
8299 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8300 | #[rustc_legacy_const_generics (4)] |
8301 | #[cfg_attr (test, assert_instr(vdbpsadbw, IMM8 = 0))] |
8302 | pub unsafe fn _mm_mask_dbsad_epu8<const IMM8: i32>( |
8303 | src: __m128i, |
8304 | k: __mmask8, |
8305 | a: __m128i, |
8306 | b: __m128i, |
8307 | ) -> __m128i { |
8308 | static_assert_uimm_bits!(IMM8, 8); |
8309 | let a: u8x16 = a.as_u8x16(); |
8310 | let b: u8x16 = b.as_u8x16(); |
8311 | let r: u16x8 = vdbpsadbw128(a, b, IMM8); |
8312 | transmute(src:simd_select_bitmask(m:k, a:r, b:src.as_u16x8())) |
8313 | } |
8314 | |
8315 | /// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. |
8316 | /// |
8317 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dbsad_epu8&expand=2110) |
8318 | #[inline ] |
8319 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8320 | #[rustc_legacy_const_generics (3)] |
8321 | #[cfg_attr (test, assert_instr(vdbpsadbw, IMM8 = 0))] |
8322 | pub unsafe fn _mm_maskz_dbsad_epu8<const IMM8: i32>( |
8323 | k: __mmask8, |
8324 | a: __m128i, |
8325 | b: __m128i, |
8326 | ) -> __m128i { |
8327 | static_assert_uimm_bits!(IMM8, 8); |
8328 | let a: u8x16 = a.as_u8x16(); |
8329 | let b: u8x16 = b.as_u8x16(); |
8330 | let r: u16x8 = vdbpsadbw128(a, b, IMM8); |
8331 | transmute(src:simd_select_bitmask(m:k, a:r, b:_mm_setzero_si128().as_u16x8())) |
8332 | } |
8333 | |
8334 | /// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a. |
8335 | /// |
8336 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi16_mask&expand=3873) |
8337 | #[inline ] |
8338 | #[target_feature (enable = "avx512bw" )] |
8339 | #[cfg_attr (test, assert_instr(vpmovw2m))] |
8340 | pub unsafe fn _mm512_movepi16_mask(a: __m512i) -> __mmask32 { |
8341 | let filter: __m512i = _mm512_set1_epi16(1 << 15); |
8342 | let a: __m512i = _mm512_and_si512(a, b:filter); |
8343 | _mm512_cmpeq_epi16_mask(a, b:filter) |
8344 | } |
8345 | |
8346 | /// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a. |
8347 | /// |
8348 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi16_mask&expand=3872) |
8349 | #[inline ] |
8350 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8351 | #[cfg_attr (test, assert_instr(vpmovw2m))] |
8352 | pub unsafe fn _mm256_movepi16_mask(a: __m256i) -> __mmask16 { |
8353 | let filter: __m256i = _mm256_set1_epi16(1 << 15); |
8354 | let a: __m256i = _mm256_and_si256(a, b:filter); |
8355 | _mm256_cmpeq_epi16_mask(a, b:filter) |
8356 | } |
8357 | |
8358 | /// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a. |
8359 | /// |
8360 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi16_mask&expand=3871) |
8361 | #[inline ] |
8362 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8363 | #[cfg_attr (test, assert_instr(vpmovw2m))] |
8364 | pub unsafe fn _mm_movepi16_mask(a: __m128i) -> __mmask8 { |
8365 | let filter: __m128i = _mm_set1_epi16(1 << 15); |
8366 | let a: __m128i = _mm_and_si128(a, b:filter); |
8367 | _mm_cmpeq_epi16_mask(a, b:filter) |
8368 | } |
8369 | |
8370 | /// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a. |
8371 | /// |
8372 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi8_mask&expand=3883) |
8373 | #[inline ] |
8374 | #[target_feature (enable = "avx512bw" )] |
8375 | #[cfg_attr (test, assert_instr(vpmovb2m))] |
8376 | pub unsafe fn _mm512_movepi8_mask(a: __m512i) -> __mmask64 { |
8377 | let filter: __m512i = _mm512_set1_epi8(1 << 7); |
8378 | let a: __m512i = _mm512_and_si512(a, b:filter); |
8379 | _mm512_cmpeq_epi8_mask(a, b:filter) |
8380 | } |
8381 | |
8382 | /// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a. |
8383 | /// |
8384 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi8_mask&expand=3882) |
8385 | #[inline ] |
8386 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8387 | #[cfg_attr (test, assert_instr(vpmovmskb))] // should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than |
8388 | // using vpmovb2m plus converting the mask register to a standard register. |
8389 | pub unsafe fn _mm256_movepi8_mask(a: __m256i) -> __mmask32 { |
8390 | let filter: __m256i = _mm256_set1_epi8(1 << 7); |
8391 | let a: __m256i = _mm256_and_si256(a, b:filter); |
8392 | _mm256_cmpeq_epi8_mask(a, b:filter) |
8393 | } |
8394 | |
8395 | /// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a. |
8396 | /// |
8397 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi8_mask&expand=3881) |
8398 | #[inline ] |
8399 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8400 | #[cfg_attr (test, assert_instr(vpmovmskb))] // should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than |
8401 | // using vpmovb2m plus converting the mask register to a standard register. |
8402 | pub unsafe fn _mm_movepi8_mask(a: __m128i) -> __mmask16 { |
8403 | let filter: __m128i = _mm_set1_epi8(1 << 7); |
8404 | let a: __m128i = _mm_and_si128(a, b:filter); |
8405 | _mm_cmpeq_epi8_mask(a, b:filter) |
8406 | } |
8407 | |
8408 | /// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k. |
8409 | /// |
8410 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi16&expand=3886) |
8411 | #[inline ] |
8412 | #[target_feature (enable = "avx512bw" )] |
8413 | #[cfg_attr (test, assert_instr(vpmovm2w))] |
8414 | pub unsafe fn _mm512_movm_epi16(k: __mmask32) -> __m512i { |
8415 | let one: i16x32 = _mm512_set1_epi16__m512i( |
8416 | 1 << 15 |
8417 | | 1 << 14 |
8418 | | 1 << 13 |
8419 | | 1 << 12 |
8420 | | 1 << 11 |
8421 | | 1 << 10 |
8422 | | 1 << 9 |
8423 | | 1 << 8 |
8424 | | 1 << 7 |
8425 | | 1 << 6 |
8426 | | 1 << 5 |
8427 | | 1 << 4 |
8428 | | 1 << 3 |
8429 | | 1 << 2 |
8430 | | 1 << 1 |
8431 | | 1 << 0, |
8432 | ) |
8433 | .as_i16x32(); |
8434 | let zero: i16x32 = _mm512_setzero_si512().as_i16x32(); |
8435 | transmute(src:simd_select_bitmask(m:k, a:one, b:zero)) |
8436 | } |
8437 | |
8438 | /// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k. |
8439 | /// |
8440 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi16&expand=3885) |
8441 | #[inline ] |
8442 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8443 | #[cfg_attr (test, assert_instr(vpmovm2w))] |
8444 | pub unsafe fn _mm256_movm_epi16(k: __mmask16) -> __m256i { |
8445 | let one: i16x16 = _mm256_set1_epi16__m256i( |
8446 | 1 << 15 |
8447 | | 1 << 14 |
8448 | | 1 << 13 |
8449 | | 1 << 12 |
8450 | | 1 << 11 |
8451 | | 1 << 10 |
8452 | | 1 << 9 |
8453 | | 1 << 8 |
8454 | | 1 << 7 |
8455 | | 1 << 6 |
8456 | | 1 << 5 |
8457 | | 1 << 4 |
8458 | | 1 << 3 |
8459 | | 1 << 2 |
8460 | | 1 << 1 |
8461 | | 1 << 0, |
8462 | ) |
8463 | .as_i16x16(); |
8464 | let zero: i16x16 = _mm256_setzero_si256().as_i16x16(); |
8465 | transmute(src:simd_select_bitmask(m:k, a:one, b:zero)) |
8466 | } |
8467 | |
8468 | /// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k. |
8469 | /// |
8470 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi16&expand=3884) |
8471 | #[inline ] |
8472 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8473 | #[cfg_attr (test, assert_instr(vpmovm2w))] |
8474 | pub unsafe fn _mm_movm_epi16(k: __mmask8) -> __m128i { |
8475 | let one: i16x8 = _mm_set1_epi16__m128i( |
8476 | 1 << 15 |
8477 | | 1 << 14 |
8478 | | 1 << 13 |
8479 | | 1 << 12 |
8480 | | 1 << 11 |
8481 | | 1 << 10 |
8482 | | 1 << 9 |
8483 | | 1 << 8 |
8484 | | 1 << 7 |
8485 | | 1 << 6 |
8486 | | 1 << 5 |
8487 | | 1 << 4 |
8488 | | 1 << 3 |
8489 | | 1 << 2 |
8490 | | 1 << 1 |
8491 | | 1 << 0, |
8492 | ) |
8493 | .as_i16x8(); |
8494 | let zero: i16x8 = _mm_setzero_si128().as_i16x8(); |
8495 | transmute(src:simd_select_bitmask(m:k, a:one, b:zero)) |
8496 | } |
8497 | |
8498 | /// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k. |
8499 | /// |
8500 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi8&expand=3895) |
8501 | #[inline ] |
8502 | #[target_feature (enable = "avx512bw" )] |
8503 | #[cfg_attr (test, assert_instr(vpmovm2b))] |
8504 | pub unsafe fn _mm512_movm_epi8(k: __mmask64) -> __m512i { |
8505 | let one: i8x64 = |
8506 | _mm512_set1_epi8__m512i(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0) |
8507 | .as_i8x64(); |
8508 | let zero: i8x64 = _mm512_setzero_si512().as_i8x64(); |
8509 | transmute(src:simd_select_bitmask(m:k, a:one, b:zero)) |
8510 | } |
8511 | |
8512 | /// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k. |
8513 | /// |
8514 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi8&expand=3894) |
8515 | #[inline ] |
8516 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8517 | #[cfg_attr (test, assert_instr(vpmovm2b))] |
8518 | pub unsafe fn _mm256_movm_epi8(k: __mmask32) -> __m256i { |
8519 | let one: i8x32 = |
8520 | _mm256_set1_epi8__m256i(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0) |
8521 | .as_i8x32(); |
8522 | let zero: i8x32 = _mm256_setzero_si256().as_i8x32(); |
8523 | transmute(src:simd_select_bitmask(m:k, a:one, b:zero)) |
8524 | } |
8525 | |
8526 | /// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k. |
8527 | /// |
8528 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi8&expand=3893) |
8529 | #[inline ] |
8530 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8531 | #[cfg_attr (test, assert_instr(vpmovm2b))] |
8532 | pub unsafe fn _mm_movm_epi8(k: __mmask16) -> __m128i { |
8533 | let one: i8x16 = _mm_set1_epi8__m128i(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0) |
8534 | .as_i8x16(); |
8535 | let zero: i8x16 = _mm_setzero_si128().as_i8x16(); |
8536 | transmute(src:simd_select_bitmask(m:k, a:one, b:zero)) |
8537 | } |
8538 | |
8539 | /// Add 32-bit masks in a and b, and store the result in k. |
8540 | /// |
8541 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask32&expand=3207) |
8542 | #[inline ] |
8543 | #[target_feature (enable = "avx512bw" )] |
8544 | pub unsafe fn _kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { |
8545 | a + b |
8546 | } |
8547 | |
8548 | /// Add 64-bit masks in a and b, and store the result in k. |
8549 | /// |
8550 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask64&expand=3208) |
8551 | #[inline ] |
8552 | #[target_feature (enable = "avx512bw" )] |
8553 | pub unsafe fn _kadd_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { |
8554 | a + b |
8555 | } |
8556 | |
8557 | /// Compute the bitwise AND of 32-bit masks a and b, and store the result in k. |
8558 | /// |
8559 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kand_mask32&expand=3213) |
8560 | #[inline ] |
8561 | #[target_feature (enable = "avx512bw" )] |
8562 | pub unsafe fn _kand_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { |
8563 | a & b |
8564 | } |
8565 | |
8566 | /// Compute the bitwise AND of 64-bit masks a and b, and store the result in k. |
8567 | /// |
8568 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kand_mask64&expand=3214) |
8569 | #[inline ] |
8570 | #[target_feature (enable = "avx512bw" )] |
8571 | pub unsafe fn _kand_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { |
8572 | a & b |
8573 | } |
8574 | |
8575 | /// Compute the bitwise NOT of 32-bit mask a, and store the result in k. |
8576 | /// |
8577 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask32&expand=3234) |
8578 | #[inline ] |
8579 | #[target_feature (enable = "avx512bw" )] |
8580 | pub unsafe fn _knot_mask32(a: __mmask32) -> __mmask32 { |
8581 | a ^ 0b11111111_11111111_11111111_11111111 |
8582 | } |
8583 | |
8584 | /// Compute the bitwise NOT of 64-bit mask a, and store the result in k. |
8585 | /// |
8586 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask64&expand=3235) |
8587 | #[inline ] |
8588 | #[target_feature (enable = "avx512bw" )] |
8589 | pub unsafe fn _knot_mask64(a: __mmask64) -> __mmask64 { |
8590 | a ^ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 |
8591 | } |
8592 | |
8593 | /// Compute the bitwise NOT of 32-bit masks a and then AND with b, and store the result in k. |
8594 | /// |
8595 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask32&expand=3219) |
8596 | #[inline ] |
8597 | #[target_feature (enable = "avx512bw" )] |
8598 | pub unsafe fn _kandn_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { |
8599 | _knot_mask32(a) & b |
8600 | } |
8601 | |
8602 | /// Compute the bitwise NOT of 64-bit masks a and then AND with b, and store the result in k. |
8603 | /// |
8604 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask64&expand=3220) |
8605 | #[inline ] |
8606 | #[target_feature (enable = "avx512bw" )] |
8607 | pub unsafe fn _kandn_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { |
8608 | _knot_mask64(a) & b |
8609 | } |
8610 | |
8611 | /// Compute the bitwise OR of 32-bit masks a and b, and store the result in k. |
8612 | /// |
8613 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask32&expand=3240) |
8614 | #[inline ] |
8615 | #[target_feature (enable = "avx512bw" )] |
8616 | pub unsafe fn _kor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { |
8617 | a | b |
8618 | } |
8619 | |
8620 | /// Compute the bitwise OR of 64-bit masks a and b, and store the result in k. |
8621 | /// |
8622 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask64&expand=3241) |
8623 | #[inline ] |
8624 | #[target_feature (enable = "avx512bw" )] |
8625 | pub unsafe fn _kor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { |
8626 | a | b |
8627 | } |
8628 | |
8629 | /// Compute the bitwise XOR of 32-bit masks a and b, and store the result in k. |
8630 | /// |
8631 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask32&expand=3292) |
8632 | #[inline ] |
8633 | #[target_feature (enable = "avx512bw" )] |
8634 | pub unsafe fn _kxor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { |
8635 | a ^ b |
8636 | } |
8637 | |
8638 | /// Compute the bitwise XOR of 64-bit masks a and b, and store the result in k. |
8639 | /// |
8640 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask64&expand=3293) |
8641 | #[inline ] |
8642 | #[target_feature (enable = "avx512bw" )] |
8643 | pub unsafe fn _kxor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { |
8644 | a ^ b |
8645 | } |
8646 | |
8647 | /// Compute the bitwise XNOR of 32-bit masks a and b, and store the result in k. |
8648 | /// |
8649 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask32&expand=3286) |
8650 | #[inline ] |
8651 | #[target_feature (enable = "avx512bw" )] |
8652 | pub unsafe fn _kxnor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { |
8653 | _knot_mask32(a ^ b) |
8654 | } |
8655 | |
8656 | /// Compute the bitwise XNOR of 64-bit masks a and b, and store the result in k. |
8657 | /// |
8658 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask64&expand=3287) |
8659 | #[inline ] |
8660 | #[target_feature (enable = "avx512bw" )] |
8661 | pub unsafe fn _kxnor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { |
8662 | _knot_mask64(a ^ b) |
8663 | } |
8664 | |
8665 | /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst. |
8666 | /// |
8667 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi8&expand=1407) |
8668 | #[inline ] |
8669 | #[target_feature (enable = "avx512bw" )] |
8670 | #[cfg_attr (test, assert_instr(vpmovwb))] |
8671 | pub unsafe fn _mm512_cvtepi16_epi8(a: __m512i) -> __m256i { |
8672 | let a: i16x32 = a.as_i16x32(); |
8673 | transmute::<i8x32, _>(src:simd_cast(a)) |
8674 | } |
8675 | |
8676 | /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
8677 | /// |
8678 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi8&expand=1408) |
8679 | #[inline ] |
8680 | #[target_feature (enable = "avx512bw" )] |
8681 | #[cfg_attr (test, assert_instr(vpmovwb))] |
8682 | pub unsafe fn _mm512_mask_cvtepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i { |
8683 | let convert: i8x32 = _mm512_cvtepi16_epi8(a).as_i8x32(); |
8684 | transmute(src:simd_select_bitmask(m:k, a:convert, b:src.as_i8x32())) |
8685 | } |
8686 | |
8687 | /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
8688 | /// |
8689 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi8&expand=1409) |
8690 | #[inline ] |
8691 | #[target_feature (enable = "avx512bw" )] |
8692 | #[cfg_attr (test, assert_instr(vpmovwb))] |
8693 | pub unsafe fn _mm512_maskz_cvtepi16_epi8(k: __mmask32, a: __m512i) -> __m256i { |
8694 | let convert: i8x32 = _mm512_cvtepi16_epi8(a).as_i8x32(); |
8695 | transmute(src:simd_select_bitmask( |
8696 | m:k, |
8697 | a:convert, |
8698 | b:_mm256_setzero_si256().as_i8x32(), |
8699 | )) |
8700 | } |
8701 | |
8702 | /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst. |
8703 | /// |
8704 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi16_epi8&expand=1404) |
8705 | #[inline ] |
8706 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8707 | #[cfg_attr (test, assert_instr(vpmovwb))] |
8708 | pub unsafe fn _mm256_cvtepi16_epi8(a: __m256i) -> __m128i { |
8709 | let a: i16x16 = a.as_i16x16(); |
8710 | transmute::<i8x16, _>(src:simd_cast(a)) |
8711 | } |
8712 | |
8713 | /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
8714 | /// |
8715 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi8&expand=1405) |
8716 | #[inline ] |
8717 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8718 | #[cfg_attr (test, assert_instr(vpmovwb))] |
8719 | pub unsafe fn _mm256_mask_cvtepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i { |
8720 | let convert: i8x16 = _mm256_cvtepi16_epi8(a).as_i8x16(); |
8721 | transmute(src:simd_select_bitmask(m:k, a:convert, b:src.as_i8x16())) |
8722 | } |
8723 | |
8724 | /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
8725 | /// |
8726 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi8&expand=1406) |
8727 | #[inline ] |
8728 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8729 | #[cfg_attr (test, assert_instr(vpmovwb))] |
8730 | pub unsafe fn _mm256_maskz_cvtepi16_epi8(k: __mmask16, a: __m256i) -> __m128i { |
8731 | let convert: i8x16 = _mm256_cvtepi16_epi8(a).as_i8x16(); |
8732 | transmute(src:simd_select_bitmask( |
8733 | m:k, |
8734 | a:convert, |
8735 | b:_mm_setzero_si128().as_i8x16(), |
8736 | )) |
8737 | } |
8738 | |
8739 | /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst. |
8740 | /// |
8741 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi16_epi8&expand=1401) |
8742 | #[inline ] |
8743 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8744 | #[cfg_attr (test, assert_instr(vpmovwb))] |
8745 | pub unsafe fn _mm_cvtepi16_epi8(a: __m128i) -> __m128i { |
8746 | let a: i16x8 = a.as_i16x8(); |
8747 | let zero: i16x8 = _mm_setzero_si128().as_i16x8(); |
8748 | let v256: i16x16 = simd_shuffle!(a, zero, [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8]); |
8749 | transmute::<i8x16, _>(src:simd_cast(v256)) |
8750 | } |
8751 | |
8752 | /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
8753 | /// |
8754 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi8&expand=1402) |
8755 | #[inline ] |
8756 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8757 | #[cfg_attr (test, assert_instr(vpmovwb))] |
8758 | pub unsafe fn _mm_mask_cvtepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { |
8759 | let convert: i8x16 = _mm_cvtepi16_epi8(a).as_i8x16(); |
8760 | let k: __mmask16 = 0b11111111_11111111 & k as __mmask16; |
8761 | transmute(src:simd_select_bitmask(m:k, a:convert, b:src.as_i8x16())) |
8762 | } |
8763 | |
8764 | /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
8765 | /// |
8766 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi8&expand=1403) |
8767 | #[inline ] |
8768 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8769 | #[cfg_attr (test, assert_instr(vpmovwb))] |
8770 | pub unsafe fn _mm_maskz_cvtepi16_epi8(k: __mmask8, a: __m128i) -> __m128i { |
8771 | let convert: i8x16 = _mm_cvtepi16_epi8(a).as_i8x16(); |
8772 | let k: __mmask16 = 0b11111111_11111111 & k as __mmask16; |
8773 | let zero: i8x16 = _mm_setzero_si128().as_i8x16(); |
8774 | transmute(src:simd_select_bitmask(m:k, a:convert, b:zero)) |
8775 | } |
8776 | |
8777 | /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. |
8778 | /// |
8779 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi16_epi8&expand=1807) |
8780 | #[inline ] |
8781 | #[target_feature (enable = "avx512bw" )] |
8782 | #[cfg_attr (test, assert_instr(vpmovswb))] |
8783 | pub unsafe fn _mm512_cvtsepi16_epi8(a: __m512i) -> __m256i { |
8784 | transmute(src:vpmovswb( |
8785 | a:a.as_i16x32(), |
8786 | src:_mm256_setzero_si256().as_i8x32(), |
8787 | mask:0b11111111_11111111_11111111_11111111, |
8788 | )) |
8789 | } |
8790 | |
8791 | /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
8792 | /// |
8793 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi16_epi8&expand=1808) |
8794 | #[inline ] |
8795 | #[target_feature (enable = "avx512bw" )] |
8796 | #[cfg_attr (test, assert_instr(vpmovswb))] |
8797 | pub unsafe fn _mm512_mask_cvtsepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i { |
8798 | transmute(src:vpmovswb(a:a.as_i16x32(), src:src.as_i8x32(), mask:k)) |
8799 | } |
8800 | |
8801 | /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
8802 | /// |
8803 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi16_epi8&expand=1809) |
8804 | #[inline ] |
8805 | #[target_feature (enable = "avx512bw" )] |
8806 | #[cfg_attr (test, assert_instr(vpmovswb))] |
8807 | pub unsafe fn _mm512_maskz_cvtsepi16_epi8(k: __mmask32, a: __m512i) -> __m256i { |
8808 | transmute(src:vpmovswb( |
8809 | a:a.as_i16x32(), |
8810 | src:_mm256_setzero_si256().as_i8x32(), |
8811 | mask:k, |
8812 | )) |
8813 | } |
8814 | |
8815 | /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. |
8816 | /// |
8817 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi16_epi8&expand=1804) |
8818 | #[inline ] |
8819 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8820 | #[cfg_attr (test, assert_instr(vpmovswb))] |
8821 | pub unsafe fn _mm256_cvtsepi16_epi8(a: __m256i) -> __m128i { |
8822 | transmute(src:vpmovswb256( |
8823 | a:a.as_i16x16(), |
8824 | src:_mm_setzero_si128().as_i8x16(), |
8825 | mask:0b11111111_11111111, |
8826 | )) |
8827 | } |
8828 | |
8829 | /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
8830 | /// |
8831 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi16_epi8&expand=1805) |
8832 | #[inline ] |
8833 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8834 | #[cfg_attr (test, assert_instr(vpmovswb))] |
8835 | pub unsafe fn _mm256_mask_cvtsepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i { |
8836 | transmute(src:vpmovswb256(a:a.as_i16x16(), src:src.as_i8x16(), mask:k)) |
8837 | } |
8838 | |
8839 | /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
8840 | /// |
8841 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi16_epi8&expand=1806) |
8842 | #[inline ] |
8843 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8844 | #[cfg_attr (test, assert_instr(vpmovswb))] |
8845 | pub unsafe fn _mm256_maskz_cvtsepi16_epi8(k: __mmask16, a: __m256i) -> __m128i { |
8846 | transmute(src:vpmovswb256( |
8847 | a:a.as_i16x16(), |
8848 | src:_mm_setzero_si128().as_i8x16(), |
8849 | mask:k, |
8850 | )) |
8851 | } |
8852 | |
8853 | /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. |
8854 | /// |
8855 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi16_epi8&expand=1801) |
8856 | #[inline ] |
8857 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8858 | #[cfg_attr (test, assert_instr(vpmovswb))] |
8859 | pub unsafe fn _mm_cvtsepi16_epi8(a: __m128i) -> __m128i { |
8860 | transmute(src:vpmovswb128( |
8861 | a:a.as_i16x8(), |
8862 | src:_mm_setzero_si128().as_i8x16(), |
8863 | mask:0b11111111, |
8864 | )) |
8865 | } |
8866 | |
8867 | /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
8868 | /// |
8869 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi16_epi8&expand=1802) |
8870 | #[inline ] |
8871 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8872 | #[cfg_attr (test, assert_instr(vpmovswb))] |
8873 | pub unsafe fn _mm_mask_cvtsepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { |
8874 | transmute(src:vpmovswb128(a:a.as_i16x8(), src:src.as_i8x16(), mask:k)) |
8875 | } |
8876 | |
8877 | /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
8878 | /// |
8879 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi16_epi8&expand=1803) |
8880 | #[inline ] |
8881 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8882 | #[cfg_attr (test, assert_instr(vpmovswb))] |
8883 | pub unsafe fn _mm_maskz_cvtsepi16_epi8(k: __mmask8, a: __m128i) -> __m128i { |
8884 | transmute(src:vpmovswb128(a:a.as_i16x8(), src:_mm_setzero_si128().as_i8x16(), mask:k)) |
8885 | } |
8886 | |
8887 | /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. |
8888 | /// |
8889 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi16_epi8&expand=2042) |
8890 | #[inline ] |
8891 | #[target_feature (enable = "avx512bw" )] |
8892 | #[cfg_attr (test, assert_instr(vpmovuswb))] |
8893 | pub unsafe fn _mm512_cvtusepi16_epi8(a: __m512i) -> __m256i { |
8894 | transmute(src:vpmovuswb( |
8895 | a:a.as_u16x32(), |
8896 | src:_mm256_setzero_si256().as_u8x32(), |
8897 | mask:0b11111111_11111111_11111111_11111111, |
8898 | )) |
8899 | } |
8900 | |
8901 | /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
8902 | /// |
8903 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi16_epi8&expand=2043) |
8904 | #[inline ] |
8905 | #[target_feature (enable = "avx512bw" )] |
8906 | #[cfg_attr (test, assert_instr(vpmovuswb))] |
8907 | pub unsafe fn _mm512_mask_cvtusepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i { |
8908 | transmute(src:vpmovuswb(a:a.as_u16x32(), src:src.as_u8x32(), mask:k)) |
8909 | } |
8910 | |
8911 | /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
8912 | /// |
8913 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi16_epi8&expand=2044) |
8914 | #[inline ] |
8915 | #[target_feature (enable = "avx512bw" )] |
8916 | #[cfg_attr (test, assert_instr(vpmovuswb))] |
8917 | pub unsafe fn _mm512_maskz_cvtusepi16_epi8(k: __mmask32, a: __m512i) -> __m256i { |
8918 | transmute(src:vpmovuswb( |
8919 | a:a.as_u16x32(), |
8920 | src:_mm256_setzero_si256().as_u8x32(), |
8921 | mask:k, |
8922 | )) |
8923 | } |
8924 | |
8925 | /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. |
8926 | /// |
8927 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi16_epi8&expand=2039) |
8928 | #[inline ] |
8929 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8930 | #[cfg_attr (test, assert_instr(vpmovuswb))] |
8931 | pub unsafe fn _mm256_cvtusepi16_epi8(a: __m256i) -> __m128i { |
8932 | transmute(src:vpmovuswb256( |
8933 | a:a.as_u16x16(), |
8934 | src:_mm_setzero_si128().as_u8x16(), |
8935 | mask:0b11111111_11111111, |
8936 | )) |
8937 | } |
8938 | |
8939 | /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
8940 | /// |
8941 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi16_epi8&expand=2040) |
8942 | #[inline ] |
8943 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8944 | #[cfg_attr (test, assert_instr(vpmovuswb))] |
8945 | pub unsafe fn _mm256_mask_cvtusepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i { |
8946 | transmute(src:vpmovuswb256(a:a.as_u16x16(), src:src.as_u8x16(), mask:k)) |
8947 | } |
8948 | |
8949 | /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
8950 | /// |
8951 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi16_epi8&expand=2041) |
8952 | #[inline ] |
8953 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8954 | #[cfg_attr (test, assert_instr(vpmovuswb))] |
8955 | pub unsafe fn _mm256_maskz_cvtusepi16_epi8(k: __mmask16, a: __m256i) -> __m128i { |
8956 | transmute(src:vpmovuswb256( |
8957 | a:a.as_u16x16(), |
8958 | src:_mm_setzero_si128().as_u8x16(), |
8959 | mask:k, |
8960 | )) |
8961 | } |
8962 | |
8963 | /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. |
8964 | /// |
8965 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi16_epi8&expand=2036) |
8966 | #[inline ] |
8967 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8968 | #[cfg_attr (test, assert_instr(vpmovuswb))] |
8969 | pub unsafe fn _mm_cvtusepi16_epi8(a: __m128i) -> __m128i { |
8970 | transmute(src:vpmovuswb128( |
8971 | a:a.as_u16x8(), |
8972 | src:_mm_setzero_si128().as_u8x16(), |
8973 | mask:0b11111111, |
8974 | )) |
8975 | } |
8976 | |
8977 | /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
8978 | /// |
8979 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi16_epi8&expand=2037) |
8980 | #[inline ] |
8981 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8982 | #[cfg_attr (test, assert_instr(vpmovuswb))] |
8983 | pub unsafe fn _mm_mask_cvtusepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { |
8984 | transmute(src:vpmovuswb128(a:a.as_u16x8(), src:src.as_u8x16(), mask:k)) |
8985 | } |
8986 | |
8987 | /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
8988 | /// |
8989 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi16_epi8&expand=2038) |
8990 | #[inline ] |
8991 | #[target_feature (enable = "avx512bw,avx512vl" )] |
8992 | #[cfg_attr (test, assert_instr(vpmovuswb))] |
8993 | pub unsafe fn _mm_maskz_cvtusepi16_epi8(k: __mmask8, a: __m128i) -> __m128i { |
8994 | transmute(src:vpmovuswb128( |
8995 | a:a.as_u16x8(), |
8996 | src:_mm_setzero_si128().as_u8x16(), |
8997 | mask:k, |
8998 | )) |
8999 | } |
9000 | |
9001 | /// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst. |
9002 | /// |
9003 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi16&expand=1526) |
9004 | #[inline ] |
9005 | #[target_feature (enable = "avx512bw" )] |
9006 | #[cfg_attr (test, assert_instr(vpmovsxbw))] |
9007 | pub unsafe fn _mm512_cvtepi8_epi16(a: __m256i) -> __m512i { |
9008 | let a: i8x32 = a.as_i8x32(); |
9009 | transmute::<i16x32, _>(src:simd_cast(a)) |
9010 | } |
9011 | |
9012 | /// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
9013 | /// |
9014 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi16&expand=1527) |
9015 | #[inline ] |
9016 | #[target_feature (enable = "avx512bw" )] |
9017 | #[cfg_attr (test, assert_instr(vpmovsxbw))] |
9018 | pub unsafe fn _mm512_mask_cvtepi8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i { |
9019 | let convert: i16x32 = _mm512_cvtepi8_epi16(a).as_i16x32(); |
9020 | transmute(src:simd_select_bitmask(m:k, a:convert, b:src.as_i16x32())) |
9021 | } |
9022 | |
9023 | /// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
9024 | /// |
9025 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi16&expand=1528) |
9026 | #[inline ] |
9027 | #[target_feature (enable = "avx512bw" )] |
9028 | #[cfg_attr (test, assert_instr(vpmovsxbw))] |
9029 | pub unsafe fn _mm512_maskz_cvtepi8_epi16(k: __mmask32, a: __m256i) -> __m512i { |
9030 | let convert: i16x32 = _mm512_cvtepi8_epi16(a).as_i16x32(); |
9031 | transmute(src:simd_select_bitmask( |
9032 | m:k, |
9033 | a:convert, |
9034 | b:_mm512_setzero_si512().as_i16x32(), |
9035 | )) |
9036 | } |
9037 | |
9038 | /// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
9039 | /// |
9040 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi16&expand=1524) |
9041 | #[inline ] |
9042 | #[target_feature (enable = "avx512bw,avx512vl" )] |
9043 | #[cfg_attr (test, assert_instr(vpmovsxbw))] |
9044 | pub unsafe fn _mm256_mask_cvtepi8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i { |
9045 | let convert: i16x16 = _mm256_cvtepi8_epi16(a).as_i16x16(); |
9046 | transmute(src:simd_select_bitmask(m:k, a:convert, b:src.as_i16x16())) |
9047 | } |
9048 | |
9049 | /// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
9050 | /// |
9051 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi16&expand=1525) |
9052 | #[inline ] |
9053 | #[target_feature (enable = "avx512bw,avx512vl" )] |
9054 | #[cfg_attr (test, assert_instr(vpmovsxbw))] |
9055 | pub unsafe fn _mm256_maskz_cvtepi8_epi16(k: __mmask16, a: __m128i) -> __m256i { |
9056 | let convert: i16x16 = _mm256_cvtepi8_epi16(a).as_i16x16(); |
9057 | transmute(src:simd_select_bitmask( |
9058 | m:k, |
9059 | a:convert, |
9060 | b:_mm256_setzero_si256().as_i16x16(), |
9061 | )) |
9062 | } |
9063 | |
9064 | /// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
9065 | /// |
9066 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi16&expand=1521) |
9067 | #[inline ] |
9068 | #[target_feature (enable = "avx512bw,avx512vl" )] |
9069 | #[cfg_attr (test, assert_instr(vpmovsxbw))] |
9070 | pub unsafe fn _mm_mask_cvtepi8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { |
9071 | let convert: i16x8 = _mm_cvtepi8_epi16(a).as_i16x8(); |
9072 | transmute(src:simd_select_bitmask(m:k, a:convert, b:src.as_i16x8())) |
9073 | } |
9074 | |
9075 | /// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
9076 | /// |
9077 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi16&expand=1522) |
9078 | #[inline ] |
9079 | #[target_feature (enable = "avx512bw,avx512vl" )] |
9080 | #[cfg_attr (test, assert_instr(vpmovsxbw))] |
9081 | pub unsafe fn _mm_maskz_cvtepi8_epi16(k: __mmask8, a: __m128i) -> __m128i { |
9082 | let convert: i16x8 = _mm_cvtepi8_epi16(a).as_i16x8(); |
9083 | transmute(src:simd_select_bitmask( |
9084 | m:k, |
9085 | a:convert, |
9086 | b:_mm_setzero_si128().as_i16x8(), |
9087 | )) |
9088 | } |
9089 | |
9090 | /// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst. |
9091 | /// |
9092 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi16&expand=1612) |
9093 | #[inline ] |
9094 | #[target_feature (enable = "avx512bw" )] |
9095 | #[cfg_attr (test, assert_instr(vpmovzxbw))] |
9096 | pub unsafe fn _mm512_cvtepu8_epi16(a: __m256i) -> __m512i { |
9097 | let a: u8x32 = a.as_u8x32(); |
9098 | transmute::<i16x32, _>(src:simd_cast(a)) |
9099 | } |
9100 | |
9101 | /// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
9102 | /// |
9103 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi16&expand=1613) |
9104 | #[inline ] |
9105 | #[target_feature (enable = "avx512bw" )] |
9106 | #[cfg_attr (test, assert_instr(vpmovzxbw))] |
9107 | pub unsafe fn _mm512_mask_cvtepu8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i { |
9108 | let convert: i16x32 = _mm512_cvtepu8_epi16(a).as_i16x32(); |
9109 | transmute(src:simd_select_bitmask(m:k, a:convert, b:src.as_i16x32())) |
9110 | } |
9111 | |
9112 | /// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
9113 | /// |
9114 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi16&expand=1614) |
9115 | #[inline ] |
9116 | #[target_feature (enable = "avx512bw" )] |
9117 | #[cfg_attr (test, assert_instr(vpmovzxbw))] |
9118 | pub unsafe fn _mm512_maskz_cvtepu8_epi16(k: __mmask32, a: __m256i) -> __m512i { |
9119 | let convert: i16x32 = _mm512_cvtepu8_epi16(a).as_i16x32(); |
9120 | transmute(src:simd_select_bitmask( |
9121 | m:k, |
9122 | a:convert, |
9123 | b:_mm512_setzero_si512().as_i16x32(), |
9124 | )) |
9125 | } |
9126 | |
9127 | /// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
9128 | /// |
9129 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi16&expand=1610) |
9130 | #[inline ] |
9131 | #[target_feature (enable = "avx512bw,avx512vl" )] |
9132 | #[cfg_attr (test, assert_instr(vpmovzxbw))] |
9133 | pub unsafe fn _mm256_mask_cvtepu8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i { |
9134 | let convert: i16x16 = _mm256_cvtepu8_epi16(a).as_i16x16(); |
9135 | transmute(src:simd_select_bitmask(m:k, a:convert, b:src.as_i16x16())) |
9136 | } |
9137 | |
9138 | /// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
9139 | /// |
9140 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu8_epi16&expand=1611) |
9141 | #[inline ] |
9142 | #[target_feature (enable = "avx512bw,avx512vl" )] |
9143 | #[cfg_attr (test, assert_instr(vpmovzxbw))] |
9144 | pub unsafe fn _mm256_maskz_cvtepu8_epi16(k: __mmask16, a: __m128i) -> __m256i { |
9145 | let convert: i16x16 = _mm256_cvtepu8_epi16(a).as_i16x16(); |
9146 | transmute(src:simd_select_bitmask( |
9147 | m:k, |
9148 | a:convert, |
9149 | b:_mm256_setzero_si256().as_i16x16(), |
9150 | )) |
9151 | } |
9152 | |
9153 | /// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
9154 | /// |
9155 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi16&expand=1607) |
9156 | #[inline ] |
9157 | #[target_feature (enable = "avx512bw,avx512vl" )] |
9158 | #[cfg_attr (test, assert_instr(vpmovzxbw))] |
9159 | pub unsafe fn _mm_mask_cvtepu8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { |
9160 | let convert: i16x8 = _mm_cvtepu8_epi16(a).as_i16x8(); |
9161 | transmute(src:simd_select_bitmask(m:k, a:convert, b:src.as_i16x8())) |
9162 | } |
9163 | |
9164 | /// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
9165 | /// |
9166 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu8_epi16&expand=1608) |
9167 | #[inline ] |
9168 | #[target_feature (enable = "avx512bw,avx512vl" )] |
9169 | #[cfg_attr (test, assert_instr(vpmovzxbw))] |
9170 | pub unsafe fn _mm_maskz_cvtepu8_epi16(k: __mmask8, a: __m128i) -> __m128i { |
9171 | let convert: i16x8 = _mm_cvtepu8_epi16(a).as_i16x8(); |
9172 | transmute(src:simd_select_bitmask( |
9173 | m:k, |
9174 | a:convert, |
9175 | b:_mm_setzero_si128().as_i16x8(), |
9176 | )) |
9177 | } |
9178 | |
9179 | /// Shift 128-bit lanes in a left by imm8 bytes while shifting in zeros, and store the results in dst. |
9180 | /// |
9181 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_bslli_epi128&expand=591) |
9182 | #[inline ] |
9183 | #[target_feature (enable = "avx512bw" )] |
9184 | #[cfg_attr (test, assert_instr(vpslldq, IMM8 = 3))] |
9185 | #[rustc_legacy_const_generics (1)] |
9186 | pub unsafe fn _mm512_bslli_epi128<const IMM8: i32>(a: __m512i) -> __m512i { |
9187 | static_assert_uimm_bits!(IMM8, 8); |
9188 | const fn mask(shift: i32, i: u32) -> u32 { |
9189 | let shift = shift as u32 & 0xff; |
9190 | if shift > 15 || i % 16 < shift { |
9191 | 0 |
9192 | } else { |
9193 | 64 + (i - shift) |
9194 | } |
9195 | } |
9196 | let a = a.as_i8x64(); |
9197 | let zero = _mm512_setzero_si512().as_i8x64(); |
9198 | let r: i8x64 = simd_shuffle!( |
9199 | zero, |
9200 | a, |
9201 | [ |
9202 | mask(IMM8, 0), |
9203 | mask(IMM8, 1), |
9204 | mask(IMM8, 2), |
9205 | mask(IMM8, 3), |
9206 | mask(IMM8, 4), |
9207 | mask(IMM8, 5), |
9208 | mask(IMM8, 6), |
9209 | mask(IMM8, 7), |
9210 | mask(IMM8, 8), |
9211 | mask(IMM8, 9), |
9212 | mask(IMM8, 10), |
9213 | mask(IMM8, 11), |
9214 | mask(IMM8, 12), |
9215 | mask(IMM8, 13), |
9216 | mask(IMM8, 14), |
9217 | mask(IMM8, 15), |
9218 | mask(IMM8, 16), |
9219 | mask(IMM8, 17), |
9220 | mask(IMM8, 18), |
9221 | mask(IMM8, 19), |
9222 | mask(IMM8, 20), |
9223 | mask(IMM8, 21), |
9224 | mask(IMM8, 22), |
9225 | mask(IMM8, 23), |
9226 | mask(IMM8, 24), |
9227 | mask(IMM8, 25), |
9228 | mask(IMM8, 26), |
9229 | mask(IMM8, 27), |
9230 | mask(IMM8, 28), |
9231 | mask(IMM8, 29), |
9232 | mask(IMM8, 30), |
9233 | mask(IMM8, 31), |
9234 | mask(IMM8, 32), |
9235 | mask(IMM8, 33), |
9236 | mask(IMM8, 34), |
9237 | mask(IMM8, 35), |
9238 | mask(IMM8, 36), |
9239 | mask(IMM8, 37), |
9240 | mask(IMM8, 38), |
9241 | mask(IMM8, 39), |
9242 | mask(IMM8, 40), |
9243 | mask(IMM8, 41), |
9244 | mask(IMM8, 42), |
9245 | mask(IMM8, 43), |
9246 | mask(IMM8, 44), |
9247 | mask(IMM8, 45), |
9248 | mask(IMM8, 46), |
9249 | mask(IMM8, 47), |
9250 | mask(IMM8, 48), |
9251 | mask(IMM8, 49), |
9252 | mask(IMM8, 50), |
9253 | mask(IMM8, 51), |
9254 | mask(IMM8, 52), |
9255 | mask(IMM8, 53), |
9256 | mask(IMM8, 54), |
9257 | mask(IMM8, 55), |
9258 | mask(IMM8, 56), |
9259 | mask(IMM8, 57), |
9260 | mask(IMM8, 58), |
9261 | mask(IMM8, 59), |
9262 | mask(IMM8, 60), |
9263 | mask(IMM8, 61), |
9264 | mask(IMM8, 62), |
9265 | mask(IMM8, 63), |
9266 | ], |
9267 | ); |
9268 | transmute(r) |
9269 | } |
9270 | |
9271 | /// Shift 128-bit lanes in a right by imm8 bytes while shifting in zeros, and store the results in dst. |
9272 | /// |
9273 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_bsrli_epi128&expand=594) |
9274 | #[inline ] |
9275 | #[target_feature (enable = "avx512bw" )] |
9276 | #[cfg_attr (test, assert_instr(vpsrldq, IMM8 = 3))] |
9277 | #[rustc_legacy_const_generics (1)] |
9278 | pub unsafe fn _mm512_bsrli_epi128<const IMM8: i32>(a: __m512i) -> __m512i { |
9279 | static_assert_uimm_bits!(IMM8, 8); |
9280 | let a = a.as_i8x64(); |
9281 | let zero = _mm512_setzero_si512().as_i8x64(); |
9282 | let r: i8x64 = match IMM8 % 16 { |
9283 | 0 => simd_shuffle!( |
9284 | a, |
9285 | zero, |
9286 | [ |
9287 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, |
9288 | 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, |
9289 | 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, |
9290 | ], |
9291 | ), |
9292 | 1 => simd_shuffle!( |
9293 | a, |
9294 | zero, |
9295 | [ |
9296 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 17, 18, 19, 20, 21, 22, 23, |
9297 | 24, 25, 26, 27, 28, 29, 30, 31, 80, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, |
9298 | 45, 46, 47, 96, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, |
9299 | ], |
9300 | ), |
9301 | 2 => simd_shuffle!( |
9302 | a, |
9303 | zero, |
9304 | [ |
9305 | 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 18, 19, 20, 21, 22, 23, 24, |
9306 | 25, 26, 27, 28, 29, 30, 31, 80, 81, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, |
9307 | 46, 47, 96, 97, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, |
9308 | ], |
9309 | ), |
9310 | 3 => simd_shuffle!( |
9311 | a, |
9312 | zero, |
9313 | [ |
9314 | 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 19, 20, 21, 22, 23, 24, |
9315 | 25, 26, 27, 28, 29, 30, 31, 80, 81, 82, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, |
9316 | 46, 47, 96, 97, 98, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, |
9317 | 114, |
9318 | ], |
9319 | ), |
9320 | 4 => simd_shuffle!( |
9321 | a, |
9322 | zero, |
9323 | [ |
9324 | 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 20, 21, 22, 23, 24, 25, |
9325 | 26, 27, 28, 29, 30, 31, 80, 81, 82, 83, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, |
9326 | 47, 96, 97, 98, 99, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, |
9327 | 115, |
9328 | ], |
9329 | ), |
9330 | 5 => simd_shuffle!( |
9331 | a, |
9332 | zero, |
9333 | [ |
9334 | 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 21, 22, 23, 24, 25, 26, |
9335 | 27, 28, 29, 30, 31, 80, 81, 82, 83, 84, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, |
9336 | 96, 97, 98, 99, 100, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, |
9337 | 115, 116, |
9338 | ], |
9339 | ), |
9340 | 6 => simd_shuffle!( |
9341 | a, |
9342 | zero, |
9343 | [ |
9344 | 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 22, 23, 24, 25, 26, 27, |
9345 | 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 96, |
9346 | 97, 98, 99, 100, 101, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, |
9347 | 116, 117, |
9348 | ], |
9349 | ), |
9350 | 7 => simd_shuffle!( |
9351 | a, |
9352 | zero, |
9353 | [ |
9354 | 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 23, 24, 25, 26, 27, |
9355 | 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 39, 40, 41, 42, 43, 44, 45, 46, 47, 96, |
9356 | 97, 98, 99, 100, 101, 102, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, |
9357 | 116, 117, 118, |
9358 | ], |
9359 | ), |
9360 | 8 => simd_shuffle!( |
9361 | a, |
9362 | zero, |
9363 | [ |
9364 | 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 24, 25, 26, 27, 28, |
9365 | 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 40, 41, 42, 43, 44, 45, 46, 47, 96, 97, |
9366 | 98, 99, 100, 101, 102, 103, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, |
9367 | 116, 117, 118, 119, |
9368 | ], |
9369 | ), |
9370 | 9 => simd_shuffle!( |
9371 | a, |
9372 | zero, |
9373 | [ |
9374 | 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 25, 26, 27, 28, 29, |
9375 | 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 41, 42, 43, 44, 45, 46, 47, 96, 97, 98, |
9376 | 99, 100, 101, 102, 103, 104, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116, |
9377 | 117, 118, 119, 120, |
9378 | ], |
9379 | ), |
9380 | 10 => simd_shuffle!( |
9381 | a, |
9382 | zero, |
9383 | [ |
9384 | 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 26, 27, 28, 29, 30, |
9385 | 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 42, 43, 44, 45, 46, 47, 96, 97, 98, 99, |
9386 | 100, 101, 102, 103, 104, 105, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116, 117, |
9387 | 118, 119, 120, 121, |
9388 | ], |
9389 | ), |
9390 | 11 => simd_shuffle!( |
9391 | a, |
9392 | zero, |
9393 | [ |
9394 | 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 27, 28, 29, 30, 31, |
9395 | 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 43, 44, 45, 46, 47, 96, 97, 98, 99, |
9396 | 100, 101, 102, 103, 104, 105, 106, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116, |
9397 | 117, 118, 119, 120, 121, 122, |
9398 | ], |
9399 | ), |
9400 | 12 => simd_shuffle!( |
9401 | a, |
9402 | zero, |
9403 | [ |
9404 | 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 28, 29, 30, 31, 80, |
9405 | 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 44, 45, 46, 47, 96, 97, 98, 99, 100, |
9406 | 101, 102, 103, 104, 105, 106, 107, 60, 61, 62, 63, 112, 113, 114, 115, 116, 117, |
9407 | 118, 119, 120, 121, 122, 123, |
9408 | ], |
9409 | ), |
9410 | 13 => simd_shuffle!( |
9411 | a, |
9412 | zero, |
9413 | [ |
9414 | 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 29, 30, 31, 80, 81, |
9415 | 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 45, 46, 47, 96, 97, 98, 99, 100, 101, |
9416 | 102, 103, 104, 105, 106, 107, 108, 61, 62, 63, 112, 113, 114, 115, 116, 117, 118, |
9417 | 119, 120, 121, 122, 123, 124, |
9418 | ], |
9419 | ), |
9420 | 14 => simd_shuffle!( |
9421 | a, |
9422 | zero, |
9423 | [ |
9424 | 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 30, 31, 80, 81, 82, |
9425 | 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 46, 47, 96, 97, 98, 99, 100, 101, 102, |
9426 | 103, 104, 105, 106, 107, 108, 109, 62, 63, 112, 113, 114, 115, 116, 117, 118, 119, |
9427 | 120, 121, 122, 123, 124, 125, |
9428 | ], |
9429 | ), |
9430 | 15 => simd_shuffle!( |
9431 | a, |
9432 | zero, |
9433 | [ |
9434 | 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 31, 80, 81, 82, 83, |
9435 | 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 47, 96, 97, 98, 99, 100, 101, 102, 103, |
9436 | 104, 105, 106, 107, 108, 109, 110, 63, 112, 113, 114, 115, 116, 117, 118, 119, 120, |
9437 | 121, 122, 123, 124, 125, 126, |
9438 | ], |
9439 | ), |
9440 | _ => zero, |
9441 | }; |
9442 | transmute(r) |
9443 | } |
9444 | |
9445 | /// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst. |
9446 | /// |
9447 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi8&expand=263) |
9448 | #[inline ] |
9449 | #[target_feature (enable = "avx512bw" )] |
9450 | #[cfg_attr (test, assert_instr(vpalignr, IMM8 = 1))] |
9451 | #[rustc_legacy_const_generics (2)] |
9452 | pub unsafe fn _mm512_alignr_epi8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i { |
9453 | // If palignr is shifting the pair of vectors more than the size of two |
9454 | // lanes, emit zero. |
9455 | if IMM8 > 32 { |
9456 | return _mm512_set1_epi8(0); |
9457 | } |
9458 | // If palignr is shifting the pair of input vectors more than one lane, |
9459 | // but less than two lanes, convert to shifting in zeroes. |
9460 | let (a, b) = if IMM8 > 16 { |
9461 | (_mm512_set1_epi8(0), a) |
9462 | } else { |
9463 | (a, b) |
9464 | }; |
9465 | let a = a.as_i8x64(); |
9466 | let b = b.as_i8x64(); |
9467 | |
9468 | let r: i8x64 = match IMM8 % 16 { |
9469 | 0 => simd_shuffle!( |
9470 | b, |
9471 | a, |
9472 | [ |
9473 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, |
9474 | 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, |
9475 | 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, |
9476 | ], |
9477 | ), |
9478 | 1 => simd_shuffle!( |
9479 | b, |
9480 | a, |
9481 | [ |
9482 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 17, 18, 19, 20, 21, 22, 23, |
9483 | 24, 25, 26, 27, 28, 29, 30, 31, 80, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, |
9484 | 45, 46, 47, 96, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, |
9485 | ], |
9486 | ), |
9487 | 2 => simd_shuffle!( |
9488 | b, |
9489 | a, |
9490 | [ |
9491 | 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 18, 19, 20, 21, 22, 23, 24, |
9492 | 25, 26, 27, 28, 29, 30, 31, 80, 81, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, |
9493 | 46, 47, 96, 97, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, |
9494 | ], |
9495 | ), |
9496 | 3 => simd_shuffle!( |
9497 | b, |
9498 | a, |
9499 | [ |
9500 | 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 19, 20, 21, 22, 23, 24, |
9501 | 25, 26, 27, 28, 29, 30, 31, 80, 81, 82, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, |
9502 | 46, 47, 96, 97, 98, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, |
9503 | 114, |
9504 | ], |
9505 | ), |
9506 | 4 => simd_shuffle!( |
9507 | b, |
9508 | a, |
9509 | [ |
9510 | 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 20, 21, 22, 23, 24, 25, |
9511 | 26, 27, 28, 29, 30, 31, 80, 81, 82, 83, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, |
9512 | 47, 96, 97, 98, 99, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, |
9513 | 115, |
9514 | ], |
9515 | ), |
9516 | 5 => simd_shuffle!( |
9517 | b, |
9518 | a, |
9519 | [ |
9520 | 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 21, 22, 23, 24, 25, 26, |
9521 | 27, 28, 29, 30, 31, 80, 81, 82, 83, 84, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, |
9522 | 96, 97, 98, 99, 100, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, |
9523 | 115, 116, |
9524 | ], |
9525 | ), |
9526 | 6 => simd_shuffle!( |
9527 | b, |
9528 | a, |
9529 | [ |
9530 | 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 22, 23, 24, 25, 26, 27, |
9531 | 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 96, |
9532 | 97, 98, 99, 100, 101, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, |
9533 | 116, 117, |
9534 | ], |
9535 | ), |
9536 | 7 => simd_shuffle!( |
9537 | b, |
9538 | a, |
9539 | [ |
9540 | 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 23, 24, 25, 26, 27, |
9541 | 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 39, 40, 41, 42, 43, 44, 45, 46, 47, 96, |
9542 | 97, 98, 99, 100, 101, 102, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, |
9543 | 116, 117, 118, |
9544 | ], |
9545 | ), |
9546 | 8 => simd_shuffle!( |
9547 | b, |
9548 | a, |
9549 | [ |
9550 | 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 24, 25, 26, 27, 28, |
9551 | 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 40, 41, 42, 43, 44, 45, 46, 47, 96, 97, |
9552 | 98, 99, 100, 101, 102, 103, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, |
9553 | 116, 117, 118, 119, |
9554 | ], |
9555 | ), |
9556 | 9 => simd_shuffle!( |
9557 | b, |
9558 | a, |
9559 | [ |
9560 | 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 25, 26, 27, 28, 29, |
9561 | 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 41, 42, 43, 44, 45, 46, 47, 96, 97, 98, |
9562 | 99, 100, 101, 102, 103, 104, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116, |
9563 | 117, 118, 119, 120, |
9564 | ], |
9565 | ), |
9566 | 10 => simd_shuffle!( |
9567 | b, |
9568 | a, |
9569 | [ |
9570 | 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 26, 27, 28, 29, 30, |
9571 | 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 42, 43, 44, 45, 46, 47, 96, 97, 98, 99, |
9572 | 100, 101, 102, 103, 104, 105, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116, 117, |
9573 | 118, 119, 120, 121, |
9574 | ], |
9575 | ), |
9576 | 11 => simd_shuffle!( |
9577 | b, |
9578 | a, |
9579 | [ |
9580 | 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 27, 28, 29, 30, 31, |
9581 | 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 43, 44, 45, 46, 47, 96, 97, 98, 99, |
9582 | 100, 101, 102, 103, 104, 105, 106, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116, |
9583 | 117, 118, 119, 120, 121, 122, |
9584 | ], |
9585 | ), |
9586 | 12 => simd_shuffle!( |
9587 | b, |
9588 | a, |
9589 | [ |
9590 | 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 28, 29, 30, 31, 80, |
9591 | 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 44, 45, 46, 47, 96, 97, 98, 99, 100, |
9592 | 101, 102, 103, 104, 105, 106, 107, 60, 61, 62, 63, 112, 113, 114, 115, 116, 117, |
9593 | 118, 119, 120, 121, 122, 123, |
9594 | ], |
9595 | ), |
9596 | 13 => simd_shuffle!( |
9597 | b, |
9598 | a, |
9599 | [ |
9600 | 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 29, 30, 31, 80, 81, |
9601 | 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 45, 46, 47, 96, 97, 98, 99, 100, 101, |
9602 | 102, 103, 104, 105, 106, 107, 108, 61, 62, 63, 112, 113, 114, 115, 116, 117, 118, |
9603 | 119, 120, 121, 122, 123, 124, |
9604 | ], |
9605 | ), |
9606 | 14 => simd_shuffle!( |
9607 | b, |
9608 | a, |
9609 | [ |
9610 | 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 30, 31, 80, 81, 82, |
9611 | 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 46, 47, 96, 97, 98, 99, 100, 101, 102, |
9612 | 103, 104, 105, 106, 107, 108, 109, 62, 63, 112, 113, 114, 115, 116, 117, 118, 119, |
9613 | 120, 121, 122, 123, 124, 125, |
9614 | ], |
9615 | ), |
9616 | 15 => simd_shuffle!( |
9617 | b, |
9618 | a, |
9619 | [ |
9620 | 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 31, 80, 81, 82, 83, |
9621 | 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 47, 96, 97, 98, 99, 100, 101, 102, 103, |
9622 | 104, 105, 106, 107, 108, 109, 110, 63, 112, 113, 114, 115, 116, 117, 118, 119, 120, |
9623 | 121, 122, 123, 124, 125, 126, |
9624 | ], |
9625 | ), |
9626 | _ => b, |
9627 | }; |
9628 | transmute(r) |
9629 | } |
9630 | |
9631 | /// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
9632 | /// |
9633 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi8&expand=264) |
9634 | #[inline ] |
9635 | #[target_feature (enable = "avx512bw" )] |
9636 | #[cfg_attr (test, assert_instr(vpalignr, IMM8 = 1))] |
9637 | #[rustc_legacy_const_generics (4)] |
9638 | pub unsafe fn _mm512_mask_alignr_epi8<const IMM8: i32>( |
9639 | src: __m512i, |
9640 | k: __mmask64, |
9641 | a: __m512i, |
9642 | b: __m512i, |
9643 | ) -> __m512i { |
9644 | static_assert_uimm_bits!(IMM8, 8); |
9645 | let r: __m512i = _mm512_alignr_epi8::<IMM8>(a, b); |
9646 | transmute(src:simd_select_bitmask(m:k, a:r.as_i8x64(), b:src.as_i8x64())) |
9647 | } |
9648 | |
9649 | /// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
9650 | /// |
9651 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi8&expand=265) |
9652 | #[inline ] |
9653 | #[target_feature (enable = "avx512bw" )] |
9654 | #[cfg_attr (test, assert_instr(vpalignr, IMM8 = 1))] |
9655 | #[rustc_legacy_const_generics (3)] |
9656 | pub unsafe fn _mm512_maskz_alignr_epi8<const IMM8: i32>( |
9657 | k: __mmask64, |
9658 | a: __m512i, |
9659 | b: __m512i, |
9660 | ) -> __m512i { |
9661 | static_assert_uimm_bits!(IMM8, 8); |
9662 | let r: __m512i = _mm512_alignr_epi8::<IMM8>(a, b); |
9663 | let zero: i8x64 = _mm512_setzero_si512().as_i8x64(); |
9664 | transmute(src:simd_select_bitmask(m:k, a:r.as_i8x64(), b:zero)) |
9665 | } |
9666 | |
9667 | /// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
9668 | /// |
9669 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi8&expand=261) |
9670 | #[inline ] |
9671 | #[target_feature (enable = "avx512bw,avx512vl" )] |
9672 | #[rustc_legacy_const_generics (4)] |
9673 | #[cfg_attr (test, assert_instr(vpalignr, IMM8 = 5))] |
9674 | pub unsafe fn _mm256_mask_alignr_epi8<const IMM8: i32>( |
9675 | src: __m256i, |
9676 | k: __mmask32, |
9677 | a: __m256i, |
9678 | b: __m256i, |
9679 | ) -> __m256i { |
9680 | static_assert_uimm_bits!(IMM8, 8); |
9681 | let r: __m256i = _mm256_alignr_epi8::<IMM8>(a, b); |
9682 | transmute(src:simd_select_bitmask(m:k, a:r.as_i8x32(), b:src.as_i8x32())) |
9683 | } |
9684 | |
9685 | /// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
9686 | /// |
9687 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi8&expand=262) |
9688 | #[inline ] |
9689 | #[target_feature (enable = "avx512bw,avx512vl" )] |
9690 | #[rustc_legacy_const_generics (3)] |
9691 | #[cfg_attr (test, assert_instr(vpalignr, IMM8 = 5))] |
9692 | pub unsafe fn _mm256_maskz_alignr_epi8<const IMM8: i32>( |
9693 | k: __mmask32, |
9694 | a: __m256i, |
9695 | b: __m256i, |
9696 | ) -> __m256i { |
9697 | static_assert_uimm_bits!(IMM8, 8); |
9698 | let r: __m256i = _mm256_alignr_epi8::<IMM8>(a, b); |
9699 | transmute(src:simd_select_bitmask( |
9700 | m:k, |
9701 | a:r.as_i8x32(), |
9702 | b:_mm256_setzero_si256().as_i8x32(), |
9703 | )) |
9704 | } |
9705 | |
9706 | /// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
9707 | /// |
9708 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi8&expand=258) |
9709 | #[inline ] |
9710 | #[target_feature (enable = "avx512bw,avx512vl" )] |
9711 | #[rustc_legacy_const_generics (4)] |
9712 | #[cfg_attr (test, assert_instr(vpalignr, IMM8 = 5))] |
9713 | pub unsafe fn _mm_mask_alignr_epi8<const IMM8: i32>( |
9714 | src: __m128i, |
9715 | k: __mmask16, |
9716 | a: __m128i, |
9717 | b: __m128i, |
9718 | ) -> __m128i { |
9719 | static_assert_uimm_bits!(IMM8, 8); |
9720 | let r: __m128i = _mm_alignr_epi8::<IMM8>(a, b); |
9721 | transmute(src:simd_select_bitmask(m:k, a:r.as_i8x16(), b:src.as_i8x16())) |
9722 | } |
9723 | |
9724 | /// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
9725 | /// |
9726 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi8&expand=259) |
9727 | #[inline ] |
9728 | #[target_feature (enable = "avx512bw,avx512vl" )] |
9729 | #[rustc_legacy_const_generics (3)] |
9730 | #[cfg_attr (test, assert_instr(vpalignr, IMM8 = 5))] |
9731 | pub unsafe fn _mm_maskz_alignr_epi8<const IMM8: i32>( |
9732 | k: __mmask16, |
9733 | a: __m128i, |
9734 | b: __m128i, |
9735 | ) -> __m128i { |
9736 | static_assert_uimm_bits!(IMM8, 8); |
9737 | let r: __m128i = _mm_alignr_epi8::<IMM8>(a, b); |
9738 | let zero: i8x16 = _mm_setzero_si128().as_i8x16(); |
9739 | transmute(src:simd_select_bitmask(m:k, a:r.as_i8x16(), b:zero)) |
9740 | } |
9741 | |
9742 | /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. |
9743 | /// |
9744 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi16_storeu_epi8&expand=1812) |
9745 | #[inline ] |
9746 | #[target_feature (enable = "avx512bw" )] |
9747 | #[cfg_attr (test, assert_instr(vpmovswb))] |
9748 | pub unsafe fn _mm512_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) { |
9749 | vpmovswbmem(mem_addr, a:a.as_i16x32(), mask:k); |
9750 | } |
9751 | |
9752 | /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. |
9753 | /// |
9754 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi16_storeu_epi8&expand=1811) |
9755 | #[inline ] |
9756 | #[target_feature (enable = "avx512bw,avx512vl" )] |
9757 | #[cfg_attr (test, assert_instr(vpmovswb))] |
9758 | pub unsafe fn _mm256_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) { |
9759 | vpmovswbmem256(mem_addr, a:a.as_i16x16(), mask:k); |
9760 | } |
9761 | |
9762 | /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. |
9763 | /// |
9764 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi16_storeu_epi8&expand=1810) |
9765 | #[inline ] |
9766 | #[target_feature (enable = "avx512bw,avx512vl" )] |
9767 | #[cfg_attr (test, assert_instr(vpmovswb))] |
9768 | pub unsafe fn _mm_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) { |
9769 | vpmovswbmem128(mem_addr, a:a.as_i16x8(), mask:k); |
9770 | } |
9771 | |
9772 | /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. |
9773 | /// |
9774 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_storeu_epi8&expand=1412) |
9775 | #[inline ] |
9776 | #[target_feature (enable = "avx512bw" )] |
9777 | #[cfg_attr (test, assert_instr(vpmovwb))] |
9778 | pub unsafe fn _mm512_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) { |
9779 | vpmovwbmem(mem_addr, a:a.as_i16x32(), mask:k); |
9780 | } |
9781 | |
9782 | /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. |
9783 | /// |
9784 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_storeu_epi8&expand=1411) |
9785 | #[inline ] |
9786 | #[target_feature (enable = "avx512bw,avx512vl" )] |
9787 | #[cfg_attr (test, assert_instr(vpmovwb))] |
9788 | pub unsafe fn _mm256_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) { |
9789 | vpmovwbmem256(mem_addr, a:a.as_i16x16(), mask:k); |
9790 | } |
9791 | |
9792 | /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. |
9793 | /// |
9794 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_storeu_epi8&expand=1410) |
9795 | #[inline ] |
9796 | #[target_feature (enable = "avx512bw,avx512vl" )] |
9797 | #[cfg_attr (test, assert_instr(vpmovwb))] |
9798 | pub unsafe fn _mm_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) { |
9799 | vpmovwbmem128(mem_addr, a:a.as_i16x8(), mask:k); |
9800 | } |
9801 | |
9802 | /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. |
9803 | /// |
9804 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi16_storeu_epi8&expand=2047) |
9805 | #[inline ] |
9806 | #[target_feature (enable = "avx512bw" )] |
9807 | #[cfg_attr (test, assert_instr(vpmovuswb))] |
9808 | pub unsafe fn _mm512_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) { |
9809 | vpmovuswbmem(mem_addr, a:a.as_i16x32(), mask:k); |
9810 | } |
9811 | |
9812 | /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. |
9813 | /// |
9814 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi16_storeu_epi8&expand=2046) |
9815 | #[inline ] |
9816 | #[target_feature (enable = "avx512bw,avx512vl" )] |
9817 | #[cfg_attr (test, assert_instr(vpmovuswb))] |
9818 | pub unsafe fn _mm256_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) { |
9819 | vpmovuswbmem256(mem_addr, a:a.as_i16x16(), mask:k); |
9820 | } |
9821 | |
9822 | /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. |
9823 | /// |
9824 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi16_storeu_epi8&expand=2045) |
9825 | #[inline ] |
9826 | #[target_feature (enable = "avx512bw,avx512vl" )] |
9827 | #[cfg_attr (test, assert_instr(vpmovuswb))] |
9828 | pub unsafe fn _mm_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) { |
9829 | vpmovuswbmem128(mem_addr, a:a.as_i16x8(), mask:k); |
9830 | } |
9831 | |
9832 | #[allow (improper_ctypes)] |
9833 | extern "C" { |
9834 | #[link_name = "llvm.x86.avx512.mask.paddus.w.512" ] |
9835 | fn vpaddusw(a: u16x32, b: u16x32, src: u16x32, mask: u32) -> u16x32; |
9836 | #[link_name = "llvm.x86.avx512.mask.paddus.w.256" ] |
9837 | fn vpaddusw256(a: u16x16, b: u16x16, src: u16x16, mask: u16) -> u16x16; |
9838 | #[link_name = "llvm.x86.avx512.mask.paddus.w.128" ] |
9839 | fn vpaddusw128(a: u16x8, b: u16x8, src: u16x8, mask: u8) -> u16x8; |
9840 | |
9841 | #[link_name = "llvm.x86.avx512.mask.paddus.b.512" ] |
9842 | fn vpaddusb(a: u8x64, b: u8x64, src: u8x64, mask: u64) -> u8x64; |
9843 | #[link_name = "llvm.x86.avx512.mask.paddus.b.256" ] |
9844 | fn vpaddusb256(a: u8x32, b: u8x32, src: u8x32, mask: u32) -> u8x32; |
9845 | #[link_name = "llvm.x86.avx512.mask.paddus.b.128" ] |
9846 | fn vpaddusb128(a: u8x16, b: u8x16, src: u8x16, mask: u16) -> u8x16; |
9847 | |
9848 | #[link_name = "llvm.x86.avx512.mask.padds.w.512" ] |
9849 | fn vpaddsw(a: i16x32, b: i16x32, src: i16x32, mask: u32) -> i16x32; |
9850 | #[link_name = "llvm.x86.avx512.mask.padds.w.256" ] |
9851 | fn vpaddsw256(a: i16x16, b: i16x16, src: i16x16, mask: u16) -> i16x16; |
9852 | #[link_name = "llvm.x86.avx512.mask.padds.w.128" ] |
9853 | fn vpaddsw128(a: i16x8, b: i16x8, src: i16x8, mask: u8) -> i16x8; |
9854 | |
9855 | #[link_name = "llvm.x86.avx512.mask.padds.b.512" ] |
9856 | fn vpaddsb(a: i8x64, b: i8x64, src: i8x64, mask: u64) -> i8x64; |
9857 | #[link_name = "llvm.x86.avx512.mask.padds.b.256" ] |
9858 | fn vpaddsb256(a: i8x32, b: i8x32, src: i8x32, mask: u32) -> i8x32; |
9859 | #[link_name = "llvm.x86.avx512.mask.padds.b.128" ] |
9860 | fn vpaddsb128(a: i8x16, b: i8x16, src: i8x16, mask: u16) -> i8x16; |
9861 | |
9862 | #[link_name = "llvm.x86.avx512.mask.psubus.w.512" ] |
9863 | fn vpsubusw(a: u16x32, b: u16x32, src: u16x32, mask: u32) -> u16x32; |
9864 | #[link_name = "llvm.x86.avx512.mask.psubus.w.256" ] |
9865 | fn vpsubusw256(a: u16x16, b: u16x16, src: u16x16, mask: u16) -> u16x16; |
9866 | #[link_name = "llvm.x86.avx512.mask.psubus.w.128" ] |
9867 | fn vpsubusw128(a: u16x8, b: u16x8, src: u16x8, mask: u8) -> u16x8; |
9868 | |
9869 | #[link_name = "llvm.x86.avx512.mask.psubus.b.512" ] |
9870 | fn vpsubusb(a: u8x64, b: u8x64, src: u8x64, mask: u64) -> u8x64; |
9871 | #[link_name = "llvm.x86.avx512.mask.psubus.b.256" ] |
9872 | fn vpsubusb256(a: u8x32, b: u8x32, src: u8x32, mask: u32) -> u8x32; |
9873 | #[link_name = "llvm.x86.avx512.mask.psubus.b.128" ] |
9874 | fn vpsubusb128(a: u8x16, b: u8x16, src: u8x16, mask: u16) -> u8x16; |
9875 | |
9876 | #[link_name = "llvm.x86.avx512.mask.psubs.w.512" ] |
9877 | fn vpsubsw(a: i16x32, b: i16x32, src: i16x32, mask: u32) -> i16x32; |
9878 | #[link_name = "llvm.x86.avx512.mask.psubs.w.256" ] |
9879 | fn vpsubsw256(a: i16x16, b: i16x16, src: i16x16, mask: u16) -> i16x16; |
9880 | #[link_name = "llvm.x86.avx512.mask.psubs.w.128" ] |
9881 | fn vpsubsw128(a: i16x8, b: i16x8, src: i16x8, mask: u8) -> i16x8; |
9882 | |
9883 | #[link_name = "llvm.x86.avx512.mask.psubs.b.512" ] |
9884 | fn vpsubsb(a: i8x64, b: i8x64, src: i8x64, mask: u64) -> i8x64; |
9885 | #[link_name = "llvm.x86.avx512.mask.psubs.b.256" ] |
9886 | fn vpsubsb256(a: i8x32, b: i8x32, src: i8x32, mask: u32) -> i8x32; |
9887 | #[link_name = "llvm.x86.avx512.mask.psubs.b.128" ] |
9888 | fn vpsubsb128(a: i8x16, b: i8x16, src: i8x16, mask: u16) -> i8x16; |
9889 | |
9890 | #[link_name = "llvm.x86.avx512.pmulhu.w.512" ] |
9891 | fn vpmulhuw(a: u16x32, b: u16x32) -> u16x32; |
9892 | #[link_name = "llvm.x86.avx512.pmulh.w.512" ] |
9893 | fn vpmulhw(a: i16x32, b: i16x32) -> i16x32; |
9894 | #[link_name = "llvm.x86.avx512.pmul.hr.sw.512" ] |
9895 | fn vpmulhrsw(a: i16x32, b: i16x32) -> i16x32; |
9896 | |
9897 | #[link_name = "llvm.x86.avx512.mask.ucmp.w.512" ] |
9898 | fn vpcmpuw(a: u16x32, b: u16x32, op: i32, mask: u32) -> u32; |
9899 | #[link_name = "llvm.x86.avx512.mask.ucmp.w.256" ] |
9900 | fn vpcmpuw256(a: u16x16, b: u16x16, op: i32, mask: u16) -> u16; |
9901 | #[link_name = "llvm.x86.avx512.mask.ucmp.w.128" ] |
9902 | fn vpcmpuw128(a: u16x8, b: u16x8, op: i32, mask: u8) -> u8; |
9903 | |
9904 | #[link_name = "llvm.x86.avx512.mask.ucmp.b.512" ] |
9905 | fn vpcmpub(a: u8x64, b: u8x64, op: i32, mask: u64) -> u64; |
9906 | #[link_name = "llvm.x86.avx512.mask.ucmp.b.256" ] |
9907 | fn vpcmpub256(a: u8x32, b: u8x32, op: i32, mask: u32) -> u32; |
9908 | #[link_name = "llvm.x86.avx512.mask.ucmp.b.128" ] |
9909 | fn vpcmpub128(a: u8x16, b: u8x16, op: i32, mask: u16) -> u16; |
9910 | |
9911 | #[link_name = "llvm.x86.avx512.mask.cmp.w.512" ] |
9912 | fn vpcmpw(a: i16x32, b: i16x32, op: i32, mask: u32) -> u32; |
9913 | #[link_name = "llvm.x86.avx512.mask.cmp.w.256" ] |
9914 | fn vpcmpw256(a: i16x16, b: i16x16, op: i32, mask: u16) -> u16; |
9915 | #[link_name = "llvm.x86.avx512.mask.cmp.w.128" ] |
9916 | fn vpcmpw128(a: i16x8, b: i16x8, op: i32, mask: u8) -> u8; |
9917 | |
9918 | #[link_name = "llvm.x86.avx512.mask.cmp.b.512" ] |
9919 | fn vpcmpb(a: i8x64, b: i8x64, op: i32, mask: u64) -> u64; |
9920 | #[link_name = "llvm.x86.avx512.mask.cmp.b.256" ] |
9921 | fn vpcmpb256(a: i8x32, b: i8x32, op: i32, mask: u32) -> u32; |
9922 | #[link_name = "llvm.x86.avx512.mask.cmp.b.128" ] |
9923 | fn vpcmpb128(a: i8x16, b: i8x16, op: i32, mask: u16) -> u16; |
9924 | |
9925 | #[link_name = "llvm.x86.avx512.mask.pmaxu.w.512" ] |
9926 | fn vpmaxuw(a: u16x32, b: u16x32) -> u16x32; |
9927 | #[link_name = "llvm.x86.avx512.mask.pmaxu.b.512" ] |
9928 | fn vpmaxub(a: u8x64, b: u8x64) -> u8x64; |
9929 | #[link_name = "llvm.x86.avx512.mask.pmaxs.w.512" ] |
9930 | fn vpmaxsw(a: i16x32, b: i16x32) -> i16x32; |
9931 | #[link_name = "llvm.x86.avx512.mask.pmaxs.b.512" ] |
9932 | fn vpmaxsb(a: i8x64, b: i8x64) -> i8x64; |
9933 | |
9934 | #[link_name = "llvm.x86.avx512.mask.pminu.w.512" ] |
9935 | fn vpminuw(a: u16x32, b: u16x32) -> u16x32; |
9936 | #[link_name = "llvm.x86.avx512.mask.pminu.b.512" ] |
9937 | fn vpminub(a: u8x64, b: u8x64) -> u8x64; |
9938 | #[link_name = "llvm.x86.avx512.mask.pmins.w.512" ] |
9939 | fn vpminsw(a: i16x32, b: i16x32) -> i16x32; |
9940 | #[link_name = "llvm.x86.avx512.mask.pmins.b.512" ] |
9941 | fn vpminsb(a: i8x64, b: i8x64) -> i8x64; |
9942 | |
9943 | #[link_name = "llvm.x86.avx512.pmaddw.d.512" ] |
9944 | fn vpmaddwd(a: i16x32, b: i16x32) -> i32x16; |
9945 | #[link_name = "llvm.x86.avx512.pmaddubs.w.512" ] |
9946 | fn vpmaddubsw(a: i8x64, b: i8x64) -> i16x32; |
9947 | |
9948 | #[link_name = "llvm.x86.avx512.packssdw.512" ] |
9949 | fn vpackssdw(a: i32x16, b: i32x16) -> i16x32; |
9950 | #[link_name = "llvm.x86.avx512.packsswb.512" ] |
9951 | fn vpacksswb(a: i16x32, b: i16x32) -> i8x64; |
9952 | #[link_name = "llvm.x86.avx512.packusdw.512" ] |
9953 | fn vpackusdw(a: i32x16, b: i32x16) -> u16x32; |
9954 | #[link_name = "llvm.x86.avx512.packuswb.512" ] |
9955 | fn vpackuswb(a: i16x32, b: i16x32) -> u8x64; |
9956 | |
9957 | #[link_name = "llvm.x86.avx512.pavg.w.512" ] |
9958 | fn vpavgw(a: u16x32, b: u16x32) -> u16x32; |
9959 | #[link_name = "llvm.x86.avx512.pavg.b.512" ] |
9960 | fn vpavgb(a: u8x64, b: u8x64) -> u8x64; |
9961 | |
9962 | #[link_name = "llvm.x86.avx512.psll.w.512" ] |
9963 | fn vpsllw(a: i16x32, count: i16x8) -> i16x32; |
9964 | |
9965 | #[link_name = "llvm.x86.avx512.psllv.w.512" ] |
9966 | fn vpsllvw(a: i16x32, b: i16x32) -> i16x32; |
9967 | #[link_name = "llvm.x86.avx512.psllv.w.256" ] |
9968 | fn vpsllvw256(a: i16x16, b: i16x16) -> i16x16; |
9969 | #[link_name = "llvm.x86.avx512.psllv.w.128" ] |
9970 | fn vpsllvw128(a: i16x8, b: i16x8) -> i16x8; |
9971 | |
9972 | #[link_name = "llvm.x86.avx512.psrl.w.512" ] |
9973 | fn vpsrlw(a: i16x32, count: i16x8) -> i16x32; |
9974 | |
9975 | #[link_name = "llvm.x86.avx512.psrlv.w.512" ] |
9976 | fn vpsrlvw(a: i16x32, b: i16x32) -> i16x32; |
9977 | #[link_name = "llvm.x86.avx512.psrlv.w.256" ] |
9978 | fn vpsrlvw256(a: i16x16, b: i16x16) -> i16x16; |
9979 | #[link_name = "llvm.x86.avx512.psrlv.w.128" ] |
9980 | fn vpsrlvw128(a: i16x8, b: i16x8) -> i16x8; |
9981 | |
9982 | #[link_name = "llvm.x86.avx512.psra.w.512" ] |
9983 | fn vpsraw(a: i16x32, count: i16x8) -> i16x32; |
9984 | |
9985 | #[link_name = "llvm.x86.avx512.psrav.w.512" ] |
9986 | fn vpsravw(a: i16x32, count: i16x32) -> i16x32; |
9987 | #[link_name = "llvm.x86.avx512.psrav.w.256" ] |
9988 | fn vpsravw256(a: i16x16, count: i16x16) -> i16x16; |
9989 | #[link_name = "llvm.x86.avx512.psrav.w.128" ] |
9990 | fn vpsravw128(a: i16x8, count: i16x8) -> i16x8; |
9991 | |
9992 | #[link_name = "llvm.x86.avx512.vpermi2var.hi.512" ] |
9993 | fn vpermi2w(a: i16x32, idx: i16x32, b: i16x32) -> i16x32; |
9994 | #[link_name = "llvm.x86.avx512.vpermi2var.hi.256" ] |
9995 | fn vpermi2w256(a: i16x16, idx: i16x16, b: i16x16) -> i16x16; |
9996 | #[link_name = "llvm.x86.avx512.vpermi2var.hi.128" ] |
9997 | fn vpermi2w128(a: i16x8, idx: i16x8, b: i16x8) -> i16x8; |
9998 | |
9999 | #[link_name = "llvm.x86.avx512.permvar.hi.512" ] |
10000 | fn vpermw(a: i16x32, idx: i16x32) -> i16x32; |
10001 | #[link_name = "llvm.x86.avx512.permvar.hi.256" ] |
10002 | fn vpermw256(a: i16x16, idx: i16x16) -> i16x16; |
10003 | #[link_name = "llvm.x86.avx512.permvar.hi.128" ] |
10004 | fn vpermw128(a: i16x8, idx: i16x8) -> i16x8; |
10005 | |
10006 | #[link_name = "llvm.x86.avx512.pshuf.b.512" ] |
10007 | fn vpshufb(a: i8x64, b: i8x64) -> i8x64; |
10008 | |
10009 | #[link_name = "llvm.x86.avx512.psad.bw.512" ] |
10010 | fn vpsadbw(a: u8x64, b: u8x64) -> u64x8; |
10011 | |
10012 | #[link_name = "llvm.x86.avx512.dbpsadbw.512" ] |
10013 | fn vdbpsadbw(a: u8x64, b: u8x64, imm8: i32) -> u16x32; |
10014 | #[link_name = "llvm.x86.avx512.dbpsadbw.256" ] |
10015 | fn vdbpsadbw256(a: u8x32, b: u8x32, imm8: i32) -> u16x16; |
10016 | #[link_name = "llvm.x86.avx512.dbpsadbw.128" ] |
10017 | fn vdbpsadbw128(a: u8x16, b: u8x16, imm8: i32) -> u16x8; |
10018 | |
10019 | #[link_name = "llvm.x86.avx512.mask.pmovs.wb.512" ] |
10020 | fn vpmovswb(a: i16x32, src: i8x32, mask: u32) -> i8x32; |
10021 | #[link_name = "llvm.x86.avx512.mask.pmovs.wb.256" ] |
10022 | fn vpmovswb256(a: i16x16, src: i8x16, mask: u16) -> i8x16; |
10023 | #[link_name = "llvm.x86.avx512.mask.pmovs.wb.128" ] |
10024 | fn vpmovswb128(a: i16x8, src: i8x16, mask: u8) -> i8x16; |
10025 | |
10026 | #[link_name = "llvm.x86.avx512.mask.pmovus.wb.512" ] |
10027 | fn vpmovuswb(a: u16x32, src: u8x32, mask: u32) -> u8x32; |
10028 | #[link_name = "llvm.x86.avx512.mask.pmovus.wb.256" ] |
10029 | fn vpmovuswb256(a: u16x16, src: u8x16, mask: u16) -> u8x16; |
10030 | #[link_name = "llvm.x86.avx512.mask.pmovus.wb.128" ] |
10031 | fn vpmovuswb128(a: u16x8, src: u8x16, mask: u8) -> u8x16; |
10032 | |
10033 | #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.512" ] |
10034 | fn vpmovswbmem(mem_addr: *mut i8, a: i16x32, mask: u32); |
10035 | #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.256" ] |
10036 | fn vpmovswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16); |
10037 | #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.128" ] |
10038 | fn vpmovswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8); |
10039 | |
10040 | #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.512" ] |
10041 | fn vpmovwbmem(mem_addr: *mut i8, a: i16x32, mask: u32); |
10042 | #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.256" ] |
10043 | fn vpmovwbmem256(mem_addr: *mut i8, a: i16x16, mask: u16); |
10044 | #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.128" ] |
10045 | fn vpmovwbmem128(mem_addr: *mut i8, a: i16x8, mask: u8); |
10046 | |
10047 | #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.512" ] |
10048 | fn vpmovuswbmem(mem_addr: *mut i8, a: i16x32, mask: u32); |
10049 | #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.256" ] |
10050 | fn vpmovuswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16); |
10051 | #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.128" ] |
10052 | fn vpmovuswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8); |
10053 | } |
10054 | |
10055 | #[cfg (test)] |
10056 | mod tests { |
10057 | |
10058 | use stdarch_test::simd_test; |
10059 | |
10060 | use crate::core_arch::x86::*; |
10061 | use crate::hint::black_box; |
10062 | use crate::mem::{self}; |
10063 | |
10064 | #[simd_test(enable = "avx512bw" )] |
10065 | unsafe fn test_mm512_abs_epi16() { |
10066 | let a = _mm512_set1_epi16(-1); |
10067 | let r = _mm512_abs_epi16(a); |
10068 | let e = _mm512_set1_epi16(1); |
10069 | assert_eq_m512i(r, e); |
10070 | } |
10071 | |
10072 | #[simd_test(enable = "avx512bw" )] |
10073 | unsafe fn test_mm512_mask_abs_epi16() { |
10074 | let a = _mm512_set1_epi16(-1); |
10075 | let r = _mm512_mask_abs_epi16(a, 0, a); |
10076 | assert_eq_m512i(r, a); |
10077 | let r = _mm512_mask_abs_epi16(a, 0b00000000_11111111_00000000_11111111, a); |
10078 | #[rustfmt::skip] |
10079 | let e = _mm512_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, |
10080 | -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1); |
10081 | assert_eq_m512i(r, e); |
10082 | } |
10083 | |
10084 | #[simd_test(enable = "avx512bw" )] |
10085 | unsafe fn test_mm512_maskz_abs_epi16() { |
10086 | let a = _mm512_set1_epi16(-1); |
10087 | let r = _mm512_maskz_abs_epi16(0, a); |
10088 | assert_eq_m512i(r, _mm512_setzero_si512()); |
10089 | let r = _mm512_maskz_abs_epi16(0b00000000_11111111_00000000_11111111, a); |
10090 | #[rustfmt::skip] |
10091 | let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, |
10092 | 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1); |
10093 | assert_eq_m512i(r, e); |
10094 | } |
10095 | |
10096 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10097 | unsafe fn test_mm256_mask_abs_epi16() { |
10098 | let a = _mm256_set1_epi16(-1); |
10099 | let r = _mm256_mask_abs_epi16(a, 0, a); |
10100 | assert_eq_m256i(r, a); |
10101 | let r = _mm256_mask_abs_epi16(a, 0b00000000_11111111, a); |
10102 | let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1); |
10103 | assert_eq_m256i(r, e); |
10104 | } |
10105 | |
10106 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10107 | unsafe fn test_mm256_maskz_abs_epi16() { |
10108 | let a = _mm256_set1_epi16(-1); |
10109 | let r = _mm256_maskz_abs_epi16(0, a); |
10110 | assert_eq_m256i(r, _mm256_setzero_si256()); |
10111 | let r = _mm256_maskz_abs_epi16(0b00000000_11111111, a); |
10112 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1); |
10113 | assert_eq_m256i(r, e); |
10114 | } |
10115 | |
10116 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10117 | unsafe fn test_mm_mask_abs_epi16() { |
10118 | let a = _mm_set1_epi16(-1); |
10119 | let r = _mm_mask_abs_epi16(a, 0, a); |
10120 | assert_eq_m128i(r, a); |
10121 | let r = _mm_mask_abs_epi16(a, 0b00001111, a); |
10122 | let e = _mm_set_epi16(-1, -1, -1, -1, 1, 1, 1, 1); |
10123 | assert_eq_m128i(r, e); |
10124 | } |
10125 | |
10126 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10127 | unsafe fn test_mm_maskz_abs_epi16() { |
10128 | let a = _mm_set1_epi16(-1); |
10129 | let r = _mm_maskz_abs_epi16(0, a); |
10130 | assert_eq_m128i(r, _mm_setzero_si128()); |
10131 | let r = _mm_maskz_abs_epi16(0b00001111, a); |
10132 | let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1); |
10133 | assert_eq_m128i(r, e); |
10134 | } |
10135 | |
10136 | #[simd_test(enable = "avx512bw" )] |
10137 | unsafe fn test_mm512_abs_epi8() { |
10138 | let a = _mm512_set1_epi8(-1); |
10139 | let r = _mm512_abs_epi8(a); |
10140 | let e = _mm512_set1_epi8(1); |
10141 | assert_eq_m512i(r, e); |
10142 | } |
10143 | |
10144 | #[simd_test(enable = "avx512bw" )] |
10145 | unsafe fn test_mm512_mask_abs_epi8() { |
10146 | let a = _mm512_set1_epi8(-1); |
10147 | let r = _mm512_mask_abs_epi8(a, 0, a); |
10148 | assert_eq_m512i(r, a); |
10149 | let r = _mm512_mask_abs_epi8( |
10150 | a, |
10151 | 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, |
10152 | a, |
10153 | ); |
10154 | #[rustfmt::skip] |
10155 | let e = _mm512_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, |
10156 | -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, |
10157 | -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, |
10158 | -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1); |
10159 | assert_eq_m512i(r, e); |
10160 | } |
10161 | |
10162 | #[simd_test(enable = "avx512bw" )] |
10163 | unsafe fn test_mm512_maskz_abs_epi8() { |
10164 | let a = _mm512_set1_epi8(-1); |
10165 | let r = _mm512_maskz_abs_epi8(0, a); |
10166 | assert_eq_m512i(r, _mm512_setzero_si512()); |
10167 | let r = _mm512_maskz_abs_epi8( |
10168 | 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, |
10169 | a, |
10170 | ); |
10171 | #[rustfmt::skip] |
10172 | let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, |
10173 | 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, |
10174 | 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, |
10175 | 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1); |
10176 | assert_eq_m512i(r, e); |
10177 | } |
10178 | |
10179 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10180 | unsafe fn test_mm256_mask_abs_epi8() { |
10181 | let a = _mm256_set1_epi8(-1); |
10182 | let r = _mm256_mask_abs_epi8(a, 0, a); |
10183 | assert_eq_m256i(r, a); |
10184 | let r = _mm256_mask_abs_epi8(a, 0b00000000_11111111_00000000_11111111, a); |
10185 | #[rustfmt::skip] |
10186 | let e = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, |
10187 | -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1); |
10188 | assert_eq_m256i(r, e); |
10189 | } |
10190 | |
10191 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10192 | unsafe fn test_mm256_maskz_abs_epi8() { |
10193 | let a = _mm256_set1_epi8(-1); |
10194 | let r = _mm256_maskz_abs_epi8(0, a); |
10195 | assert_eq_m256i(r, _mm256_setzero_si256()); |
10196 | let r = _mm256_maskz_abs_epi8(0b00000000_11111111_00000000_11111111, a); |
10197 | #[rustfmt::skip] |
10198 | let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, |
10199 | 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1); |
10200 | assert_eq_m256i(r, e); |
10201 | } |
10202 | |
10203 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10204 | unsafe fn test_mm_mask_abs_epi8() { |
10205 | let a = _mm_set1_epi8(-1); |
10206 | let r = _mm_mask_abs_epi8(a, 0, a); |
10207 | assert_eq_m128i(r, a); |
10208 | let r = _mm_mask_abs_epi8(a, 0b00000000_11111111, a); |
10209 | let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1); |
10210 | assert_eq_m128i(r, e); |
10211 | } |
10212 | |
10213 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10214 | unsafe fn test_mm_maskz_abs_epi8() { |
10215 | let a = _mm_set1_epi8(-1); |
10216 | let r = _mm_maskz_abs_epi8(0, a); |
10217 | assert_eq_m128i(r, _mm_setzero_si128()); |
10218 | let r = _mm_maskz_abs_epi8(0b00000000_11111111, a); |
10219 | #[rustfmt::skip] |
10220 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1); |
10221 | assert_eq_m128i(r, e); |
10222 | } |
10223 | |
10224 | #[simd_test(enable = "avx512bw" )] |
10225 | unsafe fn test_mm512_add_epi16() { |
10226 | let a = _mm512_set1_epi16(1); |
10227 | let b = _mm512_set1_epi16(2); |
10228 | let r = _mm512_add_epi16(a, b); |
10229 | let e = _mm512_set1_epi16(3); |
10230 | assert_eq_m512i(r, e); |
10231 | } |
10232 | |
10233 | #[simd_test(enable = "avx512bw" )] |
10234 | unsafe fn test_mm512_mask_add_epi16() { |
10235 | let a = _mm512_set1_epi16(1); |
10236 | let b = _mm512_set1_epi16(2); |
10237 | let r = _mm512_mask_add_epi16(a, 0, a, b); |
10238 | assert_eq_m512i(r, a); |
10239 | let r = _mm512_mask_add_epi16(a, 0b00000000_11111111_00000000_11111111, a, b); |
10240 | #[rustfmt::skip] |
10241 | let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, |
10242 | 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3); |
10243 | assert_eq_m512i(r, e); |
10244 | } |
10245 | |
10246 | #[simd_test(enable = "avx512bw" )] |
10247 | unsafe fn test_mm512_maskz_add_epi16() { |
10248 | let a = _mm512_set1_epi16(1); |
10249 | let b = _mm512_set1_epi16(2); |
10250 | let r = _mm512_maskz_add_epi16(0, a, b); |
10251 | assert_eq_m512i(r, _mm512_setzero_si512()); |
10252 | let r = _mm512_maskz_add_epi16(0b00000000_11111111_00000000_11111111, a, b); |
10253 | #[rustfmt::skip] |
10254 | let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, |
10255 | 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3); |
10256 | assert_eq_m512i(r, e); |
10257 | } |
10258 | |
10259 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10260 | unsafe fn test_mm256_mask_add_epi16() { |
10261 | let a = _mm256_set1_epi16(1); |
10262 | let b = _mm256_set1_epi16(2); |
10263 | let r = _mm256_mask_add_epi16(a, 0, a, b); |
10264 | assert_eq_m256i(r, a); |
10265 | let r = _mm256_mask_add_epi16(a, 0b00000000_11111111, a, b); |
10266 | let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3); |
10267 | assert_eq_m256i(r, e); |
10268 | } |
10269 | |
10270 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10271 | unsafe fn test_mm256_maskz_add_epi16() { |
10272 | let a = _mm256_set1_epi16(1); |
10273 | let b = _mm256_set1_epi16(2); |
10274 | let r = _mm256_maskz_add_epi16(0, a, b); |
10275 | assert_eq_m256i(r, _mm256_setzero_si256()); |
10276 | let r = _mm256_maskz_add_epi16(0b00000000_11111111, a, b); |
10277 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3); |
10278 | assert_eq_m256i(r, e); |
10279 | } |
10280 | |
10281 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10282 | unsafe fn test_mm_mask_add_epi16() { |
10283 | let a = _mm_set1_epi16(1); |
10284 | let b = _mm_set1_epi16(2); |
10285 | let r = _mm_mask_add_epi16(a, 0, a, b); |
10286 | assert_eq_m128i(r, a); |
10287 | let r = _mm_mask_add_epi16(a, 0b00001111, a, b); |
10288 | let e = _mm_set_epi16(1, 1, 1, 1, 3, 3, 3, 3); |
10289 | assert_eq_m128i(r, e); |
10290 | } |
10291 | |
10292 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10293 | unsafe fn test_mm_maskz_add_epi16() { |
10294 | let a = _mm_set1_epi16(1); |
10295 | let b = _mm_set1_epi16(2); |
10296 | let r = _mm_maskz_add_epi16(0, a, b); |
10297 | assert_eq_m128i(r, _mm_setzero_si128()); |
10298 | let r = _mm_maskz_add_epi16(0b00001111, a, b); |
10299 | let e = _mm_set_epi16(0, 0, 0, 0, 3, 3, 3, 3); |
10300 | assert_eq_m128i(r, e); |
10301 | } |
10302 | |
10303 | #[simd_test(enable = "avx512bw" )] |
10304 | unsafe fn test_mm512_add_epi8() { |
10305 | let a = _mm512_set1_epi8(1); |
10306 | let b = _mm512_set1_epi8(2); |
10307 | let r = _mm512_add_epi8(a, b); |
10308 | let e = _mm512_set1_epi8(3); |
10309 | assert_eq_m512i(r, e); |
10310 | } |
10311 | |
10312 | #[simd_test(enable = "avx512bw" )] |
10313 | unsafe fn test_mm512_mask_add_epi8() { |
10314 | let a = _mm512_set1_epi8(1); |
10315 | let b = _mm512_set1_epi8(2); |
10316 | let r = _mm512_mask_add_epi8(a, 0, a, b); |
10317 | assert_eq_m512i(r, a); |
10318 | let r = _mm512_mask_add_epi8( |
10319 | a, |
10320 | 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, |
10321 | a, |
10322 | b, |
10323 | ); |
10324 | #[rustfmt::skip] |
10325 | let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, |
10326 | 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, |
10327 | 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, |
10328 | 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3); |
10329 | assert_eq_m512i(r, e); |
10330 | } |
10331 | |
10332 | #[simd_test(enable = "avx512bw" )] |
10333 | unsafe fn test_mm512_maskz_add_epi8() { |
10334 | let a = _mm512_set1_epi8(1); |
10335 | let b = _mm512_set1_epi8(2); |
10336 | let r = _mm512_maskz_add_epi8(0, a, b); |
10337 | assert_eq_m512i(r, _mm512_setzero_si512()); |
10338 | let r = _mm512_maskz_add_epi8( |
10339 | 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, |
10340 | a, |
10341 | b, |
10342 | ); |
10343 | #[rustfmt::skip] |
10344 | let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, |
10345 | 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, |
10346 | 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, |
10347 | 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3); |
10348 | assert_eq_m512i(r, e); |
10349 | } |
10350 | |
10351 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10352 | unsafe fn test_mm256_mask_add_epi8() { |
10353 | let a = _mm256_set1_epi8(1); |
10354 | let b = _mm256_set1_epi8(2); |
10355 | let r = _mm256_mask_add_epi8(a, 0, a, b); |
10356 | assert_eq_m256i(r, a); |
10357 | let r = _mm256_mask_add_epi8(a, 0b00000000_11111111_00000000_11111111, a, b); |
10358 | #[rustfmt::skip] |
10359 | let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, |
10360 | 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3); |
10361 | assert_eq_m256i(r, e); |
10362 | } |
10363 | |
10364 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10365 | unsafe fn test_mm256_maskz_add_epi8() { |
10366 | let a = _mm256_set1_epi8(1); |
10367 | let b = _mm256_set1_epi8(2); |
10368 | let r = _mm256_maskz_add_epi8(0, a, b); |
10369 | assert_eq_m256i(r, _mm256_setzero_si256()); |
10370 | let r = _mm256_maskz_add_epi8(0b00000000_11111111_00000000_11111111, a, b); |
10371 | #[rustfmt::skip] |
10372 | let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, |
10373 | 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3); |
10374 | assert_eq_m256i(r, e); |
10375 | } |
10376 | |
10377 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10378 | unsafe fn test_mm_mask_add_epi8() { |
10379 | let a = _mm_set1_epi8(1); |
10380 | let b = _mm_set1_epi8(2); |
10381 | let r = _mm_mask_add_epi8(a, 0, a, b); |
10382 | assert_eq_m128i(r, a); |
10383 | let r = _mm_mask_add_epi8(a, 0b00000000_11111111, a, b); |
10384 | let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3); |
10385 | assert_eq_m128i(r, e); |
10386 | } |
10387 | |
10388 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10389 | unsafe fn test_mm_maskz_add_epi8() { |
10390 | let a = _mm_set1_epi8(1); |
10391 | let b = _mm_set1_epi8(2); |
10392 | let r = _mm_maskz_add_epi8(0, a, b); |
10393 | assert_eq_m128i(r, _mm_setzero_si128()); |
10394 | let r = _mm_maskz_add_epi8(0b00000000_11111111, a, b); |
10395 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3); |
10396 | assert_eq_m128i(r, e); |
10397 | } |
10398 | |
10399 | #[simd_test(enable = "avx512bw" )] |
10400 | unsafe fn test_mm512_adds_epu16() { |
10401 | let a = _mm512_set1_epi16(1); |
10402 | let b = _mm512_set1_epi16(u16::MAX as i16); |
10403 | let r = _mm512_adds_epu16(a, b); |
10404 | let e = _mm512_set1_epi16(u16::MAX as i16); |
10405 | assert_eq_m512i(r, e); |
10406 | } |
10407 | |
10408 | #[simd_test(enable = "avx512bw" )] |
10409 | unsafe fn test_mm512_mask_adds_epu16() { |
10410 | let a = _mm512_set1_epi16(1); |
10411 | let b = _mm512_set1_epi16(u16::MAX as i16); |
10412 | let r = _mm512_mask_adds_epu16(a, 0, a, b); |
10413 | assert_eq_m512i(r, a); |
10414 | let r = _mm512_mask_adds_epu16(a, 0b00000000_00000000_00000000_00001111, a, b); |
10415 | #[rustfmt::skip] |
10416 | let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
10417 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16); |
10418 | assert_eq_m512i(r, e); |
10419 | } |
10420 | |
10421 | #[simd_test(enable = "avx512bw" )] |
10422 | unsafe fn test_mm512_maskz_adds_epu16() { |
10423 | let a = _mm512_set1_epi16(1); |
10424 | let b = _mm512_set1_epi16(u16::MAX as i16); |
10425 | let r = _mm512_maskz_adds_epu16(0, a, b); |
10426 | assert_eq_m512i(r, _mm512_setzero_si512()); |
10427 | let r = _mm512_maskz_adds_epu16(0b00000000_00000000_00000000_00001111, a, b); |
10428 | #[rustfmt::skip] |
10429 | let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
10430 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16); |
10431 | assert_eq_m512i(r, e); |
10432 | } |
10433 | |
10434 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10435 | unsafe fn test_mm256_mask_adds_epu16() { |
10436 | let a = _mm256_set1_epi16(1); |
10437 | let b = _mm256_set1_epi16(u16::MAX as i16); |
10438 | let r = _mm256_mask_adds_epu16(a, 0, a, b); |
10439 | assert_eq_m256i(r, a); |
10440 | let r = _mm256_mask_adds_epu16(a, 0b00000000_00001111, a, b); |
10441 | #[rustfmt::skip] |
10442 | let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16); |
10443 | assert_eq_m256i(r, e); |
10444 | } |
10445 | |
10446 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10447 | unsafe fn test_mm256_maskz_adds_epu16() { |
10448 | let a = _mm256_set1_epi16(1); |
10449 | let b = _mm256_set1_epi16(u16::MAX as i16); |
10450 | let r = _mm256_maskz_adds_epu16(0, a, b); |
10451 | assert_eq_m256i(r, _mm256_setzero_si256()); |
10452 | let r = _mm256_maskz_adds_epu16(0b00000000_00001111, a, b); |
10453 | #[rustfmt::skip] |
10454 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16); |
10455 | assert_eq_m256i(r, e); |
10456 | } |
10457 | |
10458 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10459 | unsafe fn test_mm_mask_adds_epu16() { |
10460 | let a = _mm_set1_epi16(1); |
10461 | let b = _mm_set1_epi16(u16::MAX as i16); |
10462 | let r = _mm_mask_adds_epu16(a, 0, a, b); |
10463 | assert_eq_m128i(r, a); |
10464 | let r = _mm_mask_adds_epu16(a, 0b00001111, a, b); |
10465 | #[rustfmt::skip] |
10466 | let e = _mm_set_epi16(1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16); |
10467 | assert_eq_m128i(r, e); |
10468 | } |
10469 | |
10470 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10471 | unsafe fn test_mm_maskz_adds_epu16() { |
10472 | let a = _mm_set1_epi16(1); |
10473 | let b = _mm_set1_epi16(u16::MAX as i16); |
10474 | let r = _mm_maskz_adds_epu16(0, a, b); |
10475 | assert_eq_m128i(r, _mm_setzero_si128()); |
10476 | let r = _mm_maskz_adds_epu16(0b00001111, a, b); |
10477 | #[rustfmt::skip] |
10478 | let e = _mm_set_epi16(0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16); |
10479 | assert_eq_m128i(r, e); |
10480 | } |
10481 | |
10482 | #[simd_test(enable = "avx512bw" )] |
10483 | unsafe fn test_mm512_adds_epu8() { |
10484 | let a = _mm512_set1_epi8(1); |
10485 | let b = _mm512_set1_epi8(u8::MAX as i8); |
10486 | let r = _mm512_adds_epu8(a, b); |
10487 | let e = _mm512_set1_epi8(u8::MAX as i8); |
10488 | assert_eq_m512i(r, e); |
10489 | } |
10490 | |
10491 | #[simd_test(enable = "avx512bw" )] |
10492 | unsafe fn test_mm512_mask_adds_epu8() { |
10493 | let a = _mm512_set1_epi8(1); |
10494 | let b = _mm512_set1_epi8(u8::MAX as i8); |
10495 | let r = _mm512_mask_adds_epu8(a, 0, a, b); |
10496 | assert_eq_m512i(r, a); |
10497 | let r = _mm512_mask_adds_epu8( |
10498 | a, |
10499 | 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, |
10500 | a, |
10501 | b, |
10502 | ); |
10503 | #[rustfmt::skip] |
10504 | let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
10505 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
10506 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
10507 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8); |
10508 | assert_eq_m512i(r, e); |
10509 | } |
10510 | |
10511 | #[simd_test(enable = "avx512bw" )] |
10512 | unsafe fn test_mm512_maskz_adds_epu8() { |
10513 | let a = _mm512_set1_epi8(1); |
10514 | let b = _mm512_set1_epi8(u8::MAX as i8); |
10515 | let r = _mm512_maskz_adds_epu8(0, a, b); |
10516 | assert_eq_m512i(r, _mm512_setzero_si512()); |
10517 | let r = _mm512_maskz_adds_epu8( |
10518 | 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, |
10519 | a, |
10520 | b, |
10521 | ); |
10522 | #[rustfmt::skip] |
10523 | let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
10524 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
10525 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
10526 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8); |
10527 | assert_eq_m512i(r, e); |
10528 | } |
10529 | |
10530 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10531 | unsafe fn test_mm256_mask_adds_epu8() { |
10532 | let a = _mm256_set1_epi8(1); |
10533 | let b = _mm256_set1_epi8(u8::MAX as i8); |
10534 | let r = _mm256_mask_adds_epu8(a, 0, a, b); |
10535 | assert_eq_m256i(r, a); |
10536 | let r = _mm256_mask_adds_epu8(a, 0b00000000_00000000_00000000_00001111, a, b); |
10537 | #[rustfmt::skip] |
10538 | let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
10539 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8); |
10540 | assert_eq_m256i(r, e); |
10541 | } |
10542 | |
10543 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10544 | unsafe fn test_mm256_maskz_adds_epu8() { |
10545 | let a = _mm256_set1_epi8(1); |
10546 | let b = _mm256_set1_epi8(u8::MAX as i8); |
10547 | let r = _mm256_maskz_adds_epu8(0, a, b); |
10548 | assert_eq_m256i(r, _mm256_setzero_si256()); |
10549 | let r = _mm256_maskz_adds_epu8(0b00000000_00000000_00000000_00001111, a, b); |
10550 | #[rustfmt::skip] |
10551 | let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
10552 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8); |
10553 | assert_eq_m256i(r, e); |
10554 | } |
10555 | |
10556 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10557 | unsafe fn test_mm_mask_adds_epu8() { |
10558 | let a = _mm_set1_epi8(1); |
10559 | let b = _mm_set1_epi8(u8::MAX as i8); |
10560 | let r = _mm_mask_adds_epu8(a, 0, a, b); |
10561 | assert_eq_m128i(r, a); |
10562 | let r = _mm_mask_adds_epu8(a, 0b00000000_00001111, a, b); |
10563 | #[rustfmt::skip] |
10564 | let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8); |
10565 | assert_eq_m128i(r, e); |
10566 | } |
10567 | |
10568 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10569 | unsafe fn test_mm_maskz_adds_epu8() { |
10570 | let a = _mm_set1_epi8(1); |
10571 | let b = _mm_set1_epi8(u8::MAX as i8); |
10572 | let r = _mm_maskz_adds_epu8(0, a, b); |
10573 | assert_eq_m128i(r, _mm_setzero_si128()); |
10574 | let r = _mm_maskz_adds_epu8(0b00000000_00001111, a, b); |
10575 | #[rustfmt::skip] |
10576 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8); |
10577 | assert_eq_m128i(r, e); |
10578 | } |
10579 | |
10580 | #[simd_test(enable = "avx512bw" )] |
10581 | unsafe fn test_mm512_adds_epi16() { |
10582 | let a = _mm512_set1_epi16(1); |
10583 | let b = _mm512_set1_epi16(i16::MAX); |
10584 | let r = _mm512_adds_epi16(a, b); |
10585 | let e = _mm512_set1_epi16(i16::MAX); |
10586 | assert_eq_m512i(r, e); |
10587 | } |
10588 | |
10589 | #[simd_test(enable = "avx512bw" )] |
10590 | unsafe fn test_mm512_mask_adds_epi16() { |
10591 | let a = _mm512_set1_epi16(1); |
10592 | let b = _mm512_set1_epi16(i16::MAX); |
10593 | let r = _mm512_mask_adds_epi16(a, 0, a, b); |
10594 | assert_eq_m512i(r, a); |
10595 | let r = _mm512_mask_adds_epi16(a, 0b00000000_00000000_00000000_00001111, a, b); |
10596 | #[rustfmt::skip] |
10597 | let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
10598 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX); |
10599 | assert_eq_m512i(r, e); |
10600 | } |
10601 | |
10602 | #[simd_test(enable = "avx512bw" )] |
10603 | unsafe fn test_mm512_maskz_adds_epi16() { |
10604 | let a = _mm512_set1_epi16(1); |
10605 | let b = _mm512_set1_epi16(i16::MAX); |
10606 | let r = _mm512_maskz_adds_epi16(0, a, b); |
10607 | assert_eq_m512i(r, _mm512_setzero_si512()); |
10608 | let r = _mm512_maskz_adds_epi16(0b00000000_00000000_00000000_00001111, a, b); |
10609 | #[rustfmt::skip] |
10610 | let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
10611 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX); |
10612 | assert_eq_m512i(r, e); |
10613 | } |
10614 | |
10615 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10616 | unsafe fn test_mm256_mask_adds_epi16() { |
10617 | let a = _mm256_set1_epi16(1); |
10618 | let b = _mm256_set1_epi16(i16::MAX); |
10619 | let r = _mm256_mask_adds_epi16(a, 0, a, b); |
10620 | assert_eq_m256i(r, a); |
10621 | let r = _mm256_mask_adds_epi16(a, 0b00000000_00001111, a, b); |
10622 | #[rustfmt::skip] |
10623 | let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX); |
10624 | assert_eq_m256i(r, e); |
10625 | } |
10626 | |
10627 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10628 | unsafe fn test_mm256_maskz_adds_epi16() { |
10629 | let a = _mm256_set1_epi16(1); |
10630 | let b = _mm256_set1_epi16(i16::MAX); |
10631 | let r = _mm256_maskz_adds_epi16(0, a, b); |
10632 | assert_eq_m256i(r, _mm256_setzero_si256()); |
10633 | let r = _mm256_maskz_adds_epi16(0b00000000_00001111, a, b); |
10634 | #[rustfmt::skip] |
10635 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX); |
10636 | assert_eq_m256i(r, e); |
10637 | } |
10638 | |
10639 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10640 | unsafe fn test_mm_mask_adds_epi16() { |
10641 | let a = _mm_set1_epi16(1); |
10642 | let b = _mm_set1_epi16(i16::MAX); |
10643 | let r = _mm_mask_adds_epi16(a, 0, a, b); |
10644 | assert_eq_m128i(r, a); |
10645 | let r = _mm_mask_adds_epi16(a, 0b00001111, a, b); |
10646 | let e = _mm_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX); |
10647 | assert_eq_m128i(r, e); |
10648 | } |
10649 | |
10650 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10651 | unsafe fn test_mm_maskz_adds_epi16() { |
10652 | let a = _mm_set1_epi16(1); |
10653 | let b = _mm_set1_epi16(i16::MAX); |
10654 | let r = _mm_maskz_adds_epi16(0, a, b); |
10655 | assert_eq_m128i(r, _mm_setzero_si128()); |
10656 | let r = _mm_maskz_adds_epi16(0b00001111, a, b); |
10657 | let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX); |
10658 | assert_eq_m128i(r, e); |
10659 | } |
10660 | |
10661 | #[simd_test(enable = "avx512bw" )] |
10662 | unsafe fn test_mm512_adds_epi8() { |
10663 | let a = _mm512_set1_epi8(1); |
10664 | let b = _mm512_set1_epi8(i8::MAX); |
10665 | let r = _mm512_adds_epi8(a, b); |
10666 | let e = _mm512_set1_epi8(i8::MAX); |
10667 | assert_eq_m512i(r, e); |
10668 | } |
10669 | |
10670 | #[simd_test(enable = "avx512bw" )] |
10671 | unsafe fn test_mm512_mask_adds_epi8() { |
10672 | let a = _mm512_set1_epi8(1); |
10673 | let b = _mm512_set1_epi8(i8::MAX); |
10674 | let r = _mm512_mask_adds_epi8(a, 0, a, b); |
10675 | assert_eq_m512i(r, a); |
10676 | let r = _mm512_mask_adds_epi8( |
10677 | a, |
10678 | 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, |
10679 | a, |
10680 | b, |
10681 | ); |
10682 | #[rustfmt::skip] |
10683 | let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
10684 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
10685 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
10686 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX); |
10687 | assert_eq_m512i(r, e); |
10688 | } |
10689 | |
10690 | #[simd_test(enable = "avx512bw" )] |
10691 | unsafe fn test_mm512_maskz_adds_epi8() { |
10692 | let a = _mm512_set1_epi8(1); |
10693 | let b = _mm512_set1_epi8(i8::MAX); |
10694 | let r = _mm512_maskz_adds_epi8(0, a, b); |
10695 | assert_eq_m512i(r, _mm512_setzero_si512()); |
10696 | let r = _mm512_maskz_adds_epi8( |
10697 | 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, |
10698 | a, |
10699 | b, |
10700 | ); |
10701 | #[rustfmt::skip] |
10702 | let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
10703 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
10704 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
10705 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX); |
10706 | assert_eq_m512i(r, e); |
10707 | } |
10708 | |
10709 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10710 | unsafe fn test_mm256_mask_adds_epi8() { |
10711 | let a = _mm256_set1_epi8(1); |
10712 | let b = _mm256_set1_epi8(i8::MAX); |
10713 | let r = _mm256_mask_adds_epi8(a, 0, a, b); |
10714 | assert_eq_m256i(r, a); |
10715 | let r = _mm256_mask_adds_epi8(a, 0b00000000_00000000_00000000_00001111, a, b); |
10716 | #[rustfmt::skip] |
10717 | let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
10718 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX); |
10719 | assert_eq_m256i(r, e); |
10720 | } |
10721 | |
10722 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10723 | unsafe fn test_mm256_maskz_adds_epi8() { |
10724 | let a = _mm256_set1_epi8(1); |
10725 | let b = _mm256_set1_epi8(i8::MAX); |
10726 | let r = _mm256_maskz_adds_epi8(0, a, b); |
10727 | assert_eq_m256i(r, _mm256_setzero_si256()); |
10728 | let r = _mm256_maskz_adds_epi8(0b00000000_00000000_00000000_00001111, a, b); |
10729 | #[rustfmt::skip] |
10730 | let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
10731 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX); |
10732 | assert_eq_m256i(r, e); |
10733 | } |
10734 | |
10735 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10736 | unsafe fn test_mm_mask_adds_epi8() { |
10737 | let a = _mm_set1_epi8(1); |
10738 | let b = _mm_set1_epi8(i8::MAX); |
10739 | let r = _mm_mask_adds_epi8(a, 0, a, b); |
10740 | assert_eq_m128i(r, a); |
10741 | let r = _mm_mask_adds_epi8(a, 0b00000000_00001111, a, b); |
10742 | #[rustfmt::skip] |
10743 | let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX); |
10744 | assert_eq_m128i(r, e); |
10745 | } |
10746 | |
10747 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10748 | unsafe fn test_mm_maskz_adds_epi8() { |
10749 | let a = _mm_set1_epi8(1); |
10750 | let b = _mm_set1_epi8(i8::MAX); |
10751 | let r = _mm_maskz_adds_epi8(0, a, b); |
10752 | assert_eq_m128i(r, _mm_setzero_si128()); |
10753 | let r = _mm_maskz_adds_epi8(0b00000000_00001111, a, b); |
10754 | #[rustfmt::skip] |
10755 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX); |
10756 | assert_eq_m128i(r, e); |
10757 | } |
10758 | |
10759 | #[simd_test(enable = "avx512bw" )] |
10760 | unsafe fn test_mm512_sub_epi16() { |
10761 | let a = _mm512_set1_epi16(1); |
10762 | let b = _mm512_set1_epi16(2); |
10763 | let r = _mm512_sub_epi16(a, b); |
10764 | let e = _mm512_set1_epi16(-1); |
10765 | assert_eq_m512i(r, e); |
10766 | } |
10767 | |
10768 | #[simd_test(enable = "avx512bw" )] |
10769 | unsafe fn test_mm512_mask_sub_epi16() { |
10770 | let a = _mm512_set1_epi16(1); |
10771 | let b = _mm512_set1_epi16(2); |
10772 | let r = _mm512_mask_sub_epi16(a, 0, a, b); |
10773 | assert_eq_m512i(r, a); |
10774 | let r = _mm512_mask_sub_epi16(a, 0b00000000_11111111_00000000_11111111, a, b); |
10775 | #[rustfmt::skip] |
10776 | let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, |
10777 | 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1); |
10778 | assert_eq_m512i(r, e); |
10779 | } |
10780 | |
10781 | #[simd_test(enable = "avx512bw" )] |
10782 | unsafe fn test_mm512_maskz_sub_epi16() { |
10783 | let a = _mm512_set1_epi16(1); |
10784 | let b = _mm512_set1_epi16(2); |
10785 | let r = _mm512_maskz_sub_epi16(0, a, b); |
10786 | assert_eq_m512i(r, _mm512_setzero_si512()); |
10787 | let r = _mm512_maskz_sub_epi16(0b00000000_11111111_00000000_11111111, a, b); |
10788 | #[rustfmt::skip] |
10789 | let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, |
10790 | 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1); |
10791 | assert_eq_m512i(r, e); |
10792 | } |
10793 | |
10794 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10795 | unsafe fn test_mm256_mask_sub_epi16() { |
10796 | let a = _mm256_set1_epi16(1); |
10797 | let b = _mm256_set1_epi16(2); |
10798 | let r = _mm256_mask_sub_epi16(a, 0, a, b); |
10799 | assert_eq_m256i(r, a); |
10800 | let r = _mm256_mask_sub_epi16(a, 0b00000000_11111111, a, b); |
10801 | let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1); |
10802 | assert_eq_m256i(r, e); |
10803 | } |
10804 | |
10805 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10806 | unsafe fn test_mm256_maskz_sub_epi16() { |
10807 | let a = _mm256_set1_epi16(1); |
10808 | let b = _mm256_set1_epi16(2); |
10809 | let r = _mm256_maskz_sub_epi16(0, a, b); |
10810 | assert_eq_m256i(r, _mm256_setzero_si256()); |
10811 | let r = _mm256_maskz_sub_epi16(0b00000000_11111111, a, b); |
10812 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1); |
10813 | assert_eq_m256i(r, e); |
10814 | } |
10815 | |
10816 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10817 | unsafe fn test_mm_mask_sub_epi16() { |
10818 | let a = _mm_set1_epi16(1); |
10819 | let b = _mm_set1_epi16(2); |
10820 | let r = _mm_mask_sub_epi16(a, 0, a, b); |
10821 | assert_eq_m128i(r, a); |
10822 | let r = _mm_mask_sub_epi16(a, 0b00001111, a, b); |
10823 | let e = _mm_set_epi16(1, 1, 1, 1, -1, -1, -1, -1); |
10824 | assert_eq_m128i(r, e); |
10825 | } |
10826 | |
10827 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10828 | unsafe fn test_mm_maskz_sub_epi16() { |
10829 | let a = _mm_set1_epi16(1); |
10830 | let b = _mm_set1_epi16(2); |
10831 | let r = _mm_maskz_sub_epi16(0, a, b); |
10832 | assert_eq_m128i(r, _mm_setzero_si128()); |
10833 | let r = _mm_maskz_sub_epi16(0b00001111, a, b); |
10834 | let e = _mm_set_epi16(0, 0, 0, 0, -1, -1, -1, -1); |
10835 | assert_eq_m128i(r, e); |
10836 | } |
10837 | |
10838 | #[simd_test(enable = "avx512bw" )] |
10839 | unsafe fn test_mm512_sub_epi8() { |
10840 | let a = _mm512_set1_epi8(1); |
10841 | let b = _mm512_set1_epi8(2); |
10842 | let r = _mm512_sub_epi8(a, b); |
10843 | let e = _mm512_set1_epi8(-1); |
10844 | assert_eq_m512i(r, e); |
10845 | } |
10846 | |
10847 | #[simd_test(enable = "avx512bw" )] |
10848 | unsafe fn test_mm512_mask_sub_epi8() { |
10849 | let a = _mm512_set1_epi8(1); |
10850 | let b = _mm512_set1_epi8(2); |
10851 | let r = _mm512_mask_sub_epi8(a, 0, a, b); |
10852 | assert_eq_m512i(r, a); |
10853 | let r = _mm512_mask_sub_epi8( |
10854 | a, |
10855 | 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, |
10856 | a, |
10857 | b, |
10858 | ); |
10859 | #[rustfmt::skip] |
10860 | let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, |
10861 | 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, |
10862 | 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, |
10863 | 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1); |
10864 | assert_eq_m512i(r, e); |
10865 | } |
10866 | |
10867 | #[simd_test(enable = "avx512bw" )] |
10868 | unsafe fn test_mm512_maskz_sub_epi8() { |
10869 | let a = _mm512_set1_epi8(1); |
10870 | let b = _mm512_set1_epi8(2); |
10871 | let r = _mm512_maskz_sub_epi8(0, a, b); |
10872 | assert_eq_m512i(r, _mm512_setzero_si512()); |
10873 | let r = _mm512_maskz_sub_epi8( |
10874 | 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, |
10875 | a, |
10876 | b, |
10877 | ); |
10878 | #[rustfmt::skip] |
10879 | let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, |
10880 | 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, |
10881 | 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, |
10882 | 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1); |
10883 | assert_eq_m512i(r, e); |
10884 | } |
10885 | |
10886 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10887 | unsafe fn test_mm256_mask_sub_epi8() { |
10888 | let a = _mm256_set1_epi8(1); |
10889 | let b = _mm256_set1_epi8(2); |
10890 | let r = _mm256_mask_sub_epi8(a, 0, a, b); |
10891 | assert_eq_m256i(r, a); |
10892 | let r = _mm256_mask_sub_epi8(a, 0b00000000_11111111_00000000_11111111, a, b); |
10893 | #[rustfmt::skip] |
10894 | let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, |
10895 | 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1); |
10896 | assert_eq_m256i(r, e); |
10897 | } |
10898 | |
10899 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10900 | unsafe fn test_mm256_maskz_sub_epi8() { |
10901 | let a = _mm256_set1_epi8(1); |
10902 | let b = _mm256_set1_epi8(2); |
10903 | let r = _mm256_maskz_sub_epi8(0, a, b); |
10904 | assert_eq_m256i(r, _mm256_setzero_si256()); |
10905 | let r = _mm256_maskz_sub_epi8(0b00000000_11111111_00000000_11111111, a, b); |
10906 | #[rustfmt::skip] |
10907 | let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, |
10908 | 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1); |
10909 | assert_eq_m256i(r, e); |
10910 | } |
10911 | |
10912 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10913 | unsafe fn test_mm_mask_sub_epi8() { |
10914 | let a = _mm_set1_epi8(1); |
10915 | let b = _mm_set1_epi8(2); |
10916 | let r = _mm_mask_sub_epi8(a, 0, a, b); |
10917 | assert_eq_m128i(r, a); |
10918 | let r = _mm_mask_sub_epi8(a, 0b00000000_11111111, a, b); |
10919 | let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1); |
10920 | assert_eq_m128i(r, e); |
10921 | } |
10922 | |
10923 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10924 | unsafe fn test_mm_maskz_sub_epi8() { |
10925 | let a = _mm_set1_epi8(1); |
10926 | let b = _mm_set1_epi8(2); |
10927 | let r = _mm_maskz_sub_epi8(0, a, b); |
10928 | assert_eq_m128i(r, _mm_setzero_si128()); |
10929 | let r = _mm_maskz_sub_epi8(0b00000000_11111111, a, b); |
10930 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1); |
10931 | assert_eq_m128i(r, e); |
10932 | } |
10933 | |
10934 | #[simd_test(enable = "avx512bw" )] |
10935 | unsafe fn test_mm512_subs_epu16() { |
10936 | let a = _mm512_set1_epi16(1); |
10937 | let b = _mm512_set1_epi16(u16::MAX as i16); |
10938 | let r = _mm512_subs_epu16(a, b); |
10939 | let e = _mm512_set1_epi16(0); |
10940 | assert_eq_m512i(r, e); |
10941 | } |
10942 | |
10943 | #[simd_test(enable = "avx512bw" )] |
10944 | unsafe fn test_mm512_mask_subs_epu16() { |
10945 | let a = _mm512_set1_epi16(1); |
10946 | let b = _mm512_set1_epi16(u16::MAX as i16); |
10947 | let r = _mm512_mask_subs_epu16(a, 0, a, b); |
10948 | assert_eq_m512i(r, a); |
10949 | let r = _mm512_mask_subs_epu16(a, 0b00000000_00000000_00000000_00001111, a, b); |
10950 | #[rustfmt::skip] |
10951 | let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
10952 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); |
10953 | assert_eq_m512i(r, e); |
10954 | } |
10955 | |
10956 | #[simd_test(enable = "avx512bw" )] |
10957 | unsafe fn test_mm512_maskz_subs_epu16() { |
10958 | let a = _mm512_set1_epi16(1); |
10959 | let b = _mm512_set1_epi16(u16::MAX as i16); |
10960 | let r = _mm512_maskz_subs_epu16(0, a, b); |
10961 | assert_eq_m512i(r, _mm512_setzero_si512()); |
10962 | let r = _mm512_maskz_subs_epu16(0b00000000_00000000_00000000_00001111, a, b); |
10963 | #[rustfmt::skip] |
10964 | let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
10965 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
10966 | assert_eq_m512i(r, e); |
10967 | } |
10968 | |
10969 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10970 | unsafe fn test_mm256_mask_subs_epu16() { |
10971 | let a = _mm256_set1_epi16(1); |
10972 | let b = _mm256_set1_epi16(u16::MAX as i16); |
10973 | let r = _mm256_mask_subs_epu16(a, 0, a, b); |
10974 | assert_eq_m256i(r, a); |
10975 | let r = _mm256_mask_subs_epu16(a, 0b00000000_00001111, a, b); |
10976 | let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); |
10977 | assert_eq_m256i(r, e); |
10978 | } |
10979 | |
10980 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10981 | unsafe fn test_mm256_maskz_subs_epu16() { |
10982 | let a = _mm256_set1_epi16(1); |
10983 | let b = _mm256_set1_epi16(u16::MAX as i16); |
10984 | let r = _mm256_maskz_subs_epu16(0, a, b); |
10985 | assert_eq_m256i(r, _mm256_setzero_si256()); |
10986 | let r = _mm256_maskz_subs_epu16(0b00000000_00001111, a, b); |
10987 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
10988 | assert_eq_m256i(r, e); |
10989 | } |
10990 | |
10991 | #[simd_test(enable = "avx512bw,avx512vl" )] |
10992 | unsafe fn test_mm_mask_subs_epu16() { |
10993 | let a = _mm_set1_epi16(1); |
10994 | let b = _mm_set1_epi16(u16::MAX as i16); |
10995 | let r = _mm_mask_subs_epu16(a, 0, a, b); |
10996 | assert_eq_m128i(r, a); |
10997 | let r = _mm_mask_subs_epu16(a, 0b00001111, a, b); |
10998 | let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0); |
10999 | assert_eq_m128i(r, e); |
11000 | } |
11001 | |
11002 | #[simd_test(enable = "avx512bw,avx512vl" )] |
11003 | unsafe fn test_mm_maskz_subs_epu16() { |
11004 | let a = _mm_set1_epi16(1); |
11005 | let b = _mm_set1_epi16(u16::MAX as i16); |
11006 | let r = _mm_maskz_subs_epu16(0, a, b); |
11007 | assert_eq_m128i(r, _mm_setzero_si128()); |
11008 | let r = _mm_maskz_subs_epu16(0b00001111, a, b); |
11009 | let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0); |
11010 | assert_eq_m128i(r, e); |
11011 | } |
11012 | |
11013 | #[simd_test(enable = "avx512bw" )] |
11014 | unsafe fn test_mm512_subs_epu8() { |
11015 | let a = _mm512_set1_epi8(1); |
11016 | let b = _mm512_set1_epi8(u8::MAX as i8); |
11017 | let r = _mm512_subs_epu8(a, b); |
11018 | let e = _mm512_set1_epi8(0); |
11019 | assert_eq_m512i(r, e); |
11020 | } |
11021 | |
11022 | #[simd_test(enable = "avx512bw" )] |
11023 | unsafe fn test_mm512_mask_subs_epu8() { |
11024 | let a = _mm512_set1_epi8(1); |
11025 | let b = _mm512_set1_epi8(u8::MAX as i8); |
11026 | let r = _mm512_mask_subs_epu8(a, 0, a, b); |
11027 | assert_eq_m512i(r, a); |
11028 | let r = _mm512_mask_subs_epu8( |
11029 | a, |
11030 | 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, |
11031 | a, |
11032 | b, |
11033 | ); |
11034 | #[rustfmt::skip] |
11035 | let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
11036 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
11037 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
11038 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); |
11039 | assert_eq_m512i(r, e); |
11040 | } |
11041 | |
11042 | #[simd_test(enable = "avx512bw" )] |
11043 | unsafe fn test_mm512_maskz_subs_epu8() { |
11044 | let a = _mm512_set1_epi8(1); |
11045 | let b = _mm512_set1_epi8(u8::MAX as i8); |
11046 | let r = _mm512_maskz_subs_epu8(0, a, b); |
11047 | assert_eq_m512i(r, _mm512_setzero_si512()); |
11048 | let r = _mm512_maskz_subs_epu8( |
11049 | 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, |
11050 | a, |
11051 | b, |
11052 | ); |
11053 | #[rustfmt::skip] |
11054 | let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
11055 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
11056 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
11057 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
11058 | assert_eq_m512i(r, e); |
11059 | } |
11060 | |
11061 | #[simd_test(enable = "avx512bw,avx512vl" )] |
11062 | unsafe fn test_mm256_mask_subs_epu8() { |
11063 | let a = _mm256_set1_epi8(1); |
11064 | let b = _mm256_set1_epi8(u8::MAX as i8); |
11065 | let r = _mm256_mask_subs_epu8(a, 0, a, b); |
11066 | assert_eq_m256i(r, a); |
11067 | let r = _mm256_mask_subs_epu8(a, 0b00000000_00000000_00000000_00001111, a, b); |
11068 | #[rustfmt::skip] |
11069 | let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
11070 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); |
11071 | assert_eq_m256i(r, e); |
11072 | } |
11073 | |
11074 | #[simd_test(enable = "avx512bw,avx512vl" )] |
11075 | unsafe fn test_mm256_maskz_subs_epu8() { |
11076 | let a = _mm256_set1_epi8(1); |
11077 | let b = _mm256_set1_epi8(u8::MAX as i8); |
11078 | let r = _mm256_maskz_subs_epu8(0, a, b); |
11079 | assert_eq_m256i(r, _mm256_setzero_si256()); |
11080 | let r = _mm256_maskz_subs_epu8(0b00000000_00000000_00000000_00001111, a, b); |
11081 | #[rustfmt::skip] |
11082 | let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
11083 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
11084 | assert_eq_m256i(r, e); |
11085 | } |
11086 | |
11087 | #[simd_test(enable = "avx512bw,avx512vl" )] |
11088 | unsafe fn test_mm_mask_subs_epu8() { |
11089 | let a = _mm_set1_epi8(1); |
11090 | let b = _mm_set1_epi8(u8::MAX as i8); |
11091 | let r = _mm_mask_subs_epu8(a, 0, a, b); |
11092 | assert_eq_m128i(r, a); |
11093 | let r = _mm_mask_subs_epu8(a, 0b00000000_00001111, a, b); |
11094 | let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); |
11095 | assert_eq_m128i(r, e); |
11096 | } |
11097 | |
11098 | #[simd_test(enable = "avx512bw,avx512vl" )] |
11099 | unsafe fn test_mm_maskz_subs_epu8() { |
11100 | let a = _mm_set1_epi8(1); |
11101 | let b = _mm_set1_epi8(u8::MAX as i8); |
11102 | let r = _mm_maskz_subs_epu8(0, a, b); |
11103 | assert_eq_m128i(r, _mm_setzero_si128()); |
11104 | let r = _mm_maskz_subs_epu8(0b00000000_00001111, a, b); |
11105 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
11106 | assert_eq_m128i(r, e); |
11107 | } |
11108 | |
11109 | #[simd_test(enable = "avx512bw" )] |
11110 | unsafe fn test_mm512_subs_epi16() { |
11111 | let a = _mm512_set1_epi16(-1); |
11112 | let b = _mm512_set1_epi16(i16::MAX); |
11113 | let r = _mm512_subs_epi16(a, b); |
11114 | let e = _mm512_set1_epi16(i16::MIN); |
11115 | assert_eq_m512i(r, e); |
11116 | } |
11117 | |
11118 | #[simd_test(enable = "avx512bw" )] |
11119 | unsafe fn test_mm512_mask_subs_epi16() { |
11120 | let a = _mm512_set1_epi16(-1); |
11121 | let b = _mm512_set1_epi16(i16::MAX); |
11122 | let r = _mm512_mask_subs_epi16(a, 0, a, b); |
11123 | assert_eq_m512i(r, a); |
11124 | let r = _mm512_mask_subs_epi16(a, 0b00000000_00000000_00000000_00001111, a, b); |
11125 | #[rustfmt::skip] |
11126 | let e = _mm512_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
11127 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN); |
11128 | assert_eq_m512i(r, e); |
11129 | } |
11130 | |
11131 | #[simd_test(enable = "avx512bw" )] |
11132 | unsafe fn test_mm512_maskz_subs_epi16() { |
11133 | let a = _mm512_set1_epi16(-1); |
11134 | let b = _mm512_set1_epi16(i16::MAX); |
11135 | let r = _mm512_maskz_subs_epi16(0, a, b); |
11136 | assert_eq_m512i(r, _mm512_setzero_si512()); |
11137 | let r = _mm512_maskz_subs_epi16(0b00000000_00000000_00000000_00001111, a, b); |
11138 | #[rustfmt::skip] |
11139 | let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
11140 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN); |
11141 | assert_eq_m512i(r, e); |
11142 | } |
11143 | |
11144 | #[simd_test(enable = "avx512bw,avx512vl" )] |
11145 | unsafe fn test_mm256_mask_subs_epi16() { |
11146 | let a = _mm256_set1_epi16(-1); |
11147 | let b = _mm256_set1_epi16(i16::MAX); |
11148 | let r = _mm256_mask_subs_epi16(a, 0, a, b); |
11149 | assert_eq_m256i(r, a); |
11150 | let r = _mm256_mask_subs_epi16(a, 0b00000000_00001111, a, b); |
11151 | #[rustfmt::skip] |
11152 | let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN); |
11153 | assert_eq_m256i(r, e); |
11154 | } |
11155 | |
11156 | #[simd_test(enable = "avx512bw,avx512vl" )] |
11157 | unsafe fn test_mm256_maskz_subs_epi16() { |
11158 | let a = _mm256_set1_epi16(-1); |
11159 | let b = _mm256_set1_epi16(i16::MAX); |
11160 | let r = _mm256_maskz_subs_epi16(0, a, b); |
11161 | assert_eq_m256i(r, _mm256_setzero_si256()); |
11162 | let r = _mm256_maskz_subs_epi16(0b00000000_00001111, a, b); |
11163 | #[rustfmt::skip] |
11164 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN); |
11165 | assert_eq_m256i(r, e); |
11166 | } |
11167 | |
11168 | #[simd_test(enable = "avx512bw,avx512vl" )] |
11169 | unsafe fn test_mm_mask_subs_epi16() { |
11170 | let a = _mm_set1_epi16(-1); |
11171 | let b = _mm_set1_epi16(i16::MAX); |
11172 | let r = _mm_mask_subs_epi16(a, 0, a, b); |
11173 | assert_eq_m128i(r, a); |
11174 | let r = _mm_mask_subs_epi16(a, 0b00001111, a, b); |
11175 | let e = _mm_set_epi16(-1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN); |
11176 | assert_eq_m128i(r, e); |
11177 | } |
11178 | |
11179 | #[simd_test(enable = "avx512bw,avx512vl" )] |
11180 | unsafe fn test_mm_maskz_subs_epi16() { |
11181 | let a = _mm_set1_epi16(-1); |
11182 | let b = _mm_set1_epi16(i16::MAX); |
11183 | let r = _mm_maskz_subs_epi16(0, a, b); |
11184 | assert_eq_m128i(r, _mm_setzero_si128()); |
11185 | let r = _mm_maskz_subs_epi16(0b00001111, a, b); |
11186 | let e = _mm_set_epi16(0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN); |
11187 | assert_eq_m128i(r, e); |
11188 | } |
11189 | |
11190 | #[simd_test(enable = "avx512bw" )] |
11191 | unsafe fn test_mm512_subs_epi8() { |
11192 | let a = _mm512_set1_epi8(-1); |
11193 | let b = _mm512_set1_epi8(i8::MAX); |
11194 | let r = _mm512_subs_epi8(a, b); |
11195 | let e = _mm512_set1_epi8(i8::MIN); |
11196 | assert_eq_m512i(r, e); |
11197 | } |
11198 | |
11199 | #[simd_test(enable = "avx512bw" )] |
11200 | unsafe fn test_mm512_mask_subs_epi8() { |
11201 | let a = _mm512_set1_epi8(-1); |
11202 | let b = _mm512_set1_epi8(i8::MAX); |
11203 | let r = _mm512_mask_subs_epi8(a, 0, a, b); |
11204 | assert_eq_m512i(r, a); |
11205 | let r = _mm512_mask_subs_epi8( |
11206 | a, |
11207 | 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, |
11208 | a, |
11209 | b, |
11210 | ); |
11211 | #[rustfmt::skip] |
11212 | let e = _mm512_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
11213 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
11214 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
11215 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN); |
11216 | assert_eq_m512i(r, e); |
11217 | } |
11218 | |
11219 | #[simd_test(enable = "avx512bw" )] |
11220 | unsafe fn test_mm512_maskz_subs_epi8() { |
11221 | let a = _mm512_set1_epi8(-1); |
11222 | let b = _mm512_set1_epi8(i8::MAX); |
11223 | let r = _mm512_maskz_subs_epi8(0, a, b); |
11224 | assert_eq_m512i(r, _mm512_setzero_si512()); |
11225 | let r = _mm512_maskz_subs_epi8( |
11226 | 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, |
11227 | a, |
11228 | b, |
11229 | ); |
11230 | #[rustfmt::skip] |
11231 | let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
11232 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
11233 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
11234 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN); |
11235 | assert_eq_m512i(r, e); |
11236 | } |
11237 | |
11238 | #[simd_test(enable = "avx512bw,avx512vl" )] |
11239 | unsafe fn test_mm256_mask_subs_epi8() { |
11240 | let a = _mm256_set1_epi8(-1); |
11241 | let b = _mm256_set1_epi8(i8::MAX); |
11242 | let r = _mm256_mask_subs_epi8(a, 0, a, b); |
11243 | assert_eq_m256i(r, a); |
11244 | let r = _mm256_mask_subs_epi8(a, 0b00000000_00000000_00000000_00001111, a, b); |
11245 | #[rustfmt::skip] |
11246 | let e = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
11247 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN); |
11248 | assert_eq_m256i(r, e); |
11249 | } |
11250 | |
11251 | #[simd_test(enable = "avx512bw,avx512vl" )] |
11252 | unsafe fn test_mm256_maskz_subs_epi8() { |
11253 | let a = _mm256_set1_epi8(-1); |
11254 | let b = _mm256_set1_epi8(i8::MAX); |
11255 | let r = _mm256_maskz_subs_epi8(0, a, b); |
11256 | assert_eq_m256i(r, _mm256_setzero_si256()); |
11257 | let r = _mm256_maskz_subs_epi8(0b00000000_00000000_00000000_00001111, a, b); |
11258 | #[rustfmt::skip] |
11259 | let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
11260 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN); |
11261 | assert_eq_m256i(r, e); |
11262 | } |
11263 | |
11264 | #[simd_test(enable = "avx512bw,avx512vl" )] |
11265 | unsafe fn test_mm_mask_subs_epi8() { |
11266 | let a = _mm_set1_epi8(-1); |
11267 | let b = _mm_set1_epi8(i8::MAX); |
11268 | let r = _mm_mask_subs_epi8(a, 0, a, b); |
11269 | assert_eq_m128i(r, a); |
11270 | let r = _mm_mask_subs_epi8(a, 0b00000000_00001111, a, b); |
11271 | #[rustfmt::skip] |
11272 | let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN); |
11273 | assert_eq_m128i(r, e); |
11274 | } |
11275 | |
11276 | #[simd_test(enable = "avx512bw,avx512vl" )] |
11277 | unsafe fn test_mm_maskz_subs_epi8() { |
11278 | let a = _mm_set1_epi8(-1); |
11279 | let b = _mm_set1_epi8(i8::MAX); |
11280 | let r = _mm_maskz_subs_epi8(0, a, b); |
11281 | assert_eq_m128i(r, _mm_setzero_si128()); |
11282 | let r = _mm_maskz_subs_epi8(0b00000000_00001111, a, b); |
11283 | #[rustfmt::skip] |
11284 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN); |
11285 | assert_eq_m128i(r, e); |
11286 | } |
11287 | |
11288 | #[simd_test(enable = "avx512bw" )] |
11289 | unsafe fn test_mm512_mulhi_epu16() { |
11290 | let a = _mm512_set1_epi16(1); |
11291 | let b = _mm512_set1_epi16(1); |
11292 | let r = _mm512_mulhi_epu16(a, b); |
11293 | let e = _mm512_set1_epi16(0); |
11294 | assert_eq_m512i(r, e); |
11295 | } |
11296 | |
11297 | #[simd_test(enable = "avx512bw" )] |
11298 | unsafe fn test_mm512_mask_mulhi_epu16() { |
11299 | let a = _mm512_set1_epi16(1); |
11300 | let b = _mm512_set1_epi16(1); |
11301 | let r = _mm512_mask_mulhi_epu16(a, 0, a, b); |
11302 | assert_eq_m512i(r, a); |
11303 | let r = _mm512_mask_mulhi_epu16(a, 0b00000000_00000000_00000000_00001111, a, b); |
11304 | #[rustfmt::skip] |
11305 | let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
11306 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); |
11307 | assert_eq_m512i(r, e); |
11308 | } |
11309 | |
11310 | #[simd_test(enable = "avx512bw" )] |
11311 | unsafe fn test_mm512_maskz_mulhi_epu16() { |
11312 | let a = _mm512_set1_epi16(1); |
11313 | let b = _mm512_set1_epi16(1); |
11314 | let r = _mm512_maskz_mulhi_epu16(0, a, b); |
11315 | assert_eq_m512i(r, _mm512_setzero_si512()); |
11316 | let r = _mm512_maskz_mulhi_epu16(0b00000000_00000000_00000000_00001111, a, b); |
11317 | #[rustfmt::skip] |
11318 | let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
11319 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
11320 | assert_eq_m512i(r, e); |
11321 | } |
11322 | |
11323 | #[simd_test(enable = "avx512bw,avx512vl" )] |
11324 | unsafe fn test_mm256_mask_mulhi_epu16() { |
11325 | let a = _mm256_set1_epi16(1); |
11326 | let b = _mm256_set1_epi16(1); |
11327 | let r = _mm256_mask_mulhi_epu16(a, 0, a, b); |
11328 | assert_eq_m256i(r, a); |
11329 | let r = _mm256_mask_mulhi_epu16(a, 0b00000000_00001111, a, b); |
11330 | let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); |
11331 | assert_eq_m256i(r, e); |
11332 | } |
11333 | |
11334 | #[simd_test(enable = "avx512bw,avx512vl" )] |
11335 | unsafe fn test_mm256_maskz_mulhi_epu16() { |
11336 | let a = _mm256_set1_epi16(1); |
11337 | let b = _mm256_set1_epi16(1); |
11338 | let r = _mm256_maskz_mulhi_epu16(0, a, b); |
11339 | assert_eq_m256i(r, _mm256_setzero_si256()); |
11340 | let r = _mm256_maskz_mulhi_epu16(0b00000000_00001111, a, b); |
11341 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
11342 | assert_eq_m256i(r, e); |
11343 | } |
11344 | |
11345 | #[simd_test(enable = "avx512bw,avx512vl" )] |
11346 | unsafe fn test_mm_mask_mulhi_epu16() { |
11347 | let a = _mm_set1_epi16(1); |
11348 | let b = _mm_set1_epi16(1); |
11349 | let r = _mm_mask_mulhi_epu16(a, 0, a, b); |
11350 | assert_eq_m128i(r, a); |
11351 | let r = _mm_mask_mulhi_epu16(a, 0b00001111, a, b); |
11352 | let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0); |
11353 | assert_eq_m128i(r, e); |
11354 | } |
11355 | |
11356 | #[simd_test(enable = "avx512bw,avx512vl" )] |
11357 | unsafe fn test_mm_maskz_mulhi_epu16() { |
11358 | let a = _mm_set1_epi16(1); |
11359 | let b = _mm_set1_epi16(1); |
11360 | let r = _mm_maskz_mulhi_epu16(0, a, b); |
11361 | assert_eq_m128i(r, _mm_setzero_si128()); |
11362 | let r = _mm_maskz_mulhi_epu16(0b00001111, a, b); |
11363 | let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0); |
11364 | assert_eq_m128i(r, e); |
11365 | } |
11366 | |
11367 | #[simd_test(enable = "avx512bw" )] |
11368 | unsafe fn test_mm512_mulhi_epi16() { |
11369 | let a = _mm512_set1_epi16(1); |
11370 | let b = _mm512_set1_epi16(1); |
11371 | let r = _mm512_mulhi_epi16(a, b); |
11372 | let e = _mm512_set1_epi16(0); |
11373 | assert_eq_m512i(r, e); |
11374 | } |
11375 | |
11376 | #[simd_test(enable = "avx512bw" )] |
11377 | unsafe fn test_mm512_mask_mulhi_epi16() { |
11378 | let a = _mm512_set1_epi16(1); |
11379 | let b = _mm512_set1_epi16(1); |
11380 | let r = _mm512_mask_mulhi_epi16(a, 0, a, b); |
11381 | assert_eq_m512i(r, a); |
11382 | let r = _mm512_mask_mulhi_epi16(a, 0b00000000_00000000_00000000_00001111, a, b); |
11383 | #[rustfmt::skip] |
11384 | let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
11385 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); |
11386 | assert_eq_m512i(r, e); |
11387 | } |
11388 | |
11389 | #[simd_test(enable = "avx512bw" )] |
11390 | unsafe fn test_mm512_maskz_mulhi_epi16() { |
11391 | let a = _mm512_set1_epi16(1); |
11392 | let b = _mm512_set1_epi16(1); |
11393 | let r = _mm512_maskz_mulhi_epi16(0, a, b); |
11394 | assert_eq_m512i(r, _mm512_setzero_si512()); |
11395 | let r = _mm512_maskz_mulhi_epi16(0b00000000_00000000_00000000_00001111, a, b); |
11396 | #[rustfmt::skip] |
11397 | let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
11398 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
11399 | assert_eq_m512i(r, e); |
11400 | } |
11401 | |
11402 | #[simd_test(enable = "avx512bw,avx512vl" )] |
11403 | unsafe fn test_mm256_mask_mulhi_epi16() { |
11404 | let a = _mm256_set1_epi16(1); |
11405 | let b = _mm256_set1_epi16(1); |
11406 | let r = _mm256_mask_mulhi_epi16(a, 0, a, b); |
11407 | assert_eq_m256i(r, a); |
11408 | let r = _mm256_mask_mulhi_epi16(a, 0b00000000_00001111, a, b); |
11409 | let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); |
11410 | assert_eq_m256i(r, e); |
11411 | } |
11412 | |
11413 | #[simd_test(enable = "avx512bw,avx512vl" )] |
11414 | unsafe fn test_mm256_maskz_mulhi_epi16() { |
11415 | let a = _mm256_set1_epi16(1); |
11416 | let b = _mm256_set1_epi16(1); |
11417 | let r = _mm256_maskz_mulhi_epi16(0, a, b); |
11418 | assert_eq_m256i(r, _mm256_setzero_si256()); |
11419 | let r = _mm256_maskz_mulhi_epi16(0b00000000_00001111, a, b); |
11420 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
11421 | assert_eq_m256i(r, e); |
11422 | } |
11423 | |
11424 | #[simd_test(enable = "avx512bw,avx512vl" )] |
11425 | unsafe fn test_mm_mask_mulhi_epi16() { |
11426 | let a = _mm_set1_epi16(1); |
11427 | let b = _mm_set1_epi16(1); |
11428 | let r = _mm_mask_mulhi_epi16(a, 0, a, b); |
11429 | assert_eq_m128i(r, a); |
11430 | let r = _mm_mask_mulhi_epi16(a, 0b00001111, a, b); |
11431 | let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0); |
11432 | assert_eq_m128i(r, e); |
11433 | } |
11434 | |
11435 | #[simd_test(enable = "avx512bw,avx512vl" )] |
11436 | unsafe fn test_mm_maskz_mulhi_epi16() { |
11437 | let a = _mm_set1_epi16(1); |
11438 | let b = _mm_set1_epi16(1); |
11439 | let r = _mm_maskz_mulhi_epi16(0, a, b); |
11440 | assert_eq_m128i(r, _mm_setzero_si128()); |
11441 | let r = _mm_maskz_mulhi_epi16(0b00001111, a, b); |
11442 | let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0); |
11443 | assert_eq_m128i(r, e); |
11444 | } |
11445 | |
11446 | #[simd_test(enable = "avx512bw" )] |
11447 | unsafe fn test_mm512_mulhrs_epi16() { |
11448 | let a = _mm512_set1_epi16(1); |
11449 | let b = _mm512_set1_epi16(1); |
11450 | let r = _mm512_mulhrs_epi16(a, b); |
11451 | let e = _mm512_set1_epi16(0); |
11452 | assert_eq_m512i(r, e); |
11453 | } |
11454 | |
11455 | #[simd_test(enable = "avx512bw" )] |
11456 | unsafe fn test_mm512_mask_mulhrs_epi16() { |
11457 | let a = _mm512_set1_epi16(1); |
11458 | let b = _mm512_set1_epi16(1); |
11459 | let r = _mm512_mask_mulhrs_epi16(a, 0, a, b); |
11460 | assert_eq_m512i(r, a); |
11461 | let r = _mm512_mask_mulhrs_epi16(a, 0b00000000_00000000_00000000_00001111, a, b); |
11462 | #[rustfmt::skip] |
11463 | let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
11464 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); |
11465 | assert_eq_m512i(r, e); |
11466 | } |
11467 | |
11468 | #[simd_test(enable = "avx512bw" )] |
11469 | unsafe fn test_mm512_maskz_mulhrs_epi16() { |
11470 | let a = _mm512_set1_epi16(1); |
11471 | let b = _mm512_set1_epi16(1); |
11472 | let r = _mm512_maskz_mulhrs_epi16(0, a, b); |
11473 | assert_eq_m512i(r, _mm512_setzero_si512()); |
11474 | let r = _mm512_maskz_mulhrs_epi16(0b00000000_00000000_00000000_00001111, a, b); |
11475 | #[rustfmt::skip] |
11476 | let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
11477 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
11478 | assert_eq_m512i(r, e); |
11479 | } |
11480 | |
11481 | #[simd_test(enable = "avx512bw,avx512vl" )] |
11482 | unsafe fn test_mm256_mask_mulhrs_epi16() { |
11483 | let a = _mm256_set1_epi16(1); |
11484 | let b = _mm256_set1_epi16(1); |
11485 | let r = _mm256_mask_mulhrs_epi16(a, 0, a, b); |
11486 | assert_eq_m256i(r, a); |
11487 | let r = _mm256_mask_mulhrs_epi16(a, 0b00000000_00001111, a, b); |
11488 | let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); |
11489 | assert_eq_m256i(r, e); |
11490 | } |
11491 | |
11492 | #[simd_test(enable = "avx512bw,avx512vl" )] |
11493 | unsafe fn test_mm256_maskz_mulhrs_epi16() { |
11494 | let a = _mm256_set1_epi16(1); |
11495 | let b = _mm256_set1_epi16(1); |
11496 | let r = _mm256_maskz_mulhrs_epi16(0, a, b); |
11497 | assert_eq_m256i(r, _mm256_setzero_si256()); |
11498 | let r = _mm256_maskz_mulhrs_epi16(0b00000000_00001111, a, b); |
11499 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
11500 | assert_eq_m256i(r, e); |
11501 | } |
11502 | |
11503 | #[simd_test(enable = "avx512bw,avx512vl" )] |
11504 | unsafe fn test_mm_mask_mulhrs_epi16() { |
11505 | let a = _mm_set1_epi16(1); |
11506 | let b = _mm_set1_epi16(1); |
11507 | let r = _mm_mask_mulhrs_epi16(a, 0, a, b); |
11508 | assert_eq_m128i(r, a); |
11509 | let r = _mm_mask_mulhrs_epi16(a, 0b00001111, a, b); |
11510 | let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0); |
11511 | assert_eq_m128i(r, e); |
11512 | } |
11513 | |
11514 | #[simd_test(enable = "avx512bw,avx512vl" )] |
11515 | unsafe fn test_mm_maskz_mulhrs_epi16() { |
11516 | let a = _mm_set1_epi16(1); |
11517 | let b = _mm_set1_epi16(1); |
11518 | let r = _mm_maskz_mulhrs_epi16(0, a, b); |
11519 | assert_eq_m128i(r, _mm_setzero_si128()); |
11520 | let r = _mm_maskz_mulhrs_epi16(0b00001111, a, b); |
11521 | let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0); |
11522 | assert_eq_m128i(r, e); |
11523 | } |
11524 | |
11525 | #[simd_test(enable = "avx512bw" )] |
11526 | unsafe fn test_mm512_mullo_epi16() { |
11527 | let a = _mm512_set1_epi16(1); |
11528 | let b = _mm512_set1_epi16(1); |
11529 | let r = _mm512_mullo_epi16(a, b); |
11530 | let e = _mm512_set1_epi16(1); |
11531 | assert_eq_m512i(r, e); |
11532 | } |
11533 | |
11534 | #[simd_test(enable = "avx512bw" )] |
11535 | unsafe fn test_mm512_mask_mullo_epi16() { |
11536 | let a = _mm512_set1_epi16(1); |
11537 | let b = _mm512_set1_epi16(1); |
11538 | let r = _mm512_mask_mullo_epi16(a, 0, a, b); |
11539 | assert_eq_m512i(r, a); |
11540 | let r = _mm512_mask_mullo_epi16(a, 0b00000000_00000000_00000000_00001111, a, b); |
11541 | #[rustfmt::skip] |
11542 | let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
11543 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); |
11544 | assert_eq_m512i(r, e); |
11545 | } |
11546 | |
11547 | #[simd_test(enable = "avx512bw" )] |
11548 | unsafe fn test_mm512_maskz_mullo_epi16() { |
11549 | let a = _mm512_set1_epi16(1); |
11550 | let b = _mm512_set1_epi16(1); |
11551 | let r = _mm512_maskz_mullo_epi16(0, a, b); |
11552 | assert_eq_m512i(r, _mm512_setzero_si512()); |
11553 | let r = _mm512_maskz_mullo_epi16(0b00000000_00000000_00000000_00001111, a, b); |
11554 | #[rustfmt::skip] |
11555 | let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
11556 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1); |
11557 | assert_eq_m512i(r, e); |
11558 | } |
11559 | |
11560 | #[simd_test(enable = "avx512bw,avx512vl" )] |
11561 | unsafe fn test_mm256_mask_mullo_epi16() { |
11562 | let a = _mm256_set1_epi16(1); |
11563 | let b = _mm256_set1_epi16(1); |
11564 | let r = _mm256_mask_mullo_epi16(a, 0, a, b); |
11565 | assert_eq_m256i(r, a); |
11566 | let r = _mm256_mask_mullo_epi16(a, 0b00000000_00001111, a, b); |
11567 | let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); |
11568 | assert_eq_m256i(r, e); |
11569 | } |
11570 | |
11571 | #[simd_test(enable = "avx512bw,avx512vl" )] |
11572 | unsafe fn test_mm256_maskz_mullo_epi16() { |
11573 | let a = _mm256_set1_epi16(1); |
11574 | let b = _mm256_set1_epi16(1); |
11575 | let r = _mm256_maskz_mullo_epi16(0, a, b); |
11576 | assert_eq_m256i(r, _mm256_setzero_si256()); |
11577 | let r = _mm256_maskz_mullo_epi16(0b00000000_00001111, a, b); |
11578 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1); |
11579 | assert_eq_m256i(r, e); |
11580 | } |
11581 | |
11582 | #[simd_test(enable = "avx512bw,avx512vl" )] |
11583 | unsafe fn test_mm_mask_mullo_epi16() { |
11584 | let a = _mm_set1_epi16(1); |
11585 | let b = _mm_set1_epi16(1); |
11586 | let r = _mm_mask_mullo_epi16(a, 0, a, b); |
11587 | assert_eq_m128i(r, a); |
11588 | let r = _mm_mask_mullo_epi16(a, 0b00001111, a, b); |
11589 | let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1); |
11590 | assert_eq_m128i(r, e); |
11591 | } |
11592 | |
11593 | #[simd_test(enable = "avx512bw,avx512vl" )] |
11594 | unsafe fn test_mm_maskz_mullo_epi16() { |
11595 | let a = _mm_set1_epi16(1); |
11596 | let b = _mm_set1_epi16(1); |
11597 | let r = _mm_maskz_mullo_epi16(0, a, b); |
11598 | assert_eq_m128i(r, _mm_setzero_si128()); |
11599 | let r = _mm_maskz_mullo_epi16(0b00001111, a, b); |
11600 | let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1); |
11601 | assert_eq_m128i(r, e); |
11602 | } |
11603 | |
11604 | #[simd_test(enable = "avx512bw" )] |
11605 | unsafe fn test_mm512_max_epu16() { |
11606 | #[rustfmt::skip] |
11607 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
11608 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
11609 | #[rustfmt::skip] |
11610 | let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
11611 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
11612 | let r = _mm512_max_epu16(a, b); |
11613 | #[rustfmt::skip] |
11614 | let e = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15, |
11615 | 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15); |
11616 | assert_eq_m512i(r, e); |
11617 | } |
11618 | |
11619 | #[simd_test(enable = "avx512f" )] |
11620 | unsafe fn test_mm512_mask_max_epu16() { |
11621 | #[rustfmt::skip] |
11622 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
11623 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
11624 | #[rustfmt::skip] |
11625 | let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
11626 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
11627 | let r = _mm512_mask_max_epu16(a, 0, a, b); |
11628 | assert_eq_m512i(r, a); |
11629 | let r = _mm512_mask_max_epu16(a, 0b00000000_11111111_00000000_11111111, a, b); |
11630 | #[rustfmt::skip] |
11631 | let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
11632 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
11633 | assert_eq_m512i(r, e); |
11634 | } |
11635 | |
11636 | #[simd_test(enable = "avx512f" )] |
11637 | unsafe fn test_mm512_maskz_max_epu16() { |
11638 | #[rustfmt::skip] |
11639 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
11640 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
11641 | #[rustfmt::skip] |
11642 | let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
11643 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
11644 | let r = _mm512_maskz_max_epu16(0, a, b); |
11645 | assert_eq_m512i(r, _mm512_setzero_si512()); |
11646 | let r = _mm512_maskz_max_epu16(0b00000000_11111111_00000000_11111111, a, b); |
11647 | #[rustfmt::skip] |
11648 | let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15, |
11649 | 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15); |
11650 | assert_eq_m512i(r, e); |
11651 | } |
11652 | |
11653 | #[simd_test(enable = "avx512f,avx512vl" )] |
11654 | unsafe fn test_mm256_mask_max_epu16() { |
11655 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
11656 | let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
11657 | let r = _mm256_mask_max_epu16(a, 0, a, b); |
11658 | assert_eq_m256i(r, a); |
11659 | let r = _mm256_mask_max_epu16(a, 0b00000000_11111111, a, b); |
11660 | let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
11661 | assert_eq_m256i(r, e); |
11662 | } |
11663 | |
11664 | #[simd_test(enable = "avx512f,avx512vl" )] |
11665 | unsafe fn test_mm256_maskz_max_epu16() { |
11666 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
11667 | let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
11668 | let r = _mm256_maskz_max_epu16(0, a, b); |
11669 | assert_eq_m256i(r, _mm256_setzero_si256()); |
11670 | let r = _mm256_maskz_max_epu16(0b00000000_11111111, a, b); |
11671 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15); |
11672 | assert_eq_m256i(r, e); |
11673 | } |
11674 | |
11675 | #[simd_test(enable = "avx512f,avx512vl" )] |
11676 | unsafe fn test_mm_mask_max_epu16() { |
11677 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
11678 | let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0); |
11679 | let r = _mm_mask_max_epu16(a, 0, a, b); |
11680 | assert_eq_m128i(r, a); |
11681 | let r = _mm_mask_max_epu16(a, 0b00001111, a, b); |
11682 | let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
11683 | assert_eq_m128i(r, e); |
11684 | } |
11685 | |
11686 | #[simd_test(enable = "avx512f,avx512vl" )] |
11687 | unsafe fn test_mm_maskz_max_epu16() { |
11688 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
11689 | let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0); |
11690 | let r = _mm_maskz_max_epu16(0, a, b); |
11691 | assert_eq_m128i(r, _mm_setzero_si128()); |
11692 | let r = _mm_maskz_max_epu16(0b00001111, a, b); |
11693 | let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7); |
11694 | assert_eq_m128i(r, e); |
11695 | } |
11696 | |
11697 | #[simd_test(enable = "avx512bw" )] |
11698 | unsafe fn test_mm512_max_epu8() { |
11699 | #[rustfmt::skip] |
11700 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
11701 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
11702 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
11703 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
11704 | #[rustfmt::skip] |
11705 | let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
11706 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
11707 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
11708 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
11709 | let r = _mm512_max_epu8(a, b); |
11710 | #[rustfmt::skip] |
11711 | let e = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15, |
11712 | 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15, |
11713 | 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15, |
11714 | 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15); |
11715 | assert_eq_m512i(r, e); |
11716 | } |
11717 | |
11718 | #[simd_test(enable = "avx512f" )] |
11719 | unsafe fn test_mm512_mask_max_epu8() { |
11720 | #[rustfmt::skip] |
11721 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
11722 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
11723 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
11724 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
11725 | #[rustfmt::skip] |
11726 | let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
11727 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
11728 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
11729 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
11730 | let r = _mm512_mask_max_epu8(a, 0, a, b); |
11731 | assert_eq_m512i(r, a); |
11732 | let r = _mm512_mask_max_epu8( |
11733 | a, |
11734 | 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, |
11735 | a, |
11736 | b, |
11737 | ); |
11738 | #[rustfmt::skip] |
11739 | let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
11740 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
11741 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
11742 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
11743 | assert_eq_m512i(r, e); |
11744 | } |
11745 | |
11746 | #[simd_test(enable = "avx512f" )] |
11747 | unsafe fn test_mm512_maskz_max_epu8() { |
11748 | #[rustfmt::skip] |
11749 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
11750 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
11751 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
11752 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
11753 | #[rustfmt::skip] |
11754 | let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
11755 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
11756 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
11757 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
11758 | let r = _mm512_maskz_max_epu8(0, a, b); |
11759 | assert_eq_m512i(r, _mm512_setzero_si512()); |
11760 | let r = _mm512_maskz_max_epu8( |
11761 | 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, |
11762 | a, |
11763 | b, |
11764 | ); |
11765 | #[rustfmt::skip] |
11766 | let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15, |
11767 | 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15, |
11768 | 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15, |
11769 | 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15); |
11770 | assert_eq_m512i(r, e); |
11771 | } |
11772 | |
11773 | #[simd_test(enable = "avx512f,avx512vl" )] |
11774 | unsafe fn test_mm256_mask_max_epu8() { |
11775 | #[rustfmt::skip] |
11776 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
11777 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
11778 | #[rustfmt::skip] |
11779 | let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
11780 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
11781 | let r = _mm256_mask_max_epu8(a, 0, a, b); |
11782 | assert_eq_m256i(r, a); |
11783 | let r = _mm256_mask_max_epu8(a, 0b00000000_11111111_00000000_11111111, a, b); |
11784 | #[rustfmt::skip] |
11785 | let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
11786 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
11787 | assert_eq_m256i(r, e); |
11788 | } |
11789 | |
11790 | #[simd_test(enable = "avx512f,avx512vl" )] |
11791 | unsafe fn test_mm256_maskz_max_epu8() { |
11792 | #[rustfmt::skip] |
11793 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
11794 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
11795 | #[rustfmt::skip] |
11796 | let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
11797 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
11798 | let r = _mm256_maskz_max_epu8(0, a, b); |
11799 | assert_eq_m256i(r, _mm256_setzero_si256()); |
11800 | let r = _mm256_maskz_max_epu8(0b00000000_11111111_00000000_11111111, a, b); |
11801 | #[rustfmt::skip] |
11802 | let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15, |
11803 | 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15); |
11804 | assert_eq_m256i(r, e); |
11805 | } |
11806 | |
11807 | #[simd_test(enable = "avx512f,avx512vl" )] |
11808 | unsafe fn test_mm_mask_max_epu8() { |
11809 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
11810 | let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
11811 | let r = _mm_mask_max_epu8(a, 0, a, b); |
11812 | assert_eq_m128i(r, a); |
11813 | let r = _mm_mask_max_epu8(a, 0b00000000_11111111, a, b); |
11814 | let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
11815 | assert_eq_m128i(r, e); |
11816 | } |
11817 | |
11818 | #[simd_test(enable = "avx512f,avx512vl" )] |
11819 | unsafe fn test_mm_maskz_max_epu8() { |
11820 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
11821 | let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
11822 | let r = _mm_maskz_max_epu8(0, a, b); |
11823 | assert_eq_m128i(r, _mm_setzero_si128()); |
11824 | let r = _mm_maskz_max_epu8(0b00000000_11111111, a, b); |
11825 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15); |
11826 | assert_eq_m128i(r, e); |
11827 | } |
11828 | |
11829 | #[simd_test(enable = "avx512bw" )] |
11830 | unsafe fn test_mm512_max_epi16() { |
11831 | #[rustfmt::skip] |
11832 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
11833 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
11834 | #[rustfmt::skip] |
11835 | let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
11836 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
11837 | let r = _mm512_max_epi16(a, b); |
11838 | #[rustfmt::skip] |
11839 | let e = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15, |
11840 | 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15); |
11841 | assert_eq_m512i(r, e); |
11842 | } |
11843 | |
11844 | #[simd_test(enable = "avx512f" )] |
11845 | unsafe fn test_mm512_mask_max_epi16() { |
11846 | #[rustfmt::skip] |
11847 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
11848 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
11849 | #[rustfmt::skip] |
11850 | let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
11851 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
11852 | let r = _mm512_mask_max_epi16(a, 0, a, b); |
11853 | assert_eq_m512i(r, a); |
11854 | let r = _mm512_mask_max_epi16(a, 0b00000000_11111111_00000000_11111111, a, b); |
11855 | #[rustfmt::skip] |
11856 | let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
11857 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
11858 | assert_eq_m512i(r, e); |
11859 | } |
11860 | |
11861 | #[simd_test(enable = "avx512f" )] |
11862 | unsafe fn test_mm512_maskz_max_epi16() { |
11863 | #[rustfmt::skip] |
11864 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
11865 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
11866 | #[rustfmt::skip] |
11867 | let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
11868 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
11869 | let r = _mm512_maskz_max_epi16(0, a, b); |
11870 | assert_eq_m512i(r, _mm512_setzero_si512()); |
11871 | let r = _mm512_maskz_max_epi16(0b00000000_11111111_00000000_11111111, a, b); |
11872 | #[rustfmt::skip] |
11873 | let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15, |
11874 | 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15); |
11875 | assert_eq_m512i(r, e); |
11876 | } |
11877 | |
11878 | #[simd_test(enable = "avx512f,avx512vl" )] |
11879 | unsafe fn test_mm256_mask_max_epi16() { |
11880 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
11881 | let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
11882 | let r = _mm256_mask_max_epi16(a, 0, a, b); |
11883 | assert_eq_m256i(r, a); |
11884 | let r = _mm256_mask_max_epi16(a, 0b00000000_11111111, a, b); |
11885 | let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
11886 | assert_eq_m256i(r, e); |
11887 | } |
11888 | |
11889 | #[simd_test(enable = "avx512f,avx512vl" )] |
11890 | unsafe fn test_mm256_maskz_max_epi16() { |
11891 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
11892 | let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
11893 | let r = _mm256_maskz_max_epi16(0, a, b); |
11894 | assert_eq_m256i(r, _mm256_setzero_si256()); |
11895 | let r = _mm256_maskz_max_epi16(0b00000000_11111111, a, b); |
11896 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15); |
11897 | assert_eq_m256i(r, e); |
11898 | } |
11899 | |
11900 | #[simd_test(enable = "avx512f,avx512vl" )] |
11901 | unsafe fn test_mm_mask_max_epi16() { |
11902 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
11903 | let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0); |
11904 | let r = _mm_mask_max_epi16(a, 0, a, b); |
11905 | assert_eq_m128i(r, a); |
11906 | let r = _mm_mask_max_epi16(a, 0b00001111, a, b); |
11907 | let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
11908 | assert_eq_m128i(r, e); |
11909 | } |
11910 | |
11911 | #[simd_test(enable = "avx512f,avx512vl" )] |
11912 | unsafe fn test_mm_maskz_max_epi16() { |
11913 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
11914 | let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0); |
11915 | let r = _mm_maskz_max_epi16(0, a, b); |
11916 | assert_eq_m128i(r, _mm_setzero_si128()); |
11917 | let r = _mm_maskz_max_epi16(0b00001111, a, b); |
11918 | let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7); |
11919 | assert_eq_m128i(r, e); |
11920 | } |
11921 | |
11922 | #[simd_test(enable = "avx512bw" )] |
11923 | unsafe fn test_mm512_max_epi8() { |
11924 | #[rustfmt::skip] |
11925 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
11926 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
11927 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
11928 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
11929 | #[rustfmt::skip] |
11930 | let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
11931 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
11932 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
11933 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
11934 | let r = _mm512_max_epi8(a, b); |
11935 | #[rustfmt::skip] |
11936 | let e = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15, |
11937 | 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15, |
11938 | 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15, |
11939 | 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15); |
11940 | assert_eq_m512i(r, e); |
11941 | } |
11942 | |
11943 | #[simd_test(enable = "avx512f" )] |
11944 | unsafe fn test_mm512_mask_max_epi8() { |
11945 | #[rustfmt::skip] |
11946 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
11947 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
11948 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
11949 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
11950 | #[rustfmt::skip] |
11951 | let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
11952 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
11953 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
11954 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
11955 | let r = _mm512_mask_max_epi8(a, 0, a, b); |
11956 | assert_eq_m512i(r, a); |
11957 | let r = _mm512_mask_max_epi8( |
11958 | a, |
11959 | 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, |
11960 | a, |
11961 | b, |
11962 | ); |
11963 | #[rustfmt::skip] |
11964 | let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
11965 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
11966 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
11967 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
11968 | assert_eq_m512i(r, e); |
11969 | } |
11970 | |
11971 | #[simd_test(enable = "avx512f" )] |
11972 | unsafe fn test_mm512_maskz_max_epi8() { |
11973 | #[rustfmt::skip] |
11974 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
11975 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
11976 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
11977 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
11978 | #[rustfmt::skip] |
11979 | let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
11980 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
11981 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
11982 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
11983 | let r = _mm512_maskz_max_epi8(0, a, b); |
11984 | assert_eq_m512i(r, _mm512_setzero_si512()); |
11985 | let r = _mm512_maskz_max_epi8( |
11986 | 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, |
11987 | a, |
11988 | b, |
11989 | ); |
11990 | #[rustfmt::skip] |
11991 | let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15, |
11992 | 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15, |
11993 | 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15, |
11994 | 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15); |
11995 | assert_eq_m512i(r, e); |
11996 | } |
11997 | |
11998 | #[simd_test(enable = "avx512f,avx512vl" )] |
11999 | unsafe fn test_mm256_mask_max_epi8() { |
12000 | #[rustfmt::skip] |
12001 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
12002 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
12003 | #[rustfmt::skip] |
12004 | let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
12005 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
12006 | let r = _mm256_mask_max_epi8(a, 0, a, b); |
12007 | assert_eq_m256i(r, a); |
12008 | let r = _mm256_mask_max_epi8(a, 0b00000000_11111111_00000000_11111111, a, b); |
12009 | #[rustfmt::skip] |
12010 | let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
12011 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
12012 | assert_eq_m256i(r, e); |
12013 | } |
12014 | |
12015 | #[simd_test(enable = "avx512f,avx512vl" )] |
12016 | unsafe fn test_mm256_maskz_max_epi8() { |
12017 | #[rustfmt::skip] |
12018 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
12019 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
12020 | #[rustfmt::skip] |
12021 | let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
12022 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
12023 | let r = _mm256_maskz_max_epi8(0, a, b); |
12024 | assert_eq_m256i(r, _mm256_setzero_si256()); |
12025 | let r = _mm256_maskz_max_epi8(0b00000000_11111111_00000000_11111111, a, b); |
12026 | #[rustfmt::skip] |
12027 | let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15, |
12028 | 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15); |
12029 | assert_eq_m256i(r, e); |
12030 | } |
12031 | |
12032 | #[simd_test(enable = "avx512f,avx512vl" )] |
12033 | unsafe fn test_mm_mask_max_epi8() { |
12034 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
12035 | let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
12036 | let r = _mm_mask_max_epi8(a, 0, a, b); |
12037 | assert_eq_m128i(r, a); |
12038 | let r = _mm_mask_max_epi8(a, 0b00000000_11111111, a, b); |
12039 | let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
12040 | assert_eq_m128i(r, e); |
12041 | } |
12042 | |
12043 | #[simd_test(enable = "avx512f,avx512vl" )] |
12044 | unsafe fn test_mm_maskz_max_epi8() { |
12045 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
12046 | let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
12047 | let r = _mm_maskz_max_epi8(0, a, b); |
12048 | assert_eq_m128i(r, _mm_setzero_si128()); |
12049 | let r = _mm_maskz_max_epi8(0b00000000_11111111, a, b); |
12050 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15); |
12051 | assert_eq_m128i(r, e); |
12052 | } |
12053 | |
12054 | #[simd_test(enable = "avx512bw" )] |
12055 | unsafe fn test_mm512_min_epu16() { |
12056 | #[rustfmt::skip] |
12057 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
12058 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
12059 | #[rustfmt::skip] |
12060 | let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
12061 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
12062 | let r = _mm512_min_epu16(a, b); |
12063 | #[rustfmt::skip] |
12064 | let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, |
12065 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); |
12066 | assert_eq_m512i(r, e); |
12067 | } |
12068 | |
12069 | #[simd_test(enable = "avx512f" )] |
12070 | unsafe fn test_mm512_mask_min_epu16() { |
12071 | #[rustfmt::skip] |
12072 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
12073 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
12074 | #[rustfmt::skip] |
12075 | let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
12076 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
12077 | let r = _mm512_mask_min_epu16(a, 0, a, b); |
12078 | assert_eq_m512i(r, a); |
12079 | let r = _mm512_mask_min_epu16(a, 0b00000000_11111111_00000000_11111111, a, b); |
12080 | #[rustfmt::skip] |
12081 | let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, |
12082 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); |
12083 | assert_eq_m512i(r, e); |
12084 | } |
12085 | |
12086 | #[simd_test(enable = "avx512f" )] |
12087 | unsafe fn test_mm512_maskz_min_epu16() { |
12088 | #[rustfmt::skip] |
12089 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
12090 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
12091 | #[rustfmt::skip] |
12092 | let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
12093 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
12094 | let r = _mm512_maskz_min_epu16(0, a, b); |
12095 | assert_eq_m512i(r, _mm512_setzero_si512()); |
12096 | let r = _mm512_maskz_min_epu16(0b00000000_11111111_00000000_11111111, a, b); |
12097 | #[rustfmt::skip] |
12098 | let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0, |
12099 | 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0); |
12100 | assert_eq_m512i(r, e); |
12101 | } |
12102 | |
12103 | #[simd_test(enable = "avx512f,avx512vl" )] |
12104 | unsafe fn test_mm256_mask_min_epu16() { |
12105 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
12106 | let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
12107 | let r = _mm256_mask_min_epu16(a, 0, a, b); |
12108 | assert_eq_m256i(r, a); |
12109 | let r = _mm256_mask_min_epu16(a, 0b00000000_11111111, a, b); |
12110 | let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); |
12111 | assert_eq_m256i(r, e); |
12112 | } |
12113 | |
12114 | #[simd_test(enable = "avx512f,avx512vl" )] |
12115 | unsafe fn test_mm256_maskz_min_epu16() { |
12116 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
12117 | let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
12118 | let r = _mm256_maskz_min_epu16(0, a, b); |
12119 | assert_eq_m256i(r, _mm256_setzero_si256()); |
12120 | let r = _mm256_maskz_min_epu16(0b00000000_11111111, a, b); |
12121 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0); |
12122 | assert_eq_m256i(r, e); |
12123 | } |
12124 | |
12125 | #[simd_test(enable = "avx512f,avx512vl" )] |
12126 | unsafe fn test_mm_mask_min_epu16() { |
12127 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
12128 | let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0); |
12129 | let r = _mm_mask_min_epu16(a, 0, a, b); |
12130 | assert_eq_m128i(r, a); |
12131 | let r = _mm_mask_min_epu16(a, 0b00001111, a, b); |
12132 | let e = _mm_set_epi16(0, 1, 2, 3, 3, 2, 1, 0); |
12133 | assert_eq_m128i(r, e); |
12134 | } |
12135 | |
12136 | #[simd_test(enable = "avx512f,avx512vl" )] |
12137 | unsafe fn test_mm_maskz_min_epu16() { |
12138 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
12139 | let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0); |
12140 | let r = _mm_maskz_min_epu16(0, a, b); |
12141 | assert_eq_m128i(r, _mm_setzero_si128()); |
12142 | let r = _mm_maskz_min_epu16(0b00001111, a, b); |
12143 | let e = _mm_set_epi16(0, 0, 0, 0, 3, 2, 1, 0); |
12144 | assert_eq_m128i(r, e); |
12145 | } |
12146 | |
12147 | #[simd_test(enable = "avx512bw" )] |
12148 | unsafe fn test_mm512_min_epu8() { |
12149 | #[rustfmt::skip] |
12150 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
12151 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
12152 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
12153 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
12154 | #[rustfmt::skip] |
12155 | let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
12156 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
12157 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
12158 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
12159 | let r = _mm512_min_epu8(a, b); |
12160 | #[rustfmt::skip] |
12161 | let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, |
12162 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, |
12163 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, |
12164 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); |
12165 | assert_eq_m512i(r, e); |
12166 | } |
12167 | |
12168 | #[simd_test(enable = "avx512f" )] |
12169 | unsafe fn test_mm512_mask_min_epu8() { |
12170 | #[rustfmt::skip] |
12171 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
12172 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
12173 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
12174 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
12175 | #[rustfmt::skip] |
12176 | let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
12177 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
12178 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
12179 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
12180 | let r = _mm512_mask_min_epu8(a, 0, a, b); |
12181 | assert_eq_m512i(r, a); |
12182 | let r = _mm512_mask_min_epu8( |
12183 | a, |
12184 | 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, |
12185 | a, |
12186 | b, |
12187 | ); |
12188 | #[rustfmt::skip] |
12189 | let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, |
12190 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, |
12191 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, |
12192 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); |
12193 | assert_eq_m512i(r, e); |
12194 | } |
12195 | |
12196 | #[simd_test(enable = "avx512f" )] |
12197 | unsafe fn test_mm512_maskz_min_epu8() { |
12198 | #[rustfmt::skip] |
12199 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
12200 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
12201 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
12202 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
12203 | #[rustfmt::skip] |
12204 | let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
12205 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
12206 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
12207 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
12208 | let r = _mm512_maskz_min_epu8(0, a, b); |
12209 | assert_eq_m512i(r, _mm512_setzero_si512()); |
12210 | let r = _mm512_maskz_min_epu8( |
12211 | 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, |
12212 | a, |
12213 | b, |
12214 | ); |
12215 | #[rustfmt::skip] |
12216 | let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0, |
12217 | 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0, |
12218 | 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0, |
12219 | 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0); |
12220 | assert_eq_m512i(r, e); |
12221 | } |
12222 | |
12223 | #[simd_test(enable = "avx512f,avx512vl" )] |
12224 | unsafe fn test_mm256_mask_min_epu8() { |
12225 | #[rustfmt::skip] |
12226 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
12227 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
12228 | #[rustfmt::skip] |
12229 | let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
12230 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
12231 | let r = _mm256_mask_min_epu8(a, 0, a, b); |
12232 | assert_eq_m256i(r, a); |
12233 | let r = _mm256_mask_min_epu8(a, 0b00000000_11111111_00000000_11111111, a, b); |
12234 | #[rustfmt::skip] |
12235 | let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, |
12236 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); |
12237 | assert_eq_m256i(r, e); |
12238 | } |
12239 | |
12240 | #[simd_test(enable = "avx512f,avx512vl" )] |
12241 | unsafe fn test_mm256_maskz_min_epu8() { |
12242 | #[rustfmt::skip] |
12243 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
12244 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
12245 | #[rustfmt::skip] |
12246 | let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
12247 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
12248 | let r = _mm256_maskz_min_epu8(0, a, b); |
12249 | assert_eq_m256i(r, _mm256_setzero_si256()); |
12250 | let r = _mm256_maskz_min_epu8(0b00000000_11111111_00000000_11111111, a, b); |
12251 | #[rustfmt::skip] |
12252 | let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0, |
12253 | 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0); |
12254 | assert_eq_m256i(r, e); |
12255 | } |
12256 | |
12257 | #[simd_test(enable = "avx512f,avx512vl" )] |
12258 | unsafe fn test_mm_mask_min_epu8() { |
12259 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
12260 | let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
12261 | let r = _mm_mask_min_epu8(a, 0, a, b); |
12262 | assert_eq_m128i(r, a); |
12263 | let r = _mm_mask_min_epu8(a, 0b00000000_11111111, a, b); |
12264 | let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); |
12265 | assert_eq_m128i(r, e); |
12266 | } |
12267 | |
12268 | #[simd_test(enable = "avx512f,avx512vl" )] |
12269 | unsafe fn test_mm_maskz_min_epu8() { |
12270 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
12271 | let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
12272 | let r = _mm_maskz_min_epu8(0, a, b); |
12273 | assert_eq_m128i(r, _mm_setzero_si128()); |
12274 | let r = _mm_maskz_min_epu8(0b00000000_11111111, a, b); |
12275 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0); |
12276 | assert_eq_m128i(r, e); |
12277 | } |
12278 | |
12279 | #[simd_test(enable = "avx512bw" )] |
12280 | unsafe fn test_mm512_min_epi16() { |
12281 | #[rustfmt::skip] |
12282 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
12283 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
12284 | #[rustfmt::skip] |
12285 | let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
12286 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
12287 | let r = _mm512_min_epi16(a, b); |
12288 | #[rustfmt::skip] |
12289 | let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, |
12290 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); |
12291 | assert_eq_m512i(r, e); |
12292 | } |
12293 | |
12294 | #[simd_test(enable = "avx512f" )] |
12295 | unsafe fn test_mm512_mask_min_epi16() { |
12296 | #[rustfmt::skip] |
12297 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
12298 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
12299 | #[rustfmt::skip] |
12300 | let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
12301 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
12302 | let r = _mm512_mask_min_epi16(a, 0, a, b); |
12303 | assert_eq_m512i(r, a); |
12304 | let r = _mm512_mask_min_epi16(a, 0b00000000_11111111_00000000_11111111, a, b); |
12305 | #[rustfmt::skip] |
12306 | let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, |
12307 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); |
12308 | assert_eq_m512i(r, e); |
12309 | } |
12310 | |
12311 | #[simd_test(enable = "avx512f" )] |
12312 | unsafe fn test_mm512_maskz_min_epi16() { |
12313 | #[rustfmt::skip] |
12314 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
12315 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
12316 | #[rustfmt::skip] |
12317 | let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
12318 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
12319 | let r = _mm512_maskz_min_epi16(0, a, b); |
12320 | assert_eq_m512i(r, _mm512_setzero_si512()); |
12321 | let r = _mm512_maskz_min_epi16(0b00000000_11111111_00000000_11111111, a, b); |
12322 | #[rustfmt::skip] |
12323 | let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0, |
12324 | 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0); |
12325 | assert_eq_m512i(r, e); |
12326 | } |
12327 | |
12328 | #[simd_test(enable = "avx512f,avx512vl" )] |
12329 | unsafe fn test_mm256_mask_min_epi16() { |
12330 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
12331 | let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
12332 | let r = _mm256_mask_min_epi16(a, 0, a, b); |
12333 | assert_eq_m256i(r, a); |
12334 | let r = _mm256_mask_min_epi16(a, 0b00000000_11111111, a, b); |
12335 | let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); |
12336 | assert_eq_m256i(r, e); |
12337 | } |
12338 | |
12339 | #[simd_test(enable = "avx512f,avx512vl" )] |
12340 | unsafe fn test_mm256_maskz_min_epi16() { |
12341 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
12342 | let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
12343 | let r = _mm256_maskz_min_epi16(0, a, b); |
12344 | assert_eq_m256i(r, _mm256_setzero_si256()); |
12345 | let r = _mm256_maskz_min_epi16(0b00000000_11111111, a, b); |
12346 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0); |
12347 | assert_eq_m256i(r, e); |
12348 | } |
12349 | |
12350 | #[simd_test(enable = "avx512f,avx512vl" )] |
12351 | unsafe fn test_mm_mask_min_epi16() { |
12352 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
12353 | let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0); |
12354 | let r = _mm_mask_min_epi16(a, 0, a, b); |
12355 | assert_eq_m128i(r, a); |
12356 | let r = _mm_mask_min_epi16(a, 0b00001111, a, b); |
12357 | let e = _mm_set_epi16(0, 1, 2, 3, 3, 2, 1, 0); |
12358 | assert_eq_m128i(r, e); |
12359 | } |
12360 | |
12361 | #[simd_test(enable = "avx512f,avx512vl" )] |
12362 | unsafe fn test_mm_maskz_min_epi16() { |
12363 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
12364 | let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0); |
12365 | let r = _mm_maskz_min_epi16(0, a, b); |
12366 | assert_eq_m128i(r, _mm_setzero_si128()); |
12367 | let r = _mm_maskz_min_epi16(0b00001111, a, b); |
12368 | let e = _mm_set_epi16(0, 0, 0, 0, 3, 2, 1, 0); |
12369 | assert_eq_m128i(r, e); |
12370 | } |
12371 | |
12372 | #[simd_test(enable = "avx512bw" )] |
12373 | unsafe fn test_mm512_min_epi8() { |
12374 | #[rustfmt::skip] |
12375 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
12376 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
12377 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
12378 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
12379 | #[rustfmt::skip] |
12380 | let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
12381 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
12382 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
12383 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
12384 | let r = _mm512_min_epi8(a, b); |
12385 | #[rustfmt::skip] |
12386 | let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, |
12387 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, |
12388 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, |
12389 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); |
12390 | assert_eq_m512i(r, e); |
12391 | } |
12392 | |
12393 | #[simd_test(enable = "avx512f" )] |
12394 | unsafe fn test_mm512_mask_min_epi8() { |
12395 | #[rustfmt::skip] |
12396 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
12397 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
12398 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
12399 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
12400 | #[rustfmt::skip] |
12401 | let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
12402 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
12403 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
12404 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
12405 | let r = _mm512_mask_min_epi8(a, 0, a, b); |
12406 | assert_eq_m512i(r, a); |
12407 | let r = _mm512_mask_min_epi8( |
12408 | a, |
12409 | 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, |
12410 | a, |
12411 | b, |
12412 | ); |
12413 | #[rustfmt::skip] |
12414 | let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, |
12415 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, |
12416 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, |
12417 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); |
12418 | assert_eq_m512i(r, e); |
12419 | } |
12420 | |
12421 | #[simd_test(enable = "avx512f" )] |
12422 | unsafe fn test_mm512_maskz_min_epi8() { |
12423 | #[rustfmt::skip] |
12424 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
12425 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
12426 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
12427 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
12428 | #[rustfmt::skip] |
12429 | let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
12430 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
12431 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
12432 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
12433 | let r = _mm512_maskz_min_epi8(0, a, b); |
12434 | assert_eq_m512i(r, _mm512_setzero_si512()); |
12435 | let r = _mm512_maskz_min_epi8( |
12436 | 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, |
12437 | a, |
12438 | b, |
12439 | ); |
12440 | #[rustfmt::skip] |
12441 | let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0, |
12442 | 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0, |
12443 | 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0, |
12444 | 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0); |
12445 | assert_eq_m512i(r, e); |
12446 | } |
12447 | |
12448 | #[simd_test(enable = "avx512f,avx512vl" )] |
12449 | unsafe fn test_mm256_mask_min_epi8() { |
12450 | #[rustfmt::skip] |
12451 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
12452 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
12453 | #[rustfmt::skip] |
12454 | let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
12455 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
12456 | let r = _mm256_mask_min_epi8(a, 0, a, b); |
12457 | assert_eq_m256i(r, a); |
12458 | let r = _mm256_mask_min_epi8(a, 0b00000000_11111111_00000000_11111111, a, b); |
12459 | #[rustfmt::skip] |
12460 | let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, |
12461 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); |
12462 | assert_eq_m256i(r, e); |
12463 | } |
12464 | |
12465 | #[simd_test(enable = "avx512f,avx512vl" )] |
12466 | unsafe fn test_mm256_maskz_min_epi8() { |
12467 | #[rustfmt::skip] |
12468 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
12469 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
12470 | #[rustfmt::skip] |
12471 | let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
12472 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
12473 | let r = _mm256_maskz_min_epi8(0, a, b); |
12474 | assert_eq_m256i(r, _mm256_setzero_si256()); |
12475 | let r = _mm256_maskz_min_epi8(0b00000000_11111111_00000000_11111111, a, b); |
12476 | #[rustfmt::skip] |
12477 | let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0, |
12478 | 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0); |
12479 | assert_eq_m256i(r, e); |
12480 | } |
12481 | |
12482 | #[simd_test(enable = "avx512f,avx512vl" )] |
12483 | unsafe fn test_mm_mask_min_epi8() { |
12484 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
12485 | let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
12486 | let r = _mm_mask_min_epi8(a, 0, a, b); |
12487 | assert_eq_m128i(r, a); |
12488 | let r = _mm_mask_min_epi8(a, 0b00000000_11111111, a, b); |
12489 | let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); |
12490 | assert_eq_m128i(r, e); |
12491 | } |
12492 | |
12493 | #[simd_test(enable = "avx512f,avx512vl" )] |
12494 | unsafe fn test_mm_maskz_min_epi8() { |
12495 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
12496 | let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
12497 | let r = _mm_maskz_min_epi8(0, a, b); |
12498 | assert_eq_m128i(r, _mm_setzero_si128()); |
12499 | let r = _mm_maskz_min_epi8(0b00000000_11111111, a, b); |
12500 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0); |
12501 | assert_eq_m128i(r, e); |
12502 | } |
12503 | |
12504 | #[simd_test(enable = "avx512bw" )] |
12505 | unsafe fn test_mm512_cmplt_epu16_mask() { |
12506 | let a = _mm512_set1_epi16(-2); |
12507 | let b = _mm512_set1_epi16(-1); |
12508 | let m = _mm512_cmplt_epu16_mask(a, b); |
12509 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); |
12510 | } |
12511 | |
12512 | #[simd_test(enable = "avx512bw" )] |
12513 | unsafe fn test_mm512_mask_cmplt_epu16_mask() { |
12514 | let a = _mm512_set1_epi16(-2); |
12515 | let b = _mm512_set1_epi16(-1); |
12516 | let mask = 0b01010101_01010101_01010101_01010101; |
12517 | let r = _mm512_mask_cmplt_epu16_mask(mask, a, b); |
12518 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); |
12519 | } |
12520 | |
12521 | #[simd_test(enable = "avx512bw,avx512vl" )] |
12522 | unsafe fn test_mm256_cmplt_epu16_mask() { |
12523 | let a = _mm256_set1_epi16(-2); |
12524 | let b = _mm256_set1_epi16(-1); |
12525 | let m = _mm256_cmplt_epu16_mask(a, b); |
12526 | assert_eq!(m, 0b11111111_11111111); |
12527 | } |
12528 | |
12529 | #[simd_test(enable = "avx512bw,avx512vl" )] |
12530 | unsafe fn test_mm256_mask_cmplt_epu16_mask() { |
12531 | let a = _mm256_set1_epi16(-2); |
12532 | let b = _mm256_set1_epi16(-1); |
12533 | let mask = 0b01010101_01010101; |
12534 | let r = _mm256_mask_cmplt_epu16_mask(mask, a, b); |
12535 | assert_eq!(r, 0b01010101_01010101); |
12536 | } |
12537 | |
12538 | #[simd_test(enable = "avx512bw,avx512vl" )] |
12539 | unsafe fn test_mm_cmplt_epu16_mask() { |
12540 | let a = _mm_set1_epi16(-2); |
12541 | let b = _mm_set1_epi16(-1); |
12542 | let m = _mm_cmplt_epu16_mask(a, b); |
12543 | assert_eq!(m, 0b11111111); |
12544 | } |
12545 | |
12546 | #[simd_test(enable = "avx512bw,avx512vl" )] |
12547 | unsafe fn test_mm_mask_cmplt_epu16_mask() { |
12548 | let a = _mm_set1_epi16(-2); |
12549 | let b = _mm_set1_epi16(-1); |
12550 | let mask = 0b01010101; |
12551 | let r = _mm_mask_cmplt_epu16_mask(mask, a, b); |
12552 | assert_eq!(r, 0b01010101); |
12553 | } |
12554 | |
12555 | #[simd_test(enable = "avx512bw" )] |
12556 | unsafe fn test_mm512_cmplt_epu8_mask() { |
12557 | let a = _mm512_set1_epi8(-2); |
12558 | let b = _mm512_set1_epi8(-1); |
12559 | let m = _mm512_cmplt_epu8_mask(a, b); |
12560 | assert_eq!( |
12561 | m, |
12562 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 |
12563 | ); |
12564 | } |
12565 | |
12566 | #[simd_test(enable = "avx512bw" )] |
12567 | unsafe fn test_mm512_mask_cmplt_epu8_mask() { |
12568 | let a = _mm512_set1_epi8(-2); |
12569 | let b = _mm512_set1_epi8(-1); |
12570 | let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; |
12571 | let r = _mm512_mask_cmplt_epu8_mask(mask, a, b); |
12572 | assert_eq!( |
12573 | r, |
12574 | 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 |
12575 | ); |
12576 | } |
12577 | |
12578 | #[simd_test(enable = "avx512bw,avx512vl" )] |
12579 | unsafe fn test_mm256_cmplt_epu8_mask() { |
12580 | let a = _mm256_set1_epi8(-2); |
12581 | let b = _mm256_set1_epi8(-1); |
12582 | let m = _mm256_cmplt_epu8_mask(a, b); |
12583 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); |
12584 | } |
12585 | |
12586 | #[simd_test(enable = "avx512bw,avx512vl" )] |
12587 | unsafe fn test_mm256_mask_cmplt_epu8_mask() { |
12588 | let a = _mm256_set1_epi8(-2); |
12589 | let b = _mm256_set1_epi8(-1); |
12590 | let mask = 0b01010101_01010101_01010101_01010101; |
12591 | let r = _mm256_mask_cmplt_epu8_mask(mask, a, b); |
12592 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); |
12593 | } |
12594 | |
12595 | #[simd_test(enable = "avx512bw,avx512vl" )] |
12596 | unsafe fn test_mm_cmplt_epu8_mask() { |
12597 | let a = _mm_set1_epi8(-2); |
12598 | let b = _mm_set1_epi8(-1); |
12599 | let m = _mm_cmplt_epu8_mask(a, b); |
12600 | assert_eq!(m, 0b11111111_11111111); |
12601 | } |
12602 | |
12603 | #[simd_test(enable = "avx512bw,avx512vl" )] |
12604 | unsafe fn test_mm_mask_cmplt_epu8_mask() { |
12605 | let a = _mm_set1_epi8(-2); |
12606 | let b = _mm_set1_epi8(-1); |
12607 | let mask = 0b01010101_01010101; |
12608 | let r = _mm_mask_cmplt_epu8_mask(mask, a, b); |
12609 | assert_eq!(r, 0b01010101_01010101); |
12610 | } |
12611 | |
12612 | #[simd_test(enable = "avx512bw" )] |
12613 | unsafe fn test_mm512_cmplt_epi16_mask() { |
12614 | let a = _mm512_set1_epi16(-2); |
12615 | let b = _mm512_set1_epi16(-1); |
12616 | let m = _mm512_cmplt_epi16_mask(a, b); |
12617 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); |
12618 | } |
12619 | |
12620 | #[simd_test(enable = "avx512bw" )] |
12621 | unsafe fn test_mm512_mask_cmplt_epi16_mask() { |
12622 | let a = _mm512_set1_epi16(-2); |
12623 | let b = _mm512_set1_epi16(-1); |
12624 | let mask = 0b01010101_01010101_01010101_01010101; |
12625 | let r = _mm512_mask_cmplt_epi16_mask(mask, a, b); |
12626 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); |
12627 | } |
12628 | |
12629 | #[simd_test(enable = "avx512bw,avx512vl" )] |
12630 | unsafe fn test_mm256_cmplt_epi16_mask() { |
12631 | let a = _mm256_set1_epi16(-2); |
12632 | let b = _mm256_set1_epi16(-1); |
12633 | let m = _mm256_cmplt_epi16_mask(a, b); |
12634 | assert_eq!(m, 0b11111111_11111111); |
12635 | } |
12636 | |
12637 | #[simd_test(enable = "avx512bw,avx512vl" )] |
12638 | unsafe fn test_mm256_mask_cmplt_epi16_mask() { |
12639 | let a = _mm256_set1_epi16(-2); |
12640 | let b = _mm256_set1_epi16(-1); |
12641 | let mask = 0b01010101_01010101; |
12642 | let r = _mm256_mask_cmplt_epi16_mask(mask, a, b); |
12643 | assert_eq!(r, 0b01010101_01010101); |
12644 | } |
12645 | |
12646 | #[simd_test(enable = "avx512bw,avx512vl" )] |
12647 | unsafe fn test_mm_cmplt_epi16_mask() { |
12648 | let a = _mm_set1_epi16(-2); |
12649 | let b = _mm_set1_epi16(-1); |
12650 | let m = _mm_cmplt_epi16_mask(a, b); |
12651 | assert_eq!(m, 0b11111111); |
12652 | } |
12653 | |
12654 | #[simd_test(enable = "avx512bw,avx512vl" )] |
12655 | unsafe fn test_mm_mask_cmplt_epi16_mask() { |
12656 | let a = _mm_set1_epi16(-2); |
12657 | let b = _mm_set1_epi16(-1); |
12658 | let mask = 0b01010101; |
12659 | let r = _mm_mask_cmplt_epi16_mask(mask, a, b); |
12660 | assert_eq!(r, 0b01010101); |
12661 | } |
12662 | |
12663 | #[simd_test(enable = "avx512bw" )] |
12664 | unsafe fn test_mm512_cmplt_epi8_mask() { |
12665 | let a = _mm512_set1_epi8(-2); |
12666 | let b = _mm512_set1_epi8(-1); |
12667 | let m = _mm512_cmplt_epi8_mask(a, b); |
12668 | assert_eq!( |
12669 | m, |
12670 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 |
12671 | ); |
12672 | } |
12673 | |
12674 | #[simd_test(enable = "avx512bw" )] |
12675 | unsafe fn test_mm512_mask_cmplt_epi8_mask() { |
12676 | let a = _mm512_set1_epi8(-2); |
12677 | let b = _mm512_set1_epi8(-1); |
12678 | let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; |
12679 | let r = _mm512_mask_cmplt_epi8_mask(mask, a, b); |
12680 | assert_eq!( |
12681 | r, |
12682 | 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 |
12683 | ); |
12684 | } |
12685 | |
12686 | #[simd_test(enable = "avx512bw,avx512vl" )] |
12687 | unsafe fn test_mm256_cmplt_epi8_mask() { |
12688 | let a = _mm256_set1_epi8(-2); |
12689 | let b = _mm256_set1_epi8(-1); |
12690 | let m = _mm256_cmplt_epi8_mask(a, b); |
12691 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); |
12692 | } |
12693 | |
12694 | #[simd_test(enable = "avx512bw,avx512vl" )] |
12695 | unsafe fn test_mm256_mask_cmplt_epi8_mask() { |
12696 | let a = _mm256_set1_epi8(-2); |
12697 | let b = _mm256_set1_epi8(-1); |
12698 | let mask = 0b01010101_01010101_01010101_01010101; |
12699 | let r = _mm256_mask_cmplt_epi8_mask(mask, a, b); |
12700 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); |
12701 | } |
12702 | |
12703 | #[simd_test(enable = "avx512bw,avx512vl" )] |
12704 | unsafe fn test_mm_cmplt_epi8_mask() { |
12705 | let a = _mm_set1_epi8(-2); |
12706 | let b = _mm_set1_epi8(-1); |
12707 | let m = _mm_cmplt_epi8_mask(a, b); |
12708 | assert_eq!(m, 0b11111111_11111111); |
12709 | } |
12710 | |
12711 | #[simd_test(enable = "avx512bw,avx512vl" )] |
12712 | unsafe fn test_mm_mask_cmplt_epi8_mask() { |
12713 | let a = _mm_set1_epi8(-2); |
12714 | let b = _mm_set1_epi8(-1); |
12715 | let mask = 0b01010101_01010101; |
12716 | let r = _mm_mask_cmplt_epi8_mask(mask, a, b); |
12717 | assert_eq!(r, 0b01010101_01010101); |
12718 | } |
12719 | |
12720 | #[simd_test(enable = "avx512bw" )] |
12721 | unsafe fn test_mm512_cmpgt_epu16_mask() { |
12722 | let a = _mm512_set1_epi16(2); |
12723 | let b = _mm512_set1_epi16(1); |
12724 | let m = _mm512_cmpgt_epu16_mask(a, b); |
12725 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); |
12726 | } |
12727 | |
12728 | #[simd_test(enable = "avx512bw" )] |
12729 | unsafe fn test_mm512_mask_cmpgt_epu16_mask() { |
12730 | let a = _mm512_set1_epi16(2); |
12731 | let b = _mm512_set1_epi16(1); |
12732 | let mask = 0b01010101_01010101_01010101_01010101; |
12733 | let r = _mm512_mask_cmpgt_epu16_mask(mask, a, b); |
12734 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); |
12735 | } |
12736 | |
12737 | #[simd_test(enable = "avx512bw,avx512vl" )] |
12738 | unsafe fn test_mm256_cmpgt_epu16_mask() { |
12739 | let a = _mm256_set1_epi16(2); |
12740 | let b = _mm256_set1_epi16(1); |
12741 | let m = _mm256_cmpgt_epu16_mask(a, b); |
12742 | assert_eq!(m, 0b11111111_11111111); |
12743 | } |
12744 | |
12745 | #[simd_test(enable = "avx512bw,avx512vl" )] |
12746 | unsafe fn test_mm256_mask_cmpgt_epu16_mask() { |
12747 | let a = _mm256_set1_epi16(2); |
12748 | let b = _mm256_set1_epi16(1); |
12749 | let mask = 0b01010101_01010101; |
12750 | let r = _mm256_mask_cmpgt_epu16_mask(mask, a, b); |
12751 | assert_eq!(r, 0b01010101_01010101); |
12752 | } |
12753 | |
12754 | #[simd_test(enable = "avx512bw,avx512vl" )] |
12755 | unsafe fn test_mm_cmpgt_epu16_mask() { |
12756 | let a = _mm_set1_epi16(2); |
12757 | let b = _mm_set1_epi16(1); |
12758 | let m = _mm_cmpgt_epu16_mask(a, b); |
12759 | assert_eq!(m, 0b11111111); |
12760 | } |
12761 | |
12762 | #[simd_test(enable = "avx512bw,avx512vl" )] |
12763 | unsafe fn test_mm_mask_cmpgt_epu16_mask() { |
12764 | let a = _mm_set1_epi16(2); |
12765 | let b = _mm_set1_epi16(1); |
12766 | let mask = 0b01010101; |
12767 | let r = _mm_mask_cmpgt_epu16_mask(mask, a, b); |
12768 | assert_eq!(r, 0b01010101); |
12769 | } |
12770 | |
12771 | #[simd_test(enable = "avx512bw" )] |
12772 | unsafe fn test_mm512_cmpgt_epu8_mask() { |
12773 | let a = _mm512_set1_epi8(2); |
12774 | let b = _mm512_set1_epi8(1); |
12775 | let m = _mm512_cmpgt_epu8_mask(a, b); |
12776 | assert_eq!( |
12777 | m, |
12778 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 |
12779 | ); |
12780 | } |
12781 | |
12782 | #[simd_test(enable = "avx512bw" )] |
12783 | unsafe fn test_mm512_mask_cmpgt_epu8_mask() { |
12784 | let a = _mm512_set1_epi8(2); |
12785 | let b = _mm512_set1_epi8(1); |
12786 | let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; |
12787 | let r = _mm512_mask_cmpgt_epu8_mask(mask, a, b); |
12788 | assert_eq!( |
12789 | r, |
12790 | 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 |
12791 | ); |
12792 | } |
12793 | |
12794 | #[simd_test(enable = "avx512bw,avx512vl" )] |
12795 | unsafe fn test_mm256_cmpgt_epu8_mask() { |
12796 | let a = _mm256_set1_epi8(2); |
12797 | let b = _mm256_set1_epi8(1); |
12798 | let m = _mm256_cmpgt_epu8_mask(a, b); |
12799 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); |
12800 | } |
12801 | |
12802 | #[simd_test(enable = "avx512bw,avx512vl" )] |
12803 | unsafe fn test_mm256_mask_cmpgt_epu8_mask() { |
12804 | let a = _mm256_set1_epi8(2); |
12805 | let b = _mm256_set1_epi8(1); |
12806 | let mask = 0b01010101_01010101_01010101_01010101; |
12807 | let r = _mm256_mask_cmpgt_epu8_mask(mask, a, b); |
12808 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); |
12809 | } |
12810 | |
12811 | #[simd_test(enable = "avx512bw,avx512vl" )] |
12812 | unsafe fn test_mm_cmpgt_epu8_mask() { |
12813 | let a = _mm_set1_epi8(2); |
12814 | let b = _mm_set1_epi8(1); |
12815 | let m = _mm_cmpgt_epu8_mask(a, b); |
12816 | assert_eq!(m, 0b11111111_11111111); |
12817 | } |
12818 | |
12819 | #[simd_test(enable = "avx512bw,avx512vl" )] |
12820 | unsafe fn test_mm_mask_cmpgt_epu8_mask() { |
12821 | let a = _mm_set1_epi8(2); |
12822 | let b = _mm_set1_epi8(1); |
12823 | let mask = 0b01010101_01010101; |
12824 | let r = _mm_mask_cmpgt_epu8_mask(mask, a, b); |
12825 | assert_eq!(r, 0b01010101_01010101); |
12826 | } |
12827 | |
12828 | #[simd_test(enable = "avx512bw" )] |
12829 | unsafe fn test_mm512_cmpgt_epi16_mask() { |
12830 | let a = _mm512_set1_epi16(2); |
12831 | let b = _mm512_set1_epi16(-1); |
12832 | let m = _mm512_cmpgt_epi16_mask(a, b); |
12833 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); |
12834 | } |
12835 | |
12836 | #[simd_test(enable = "avx512bw" )] |
12837 | unsafe fn test_mm512_mask_cmpgt_epi16_mask() { |
12838 | let a = _mm512_set1_epi16(2); |
12839 | let b = _mm512_set1_epi16(-1); |
12840 | let mask = 0b01010101_01010101_01010101_01010101; |
12841 | let r = _mm512_mask_cmpgt_epi16_mask(mask, a, b); |
12842 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); |
12843 | } |
12844 | |
12845 | #[simd_test(enable = "avx512bw,avx512vl" )] |
12846 | unsafe fn test_mm256_cmpgt_epi16_mask() { |
12847 | let a = _mm256_set1_epi16(2); |
12848 | let b = _mm256_set1_epi16(-1); |
12849 | let m = _mm256_cmpgt_epi16_mask(a, b); |
12850 | assert_eq!(m, 0b11111111_11111111); |
12851 | } |
12852 | |
12853 | #[simd_test(enable = "avx512bw,avx512vl" )] |
12854 | unsafe fn test_mm256_mask_cmpgt_epi16_mask() { |
12855 | let a = _mm256_set1_epi16(2); |
12856 | let b = _mm256_set1_epi16(-1); |
12857 | let mask = 0b001010101_01010101; |
12858 | let r = _mm256_mask_cmpgt_epi16_mask(mask, a, b); |
12859 | assert_eq!(r, 0b01010101_01010101); |
12860 | } |
12861 | |
12862 | #[simd_test(enable = "avx512bw,avx512vl" )] |
12863 | unsafe fn test_mm_cmpgt_epi16_mask() { |
12864 | let a = _mm_set1_epi16(2); |
12865 | let b = _mm_set1_epi16(-1); |
12866 | let m = _mm_cmpgt_epi16_mask(a, b); |
12867 | assert_eq!(m, 0b11111111); |
12868 | } |
12869 | |
12870 | #[simd_test(enable = "avx512bw,avx512vl" )] |
12871 | unsafe fn test_mm_mask_cmpgt_epi16_mask() { |
12872 | let a = _mm_set1_epi16(2); |
12873 | let b = _mm_set1_epi16(-1); |
12874 | let mask = 0b01010101; |
12875 | let r = _mm_mask_cmpgt_epi16_mask(mask, a, b); |
12876 | assert_eq!(r, 0b01010101); |
12877 | } |
12878 | |
12879 | #[simd_test(enable = "avx512bw" )] |
12880 | unsafe fn test_mm512_cmpgt_epi8_mask() { |
12881 | let a = _mm512_set1_epi8(2); |
12882 | let b = _mm512_set1_epi8(-1); |
12883 | let m = _mm512_cmpgt_epi8_mask(a, b); |
12884 | assert_eq!( |
12885 | m, |
12886 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 |
12887 | ); |
12888 | } |
12889 | |
12890 | #[simd_test(enable = "avx512bw" )] |
12891 | unsafe fn test_mm512_mask_cmpgt_epi8_mask() { |
12892 | let a = _mm512_set1_epi8(2); |
12893 | let b = _mm512_set1_epi8(-1); |
12894 | let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; |
12895 | let r = _mm512_mask_cmpgt_epi8_mask(mask, a, b); |
12896 | assert_eq!( |
12897 | r, |
12898 | 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 |
12899 | ); |
12900 | } |
12901 | |
12902 | #[simd_test(enable = "avx512bw,avx512vl" )] |
12903 | unsafe fn test_mm256_cmpgt_epi8_mask() { |
12904 | let a = _mm256_set1_epi8(2); |
12905 | let b = _mm256_set1_epi8(-1); |
12906 | let m = _mm256_cmpgt_epi8_mask(a, b); |
12907 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); |
12908 | } |
12909 | |
12910 | #[simd_test(enable = "avx512bw,avx512vl" )] |
12911 | unsafe fn test_mm256_mask_cmpgt_epi8_mask() { |
12912 | let a = _mm256_set1_epi8(2); |
12913 | let b = _mm256_set1_epi8(-1); |
12914 | let mask = 0b01010101_01010101_01010101_01010101; |
12915 | let r = _mm256_mask_cmpgt_epi8_mask(mask, a, b); |
12916 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); |
12917 | } |
12918 | |
12919 | #[simd_test(enable = "avx512bw,avx512vl" )] |
12920 | unsafe fn test_mm_cmpgt_epi8_mask() { |
12921 | let a = _mm_set1_epi8(2); |
12922 | let b = _mm_set1_epi8(-1); |
12923 | let m = _mm_cmpgt_epi8_mask(a, b); |
12924 | assert_eq!(m, 0b11111111_11111111); |
12925 | } |
12926 | |
12927 | #[simd_test(enable = "avx512bw,avx512vl" )] |
12928 | unsafe fn test_mm_mask_cmpgt_epi8_mask() { |
12929 | let a = _mm_set1_epi8(2); |
12930 | let b = _mm_set1_epi8(-1); |
12931 | let mask = 0b01010101_01010101; |
12932 | let r = _mm_mask_cmpgt_epi8_mask(mask, a, b); |
12933 | assert_eq!(r, 0b01010101_01010101); |
12934 | } |
12935 | |
12936 | #[simd_test(enable = "avx512bw" )] |
12937 | unsafe fn test_mm512_cmple_epu16_mask() { |
12938 | let a = _mm512_set1_epi16(-1); |
12939 | let b = _mm512_set1_epi16(-1); |
12940 | let m = _mm512_cmple_epu16_mask(a, b); |
12941 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); |
12942 | } |
12943 | |
12944 | #[simd_test(enable = "avx512bw" )] |
12945 | unsafe fn test_mm512_mask_cmple_epu16_mask() { |
12946 | let a = _mm512_set1_epi16(-1); |
12947 | let b = _mm512_set1_epi16(-1); |
12948 | let mask = 0b01010101_01010101_01010101_01010101; |
12949 | let r = _mm512_mask_cmple_epu16_mask(mask, a, b); |
12950 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); |
12951 | } |
12952 | |
12953 | #[simd_test(enable = "avx512bw,avx512vl" )] |
12954 | unsafe fn test_mm256_cmple_epu16_mask() { |
12955 | let a = _mm256_set1_epi16(-1); |
12956 | let b = _mm256_set1_epi16(-1); |
12957 | let m = _mm256_cmple_epu16_mask(a, b); |
12958 | assert_eq!(m, 0b11111111_11111111); |
12959 | } |
12960 | |
12961 | #[simd_test(enable = "avx512bw,avx512vl" )] |
12962 | unsafe fn test_mm256_mask_cmple_epu16_mask() { |
12963 | let a = _mm256_set1_epi16(-1); |
12964 | let b = _mm256_set1_epi16(-1); |
12965 | let mask = 0b01010101_01010101; |
12966 | let r = _mm256_mask_cmple_epu16_mask(mask, a, b); |
12967 | assert_eq!(r, 0b01010101_01010101); |
12968 | } |
12969 | |
12970 | #[simd_test(enable = "avx512bw,avx512vl" )] |
12971 | unsafe fn test_mm_cmple_epu16_mask() { |
12972 | let a = _mm_set1_epi16(-1); |
12973 | let b = _mm_set1_epi16(-1); |
12974 | let m = _mm_cmple_epu16_mask(a, b); |
12975 | assert_eq!(m, 0b11111111); |
12976 | } |
12977 | |
12978 | #[simd_test(enable = "avx512bw,avx512vl" )] |
12979 | unsafe fn test_mm_mask_cmple_epu16_mask() { |
12980 | let a = _mm_set1_epi16(-1); |
12981 | let b = _mm_set1_epi16(-1); |
12982 | let mask = 0b01010101; |
12983 | let r = _mm_mask_cmple_epu16_mask(mask, a, b); |
12984 | assert_eq!(r, 0b01010101); |
12985 | } |
12986 | |
12987 | #[simd_test(enable = "avx512bw" )] |
12988 | unsafe fn test_mm512_cmple_epu8_mask() { |
12989 | let a = _mm512_set1_epi8(-1); |
12990 | let b = _mm512_set1_epi8(-1); |
12991 | let m = _mm512_cmple_epu8_mask(a, b); |
12992 | assert_eq!( |
12993 | m, |
12994 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 |
12995 | ); |
12996 | } |
12997 | |
12998 | #[simd_test(enable = "avx512bw" )] |
12999 | unsafe fn test_mm512_mask_cmple_epu8_mask() { |
13000 | let a = _mm512_set1_epi8(-1); |
13001 | let b = _mm512_set1_epi8(-1); |
13002 | let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; |
13003 | let r = _mm512_mask_cmple_epu8_mask(mask, a, b); |
13004 | assert_eq!( |
13005 | r, |
13006 | 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 |
13007 | ); |
13008 | } |
13009 | |
13010 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13011 | unsafe fn test_mm256_cmple_epu8_mask() { |
13012 | let a = _mm256_set1_epi8(-1); |
13013 | let b = _mm256_set1_epi8(-1); |
13014 | let m = _mm256_cmple_epu8_mask(a, b); |
13015 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); |
13016 | } |
13017 | |
13018 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13019 | unsafe fn test_mm256_mask_cmple_epu8_mask() { |
13020 | let a = _mm256_set1_epi8(-1); |
13021 | let b = _mm256_set1_epi8(-1); |
13022 | let mask = 0b01010101_01010101_01010101_01010101; |
13023 | let r = _mm256_mask_cmple_epu8_mask(mask, a, b); |
13024 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); |
13025 | } |
13026 | |
13027 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13028 | unsafe fn test_mm_cmple_epu8_mask() { |
13029 | let a = _mm_set1_epi8(-1); |
13030 | let b = _mm_set1_epi8(-1); |
13031 | let m = _mm_cmple_epu8_mask(a, b); |
13032 | assert_eq!(m, 0b11111111_11111111); |
13033 | } |
13034 | |
13035 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13036 | unsafe fn test_mm_mask_cmple_epu8_mask() { |
13037 | let a = _mm_set1_epi8(-1); |
13038 | let b = _mm_set1_epi8(-1); |
13039 | let mask = 0b01010101_01010101; |
13040 | let r = _mm_mask_cmple_epu8_mask(mask, a, b); |
13041 | assert_eq!(r, 0b01010101_01010101); |
13042 | } |
13043 | |
13044 | #[simd_test(enable = "avx512bw" )] |
13045 | unsafe fn test_mm512_cmple_epi16_mask() { |
13046 | let a = _mm512_set1_epi16(-1); |
13047 | let b = _mm512_set1_epi16(-1); |
13048 | let m = _mm512_cmple_epi16_mask(a, b); |
13049 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); |
13050 | } |
13051 | |
13052 | #[simd_test(enable = "avx512bw" )] |
13053 | unsafe fn test_mm512_mask_cmple_epi16_mask() { |
13054 | let a = _mm512_set1_epi16(-1); |
13055 | let b = _mm512_set1_epi16(-1); |
13056 | let mask = 0b01010101_01010101_01010101_01010101; |
13057 | let r = _mm512_mask_cmple_epi16_mask(mask, a, b); |
13058 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); |
13059 | } |
13060 | |
13061 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13062 | unsafe fn test_mm256_cmple_epi16_mask() { |
13063 | let a = _mm256_set1_epi16(-1); |
13064 | let b = _mm256_set1_epi16(-1); |
13065 | let m = _mm256_cmple_epi16_mask(a, b); |
13066 | assert_eq!(m, 0b11111111_11111111); |
13067 | } |
13068 | |
13069 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13070 | unsafe fn test_mm256_mask_cmple_epi16_mask() { |
13071 | let a = _mm256_set1_epi16(-1); |
13072 | let b = _mm256_set1_epi16(-1); |
13073 | let mask = 0b01010101_01010101; |
13074 | let r = _mm256_mask_cmple_epi16_mask(mask, a, b); |
13075 | assert_eq!(r, 0b01010101_01010101); |
13076 | } |
13077 | |
13078 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13079 | unsafe fn test_mm_cmple_epi16_mask() { |
13080 | let a = _mm_set1_epi16(-1); |
13081 | let b = _mm_set1_epi16(-1); |
13082 | let m = _mm_cmple_epi16_mask(a, b); |
13083 | assert_eq!(m, 0b11111111); |
13084 | } |
13085 | |
13086 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13087 | unsafe fn test_mm_mask_cmple_epi16_mask() { |
13088 | let a = _mm_set1_epi16(-1); |
13089 | let b = _mm_set1_epi16(-1); |
13090 | let mask = 0b01010101; |
13091 | let r = _mm_mask_cmple_epi16_mask(mask, a, b); |
13092 | assert_eq!(r, 0b01010101); |
13093 | } |
13094 | |
13095 | #[simd_test(enable = "avx512bw" )] |
13096 | unsafe fn test_mm512_cmple_epi8_mask() { |
13097 | let a = _mm512_set1_epi8(-1); |
13098 | let b = _mm512_set1_epi8(-1); |
13099 | let m = _mm512_cmple_epi8_mask(a, b); |
13100 | assert_eq!( |
13101 | m, |
13102 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 |
13103 | ); |
13104 | } |
13105 | |
13106 | #[simd_test(enable = "avx512bw" )] |
13107 | unsafe fn test_mm512_mask_cmple_epi8_mask() { |
13108 | let a = _mm512_set1_epi8(-1); |
13109 | let b = _mm512_set1_epi8(-1); |
13110 | let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; |
13111 | let r = _mm512_mask_cmple_epi8_mask(mask, a, b); |
13112 | assert_eq!( |
13113 | r, |
13114 | 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 |
13115 | ); |
13116 | } |
13117 | |
13118 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13119 | unsafe fn test_mm256_cmple_epi8_mask() { |
13120 | let a = _mm256_set1_epi8(-1); |
13121 | let b = _mm256_set1_epi8(-1); |
13122 | let m = _mm256_cmple_epi8_mask(a, b); |
13123 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); |
13124 | } |
13125 | |
13126 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13127 | unsafe fn test_mm256_mask_cmple_epi8_mask() { |
13128 | let a = _mm256_set1_epi8(-1); |
13129 | let b = _mm256_set1_epi8(-1); |
13130 | let mask = 0b01010101_01010101_01010101_01010101; |
13131 | let r = _mm256_mask_cmple_epi8_mask(mask, a, b); |
13132 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); |
13133 | } |
13134 | |
13135 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13136 | unsafe fn test_mm_cmple_epi8_mask() { |
13137 | let a = _mm_set1_epi8(-1); |
13138 | let b = _mm_set1_epi8(-1); |
13139 | let m = _mm_cmple_epi8_mask(a, b); |
13140 | assert_eq!(m, 0b11111111_11111111); |
13141 | } |
13142 | |
13143 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13144 | unsafe fn test_mm_mask_cmple_epi8_mask() { |
13145 | let a = _mm_set1_epi8(-1); |
13146 | let b = _mm_set1_epi8(-1); |
13147 | let mask = 0b01010101_01010101; |
13148 | let r = _mm_mask_cmple_epi8_mask(mask, a, b); |
13149 | assert_eq!(r, 0b01010101_01010101); |
13150 | } |
13151 | |
13152 | #[simd_test(enable = "avx512bw" )] |
13153 | unsafe fn test_mm512_cmpge_epu16_mask() { |
13154 | let a = _mm512_set1_epi16(1); |
13155 | let b = _mm512_set1_epi16(1); |
13156 | let m = _mm512_cmpge_epu16_mask(a, b); |
13157 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); |
13158 | } |
13159 | |
13160 | #[simd_test(enable = "avx512bw" )] |
13161 | unsafe fn test_mm512_mask_cmpge_epu16_mask() { |
13162 | let a = _mm512_set1_epi16(1); |
13163 | let b = _mm512_set1_epi16(1); |
13164 | let mask = 0b01010101_01010101_01010101_01010101; |
13165 | let r = _mm512_mask_cmpge_epu16_mask(mask, a, b); |
13166 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); |
13167 | } |
13168 | |
13169 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13170 | unsafe fn test_mm256_cmpge_epu16_mask() { |
13171 | let a = _mm256_set1_epi16(1); |
13172 | let b = _mm256_set1_epi16(1); |
13173 | let m = _mm256_cmpge_epu16_mask(a, b); |
13174 | assert_eq!(m, 0b11111111_11111111); |
13175 | } |
13176 | |
13177 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13178 | unsafe fn test_mm256_mask_cmpge_epu16_mask() { |
13179 | let a = _mm256_set1_epi16(1); |
13180 | let b = _mm256_set1_epi16(1); |
13181 | let mask = 0b01010101_01010101; |
13182 | let r = _mm256_mask_cmpge_epu16_mask(mask, a, b); |
13183 | assert_eq!(r, 0b01010101_01010101); |
13184 | } |
13185 | |
13186 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13187 | unsafe fn test_mm_cmpge_epu16_mask() { |
13188 | let a = _mm_set1_epi16(1); |
13189 | let b = _mm_set1_epi16(1); |
13190 | let m = _mm_cmpge_epu16_mask(a, b); |
13191 | assert_eq!(m, 0b11111111); |
13192 | } |
13193 | |
13194 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13195 | unsafe fn test_mm_mask_cmpge_epu16_mask() { |
13196 | let a = _mm_set1_epi16(1); |
13197 | let b = _mm_set1_epi16(1); |
13198 | let mask = 0b01010101; |
13199 | let r = _mm_mask_cmpge_epu16_mask(mask, a, b); |
13200 | assert_eq!(r, 0b01010101); |
13201 | } |
13202 | |
13203 | #[simd_test(enable = "avx512bw" )] |
13204 | unsafe fn test_mm512_cmpge_epu8_mask() { |
13205 | let a = _mm512_set1_epi8(1); |
13206 | let b = _mm512_set1_epi8(1); |
13207 | let m = _mm512_cmpge_epu8_mask(a, b); |
13208 | assert_eq!( |
13209 | m, |
13210 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 |
13211 | ); |
13212 | } |
13213 | |
13214 | #[simd_test(enable = "avx512bw" )] |
13215 | unsafe fn test_mm512_mask_cmpge_epu8_mask() { |
13216 | let a = _mm512_set1_epi8(1); |
13217 | let b = _mm512_set1_epi8(1); |
13218 | let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; |
13219 | let r = _mm512_mask_cmpge_epu8_mask(mask, a, b); |
13220 | assert_eq!( |
13221 | r, |
13222 | 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 |
13223 | ); |
13224 | } |
13225 | |
13226 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13227 | unsafe fn test_mm256_cmpge_epu8_mask() { |
13228 | let a = _mm256_set1_epi8(1); |
13229 | let b = _mm256_set1_epi8(1); |
13230 | let m = _mm256_cmpge_epu8_mask(a, b); |
13231 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); |
13232 | } |
13233 | |
13234 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13235 | unsafe fn test_mm256_mask_cmpge_epu8_mask() { |
13236 | let a = _mm256_set1_epi8(1); |
13237 | let b = _mm256_set1_epi8(1); |
13238 | let mask = 0b01010101_01010101_01010101_01010101; |
13239 | let r = _mm256_mask_cmpge_epu8_mask(mask, a, b); |
13240 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); |
13241 | } |
13242 | |
13243 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13244 | unsafe fn test_mm_cmpge_epu8_mask() { |
13245 | let a = _mm_set1_epi8(1); |
13246 | let b = _mm_set1_epi8(1); |
13247 | let m = _mm_cmpge_epu8_mask(a, b); |
13248 | assert_eq!(m, 0b11111111_11111111); |
13249 | } |
13250 | |
13251 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13252 | unsafe fn test_mm_mask_cmpge_epu8_mask() { |
13253 | let a = _mm_set1_epi8(1); |
13254 | let b = _mm_set1_epi8(1); |
13255 | let mask = 0b01010101_01010101; |
13256 | let r = _mm_mask_cmpge_epu8_mask(mask, a, b); |
13257 | assert_eq!(r, 0b01010101_01010101); |
13258 | } |
13259 | |
13260 | #[simd_test(enable = "avx512bw" )] |
13261 | unsafe fn test_mm512_cmpge_epi16_mask() { |
13262 | let a = _mm512_set1_epi16(-1); |
13263 | let b = _mm512_set1_epi16(-1); |
13264 | let m = _mm512_cmpge_epi16_mask(a, b); |
13265 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); |
13266 | } |
13267 | |
13268 | #[simd_test(enable = "avx512bw" )] |
13269 | unsafe fn test_mm512_mask_cmpge_epi16_mask() { |
13270 | let a = _mm512_set1_epi16(-1); |
13271 | let b = _mm512_set1_epi16(-1); |
13272 | let mask = 0b01010101_01010101_01010101_01010101; |
13273 | let r = _mm512_mask_cmpge_epi16_mask(mask, a, b); |
13274 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); |
13275 | } |
13276 | |
13277 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13278 | unsafe fn test_mm256_cmpge_epi16_mask() { |
13279 | let a = _mm256_set1_epi16(-1); |
13280 | let b = _mm256_set1_epi16(-1); |
13281 | let m = _mm256_cmpge_epi16_mask(a, b); |
13282 | assert_eq!(m, 0b11111111_11111111); |
13283 | } |
13284 | |
13285 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13286 | unsafe fn test_mm256_mask_cmpge_epi16_mask() { |
13287 | let a = _mm256_set1_epi16(-1); |
13288 | let b = _mm256_set1_epi16(-1); |
13289 | let mask = 0b01010101_01010101; |
13290 | let r = _mm256_mask_cmpge_epi16_mask(mask, a, b); |
13291 | assert_eq!(r, 0b01010101_01010101); |
13292 | } |
13293 | |
13294 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13295 | unsafe fn test_mm_cmpge_epi16_mask() { |
13296 | let a = _mm_set1_epi16(-1); |
13297 | let b = _mm_set1_epi16(-1); |
13298 | let m = _mm_cmpge_epi16_mask(a, b); |
13299 | assert_eq!(m, 0b11111111); |
13300 | } |
13301 | |
13302 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13303 | unsafe fn test_mm_mask_cmpge_epi16_mask() { |
13304 | let a = _mm_set1_epi16(-1); |
13305 | let b = _mm_set1_epi16(-1); |
13306 | let mask = 0b01010101; |
13307 | let r = _mm_mask_cmpge_epi16_mask(mask, a, b); |
13308 | assert_eq!(r, 0b01010101); |
13309 | } |
13310 | |
13311 | #[simd_test(enable = "avx512bw" )] |
13312 | unsafe fn test_mm512_cmpge_epi8_mask() { |
13313 | let a = _mm512_set1_epi8(-1); |
13314 | let b = _mm512_set1_epi8(-1); |
13315 | let m = _mm512_cmpge_epi8_mask(a, b); |
13316 | assert_eq!( |
13317 | m, |
13318 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 |
13319 | ); |
13320 | } |
13321 | |
13322 | #[simd_test(enable = "avx512bw" )] |
13323 | unsafe fn test_mm512_mask_cmpge_epi8_mask() { |
13324 | let a = _mm512_set1_epi8(-1); |
13325 | let b = _mm512_set1_epi8(-1); |
13326 | let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; |
13327 | let r = _mm512_mask_cmpge_epi8_mask(mask, a, b); |
13328 | assert_eq!( |
13329 | r, |
13330 | 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 |
13331 | ); |
13332 | } |
13333 | |
13334 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13335 | unsafe fn test_mm256_cmpge_epi8_mask() { |
13336 | let a = _mm256_set1_epi8(-1); |
13337 | let b = _mm256_set1_epi8(-1); |
13338 | let m = _mm256_cmpge_epi8_mask(a, b); |
13339 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); |
13340 | } |
13341 | |
13342 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13343 | unsafe fn test_mm256_mask_cmpge_epi8_mask() { |
13344 | let a = _mm256_set1_epi8(-1); |
13345 | let b = _mm256_set1_epi8(-1); |
13346 | let mask = 0b01010101_01010101_01010101_01010101; |
13347 | let r = _mm256_mask_cmpge_epi8_mask(mask, a, b); |
13348 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); |
13349 | } |
13350 | |
13351 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13352 | unsafe fn test_mm_cmpge_epi8_mask() { |
13353 | let a = _mm_set1_epi8(-1); |
13354 | let b = _mm_set1_epi8(-1); |
13355 | let m = _mm_cmpge_epi8_mask(a, b); |
13356 | assert_eq!(m, 0b11111111_11111111); |
13357 | } |
13358 | |
13359 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13360 | unsafe fn test_mm_mask_cmpge_epi8_mask() { |
13361 | let a = _mm_set1_epi8(-1); |
13362 | let b = _mm_set1_epi8(-1); |
13363 | let mask = 0b01010101_01010101; |
13364 | let r = _mm_mask_cmpge_epi8_mask(mask, a, b); |
13365 | assert_eq!(r, 0b01010101_01010101); |
13366 | } |
13367 | |
13368 | #[simd_test(enable = "avx512bw" )] |
13369 | unsafe fn test_mm512_cmpeq_epu16_mask() { |
13370 | let a = _mm512_set1_epi16(1); |
13371 | let b = _mm512_set1_epi16(1); |
13372 | let m = _mm512_cmpeq_epu16_mask(a, b); |
13373 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); |
13374 | } |
13375 | |
13376 | #[simd_test(enable = "avx512bw" )] |
13377 | unsafe fn test_mm512_mask_cmpeq_epu16_mask() { |
13378 | let a = _mm512_set1_epi16(1); |
13379 | let b = _mm512_set1_epi16(1); |
13380 | let mask = 0b01010101_01010101_01010101_01010101; |
13381 | let r = _mm512_mask_cmpeq_epu16_mask(mask, a, b); |
13382 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); |
13383 | } |
13384 | |
13385 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13386 | unsafe fn test_mm256_cmpeq_epu16_mask() { |
13387 | let a = _mm256_set1_epi16(1); |
13388 | let b = _mm256_set1_epi16(1); |
13389 | let m = _mm256_cmpeq_epu16_mask(a, b); |
13390 | assert_eq!(m, 0b11111111_11111111); |
13391 | } |
13392 | |
13393 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13394 | unsafe fn test_mm256_mask_cmpeq_epu16_mask() { |
13395 | let a = _mm256_set1_epi16(1); |
13396 | let b = _mm256_set1_epi16(1); |
13397 | let mask = 0b01010101_01010101; |
13398 | let r = _mm256_mask_cmpeq_epu16_mask(mask, a, b); |
13399 | assert_eq!(r, 0b01010101_01010101); |
13400 | } |
13401 | |
13402 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13403 | unsafe fn test_mm_cmpeq_epu16_mask() { |
13404 | let a = _mm_set1_epi16(1); |
13405 | let b = _mm_set1_epi16(1); |
13406 | let m = _mm_cmpeq_epu16_mask(a, b); |
13407 | assert_eq!(m, 0b11111111); |
13408 | } |
13409 | |
13410 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13411 | unsafe fn test_mm_mask_cmpeq_epu16_mask() { |
13412 | let a = _mm_set1_epi16(1); |
13413 | let b = _mm_set1_epi16(1); |
13414 | let mask = 0b01010101; |
13415 | let r = _mm_mask_cmpeq_epu16_mask(mask, a, b); |
13416 | assert_eq!(r, 0b01010101); |
13417 | } |
13418 | |
13419 | #[simd_test(enable = "avx512bw" )] |
13420 | unsafe fn test_mm512_cmpeq_epu8_mask() { |
13421 | let a = _mm512_set1_epi8(1); |
13422 | let b = _mm512_set1_epi8(1); |
13423 | let m = _mm512_cmpeq_epu8_mask(a, b); |
13424 | assert_eq!( |
13425 | m, |
13426 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 |
13427 | ); |
13428 | } |
13429 | |
13430 | #[simd_test(enable = "avx512bw" )] |
13431 | unsafe fn test_mm512_mask_cmpeq_epu8_mask() { |
13432 | let a = _mm512_set1_epi8(1); |
13433 | let b = _mm512_set1_epi8(1); |
13434 | let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; |
13435 | let r = _mm512_mask_cmpeq_epu8_mask(mask, a, b); |
13436 | assert_eq!( |
13437 | r, |
13438 | 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 |
13439 | ); |
13440 | } |
13441 | |
13442 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13443 | unsafe fn test_mm256_cmpeq_epu8_mask() { |
13444 | let a = _mm256_set1_epi8(1); |
13445 | let b = _mm256_set1_epi8(1); |
13446 | let m = _mm256_cmpeq_epu8_mask(a, b); |
13447 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); |
13448 | } |
13449 | |
13450 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13451 | unsafe fn test_mm256_mask_cmpeq_epu8_mask() { |
13452 | let a = _mm256_set1_epi8(1); |
13453 | let b = _mm256_set1_epi8(1); |
13454 | let mask = 0b01010101_01010101_01010101_01010101; |
13455 | let r = _mm256_mask_cmpeq_epu8_mask(mask, a, b); |
13456 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); |
13457 | } |
13458 | |
13459 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13460 | unsafe fn test_mm_cmpeq_epu8_mask() { |
13461 | let a = _mm_set1_epi8(1); |
13462 | let b = _mm_set1_epi8(1); |
13463 | let m = _mm_cmpeq_epu8_mask(a, b); |
13464 | assert_eq!(m, 0b11111111_11111111); |
13465 | } |
13466 | |
13467 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13468 | unsafe fn test_mm_mask_cmpeq_epu8_mask() { |
13469 | let a = _mm_set1_epi8(1); |
13470 | let b = _mm_set1_epi8(1); |
13471 | let mask = 0b01010101_01010101; |
13472 | let r = _mm_mask_cmpeq_epu8_mask(mask, a, b); |
13473 | assert_eq!(r, 0b01010101_01010101); |
13474 | } |
13475 | |
13476 | #[simd_test(enable = "avx512bw" )] |
13477 | unsafe fn test_mm512_cmpeq_epi16_mask() { |
13478 | let a = _mm512_set1_epi16(-1); |
13479 | let b = _mm512_set1_epi16(-1); |
13480 | let m = _mm512_cmpeq_epi16_mask(a, b); |
13481 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); |
13482 | } |
13483 | |
13484 | #[simd_test(enable = "avx512bw" )] |
13485 | unsafe fn test_mm512_mask_cmpeq_epi16_mask() { |
13486 | let a = _mm512_set1_epi16(-1); |
13487 | let b = _mm512_set1_epi16(-1); |
13488 | let mask = 0b01010101_01010101_01010101_01010101; |
13489 | let r = _mm512_mask_cmpeq_epi16_mask(mask, a, b); |
13490 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); |
13491 | } |
13492 | |
13493 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13494 | unsafe fn test_mm256_cmpeq_epi16_mask() { |
13495 | let a = _mm256_set1_epi16(-1); |
13496 | let b = _mm256_set1_epi16(-1); |
13497 | let m = _mm256_cmpeq_epi16_mask(a, b); |
13498 | assert_eq!(m, 0b11111111_11111111); |
13499 | } |
13500 | |
13501 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13502 | unsafe fn test_mm256_mask_cmpeq_epi16_mask() { |
13503 | let a = _mm256_set1_epi16(-1); |
13504 | let b = _mm256_set1_epi16(-1); |
13505 | let mask = 0b01010101_01010101; |
13506 | let r = _mm256_mask_cmpeq_epi16_mask(mask, a, b); |
13507 | assert_eq!(r, 0b01010101_01010101); |
13508 | } |
13509 | |
13510 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13511 | unsafe fn test_mm_cmpeq_epi16_mask() { |
13512 | let a = _mm_set1_epi16(-1); |
13513 | let b = _mm_set1_epi16(-1); |
13514 | let m = _mm_cmpeq_epi16_mask(a, b); |
13515 | assert_eq!(m, 0b11111111); |
13516 | } |
13517 | |
13518 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13519 | unsafe fn test_mm_mask_cmpeq_epi16_mask() { |
13520 | let a = _mm_set1_epi16(-1); |
13521 | let b = _mm_set1_epi16(-1); |
13522 | let mask = 0b01010101; |
13523 | let r = _mm_mask_cmpeq_epi16_mask(mask, a, b); |
13524 | assert_eq!(r, 0b01010101); |
13525 | } |
13526 | |
13527 | #[simd_test(enable = "avx512bw" )] |
13528 | unsafe fn test_mm512_cmpeq_epi8_mask() { |
13529 | let a = _mm512_set1_epi8(-1); |
13530 | let b = _mm512_set1_epi8(-1); |
13531 | let m = _mm512_cmpeq_epi8_mask(a, b); |
13532 | assert_eq!( |
13533 | m, |
13534 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 |
13535 | ); |
13536 | } |
13537 | |
13538 | #[simd_test(enable = "avx512bw" )] |
13539 | unsafe fn test_mm512_mask_cmpeq_epi8_mask() { |
13540 | let a = _mm512_set1_epi8(-1); |
13541 | let b = _mm512_set1_epi8(-1); |
13542 | let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; |
13543 | let r = _mm512_mask_cmpeq_epi8_mask(mask, a, b); |
13544 | assert_eq!( |
13545 | r, |
13546 | 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 |
13547 | ); |
13548 | } |
13549 | |
13550 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13551 | unsafe fn test_mm256_cmpeq_epi8_mask() { |
13552 | let a = _mm256_set1_epi8(-1); |
13553 | let b = _mm256_set1_epi8(-1); |
13554 | let m = _mm256_cmpeq_epi8_mask(a, b); |
13555 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); |
13556 | } |
13557 | |
13558 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13559 | unsafe fn test_mm256_mask_cmpeq_epi8_mask() { |
13560 | let a = _mm256_set1_epi8(-1); |
13561 | let b = _mm256_set1_epi8(-1); |
13562 | let mask = 0b01010101_01010101_01010101_01010101; |
13563 | let r = _mm256_mask_cmpeq_epi8_mask(mask, a, b); |
13564 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); |
13565 | } |
13566 | |
13567 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13568 | unsafe fn test_mm_cmpeq_epi8_mask() { |
13569 | let a = _mm_set1_epi8(-1); |
13570 | let b = _mm_set1_epi8(-1); |
13571 | let m = _mm_cmpeq_epi8_mask(a, b); |
13572 | assert_eq!(m, 0b11111111_11111111); |
13573 | } |
13574 | |
13575 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13576 | unsafe fn test_mm_mask_cmpeq_epi8_mask() { |
13577 | let a = _mm_set1_epi8(-1); |
13578 | let b = _mm_set1_epi8(-1); |
13579 | let mask = 0b01010101_01010101; |
13580 | let r = _mm_mask_cmpeq_epi8_mask(mask, a, b); |
13581 | assert_eq!(r, 0b01010101_01010101); |
13582 | } |
13583 | |
13584 | #[simd_test(enable = "avx512bw" )] |
13585 | unsafe fn test_mm512_cmpneq_epu16_mask() { |
13586 | let a = _mm512_set1_epi16(2); |
13587 | let b = _mm512_set1_epi16(1); |
13588 | let m = _mm512_cmpneq_epu16_mask(a, b); |
13589 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); |
13590 | } |
13591 | |
13592 | #[simd_test(enable = "avx512bw" )] |
13593 | unsafe fn test_mm512_mask_cmpneq_epu16_mask() { |
13594 | let a = _mm512_set1_epi16(2); |
13595 | let b = _mm512_set1_epi16(1); |
13596 | let mask = 0b01010101_01010101_01010101_01010101; |
13597 | let r = _mm512_mask_cmpneq_epu16_mask(mask, a, b); |
13598 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); |
13599 | } |
13600 | |
13601 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13602 | unsafe fn test_mm256_cmpneq_epu16_mask() { |
13603 | let a = _mm256_set1_epi16(2); |
13604 | let b = _mm256_set1_epi16(1); |
13605 | let m = _mm256_cmpneq_epu16_mask(a, b); |
13606 | assert_eq!(m, 0b11111111_11111111); |
13607 | } |
13608 | |
13609 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13610 | unsafe fn test_mm256_mask_cmpneq_epu16_mask() { |
13611 | let a = _mm256_set1_epi16(2); |
13612 | let b = _mm256_set1_epi16(1); |
13613 | let mask = 0b01010101_01010101; |
13614 | let r = _mm256_mask_cmpneq_epu16_mask(mask, a, b); |
13615 | assert_eq!(r, 0b01010101_01010101); |
13616 | } |
13617 | |
13618 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13619 | unsafe fn test_mm_cmpneq_epu16_mask() { |
13620 | let a = _mm_set1_epi16(2); |
13621 | let b = _mm_set1_epi16(1); |
13622 | let m = _mm_cmpneq_epu16_mask(a, b); |
13623 | assert_eq!(m, 0b11111111); |
13624 | } |
13625 | |
13626 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13627 | unsafe fn test_mm_mask_cmpneq_epu16_mask() { |
13628 | let a = _mm_set1_epi16(2); |
13629 | let b = _mm_set1_epi16(1); |
13630 | let mask = 0b01010101; |
13631 | let r = _mm_mask_cmpneq_epu16_mask(mask, a, b); |
13632 | assert_eq!(r, 0b01010101); |
13633 | } |
13634 | |
13635 | #[simd_test(enable = "avx512bw" )] |
13636 | unsafe fn test_mm512_cmpneq_epu8_mask() { |
13637 | let a = _mm512_set1_epi8(2); |
13638 | let b = _mm512_set1_epi8(1); |
13639 | let m = _mm512_cmpneq_epu8_mask(a, b); |
13640 | assert_eq!( |
13641 | m, |
13642 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 |
13643 | ); |
13644 | } |
13645 | |
13646 | #[simd_test(enable = "avx512bw" )] |
13647 | unsafe fn test_mm512_mask_cmpneq_epu8_mask() { |
13648 | let a = _mm512_set1_epi8(2); |
13649 | let b = _mm512_set1_epi8(1); |
13650 | let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; |
13651 | let r = _mm512_mask_cmpneq_epu8_mask(mask, a, b); |
13652 | assert_eq!( |
13653 | r, |
13654 | 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 |
13655 | ); |
13656 | } |
13657 | |
13658 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13659 | unsafe fn test_mm256_cmpneq_epu8_mask() { |
13660 | let a = _mm256_set1_epi8(2); |
13661 | let b = _mm256_set1_epi8(1); |
13662 | let m = _mm256_cmpneq_epu8_mask(a, b); |
13663 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); |
13664 | } |
13665 | |
13666 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13667 | unsafe fn test_mm256_mask_cmpneq_epu8_mask() { |
13668 | let a = _mm256_set1_epi8(2); |
13669 | let b = _mm256_set1_epi8(1); |
13670 | let mask = 0b01010101_01010101_01010101_01010101; |
13671 | let r = _mm256_mask_cmpneq_epu8_mask(mask, a, b); |
13672 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); |
13673 | } |
13674 | |
13675 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13676 | unsafe fn test_mm_cmpneq_epu8_mask() { |
13677 | let a = _mm_set1_epi8(2); |
13678 | let b = _mm_set1_epi8(1); |
13679 | let m = _mm_cmpneq_epu8_mask(a, b); |
13680 | assert_eq!(m, 0b11111111_11111111); |
13681 | } |
13682 | |
13683 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13684 | unsafe fn test_mm_mask_cmpneq_epu8_mask() { |
13685 | let a = _mm_set1_epi8(2); |
13686 | let b = _mm_set1_epi8(1); |
13687 | let mask = 0b01010101_01010101; |
13688 | let r = _mm_mask_cmpneq_epu8_mask(mask, a, b); |
13689 | assert_eq!(r, 0b01010101_01010101); |
13690 | } |
13691 | |
13692 | #[simd_test(enable = "avx512bw" )] |
13693 | unsafe fn test_mm512_cmpneq_epi16_mask() { |
13694 | let a = _mm512_set1_epi16(1); |
13695 | let b = _mm512_set1_epi16(-1); |
13696 | let m = _mm512_cmpneq_epi16_mask(a, b); |
13697 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); |
13698 | } |
13699 | |
13700 | #[simd_test(enable = "avx512bw" )] |
13701 | unsafe fn test_mm512_mask_cmpneq_epi16_mask() { |
13702 | let a = _mm512_set1_epi16(1); |
13703 | let b = _mm512_set1_epi16(-1); |
13704 | let mask = 0b01010101_01010101_01010101_01010101; |
13705 | let r = _mm512_mask_cmpneq_epi16_mask(mask, a, b); |
13706 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); |
13707 | } |
13708 | |
13709 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13710 | unsafe fn test_mm256_cmpneq_epi16_mask() { |
13711 | let a = _mm256_set1_epi16(1); |
13712 | let b = _mm256_set1_epi16(-1); |
13713 | let m = _mm256_cmpneq_epi16_mask(a, b); |
13714 | assert_eq!(m, 0b11111111_11111111); |
13715 | } |
13716 | |
13717 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13718 | unsafe fn test_mm256_mask_cmpneq_epi16_mask() { |
13719 | let a = _mm256_set1_epi16(1); |
13720 | let b = _mm256_set1_epi16(-1); |
13721 | let mask = 0b01010101_01010101; |
13722 | let r = _mm256_mask_cmpneq_epi16_mask(mask, a, b); |
13723 | assert_eq!(r, 0b01010101_01010101); |
13724 | } |
13725 | |
13726 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13727 | unsafe fn test_mm_cmpneq_epi16_mask() { |
13728 | let a = _mm_set1_epi16(1); |
13729 | let b = _mm_set1_epi16(-1); |
13730 | let m = _mm_cmpneq_epi16_mask(a, b); |
13731 | assert_eq!(m, 0b11111111); |
13732 | } |
13733 | |
13734 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13735 | unsafe fn test_mm_mask_cmpneq_epi16_mask() { |
13736 | let a = _mm_set1_epi16(1); |
13737 | let b = _mm_set1_epi16(-1); |
13738 | let mask = 0b01010101; |
13739 | let r = _mm_mask_cmpneq_epi16_mask(mask, a, b); |
13740 | assert_eq!(r, 0b01010101); |
13741 | } |
13742 | |
13743 | #[simd_test(enable = "avx512bw" )] |
13744 | unsafe fn test_mm512_cmpneq_epi8_mask() { |
13745 | let a = _mm512_set1_epi8(1); |
13746 | let b = _mm512_set1_epi8(-1); |
13747 | let m = _mm512_cmpneq_epi8_mask(a, b); |
13748 | assert_eq!( |
13749 | m, |
13750 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 |
13751 | ); |
13752 | } |
13753 | |
13754 | #[simd_test(enable = "avx512bw" )] |
13755 | unsafe fn test_mm512_mask_cmpneq_epi8_mask() { |
13756 | let a = _mm512_set1_epi8(1); |
13757 | let b = _mm512_set1_epi8(-1); |
13758 | let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; |
13759 | let r = _mm512_mask_cmpneq_epi8_mask(mask, a, b); |
13760 | assert_eq!( |
13761 | r, |
13762 | 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 |
13763 | ); |
13764 | } |
13765 | |
13766 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13767 | unsafe fn test_mm256_cmpneq_epi8_mask() { |
13768 | let a = _mm256_set1_epi8(1); |
13769 | let b = _mm256_set1_epi8(-1); |
13770 | let m = _mm256_cmpneq_epi8_mask(a, b); |
13771 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); |
13772 | } |
13773 | |
13774 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13775 | unsafe fn test_mm256_mask_cmpneq_epi8_mask() { |
13776 | let a = _mm256_set1_epi8(1); |
13777 | let b = _mm256_set1_epi8(-1); |
13778 | let mask = 0b01010101_01010101_01010101_01010101; |
13779 | let r = _mm256_mask_cmpneq_epi8_mask(mask, a, b); |
13780 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); |
13781 | } |
13782 | |
13783 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13784 | unsafe fn test_mm_cmpneq_epi8_mask() { |
13785 | let a = _mm_set1_epi8(1); |
13786 | let b = _mm_set1_epi8(-1); |
13787 | let m = _mm_cmpneq_epi8_mask(a, b); |
13788 | assert_eq!(m, 0b11111111_11111111); |
13789 | } |
13790 | |
13791 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13792 | unsafe fn test_mm_mask_cmpneq_epi8_mask() { |
13793 | let a = _mm_set1_epi8(1); |
13794 | let b = _mm_set1_epi8(-1); |
13795 | let mask = 0b01010101_01010101; |
13796 | let r = _mm_mask_cmpneq_epi8_mask(mask, a, b); |
13797 | assert_eq!(r, 0b01010101_01010101); |
13798 | } |
13799 | |
13800 | #[simd_test(enable = "avx512bw" )] |
13801 | unsafe fn test_mm512_cmp_epu16_mask() { |
13802 | let a = _mm512_set1_epi16(0); |
13803 | let b = _mm512_set1_epi16(1); |
13804 | let m = _mm512_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b); |
13805 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); |
13806 | } |
13807 | |
13808 | #[simd_test(enable = "avx512bw" )] |
13809 | unsafe fn test_mm512_mask_cmp_epu16_mask() { |
13810 | let a = _mm512_set1_epi16(0); |
13811 | let b = _mm512_set1_epi16(1); |
13812 | let mask = 0b01010101_01010101_01010101_01010101; |
13813 | let r = _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b); |
13814 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); |
13815 | } |
13816 | |
13817 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13818 | unsafe fn test_mm256_cmp_epu16_mask() { |
13819 | let a = _mm256_set1_epi16(0); |
13820 | let b = _mm256_set1_epi16(1); |
13821 | let m = _mm256_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b); |
13822 | assert_eq!(m, 0b11111111_11111111); |
13823 | } |
13824 | |
13825 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13826 | unsafe fn test_mm256_mask_cmp_epu16_mask() { |
13827 | let a = _mm256_set1_epi16(0); |
13828 | let b = _mm256_set1_epi16(1); |
13829 | let mask = 0b01010101_01010101; |
13830 | let r = _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b); |
13831 | assert_eq!(r, 0b01010101_01010101); |
13832 | } |
13833 | |
13834 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13835 | unsafe fn test_mm_cmp_epu16_mask() { |
13836 | let a = _mm_set1_epi16(0); |
13837 | let b = _mm_set1_epi16(1); |
13838 | let m = _mm_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b); |
13839 | assert_eq!(m, 0b11111111); |
13840 | } |
13841 | |
13842 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13843 | unsafe fn test_mm_mask_cmp_epu16_mask() { |
13844 | let a = _mm_set1_epi16(0); |
13845 | let b = _mm_set1_epi16(1); |
13846 | let mask = 0b01010101; |
13847 | let r = _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b); |
13848 | assert_eq!(r, 0b01010101); |
13849 | } |
13850 | |
13851 | #[simd_test(enable = "avx512bw" )] |
13852 | unsafe fn test_mm512_cmp_epu8_mask() { |
13853 | let a = _mm512_set1_epi8(0); |
13854 | let b = _mm512_set1_epi8(1); |
13855 | let m = _mm512_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b); |
13856 | assert_eq!( |
13857 | m, |
13858 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 |
13859 | ); |
13860 | } |
13861 | |
13862 | #[simd_test(enable = "avx512bw" )] |
13863 | unsafe fn test_mm512_mask_cmp_epu8_mask() { |
13864 | let a = _mm512_set1_epi8(0); |
13865 | let b = _mm512_set1_epi8(1); |
13866 | let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; |
13867 | let r = _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b); |
13868 | assert_eq!( |
13869 | r, |
13870 | 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 |
13871 | ); |
13872 | } |
13873 | |
13874 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13875 | unsafe fn test_mm256_cmp_epu8_mask() { |
13876 | let a = _mm256_set1_epi8(0); |
13877 | let b = _mm256_set1_epi8(1); |
13878 | let m = _mm256_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b); |
13879 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); |
13880 | } |
13881 | |
13882 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13883 | unsafe fn test_mm256_mask_cmp_epu8_mask() { |
13884 | let a = _mm256_set1_epi8(0); |
13885 | let b = _mm256_set1_epi8(1); |
13886 | let mask = 0b01010101_01010101_01010101_01010101; |
13887 | let r = _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b); |
13888 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); |
13889 | } |
13890 | |
13891 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13892 | unsafe fn test_mm_cmp_epu8_mask() { |
13893 | let a = _mm_set1_epi8(0); |
13894 | let b = _mm_set1_epi8(1); |
13895 | let m = _mm_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b); |
13896 | assert_eq!(m, 0b11111111_11111111); |
13897 | } |
13898 | |
13899 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13900 | unsafe fn test_mm_mask_cmp_epu8_mask() { |
13901 | let a = _mm_set1_epi8(0); |
13902 | let b = _mm_set1_epi8(1); |
13903 | let mask = 0b01010101_01010101; |
13904 | let r = _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b); |
13905 | assert_eq!(r, 0b01010101_01010101); |
13906 | } |
13907 | |
13908 | #[simd_test(enable = "avx512bw" )] |
13909 | unsafe fn test_mm512_cmp_epi16_mask() { |
13910 | let a = _mm512_set1_epi16(0); |
13911 | let b = _mm512_set1_epi16(1); |
13912 | let m = _mm512_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b); |
13913 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); |
13914 | } |
13915 | |
13916 | #[simd_test(enable = "avx512bw" )] |
13917 | unsafe fn test_mm512_mask_cmp_epi16_mask() { |
13918 | let a = _mm512_set1_epi16(0); |
13919 | let b = _mm512_set1_epi16(1); |
13920 | let mask = 0b01010101_01010101_01010101_01010101; |
13921 | let r = _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b); |
13922 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); |
13923 | } |
13924 | |
13925 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13926 | unsafe fn test_mm256_cmp_epi16_mask() { |
13927 | let a = _mm256_set1_epi16(0); |
13928 | let b = _mm256_set1_epi16(1); |
13929 | let m = _mm256_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b); |
13930 | assert_eq!(m, 0b11111111_11111111); |
13931 | } |
13932 | |
13933 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13934 | unsafe fn test_mm256_mask_cmp_epi16_mask() { |
13935 | let a = _mm256_set1_epi16(0); |
13936 | let b = _mm256_set1_epi16(1); |
13937 | let mask = 0b01010101_01010101; |
13938 | let r = _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b); |
13939 | assert_eq!(r, 0b01010101_01010101); |
13940 | } |
13941 | |
13942 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13943 | unsafe fn test_mm_cmp_epi16_mask() { |
13944 | let a = _mm_set1_epi16(0); |
13945 | let b = _mm_set1_epi16(1); |
13946 | let m = _mm_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b); |
13947 | assert_eq!(m, 0b11111111); |
13948 | } |
13949 | |
13950 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13951 | unsafe fn test_mm_mask_cmp_epi16_mask() { |
13952 | let a = _mm_set1_epi16(0); |
13953 | let b = _mm_set1_epi16(1); |
13954 | let mask = 0b01010101; |
13955 | let r = _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b); |
13956 | assert_eq!(r, 0b01010101); |
13957 | } |
13958 | |
13959 | #[simd_test(enable = "avx512bw" )] |
13960 | unsafe fn test_mm512_cmp_epi8_mask() { |
13961 | let a = _mm512_set1_epi8(0); |
13962 | let b = _mm512_set1_epi8(1); |
13963 | let m = _mm512_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b); |
13964 | assert_eq!( |
13965 | m, |
13966 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 |
13967 | ); |
13968 | } |
13969 | |
13970 | #[simd_test(enable = "avx512bw" )] |
13971 | unsafe fn test_mm512_mask_cmp_epi8_mask() { |
13972 | let a = _mm512_set1_epi8(0); |
13973 | let b = _mm512_set1_epi8(1); |
13974 | let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; |
13975 | let r = _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b); |
13976 | assert_eq!( |
13977 | r, |
13978 | 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 |
13979 | ); |
13980 | } |
13981 | |
13982 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13983 | unsafe fn test_mm256_cmp_epi8_mask() { |
13984 | let a = _mm256_set1_epi8(0); |
13985 | let b = _mm256_set1_epi8(1); |
13986 | let m = _mm256_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b); |
13987 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); |
13988 | } |
13989 | |
13990 | #[simd_test(enable = "avx512bw,avx512vl" )] |
13991 | unsafe fn test_mm256_mask_cmp_epi8_mask() { |
13992 | let a = _mm256_set1_epi8(0); |
13993 | let b = _mm256_set1_epi8(1); |
13994 | let mask = 0b01010101_01010101_01010101_01010101; |
13995 | let r = _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b); |
13996 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); |
13997 | } |
13998 | |
13999 | #[simd_test(enable = "avx512bw,avx512vl" )] |
14000 | unsafe fn test_mm_cmp_epi8_mask() { |
14001 | let a = _mm_set1_epi8(0); |
14002 | let b = _mm_set1_epi8(1); |
14003 | let m = _mm_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b); |
14004 | assert_eq!(m, 0b11111111_11111111); |
14005 | } |
14006 | |
14007 | #[simd_test(enable = "avx512bw,avx512vl" )] |
14008 | unsafe fn test_mm_mask_cmp_epi8_mask() { |
14009 | let a = _mm_set1_epi8(0); |
14010 | let b = _mm_set1_epi8(1); |
14011 | let mask = 0b01010101_01010101; |
14012 | let r = _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b); |
14013 | assert_eq!(r, 0b01010101_01010101); |
14014 | } |
14015 | |
14016 | #[simd_test(enable = "avx512bw" )] |
14017 | unsafe fn test_mm512_loadu_epi16() { |
14018 | #[rustfmt::skip] |
14019 | let a: [i16; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; |
14020 | let r = _mm512_loadu_epi16(&a[0]); |
14021 | #[rustfmt::skip] |
14022 | let e = _mm512_set_epi16(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); |
14023 | assert_eq_m512i(r, e); |
14024 | } |
14025 | |
14026 | #[simd_test(enable = "avx512bw,avx512vl" )] |
14027 | unsafe fn test_mm256_loadu_epi16() { |
14028 | let a: [i16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; |
14029 | let r = _mm256_loadu_epi16(&a[0]); |
14030 | let e = _mm256_set_epi16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); |
14031 | assert_eq_m256i(r, e); |
14032 | } |
14033 | |
14034 | #[simd_test(enable = "avx512bw,avx512vl" )] |
14035 | unsafe fn test_mm_loadu_epi16() { |
14036 | let a: [i16; 8] = [1, 2, 3, 4, 5, 6, 7, 8]; |
14037 | let r = _mm_loadu_epi16(&a[0]); |
14038 | let e = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1); |
14039 | assert_eq_m128i(r, e); |
14040 | } |
14041 | |
14042 | #[simd_test(enable = "avx512bw" )] |
14043 | unsafe fn test_mm512_loadu_epi8() { |
14044 | #[rustfmt::skip] |
14045 | let a: [i8; 64] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, |
14046 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; |
14047 | let r = _mm512_loadu_epi8(&a[0]); |
14048 | #[rustfmt::skip] |
14049 | let e = _mm512_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, |
14050 | 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); |
14051 | assert_eq_m512i(r, e); |
14052 | } |
14053 | |
14054 | #[simd_test(enable = "avx512bw,avx512vl" )] |
14055 | unsafe fn test_mm256_loadu_epi8() { |
14056 | #[rustfmt::skip] |
14057 | let a: [i8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; |
14058 | let r = _mm256_loadu_epi8(&a[0]); |
14059 | #[rustfmt::skip] |
14060 | let e = _mm256_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); |
14061 | assert_eq_m256i(r, e); |
14062 | } |
14063 | |
14064 | #[simd_test(enable = "avx512bw,avx512vl" )] |
14065 | unsafe fn test_mm_loadu_epi8() { |
14066 | let a: [i8; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; |
14067 | let r = _mm_loadu_epi8(&a[0]); |
14068 | let e = _mm_set_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); |
14069 | assert_eq_m128i(r, e); |
14070 | } |
14071 | |
14072 | #[simd_test(enable = "avx512bw" )] |
14073 | unsafe fn test_mm512_storeu_epi16() { |
14074 | let a = _mm512_set1_epi16(9); |
14075 | let mut r = _mm512_undefined_epi32(); |
14076 | _mm512_storeu_epi16(&mut r as *mut _ as *mut i16, a); |
14077 | assert_eq_m512i(r, a); |
14078 | } |
14079 | |
14080 | #[simd_test(enable = "avx512bw,avx512vl" )] |
14081 | unsafe fn test_mm256_storeu_epi16() { |
14082 | let a = _mm256_set1_epi16(9); |
14083 | let mut r = _mm256_set1_epi32(0); |
14084 | _mm256_storeu_epi16(&mut r as *mut _ as *mut i16, a); |
14085 | assert_eq_m256i(r, a); |
14086 | } |
14087 | |
14088 | #[simd_test(enable = "avx512bw,avx512vl" )] |
14089 | unsafe fn test_mm_storeu_epi16() { |
14090 | let a = _mm_set1_epi16(9); |
14091 | let mut r = _mm_set1_epi32(0); |
14092 | _mm_storeu_epi16(&mut r as *mut _ as *mut i16, a); |
14093 | assert_eq_m128i(r, a); |
14094 | } |
14095 | |
14096 | #[simd_test(enable = "avx512bw" )] |
14097 | unsafe fn test_mm512_storeu_epi8() { |
14098 | let a = _mm512_set1_epi8(9); |
14099 | let mut r = _mm512_undefined_epi32(); |
14100 | _mm512_storeu_epi8(&mut r as *mut _ as *mut i8, a); |
14101 | assert_eq_m512i(r, a); |
14102 | } |
14103 | |
14104 | #[simd_test(enable = "avx512bw,avx512vl" )] |
14105 | unsafe fn test_mm256_storeu_epi8() { |
14106 | let a = _mm256_set1_epi8(9); |
14107 | let mut r = _mm256_set1_epi32(0); |
14108 | _mm256_storeu_epi8(&mut r as *mut _ as *mut i8, a); |
14109 | assert_eq_m256i(r, a); |
14110 | } |
14111 | |
14112 | #[simd_test(enable = "avx512bw,avx512vl" )] |
14113 | unsafe fn test_mm_storeu_epi8() { |
14114 | let a = _mm_set1_epi8(9); |
14115 | let mut r = _mm_set1_epi32(0); |
14116 | _mm_storeu_epi8(&mut r as *mut _ as *mut i8, a); |
14117 | assert_eq_m128i(r, a); |
14118 | } |
14119 | |
14120 | #[simd_test(enable = "avx512f,avx512bw" )] |
14121 | unsafe fn test_mm512_mask_loadu_epi16() { |
14122 | let src = _mm512_set1_epi16(42); |
14123 | let a = &[ |
14124 | 1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, |
14125 | 24, 25, 26, 27, 28, 29, 30, 31, 32, |
14126 | ]; |
14127 | let p = a.as_ptr(); |
14128 | let m = 0b10101010_11001100_11101000_11001010; |
14129 | let r = _mm512_mask_loadu_epi16(src, m, black_box(p)); |
14130 | let e = &[ |
14131 | 42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42, |
14132 | 23, 24, 42, 26, 42, 28, 42, 30, 42, 32, |
14133 | ]; |
14134 | let e = _mm512_loadu_epi16(e.as_ptr()); |
14135 | assert_eq_m512i(r, e); |
14136 | } |
14137 | |
14138 | #[simd_test(enable = "avx512f,avx512bw" )] |
14139 | unsafe fn test_mm512_maskz_loadu_epi16() { |
14140 | let a = &[ |
14141 | 1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, |
14142 | 24, 25, 26, 27, 28, 29, 30, 31, 32, |
14143 | ]; |
14144 | let p = a.as_ptr(); |
14145 | let m = 0b10101010_11001100_11101000_11001010; |
14146 | let r = _mm512_maskz_loadu_epi16(m, black_box(p)); |
14147 | let e = &[ |
14148 | 0_i16, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0, |
14149 | 26, 0, 28, 0, 30, 0, 32, |
14150 | ]; |
14151 | let e = _mm512_loadu_epi16(e.as_ptr()); |
14152 | assert_eq_m512i(r, e); |
14153 | } |
14154 | |
14155 | #[simd_test(enable = "avx512f,avx512bw" )] |
14156 | unsafe fn test_mm512_mask_storeu_epi16() { |
14157 | let mut r = [42_i16; 32]; |
14158 | let a = &[ |
14159 | 1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, |
14160 | 24, 25, 26, 27, 28, 29, 30, 31, 32, |
14161 | ]; |
14162 | let a = _mm512_loadu_epi16(a.as_ptr()); |
14163 | let m = 0b10101010_11001100_11101000_11001010; |
14164 | _mm512_mask_storeu_epi16(r.as_mut_ptr(), m, a); |
14165 | let e = &[ |
14166 | 42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42, |
14167 | 23, 24, 42, 26, 42, 28, 42, 30, 42, 32, |
14168 | ]; |
14169 | let e = _mm512_loadu_epi16(e.as_ptr()); |
14170 | assert_eq_m512i(_mm512_loadu_epi16(r.as_ptr()), e); |
14171 | } |
14172 | |
14173 | #[simd_test(enable = "avx512f,avx512bw" )] |
14174 | unsafe fn test_mm512_mask_loadu_epi8() { |
14175 | let src = _mm512_set1_epi8(42); |
14176 | let a = &[ |
14177 | 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, |
14178 | 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, |
14179 | 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, |
14180 | ]; |
14181 | let p = a.as_ptr(); |
14182 | let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010; |
14183 | let r = _mm512_mask_loadu_epi8(src, m, black_box(p)); |
14184 | let e = &[ |
14185 | 42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42, |
14186 | 23, 24, 42, 26, 42, 28, 42, 30, 42, 32, 42, 42, 42, 42, 42, 42, 42, 42, 41, 42, 43, 44, |
14187 | 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 42, 42, 42, 42, 42, 42, 42, 42, |
14188 | ]; |
14189 | let e = _mm512_loadu_epi8(e.as_ptr()); |
14190 | assert_eq_m512i(r, e); |
14191 | } |
14192 | |
14193 | #[simd_test(enable = "avx512f,avx512bw" )] |
14194 | unsafe fn test_mm512_maskz_loadu_epi8() { |
14195 | let a = &[ |
14196 | 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, |
14197 | 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, |
14198 | 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, |
14199 | ]; |
14200 | let p = a.as_ptr(); |
14201 | let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010; |
14202 | let r = _mm512_maskz_loadu_epi8(m, black_box(p)); |
14203 | let e = &[ |
14204 | 0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0, |
14205 | 26, 0, 28, 0, 30, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 41, 42, 43, 44, 45, 46, 47, 48, 49, |
14206 | 50, 51, 52, 53, 54, 55, 56, 0, 0, 0, 0, 0, 0, 0, 0, |
14207 | ]; |
14208 | let e = _mm512_loadu_epi8(e.as_ptr()); |
14209 | assert_eq_m512i(r, e); |
14210 | } |
14211 | |
14212 | #[simd_test(enable = "avx512f,avx512bw" )] |
14213 | unsafe fn test_mm512_mask_storeu_epi8() { |
14214 | let mut r = [42_i8; 64]; |
14215 | let a = &[ |
14216 | 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, |
14217 | 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, |
14218 | 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, |
14219 | ]; |
14220 | let a = _mm512_loadu_epi8(a.as_ptr()); |
14221 | let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010; |
14222 | _mm512_mask_storeu_epi8(r.as_mut_ptr(), m, a); |
14223 | let e = &[ |
14224 | 42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42, |
14225 | 23, 24, 42, 26, 42, 28, 42, 30, 42, 32, 42, 42, 42, 42, 42, 42, 42, 42, 41, 42, 43, 44, |
14226 | 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 42, 42, 42, 42, 42, 42, 42, 42, |
14227 | ]; |
14228 | let e = _mm512_loadu_epi8(e.as_ptr()); |
14229 | assert_eq_m512i(_mm512_loadu_epi8(r.as_ptr()), e); |
14230 | } |
14231 | |
14232 | #[simd_test(enable = "avx512f,avx512bw,avx512vl" )] |
14233 | unsafe fn test_mm256_mask_loadu_epi16() { |
14234 | let src = _mm256_set1_epi16(42); |
14235 | let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; |
14236 | let p = a.as_ptr(); |
14237 | let m = 0b11101000_11001010; |
14238 | let r = _mm256_mask_loadu_epi16(src, m, black_box(p)); |
14239 | let e = &[ |
14240 | 42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, |
14241 | ]; |
14242 | let e = _mm256_loadu_epi16(e.as_ptr()); |
14243 | assert_eq_m256i(r, e); |
14244 | } |
14245 | |
14246 | #[simd_test(enable = "avx512f,avx512bw,avx512vl" )] |
14247 | unsafe fn test_mm256_maskz_loadu_epi16() { |
14248 | let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; |
14249 | let p = a.as_ptr(); |
14250 | let m = 0b11101000_11001010; |
14251 | let r = _mm256_maskz_loadu_epi16(m, black_box(p)); |
14252 | let e = &[0_i16, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16]; |
14253 | let e = _mm256_loadu_epi16(e.as_ptr()); |
14254 | assert_eq_m256i(r, e); |
14255 | } |
14256 | |
14257 | #[simd_test(enable = "avx512f,avx512bw,avx512vl" )] |
14258 | unsafe fn test_mm256_mask_storeu_epi16() { |
14259 | let mut r = [42_i16; 16]; |
14260 | let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; |
14261 | let a = _mm256_loadu_epi16(a.as_ptr()); |
14262 | let m = 0b11101000_11001010; |
14263 | _mm256_mask_storeu_epi16(r.as_mut_ptr(), m, a); |
14264 | let e = &[ |
14265 | 42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, |
14266 | ]; |
14267 | let e = _mm256_loadu_epi16(e.as_ptr()); |
14268 | assert_eq_m256i(_mm256_loadu_epi16(r.as_ptr()), e); |
14269 | } |
14270 | |
14271 | #[simd_test(enable = "avx512f,avx512bw,avx512vl" )] |
14272 | unsafe fn test_mm256_mask_loadu_epi8() { |
14273 | let src = _mm256_set1_epi8(42); |
14274 | let a = &[ |
14275 | 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, |
14276 | 24, 25, 26, 27, 28, 29, 30, 31, 32, |
14277 | ]; |
14278 | let p = a.as_ptr(); |
14279 | let m = 0b10101010_11001100_11101000_11001010; |
14280 | let r = _mm256_mask_loadu_epi8(src, m, black_box(p)); |
14281 | let e = &[ |
14282 | 42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42, |
14283 | 23, 24, 42, 26, 42, 28, 42, 30, 42, 32, |
14284 | ]; |
14285 | let e = _mm256_loadu_epi8(e.as_ptr()); |
14286 | assert_eq_m256i(r, e); |
14287 | } |
14288 | |
14289 | #[simd_test(enable = "avx512f,avx512bw,avx512vl" )] |
14290 | unsafe fn test_mm256_maskz_loadu_epi8() { |
14291 | let a = &[ |
14292 | 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, |
14293 | 24, 25, 26, 27, 28, 29, 30, 31, 32, |
14294 | ]; |
14295 | let p = a.as_ptr(); |
14296 | let m = 0b10101010_11001100_11101000_11001010; |
14297 | let r = _mm256_maskz_loadu_epi8(m, black_box(p)); |
14298 | let e = &[ |
14299 | 0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0, |
14300 | 26, 0, 28, 0, 30, 0, 32, |
14301 | ]; |
14302 | let e = _mm256_loadu_epi8(e.as_ptr()); |
14303 | assert_eq_m256i(r, e); |
14304 | } |
14305 | |
14306 | #[simd_test(enable = "avx512f,avx512bw,avx512vl" )] |
14307 | unsafe fn test_mm256_mask_storeu_epi8() { |
14308 | let mut r = [42_i8; 32]; |
14309 | let a = &[ |
14310 | 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, |
14311 | 24, 25, 26, 27, 28, 29, 30, 31, 32, |
14312 | ]; |
14313 | let a = _mm256_loadu_epi8(a.as_ptr()); |
14314 | let m = 0b10101010_11001100_11101000_11001010; |
14315 | _mm256_mask_storeu_epi8(r.as_mut_ptr(), m, a); |
14316 | let e = &[ |
14317 | 42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42, |
14318 | 23, 24, 42, 26, 42, 28, 42, 30, 42, 32, |
14319 | ]; |
14320 | let e = _mm256_loadu_epi8(e.as_ptr()); |
14321 | assert_eq_m256i(_mm256_loadu_epi8(r.as_ptr()), e); |
14322 | } |
14323 | |
14324 | #[simd_test(enable = "avx512f,avx512bw,avx512vl" )] |
14325 | unsafe fn test_mm_mask_loadu_epi16() { |
14326 | let src = _mm_set1_epi16(42); |
14327 | let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8]; |
14328 | let p = a.as_ptr(); |
14329 | let m = 0b11001010; |
14330 | let r = _mm_mask_loadu_epi16(src, m, black_box(p)); |
14331 | let e = &[42_i16, 2, 42, 4, 42, 42, 7, 8]; |
14332 | let e = _mm_loadu_epi16(e.as_ptr()); |
14333 | assert_eq_m128i(r, e); |
14334 | } |
14335 | |
14336 | #[simd_test(enable = "avx512f,avx512bw,avx512vl" )] |
14337 | unsafe fn test_mm_maskz_loadu_epi16() { |
14338 | let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8]; |
14339 | let p = a.as_ptr(); |
14340 | let m = 0b11001010; |
14341 | let r = _mm_maskz_loadu_epi16(m, black_box(p)); |
14342 | let e = &[0_i16, 2, 0, 4, 0, 0, 7, 8]; |
14343 | let e = _mm_loadu_epi16(e.as_ptr()); |
14344 | assert_eq_m128i(r, e); |
14345 | } |
14346 | |
14347 | #[simd_test(enable = "avx512f,avx512bw,avx512vl" )] |
14348 | unsafe fn test_mm_mask_storeu_epi16() { |
14349 | let mut r = [42_i16; 8]; |
14350 | let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8]; |
14351 | let a = _mm_loadu_epi16(a.as_ptr()); |
14352 | let m = 0b11001010; |
14353 | _mm_mask_storeu_epi16(r.as_mut_ptr(), m, a); |
14354 | let e = &[42_i16, 2, 42, 4, 42, 42, 7, 8]; |
14355 | let e = _mm_loadu_epi16(e.as_ptr()); |
14356 | assert_eq_m128i(_mm_loadu_epi16(r.as_ptr()), e); |
14357 | } |
14358 | |
14359 | #[simd_test(enable = "avx512f,avx512bw,avx512vl" )] |
14360 | unsafe fn test_mm_mask_loadu_epi8() { |
14361 | let src = _mm_set1_epi8(42); |
14362 | let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; |
14363 | let p = a.as_ptr(); |
14364 | let m = 0b11101000_11001010; |
14365 | let r = _mm_mask_loadu_epi8(src, m, black_box(p)); |
14366 | let e = &[ |
14367 | 42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, |
14368 | ]; |
14369 | let e = _mm_loadu_epi8(e.as_ptr()); |
14370 | assert_eq_m128i(r, e); |
14371 | } |
14372 | |
14373 | #[simd_test(enable = "avx512f,avx512bw,avx512vl" )] |
14374 | unsafe fn test_mm_maskz_loadu_epi8() { |
14375 | let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; |
14376 | let p = a.as_ptr(); |
14377 | let m = 0b11101000_11001010; |
14378 | let r = _mm_maskz_loadu_epi8(m, black_box(p)); |
14379 | let e = &[0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16]; |
14380 | let e = _mm_loadu_epi8(e.as_ptr()); |
14381 | assert_eq_m128i(r, e); |
14382 | } |
14383 | |
14384 | #[simd_test(enable = "avx512f,avx512bw,avx512vl" )] |
14385 | unsafe fn test_mm_mask_storeu_epi8() { |
14386 | let mut r = [42_i8; 16]; |
14387 | let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; |
14388 | let a = _mm_loadu_epi8(a.as_ptr()); |
14389 | let m = 0b11101000_11001010; |
14390 | _mm_mask_storeu_epi8(r.as_mut_ptr(), m, a); |
14391 | let e = &[ |
14392 | 42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, |
14393 | ]; |
14394 | let e = _mm_loadu_epi8(e.as_ptr()); |
14395 | assert_eq_m128i(_mm_loadu_epi8(r.as_ptr()), e); |
14396 | } |
14397 | |
14398 | #[simd_test(enable = "avx512bw" )] |
14399 | unsafe fn test_mm512_madd_epi16() { |
14400 | let a = _mm512_set1_epi16(1); |
14401 | let b = _mm512_set1_epi16(1); |
14402 | let r = _mm512_madd_epi16(a, b); |
14403 | let e = _mm512_set1_epi32(2); |
14404 | assert_eq_m512i(r, e); |
14405 | } |
14406 | |
14407 | #[simd_test(enable = "avx512bw" )] |
14408 | unsafe fn test_mm512_mask_madd_epi16() { |
14409 | let a = _mm512_set1_epi16(1); |
14410 | let b = _mm512_set1_epi16(1); |
14411 | let r = _mm512_mask_madd_epi16(a, 0, a, b); |
14412 | assert_eq_m512i(r, a); |
14413 | let r = _mm512_mask_madd_epi16(a, 0b00000000_00001111, a, b); |
14414 | let e = _mm512_set_epi32( |
14415 | 1 << 16 | 1, |
14416 | 1 << 16 | 1, |
14417 | 1 << 16 | 1, |
14418 | 1 << 16 | 1, |
14419 | 1 << 16 | 1, |
14420 | 1 << 16 | 1, |
14421 | 1 << 16 | 1, |
14422 | 1 << 16 | 1, |
14423 | 1 << 16 | 1, |
14424 | 1 << 16 | 1, |
14425 | 1 << 16 | 1, |
14426 | 1 << 16 | 1, |
14427 | 2, |
14428 | 2, |
14429 | 2, |
14430 | 2, |
14431 | ); |
14432 | assert_eq_m512i(r, e); |
14433 | } |
14434 | |
14435 | #[simd_test(enable = "avx512bw" )] |
14436 | unsafe fn test_mm512_maskz_madd_epi16() { |
14437 | let a = _mm512_set1_epi16(1); |
14438 | let b = _mm512_set1_epi16(1); |
14439 | let r = _mm512_maskz_madd_epi16(0, a, b); |
14440 | assert_eq_m512i(r, _mm512_setzero_si512()); |
14441 | let r = _mm512_maskz_madd_epi16(0b00000000_00001111, a, b); |
14442 | let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2); |
14443 | assert_eq_m512i(r, e); |
14444 | } |
14445 | |
14446 | #[simd_test(enable = "avx512bw,avx512vl" )] |
14447 | unsafe fn test_mm256_mask_madd_epi16() { |
14448 | let a = _mm256_set1_epi16(1); |
14449 | let b = _mm256_set1_epi16(1); |
14450 | let r = _mm256_mask_madd_epi16(a, 0, a, b); |
14451 | assert_eq_m256i(r, a); |
14452 | let r = _mm256_mask_madd_epi16(a, 0b00001111, a, b); |
14453 | let e = _mm256_set_epi32( |
14454 | 1 << 16 | 1, |
14455 | 1 << 16 | 1, |
14456 | 1 << 16 | 1, |
14457 | 1 << 16 | 1, |
14458 | 2, |
14459 | 2, |
14460 | 2, |
14461 | 2, |
14462 | ); |
14463 | assert_eq_m256i(r, e); |
14464 | } |
14465 | |
14466 | #[simd_test(enable = "avx512bw,avx512vl" )] |
14467 | unsafe fn test_mm256_maskz_madd_epi16() { |
14468 | let a = _mm256_set1_epi16(1); |
14469 | let b = _mm256_set1_epi16(1); |
14470 | let r = _mm256_maskz_madd_epi16(0, a, b); |
14471 | assert_eq_m256i(r, _mm256_setzero_si256()); |
14472 | let r = _mm256_maskz_madd_epi16(0b00001111, a, b); |
14473 | let e = _mm256_set_epi32(0, 0, 0, 0, 2, 2, 2, 2); |
14474 | assert_eq_m256i(r, e); |
14475 | } |
14476 | |
14477 | #[simd_test(enable = "avx512bw,avx512vl" )] |
14478 | unsafe fn test_mm_mask_madd_epi16() { |
14479 | let a = _mm_set1_epi16(1); |
14480 | let b = _mm_set1_epi16(1); |
14481 | let r = _mm_mask_madd_epi16(a, 0, a, b); |
14482 | assert_eq_m128i(r, a); |
14483 | let r = _mm_mask_madd_epi16(a, 0b00001111, a, b); |
14484 | let e = _mm_set_epi32(2, 2, 2, 2); |
14485 | assert_eq_m128i(r, e); |
14486 | } |
14487 | |
14488 | #[simd_test(enable = "avx512bw,avx512vl" )] |
14489 | unsafe fn test_mm_maskz_madd_epi16() { |
14490 | let a = _mm_set1_epi16(1); |
14491 | let b = _mm_set1_epi16(1); |
14492 | let r = _mm_maskz_madd_epi16(0, a, b); |
14493 | assert_eq_m128i(r, _mm_setzero_si128()); |
14494 | let r = _mm_maskz_madd_epi16(0b00001111, a, b); |
14495 | let e = _mm_set_epi32(2, 2, 2, 2); |
14496 | assert_eq_m128i(r, e); |
14497 | } |
14498 | |
14499 | #[simd_test(enable = "avx512bw" )] |
14500 | unsafe fn test_mm512_maddubs_epi16() { |
14501 | let a = _mm512_set1_epi8(1); |
14502 | let b = _mm512_set1_epi8(1); |
14503 | let r = _mm512_maddubs_epi16(a, b); |
14504 | let e = _mm512_set1_epi16(2); |
14505 | assert_eq_m512i(r, e); |
14506 | } |
14507 | |
14508 | #[simd_test(enable = "avx512bw" )] |
14509 | unsafe fn test_mm512_mask_maddubs_epi16() { |
14510 | let a = _mm512_set1_epi8(1); |
14511 | let b = _mm512_set1_epi8(1); |
14512 | let src = _mm512_set1_epi16(1); |
14513 | let r = _mm512_mask_maddubs_epi16(src, 0, a, b); |
14514 | assert_eq_m512i(r, src); |
14515 | let r = _mm512_mask_add_epi16(src, 0b00000000_00000000_00000000_00000001, a, b); |
14516 | #[rustfmt::skip] |
14517 | let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
14518 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1<<9|2); |
14519 | assert_eq_m512i(r, e); |
14520 | } |
14521 | |
14522 | #[simd_test(enable = "avx512bw" )] |
14523 | unsafe fn test_mm512_maskz_maddubs_epi16() { |
14524 | let a = _mm512_set1_epi8(1); |
14525 | let b = _mm512_set1_epi8(1); |
14526 | let r = _mm512_maskz_maddubs_epi16(0, a, b); |
14527 | assert_eq_m512i(r, _mm512_setzero_si512()); |
14528 | let r = _mm512_maskz_maddubs_epi16(0b00000000_11111111_00000000_11111111, a, b); |
14529 | #[rustfmt::skip] |
14530 | let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, |
14531 | 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2); |
14532 | assert_eq_m512i(r, e); |
14533 | } |
14534 | |
14535 | #[simd_test(enable = "avx512bw,avx512vl" )] |
14536 | unsafe fn test_mm256_mask_maddubs_epi16() { |
14537 | let a = _mm256_set1_epi8(1); |
14538 | let b = _mm256_set1_epi8(1); |
14539 | let src = _mm256_set1_epi16(1); |
14540 | let r = _mm256_mask_maddubs_epi16(src, 0, a, b); |
14541 | assert_eq_m256i(r, src); |
14542 | let r = _mm256_mask_add_epi16(src, 0b00000000_00000001, a, b); |
14543 | let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 9 | 2); |
14544 | assert_eq_m256i(r, e); |
14545 | } |
14546 | |
14547 | #[simd_test(enable = "avx512bw,avx512vl" )] |
14548 | unsafe fn test_mm256_maskz_maddubs_epi16() { |
14549 | let a = _mm256_set1_epi8(1); |
14550 | let b = _mm256_set1_epi8(1); |
14551 | let r = _mm256_maskz_maddubs_epi16(0, a, b); |
14552 | assert_eq_m256i(r, _mm256_setzero_si256()); |
14553 | let r = _mm256_maskz_maddubs_epi16(0b00000000_11111111, a, b); |
14554 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2); |
14555 | assert_eq_m256i(r, e); |
14556 | } |
14557 | |
14558 | #[simd_test(enable = "avx512bw,avx512vl" )] |
14559 | unsafe fn test_mm_mask_maddubs_epi16() { |
14560 | let a = _mm_set1_epi8(1); |
14561 | let b = _mm_set1_epi8(1); |
14562 | let src = _mm_set1_epi16(1); |
14563 | let r = _mm_mask_maddubs_epi16(src, 0, a, b); |
14564 | assert_eq_m128i(r, src); |
14565 | let r = _mm_mask_add_epi16(src, 0b00000001, a, b); |
14566 | let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1 << 9 | 2); |
14567 | assert_eq_m128i(r, e); |
14568 | } |
14569 | |
14570 | #[simd_test(enable = "avx512bw,avx512vl" )] |
14571 | unsafe fn test_mm_maskz_maddubs_epi16() { |
14572 | let a = _mm_set1_epi8(1); |
14573 | let b = _mm_set1_epi8(1); |
14574 | let r = _mm_maskz_maddubs_epi16(0, a, b); |
14575 | assert_eq_m128i(r, _mm_setzero_si128()); |
14576 | let r = _mm_maskz_maddubs_epi16(0b00001111, a, b); |
14577 | let e = _mm_set_epi16(0, 0, 0, 0, 2, 2, 2, 2); |
14578 | assert_eq_m128i(r, e); |
14579 | } |
14580 | |
14581 | #[simd_test(enable = "avx512bw" )] |
14582 | unsafe fn test_mm512_packs_epi32() { |
14583 | let a = _mm512_set1_epi32(i32::MAX); |
14584 | let b = _mm512_set1_epi32(1); |
14585 | let r = _mm512_packs_epi32(a, b); |
14586 | #[rustfmt::skip] |
14587 | let e = _mm512_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX, |
14588 | 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX); |
14589 | assert_eq_m512i(r, e); |
14590 | } |
14591 | |
14592 | #[simd_test(enable = "avx512bw" )] |
14593 | unsafe fn test_mm512_mask_packs_epi32() { |
14594 | let a = _mm512_set1_epi32(i32::MAX); |
14595 | let b = _mm512_set1_epi32(1 << 16 | 1); |
14596 | let r = _mm512_mask_packs_epi32(a, 0, a, b); |
14597 | assert_eq_m512i(r, a); |
14598 | let r = _mm512_mask_packs_epi32(b, 0b00000000_00000000_00000000_00001111, a, b); |
14599 | #[rustfmt::skip] |
14600 | let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
14601 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX); |
14602 | assert_eq_m512i(r, e); |
14603 | } |
14604 | |
14605 | #[simd_test(enable = "avx512bw" )] |
14606 | unsafe fn test_mm512_maskz_packs_epi32() { |
14607 | let a = _mm512_set1_epi32(i32::MAX); |
14608 | let b = _mm512_set1_epi32(1); |
14609 | let r = _mm512_maskz_packs_epi32(0, a, b); |
14610 | assert_eq_m512i(r, _mm512_setzero_si512()); |
14611 | let r = _mm512_maskz_packs_epi32(0b00000000_00000000_00000000_00001111, a, b); |
14612 | #[rustfmt::skip] |
14613 | let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
14614 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX); |
14615 | assert_eq_m512i(r, e); |
14616 | } |
14617 | |
14618 | #[simd_test(enable = "avx512bw,avx512vl" )] |
14619 | unsafe fn test_mm256_mask_packs_epi32() { |
14620 | let a = _mm256_set1_epi32(i32::MAX); |
14621 | let b = _mm256_set1_epi32(1 << 16 | 1); |
14622 | let r = _mm256_mask_packs_epi32(a, 0, a, b); |
14623 | assert_eq_m256i(r, a); |
14624 | let r = _mm256_mask_packs_epi32(b, 0b00000000_00001111, a, b); |
14625 | #[rustfmt::skip] |
14626 | let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX); |
14627 | assert_eq_m256i(r, e); |
14628 | } |
14629 | |
14630 | #[simd_test(enable = "avx512bw,avx512vl" )] |
14631 | unsafe fn test_mm256_maskz_packs_epi32() { |
14632 | let a = _mm256_set1_epi32(i32::MAX); |
14633 | let b = _mm256_set1_epi32(1); |
14634 | let r = _mm256_maskz_packs_epi32(0, a, b); |
14635 | assert_eq_m256i(r, _mm256_setzero_si256()); |
14636 | let r = _mm256_maskz_packs_epi32(0b00000000_00001111, a, b); |
14637 | #[rustfmt::skip] |
14638 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX); |
14639 | assert_eq_m256i(r, e); |
14640 | } |
14641 | |
14642 | #[simd_test(enable = "avx512bw,avx512vl" )] |
14643 | unsafe fn test_mm_mask_packs_epi32() { |
14644 | let a = _mm_set1_epi32(i32::MAX); |
14645 | let b = _mm_set1_epi32(1 << 16 | 1); |
14646 | let r = _mm_mask_packs_epi32(a, 0, a, b); |
14647 | assert_eq_m128i(r, a); |
14648 | let r = _mm_mask_packs_epi32(b, 0b00001111, a, b); |
14649 | let e = _mm_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX); |
14650 | assert_eq_m128i(r, e); |
14651 | } |
14652 | |
14653 | #[simd_test(enable = "avx512bw,avx512vl" )] |
14654 | unsafe fn test_mm_maskz_packs_epi32() { |
14655 | let a = _mm_set1_epi32(i32::MAX); |
14656 | let b = _mm_set1_epi32(1); |
14657 | let r = _mm_maskz_packs_epi32(0, a, b); |
14658 | assert_eq_m128i(r, _mm_setzero_si128()); |
14659 | let r = _mm_maskz_packs_epi32(0b00001111, a, b); |
14660 | let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX); |
14661 | assert_eq_m128i(r, e); |
14662 | } |
14663 | |
14664 | #[simd_test(enable = "avx512bw" )] |
14665 | unsafe fn test_mm512_packs_epi16() { |
14666 | let a = _mm512_set1_epi16(i16::MAX); |
14667 | let b = _mm512_set1_epi16(1); |
14668 | let r = _mm512_packs_epi16(a, b); |
14669 | #[rustfmt::skip] |
14670 | let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, |
14671 | 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, |
14672 | 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, |
14673 | 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX); |
14674 | assert_eq_m512i(r, e); |
14675 | } |
14676 | |
14677 | #[simd_test(enable = "avx512bw" )] |
14678 | unsafe fn test_mm512_mask_packs_epi16() { |
14679 | let a = _mm512_set1_epi16(i16::MAX); |
14680 | let b = _mm512_set1_epi16(1 << 8 | 1); |
14681 | let r = _mm512_mask_packs_epi16(a, 0, a, b); |
14682 | assert_eq_m512i(r, a); |
14683 | let r = _mm512_mask_packs_epi16( |
14684 | b, |
14685 | 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, |
14686 | a, |
14687 | b, |
14688 | ); |
14689 | #[rustfmt::skip] |
14690 | let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
14691 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
14692 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
14693 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX); |
14694 | assert_eq_m512i(r, e); |
14695 | } |
14696 | |
14697 | #[simd_test(enable = "avx512bw" )] |
14698 | unsafe fn test_mm512_maskz_packs_epi16() { |
14699 | let a = _mm512_set1_epi16(i16::MAX); |
14700 | let b = _mm512_set1_epi16(1); |
14701 | let r = _mm512_maskz_packs_epi16(0, a, b); |
14702 | assert_eq_m512i(r, _mm512_setzero_si512()); |
14703 | let r = _mm512_maskz_packs_epi16( |
14704 | 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, |
14705 | a, |
14706 | b, |
14707 | ); |
14708 | #[rustfmt::skip] |
14709 | let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
14710 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
14711 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
14712 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX); |
14713 | assert_eq_m512i(r, e); |
14714 | } |
14715 | |
14716 | #[simd_test(enable = "avx512bw,avx512vl" )] |
14717 | unsafe fn test_mm256_mask_packs_epi16() { |
14718 | let a = _mm256_set1_epi16(i16::MAX); |
14719 | let b = _mm256_set1_epi16(1 << 8 | 1); |
14720 | let r = _mm256_mask_packs_epi16(a, 0, a, b); |
14721 | assert_eq_m256i(r, a); |
14722 | let r = _mm256_mask_packs_epi16(b, 0b00000000_00000000_00000000_00001111, a, b); |
14723 | #[rustfmt::skip] |
14724 | let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
14725 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX); |
14726 | assert_eq_m256i(r, e); |
14727 | } |
14728 | |
14729 | #[simd_test(enable = "avx512bw,avx512vl" )] |
14730 | unsafe fn test_mm256_maskz_packs_epi16() { |
14731 | let a = _mm256_set1_epi16(i16::MAX); |
14732 | let b = _mm256_set1_epi16(1); |
14733 | let r = _mm256_maskz_packs_epi16(0, a, b); |
14734 | assert_eq_m256i(r, _mm256_setzero_si256()); |
14735 | let r = _mm256_maskz_packs_epi16(0b00000000_00000000_00000000_00001111, a, b); |
14736 | #[rustfmt::skip] |
14737 | let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
14738 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX); |
14739 | assert_eq_m256i(r, e); |
14740 | } |
14741 | |
14742 | #[simd_test(enable = "avx512bw,avx512vl" )] |
14743 | unsafe fn test_mm_mask_packs_epi16() { |
14744 | let a = _mm_set1_epi16(i16::MAX); |
14745 | let b = _mm_set1_epi16(1 << 8 | 1); |
14746 | let r = _mm_mask_packs_epi16(a, 0, a, b); |
14747 | assert_eq_m128i(r, a); |
14748 | let r = _mm_mask_packs_epi16(b, 0b00000000_00001111, a, b); |
14749 | #[rustfmt::skip] |
14750 | let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX); |
14751 | assert_eq_m128i(r, e); |
14752 | } |
14753 | |
14754 | #[simd_test(enable = "avx512bw,avx512vl" )] |
14755 | unsafe fn test_mm_maskz_packs_epi16() { |
14756 | let a = _mm_set1_epi16(i16::MAX); |
14757 | let b = _mm_set1_epi16(1); |
14758 | let r = _mm_maskz_packs_epi16(0, a, b); |
14759 | assert_eq_m128i(r, _mm_setzero_si128()); |
14760 | let r = _mm_maskz_packs_epi16(0b00000000_00001111, a, b); |
14761 | #[rustfmt::skip] |
14762 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX); |
14763 | assert_eq_m128i(r, e); |
14764 | } |
14765 | |
14766 | #[simd_test(enable = "avx512bw" )] |
14767 | unsafe fn test_mm512_packus_epi32() { |
14768 | let a = _mm512_set1_epi32(-1); |
14769 | let b = _mm512_set1_epi32(1); |
14770 | let r = _mm512_packus_epi32(a, b); |
14771 | #[rustfmt::skip] |
14772 | let e = _mm512_set_epi16(1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, |
14773 | 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0); |
14774 | assert_eq_m512i(r, e); |
14775 | } |
14776 | |
14777 | #[simd_test(enable = "avx512bw" )] |
14778 | unsafe fn test_mm512_mask_packus_epi32() { |
14779 | let a = _mm512_set1_epi32(-1); |
14780 | let b = _mm512_set1_epi32(1 << 16 | 1); |
14781 | let r = _mm512_mask_packus_epi32(a, 0, a, b); |
14782 | assert_eq_m512i(r, a); |
14783 | let r = _mm512_mask_packus_epi32(b, 0b00000000_00000000_00000000_00001111, a, b); |
14784 | #[rustfmt::skip] |
14785 | let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
14786 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); |
14787 | assert_eq_m512i(r, e); |
14788 | } |
14789 | |
14790 | #[simd_test(enable = "avx512bw" )] |
14791 | unsafe fn test_mm512_maskz_packus_epi32() { |
14792 | let a = _mm512_set1_epi32(-1); |
14793 | let b = _mm512_set1_epi32(1); |
14794 | let r = _mm512_maskz_packus_epi32(0, a, b); |
14795 | assert_eq_m512i(r, _mm512_setzero_si512()); |
14796 | let r = _mm512_maskz_packus_epi32(0b00000000_00000000_00000000_00001111, a, b); |
14797 | #[rustfmt::skip] |
14798 | let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
14799 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
14800 | assert_eq_m512i(r, e); |
14801 | } |
14802 | |
14803 | #[simd_test(enable = "avx512bw,avx512vl" )] |
14804 | unsafe fn test_mm256_mask_packus_epi32() { |
14805 | let a = _mm256_set1_epi32(-1); |
14806 | let b = _mm256_set1_epi32(1 << 16 | 1); |
14807 | let r = _mm256_mask_packus_epi32(a, 0, a, b); |
14808 | assert_eq_m256i(r, a); |
14809 | let r = _mm256_mask_packus_epi32(b, 0b00000000_00001111, a, b); |
14810 | let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); |
14811 | assert_eq_m256i(r, e); |
14812 | } |
14813 | |
14814 | #[simd_test(enable = "avx512bw,avx512vl" )] |
14815 | unsafe fn test_mm256_maskz_packus_epi32() { |
14816 | let a = _mm256_set1_epi32(-1); |
14817 | let b = _mm256_set1_epi32(1); |
14818 | let r = _mm256_maskz_packus_epi32(0, a, b); |
14819 | assert_eq_m256i(r, _mm256_setzero_si256()); |
14820 | let r = _mm256_maskz_packus_epi32(0b00000000_00001111, a, b); |
14821 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
14822 | assert_eq_m256i(r, e); |
14823 | } |
14824 | |
14825 | #[simd_test(enable = "avx512bw,avx512vl" )] |
14826 | unsafe fn test_mm_mask_packus_epi32() { |
14827 | let a = _mm_set1_epi32(-1); |
14828 | let b = _mm_set1_epi32(1 << 16 | 1); |
14829 | let r = _mm_mask_packus_epi32(a, 0, a, b); |
14830 | assert_eq_m128i(r, a); |
14831 | let r = _mm_mask_packus_epi32(b, 0b00001111, a, b); |
14832 | let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0); |
14833 | assert_eq_m128i(r, e); |
14834 | } |
14835 | |
14836 | #[simd_test(enable = "avx512bw,avx512vl" )] |
14837 | unsafe fn test_mm_maskz_packus_epi32() { |
14838 | let a = _mm_set1_epi32(-1); |
14839 | let b = _mm_set1_epi32(1); |
14840 | let r = _mm_maskz_packus_epi32(0, a, b); |
14841 | assert_eq_m128i(r, _mm_setzero_si128()); |
14842 | let r = _mm_maskz_packus_epi32(0b00001111, a, b); |
14843 | let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0); |
14844 | assert_eq_m128i(r, e); |
14845 | } |
14846 | |
14847 | #[simd_test(enable = "avx512bw" )] |
14848 | unsafe fn test_mm512_packus_epi16() { |
14849 | let a = _mm512_set1_epi16(-1); |
14850 | let b = _mm512_set1_epi16(1); |
14851 | let r = _mm512_packus_epi16(a, b); |
14852 | #[rustfmt::skip] |
14853 | let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, |
14854 | 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, |
14855 | 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, |
14856 | 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0); |
14857 | assert_eq_m512i(r, e); |
14858 | } |
14859 | |
14860 | #[simd_test(enable = "avx512bw" )] |
14861 | unsafe fn test_mm512_mask_packus_epi16() { |
14862 | let a = _mm512_set1_epi16(-1); |
14863 | let b = _mm512_set1_epi16(1 << 8 | 1); |
14864 | let r = _mm512_mask_packus_epi16(a, 0, a, b); |
14865 | assert_eq_m512i(r, a); |
14866 | let r = _mm512_mask_packus_epi16( |
14867 | b, |
14868 | 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, |
14869 | a, |
14870 | b, |
14871 | ); |
14872 | #[rustfmt::skip] |
14873 | let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
14874 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
14875 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
14876 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); |
14877 | assert_eq_m512i(r, e); |
14878 | } |
14879 | |
14880 | #[simd_test(enable = "avx512bw" )] |
14881 | unsafe fn test_mm512_maskz_packus_epi16() { |
14882 | let a = _mm512_set1_epi16(-1); |
14883 | let b = _mm512_set1_epi16(1); |
14884 | let r = _mm512_maskz_packus_epi16(0, a, b); |
14885 | assert_eq_m512i(r, _mm512_setzero_si512()); |
14886 | let r = _mm512_maskz_packus_epi16( |
14887 | 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, |
14888 | a, |
14889 | b, |
14890 | ); |
14891 | #[rustfmt::skip] |
14892 | let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
14893 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
14894 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
14895 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
14896 | assert_eq_m512i(r, e); |
14897 | } |
14898 | |
14899 | #[simd_test(enable = "avx512bw,avx512vl" )] |
14900 | unsafe fn test_mm256_mask_packus_epi16() { |
14901 | let a = _mm256_set1_epi16(-1); |
14902 | let b = _mm256_set1_epi16(1 << 8 | 1); |
14903 | let r = _mm256_mask_packus_epi16(a, 0, a, b); |
14904 | assert_eq_m256i(r, a); |
14905 | let r = _mm256_mask_packus_epi16(b, 0b00000000_00000000_00000000_00001111, a, b); |
14906 | #[rustfmt::skip] |
14907 | let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
14908 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); |
14909 | assert_eq_m256i(r, e); |
14910 | } |
14911 | |
14912 | #[simd_test(enable = "avx512bw,avx512vl" )] |
14913 | unsafe fn test_mm256_maskz_packus_epi16() { |
14914 | let a = _mm256_set1_epi16(-1); |
14915 | let b = _mm256_set1_epi16(1); |
14916 | let r = _mm256_maskz_packus_epi16(0, a, b); |
14917 | assert_eq_m256i(r, _mm256_setzero_si256()); |
14918 | let r = _mm256_maskz_packus_epi16(0b00000000_00000000_00000000_00001111, a, b); |
14919 | #[rustfmt::skip] |
14920 | let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
14921 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
14922 | assert_eq_m256i(r, e); |
14923 | } |
14924 | |
14925 | #[simd_test(enable = "avx512bw,avx512vl" )] |
14926 | unsafe fn test_mm_mask_packus_epi16() { |
14927 | let a = _mm_set1_epi16(-1); |
14928 | let b = _mm_set1_epi16(1 << 8 | 1); |
14929 | let r = _mm_mask_packus_epi16(a, 0, a, b); |
14930 | assert_eq_m128i(r, a); |
14931 | let r = _mm_mask_packus_epi16(b, 0b00000000_00001111, a, b); |
14932 | let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); |
14933 | assert_eq_m128i(r, e); |
14934 | } |
14935 | |
14936 | #[simd_test(enable = "avx512bw,avx512vl" )] |
14937 | unsafe fn test_mm_maskz_packus_epi16() { |
14938 | let a = _mm_set1_epi16(-1); |
14939 | let b = _mm_set1_epi16(1); |
14940 | let r = _mm_maskz_packus_epi16(0, a, b); |
14941 | assert_eq_m128i(r, _mm_setzero_si128()); |
14942 | let r = _mm_maskz_packus_epi16(0b00000000_00001111, a, b); |
14943 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
14944 | assert_eq_m128i(r, e); |
14945 | } |
14946 | |
14947 | #[simd_test(enable = "avx512bw" )] |
14948 | unsafe fn test_mm512_avg_epu16() { |
14949 | let a = _mm512_set1_epi16(1); |
14950 | let b = _mm512_set1_epi16(1); |
14951 | let r = _mm512_avg_epu16(a, b); |
14952 | let e = _mm512_set1_epi16(1); |
14953 | assert_eq_m512i(r, e); |
14954 | } |
14955 | |
14956 | #[simd_test(enable = "avx512bw" )] |
14957 | unsafe fn test_mm512_mask_avg_epu16() { |
14958 | let a = _mm512_set1_epi16(1); |
14959 | let b = _mm512_set1_epi16(1); |
14960 | let r = _mm512_mask_avg_epu16(a, 0, a, b); |
14961 | assert_eq_m512i(r, a); |
14962 | let r = _mm512_mask_avg_epu16(a, 0b00000000_00000000_00000000_00001111, a, b); |
14963 | #[rustfmt::skip] |
14964 | let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
14965 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); |
14966 | assert_eq_m512i(r, e); |
14967 | } |
14968 | |
14969 | #[simd_test(enable = "avx512bw" )] |
14970 | unsafe fn test_mm512_maskz_avg_epu16() { |
14971 | let a = _mm512_set1_epi16(1); |
14972 | let b = _mm512_set1_epi16(1); |
14973 | let r = _mm512_maskz_avg_epu16(0, a, b); |
14974 | assert_eq_m512i(r, _mm512_setzero_si512()); |
14975 | let r = _mm512_maskz_avg_epu16(0b00000000_00000000_00000000_00001111, a, b); |
14976 | #[rustfmt::skip] |
14977 | let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
14978 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1); |
14979 | assert_eq_m512i(r, e); |
14980 | } |
14981 | |
14982 | #[simd_test(enable = "avx512bw,avx512vl" )] |
14983 | unsafe fn test_mm256_mask_avg_epu16() { |
14984 | let a = _mm256_set1_epi16(1); |
14985 | let b = _mm256_set1_epi16(1); |
14986 | let r = _mm256_mask_avg_epu16(a, 0, a, b); |
14987 | assert_eq_m256i(r, a); |
14988 | let r = _mm256_mask_avg_epu16(a, 0b00000000_00001111, a, b); |
14989 | let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); |
14990 | assert_eq_m256i(r, e); |
14991 | } |
14992 | |
14993 | #[simd_test(enable = "avx512bw,avx512vl" )] |
14994 | unsafe fn test_mm256_maskz_avg_epu16() { |
14995 | let a = _mm256_set1_epi16(1); |
14996 | let b = _mm256_set1_epi16(1); |
14997 | let r = _mm256_maskz_avg_epu16(0, a, b); |
14998 | assert_eq_m256i(r, _mm256_setzero_si256()); |
14999 | let r = _mm256_maskz_avg_epu16(0b00000000_00001111, a, b); |
15000 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1); |
15001 | assert_eq_m256i(r, e); |
15002 | } |
15003 | |
15004 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15005 | unsafe fn test_mm_mask_avg_epu16() { |
15006 | let a = _mm_set1_epi16(1); |
15007 | let b = _mm_set1_epi16(1); |
15008 | let r = _mm_mask_avg_epu16(a, 0, a, b); |
15009 | assert_eq_m128i(r, a); |
15010 | let r = _mm_mask_avg_epu16(a, 0b00001111, a, b); |
15011 | let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1); |
15012 | assert_eq_m128i(r, e); |
15013 | } |
15014 | |
15015 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15016 | unsafe fn test_mm_maskz_avg_epu16() { |
15017 | let a = _mm_set1_epi16(1); |
15018 | let b = _mm_set1_epi16(1); |
15019 | let r = _mm_maskz_avg_epu16(0, a, b); |
15020 | assert_eq_m128i(r, _mm_setzero_si128()); |
15021 | let r = _mm_maskz_avg_epu16(0b00001111, a, b); |
15022 | let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1); |
15023 | assert_eq_m128i(r, e); |
15024 | } |
15025 | |
15026 | #[simd_test(enable = "avx512bw" )] |
15027 | unsafe fn test_mm512_avg_epu8() { |
15028 | let a = _mm512_set1_epi8(1); |
15029 | let b = _mm512_set1_epi8(1); |
15030 | let r = _mm512_avg_epu8(a, b); |
15031 | let e = _mm512_set1_epi8(1); |
15032 | assert_eq_m512i(r, e); |
15033 | } |
15034 | |
15035 | #[simd_test(enable = "avx512bw" )] |
15036 | unsafe fn test_mm512_mask_avg_epu8() { |
15037 | let a = _mm512_set1_epi8(1); |
15038 | let b = _mm512_set1_epi8(1); |
15039 | let r = _mm512_mask_avg_epu8(a, 0, a, b); |
15040 | assert_eq_m512i(r, a); |
15041 | let r = _mm512_mask_avg_epu8( |
15042 | a, |
15043 | 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, |
15044 | a, |
15045 | b, |
15046 | ); |
15047 | #[rustfmt::skip] |
15048 | let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
15049 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
15050 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
15051 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); |
15052 | assert_eq_m512i(r, e); |
15053 | } |
15054 | |
15055 | #[simd_test(enable = "avx512bw" )] |
15056 | unsafe fn test_mm512_maskz_avg_epu8() { |
15057 | let a = _mm512_set1_epi8(1); |
15058 | let b = _mm512_set1_epi8(1); |
15059 | let r = _mm512_maskz_avg_epu8(0, a, b); |
15060 | assert_eq_m512i(r, _mm512_setzero_si512()); |
15061 | let r = _mm512_maskz_avg_epu8( |
15062 | 0b00000000_000000000_00000000_00000000_00000000_0000000_00000000_00001111, |
15063 | a, |
15064 | b, |
15065 | ); |
15066 | #[rustfmt::skip] |
15067 | let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
15068 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
15069 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
15070 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1); |
15071 | assert_eq_m512i(r, e); |
15072 | } |
15073 | |
15074 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15075 | unsafe fn test_mm256_mask_avg_epu8() { |
15076 | let a = _mm256_set1_epi8(1); |
15077 | let b = _mm256_set1_epi8(1); |
15078 | let r = _mm256_mask_avg_epu8(a, 0, a, b); |
15079 | assert_eq_m256i(r, a); |
15080 | let r = _mm256_mask_avg_epu8(a, 0b00000000_00000000_00000000_00001111, a, b); |
15081 | #[rustfmt::skip] |
15082 | let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
15083 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); |
15084 | assert_eq_m256i(r, e); |
15085 | } |
15086 | |
15087 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15088 | unsafe fn test_mm256_maskz_avg_epu8() { |
15089 | let a = _mm256_set1_epi8(1); |
15090 | let b = _mm256_set1_epi8(1); |
15091 | let r = _mm256_maskz_avg_epu8(0, a, b); |
15092 | assert_eq_m256i(r, _mm256_setzero_si256()); |
15093 | let r = _mm256_maskz_avg_epu8(0b00000000_0000000_00000000_00001111, a, b); |
15094 | #[rustfmt::skip] |
15095 | let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
15096 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1); |
15097 | assert_eq_m256i(r, e); |
15098 | } |
15099 | |
15100 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15101 | unsafe fn test_mm_mask_avg_epu8() { |
15102 | let a = _mm_set1_epi8(1); |
15103 | let b = _mm_set1_epi8(1); |
15104 | let r = _mm_mask_avg_epu8(a, 0, a, b); |
15105 | assert_eq_m128i(r, a); |
15106 | let r = _mm_mask_avg_epu8(a, 0b00000000_00001111, a, b); |
15107 | let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); |
15108 | assert_eq_m128i(r, e); |
15109 | } |
15110 | |
15111 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15112 | unsafe fn test_mm_maskz_avg_epu8() { |
15113 | let a = _mm_set1_epi8(1); |
15114 | let b = _mm_set1_epi8(1); |
15115 | let r = _mm_maskz_avg_epu8(0, a, b); |
15116 | assert_eq_m128i(r, _mm_setzero_si128()); |
15117 | let r = _mm_maskz_avg_epu8(0b00000000_00001111, a, b); |
15118 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1); |
15119 | assert_eq_m128i(r, e); |
15120 | } |
15121 | |
15122 | #[simd_test(enable = "avx512bw" )] |
15123 | unsafe fn test_mm512_sll_epi16() { |
15124 | let a = _mm512_set1_epi16(1 << 15); |
15125 | let count = _mm_set1_epi16(2); |
15126 | let r = _mm512_sll_epi16(a, count); |
15127 | let e = _mm512_set1_epi16(0); |
15128 | assert_eq_m512i(r, e); |
15129 | } |
15130 | |
15131 | #[simd_test(enable = "avx512bw" )] |
15132 | unsafe fn test_mm512_mask_sll_epi16() { |
15133 | let a = _mm512_set1_epi16(1 << 15); |
15134 | let count = _mm_set1_epi16(2); |
15135 | let r = _mm512_mask_sll_epi16(a, 0, a, count); |
15136 | assert_eq_m512i(r, a); |
15137 | let r = _mm512_mask_sll_epi16(a, 0b11111111_11111111_11111111_11111111, a, count); |
15138 | let e = _mm512_set1_epi16(0); |
15139 | assert_eq_m512i(r, e); |
15140 | } |
15141 | |
15142 | #[simd_test(enable = "avx512bw" )] |
15143 | unsafe fn test_mm512_maskz_sll_epi16() { |
15144 | let a = _mm512_set1_epi16(1 << 15); |
15145 | let count = _mm_set1_epi16(2); |
15146 | let r = _mm512_maskz_sll_epi16(0, a, count); |
15147 | assert_eq_m512i(r, _mm512_setzero_si512()); |
15148 | let r = _mm512_maskz_sll_epi16(0b11111111_11111111_11111111_11111111, a, count); |
15149 | let e = _mm512_set1_epi16(0); |
15150 | assert_eq_m512i(r, e); |
15151 | } |
15152 | |
15153 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15154 | unsafe fn test_mm256_mask_sll_epi16() { |
15155 | let a = _mm256_set1_epi16(1 << 15); |
15156 | let count = _mm_set1_epi16(2); |
15157 | let r = _mm256_mask_sll_epi16(a, 0, a, count); |
15158 | assert_eq_m256i(r, a); |
15159 | let r = _mm256_mask_sll_epi16(a, 0b11111111_11111111, a, count); |
15160 | let e = _mm256_set1_epi16(0); |
15161 | assert_eq_m256i(r, e); |
15162 | } |
15163 | |
15164 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15165 | unsafe fn test_mm256_maskz_sll_epi16() { |
15166 | let a = _mm256_set1_epi16(1 << 15); |
15167 | let count = _mm_set1_epi16(2); |
15168 | let r = _mm256_maskz_sll_epi16(0, a, count); |
15169 | assert_eq_m256i(r, _mm256_setzero_si256()); |
15170 | let r = _mm256_maskz_sll_epi16(0b11111111_11111111, a, count); |
15171 | let e = _mm256_set1_epi16(0); |
15172 | assert_eq_m256i(r, e); |
15173 | } |
15174 | |
15175 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15176 | unsafe fn test_mm_mask_sll_epi16() { |
15177 | let a = _mm_set1_epi16(1 << 15); |
15178 | let count = _mm_set1_epi16(2); |
15179 | let r = _mm_mask_sll_epi16(a, 0, a, count); |
15180 | assert_eq_m128i(r, a); |
15181 | let r = _mm_mask_sll_epi16(a, 0b11111111, a, count); |
15182 | let e = _mm_set1_epi16(0); |
15183 | assert_eq_m128i(r, e); |
15184 | } |
15185 | |
15186 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15187 | unsafe fn test_mm_maskz_sll_epi16() { |
15188 | let a = _mm_set1_epi16(1 << 15); |
15189 | let count = _mm_set1_epi16(2); |
15190 | let r = _mm_maskz_sll_epi16(0, a, count); |
15191 | assert_eq_m128i(r, _mm_setzero_si128()); |
15192 | let r = _mm_maskz_sll_epi16(0b11111111, a, count); |
15193 | let e = _mm_set1_epi16(0); |
15194 | assert_eq_m128i(r, e); |
15195 | } |
15196 | |
15197 | #[simd_test(enable = "avx512bw" )] |
15198 | unsafe fn test_mm512_slli_epi16() { |
15199 | let a = _mm512_set1_epi16(1 << 15); |
15200 | let r = _mm512_slli_epi16::<1>(a); |
15201 | let e = _mm512_set1_epi16(0); |
15202 | assert_eq_m512i(r, e); |
15203 | } |
15204 | |
15205 | #[simd_test(enable = "avx512bw" )] |
15206 | unsafe fn test_mm512_mask_slli_epi16() { |
15207 | let a = _mm512_set1_epi16(1 << 15); |
15208 | let r = _mm512_mask_slli_epi16::<1>(a, 0, a); |
15209 | assert_eq_m512i(r, a); |
15210 | let r = _mm512_mask_slli_epi16::<1>(a, 0b11111111_11111111_11111111_11111111, a); |
15211 | let e = _mm512_set1_epi16(0); |
15212 | assert_eq_m512i(r, e); |
15213 | } |
15214 | |
15215 | #[simd_test(enable = "avx512bw" )] |
15216 | unsafe fn test_mm512_maskz_slli_epi16() { |
15217 | let a = _mm512_set1_epi16(1 << 15); |
15218 | let r = _mm512_maskz_slli_epi16::<1>(0, a); |
15219 | assert_eq_m512i(r, _mm512_setzero_si512()); |
15220 | let r = _mm512_maskz_slli_epi16::<1>(0b11111111_11111111_11111111_11111111, a); |
15221 | let e = _mm512_set1_epi16(0); |
15222 | assert_eq_m512i(r, e); |
15223 | } |
15224 | |
15225 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15226 | unsafe fn test_mm256_mask_slli_epi16() { |
15227 | let a = _mm256_set1_epi16(1 << 15); |
15228 | let r = _mm256_mask_slli_epi16::<1>(a, 0, a); |
15229 | assert_eq_m256i(r, a); |
15230 | let r = _mm256_mask_slli_epi16::<1>(a, 0b11111111_11111111, a); |
15231 | let e = _mm256_set1_epi16(0); |
15232 | assert_eq_m256i(r, e); |
15233 | } |
15234 | |
15235 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15236 | unsafe fn test_mm256_maskz_slli_epi16() { |
15237 | let a = _mm256_set1_epi16(1 << 15); |
15238 | let r = _mm256_maskz_slli_epi16::<1>(0, a); |
15239 | assert_eq_m256i(r, _mm256_setzero_si256()); |
15240 | let r = _mm256_maskz_slli_epi16::<1>(0b11111111_11111111, a); |
15241 | let e = _mm256_set1_epi16(0); |
15242 | assert_eq_m256i(r, e); |
15243 | } |
15244 | |
15245 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15246 | unsafe fn test_mm_mask_slli_epi16() { |
15247 | let a = _mm_set1_epi16(1 << 15); |
15248 | let r = _mm_mask_slli_epi16::<1>(a, 0, a); |
15249 | assert_eq_m128i(r, a); |
15250 | let r = _mm_mask_slli_epi16::<1>(a, 0b11111111, a); |
15251 | let e = _mm_set1_epi16(0); |
15252 | assert_eq_m128i(r, e); |
15253 | } |
15254 | |
15255 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15256 | unsafe fn test_mm_maskz_slli_epi16() { |
15257 | let a = _mm_set1_epi16(1 << 15); |
15258 | let r = _mm_maskz_slli_epi16::<1>(0, a); |
15259 | assert_eq_m128i(r, _mm_setzero_si128()); |
15260 | let r = _mm_maskz_slli_epi16::<1>(0b11111111, a); |
15261 | let e = _mm_set1_epi16(0); |
15262 | assert_eq_m128i(r, e); |
15263 | } |
15264 | |
15265 | #[simd_test(enable = "avx512bw" )] |
15266 | unsafe fn test_mm512_sllv_epi16() { |
15267 | let a = _mm512_set1_epi16(1 << 15); |
15268 | let count = _mm512_set1_epi16(2); |
15269 | let r = _mm512_sllv_epi16(a, count); |
15270 | let e = _mm512_set1_epi16(0); |
15271 | assert_eq_m512i(r, e); |
15272 | } |
15273 | |
15274 | #[simd_test(enable = "avx512bw" )] |
15275 | unsafe fn test_mm512_mask_sllv_epi16() { |
15276 | let a = _mm512_set1_epi16(1 << 15); |
15277 | let count = _mm512_set1_epi16(2); |
15278 | let r = _mm512_mask_sllv_epi16(a, 0, a, count); |
15279 | assert_eq_m512i(r, a); |
15280 | let r = _mm512_mask_sllv_epi16(a, 0b11111111_11111111_11111111_11111111, a, count); |
15281 | let e = _mm512_set1_epi16(0); |
15282 | assert_eq_m512i(r, e); |
15283 | } |
15284 | |
15285 | #[simd_test(enable = "avx512bw" )] |
15286 | unsafe fn test_mm512_maskz_sllv_epi16() { |
15287 | let a = _mm512_set1_epi16(1 << 15); |
15288 | let count = _mm512_set1_epi16(2); |
15289 | let r = _mm512_maskz_sllv_epi16(0, a, count); |
15290 | assert_eq_m512i(r, _mm512_setzero_si512()); |
15291 | let r = _mm512_maskz_sllv_epi16(0b11111111_11111111_11111111_11111111, a, count); |
15292 | let e = _mm512_set1_epi16(0); |
15293 | assert_eq_m512i(r, e); |
15294 | } |
15295 | |
15296 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15297 | unsafe fn test_mm256_sllv_epi16() { |
15298 | let a = _mm256_set1_epi16(1 << 15); |
15299 | let count = _mm256_set1_epi16(2); |
15300 | let r = _mm256_sllv_epi16(a, count); |
15301 | let e = _mm256_set1_epi16(0); |
15302 | assert_eq_m256i(r, e); |
15303 | } |
15304 | |
15305 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15306 | unsafe fn test_mm256_mask_sllv_epi16() { |
15307 | let a = _mm256_set1_epi16(1 << 15); |
15308 | let count = _mm256_set1_epi16(2); |
15309 | let r = _mm256_mask_sllv_epi16(a, 0, a, count); |
15310 | assert_eq_m256i(r, a); |
15311 | let r = _mm256_mask_sllv_epi16(a, 0b11111111_11111111, a, count); |
15312 | let e = _mm256_set1_epi16(0); |
15313 | assert_eq_m256i(r, e); |
15314 | } |
15315 | |
15316 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15317 | unsafe fn test_mm256_maskz_sllv_epi16() { |
15318 | let a = _mm256_set1_epi16(1 << 15); |
15319 | let count = _mm256_set1_epi16(2); |
15320 | let r = _mm256_maskz_sllv_epi16(0, a, count); |
15321 | assert_eq_m256i(r, _mm256_setzero_si256()); |
15322 | let r = _mm256_maskz_sllv_epi16(0b11111111_11111111, a, count); |
15323 | let e = _mm256_set1_epi16(0); |
15324 | assert_eq_m256i(r, e); |
15325 | } |
15326 | |
15327 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15328 | unsafe fn test_mm_sllv_epi16() { |
15329 | let a = _mm_set1_epi16(1 << 15); |
15330 | let count = _mm_set1_epi16(2); |
15331 | let r = _mm_sllv_epi16(a, count); |
15332 | let e = _mm_set1_epi16(0); |
15333 | assert_eq_m128i(r, e); |
15334 | } |
15335 | |
15336 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15337 | unsafe fn test_mm_mask_sllv_epi16() { |
15338 | let a = _mm_set1_epi16(1 << 15); |
15339 | let count = _mm_set1_epi16(2); |
15340 | let r = _mm_mask_sllv_epi16(a, 0, a, count); |
15341 | assert_eq_m128i(r, a); |
15342 | let r = _mm_mask_sllv_epi16(a, 0b11111111, a, count); |
15343 | let e = _mm_set1_epi16(0); |
15344 | assert_eq_m128i(r, e); |
15345 | } |
15346 | |
15347 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15348 | unsafe fn test_mm_maskz_sllv_epi16() { |
15349 | let a = _mm_set1_epi16(1 << 15); |
15350 | let count = _mm_set1_epi16(2); |
15351 | let r = _mm_maskz_sllv_epi16(0, a, count); |
15352 | assert_eq_m128i(r, _mm_setzero_si128()); |
15353 | let r = _mm_maskz_sllv_epi16(0b11111111, a, count); |
15354 | let e = _mm_set1_epi16(0); |
15355 | assert_eq_m128i(r, e); |
15356 | } |
15357 | |
15358 | #[simd_test(enable = "avx512bw" )] |
15359 | unsafe fn test_mm512_srl_epi16() { |
15360 | let a = _mm512_set1_epi16(1 << 1); |
15361 | let count = _mm_set1_epi16(2); |
15362 | let r = _mm512_srl_epi16(a, count); |
15363 | let e = _mm512_set1_epi16(0); |
15364 | assert_eq_m512i(r, e); |
15365 | } |
15366 | |
15367 | #[simd_test(enable = "avx512bw" )] |
15368 | unsafe fn test_mm512_mask_srl_epi16() { |
15369 | let a = _mm512_set1_epi16(1 << 1); |
15370 | let count = _mm_set1_epi16(2); |
15371 | let r = _mm512_mask_srl_epi16(a, 0, a, count); |
15372 | assert_eq_m512i(r, a); |
15373 | let r = _mm512_mask_srl_epi16(a, 0b11111111_11111111_11111111_11111111, a, count); |
15374 | let e = _mm512_set1_epi16(0); |
15375 | assert_eq_m512i(r, e); |
15376 | } |
15377 | |
15378 | #[simd_test(enable = "avx512bw" )] |
15379 | unsafe fn test_mm512_maskz_srl_epi16() { |
15380 | let a = _mm512_set1_epi16(1 << 1); |
15381 | let count = _mm_set1_epi16(2); |
15382 | let r = _mm512_maskz_srl_epi16(0, a, count); |
15383 | assert_eq_m512i(r, _mm512_setzero_si512()); |
15384 | let r = _mm512_maskz_srl_epi16(0b11111111_11111111_11111111_11111111, a, count); |
15385 | let e = _mm512_set1_epi16(0); |
15386 | assert_eq_m512i(r, e); |
15387 | } |
15388 | |
15389 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15390 | unsafe fn test_mm256_mask_srl_epi16() { |
15391 | let a = _mm256_set1_epi16(1 << 1); |
15392 | let count = _mm_set1_epi16(2); |
15393 | let r = _mm256_mask_srl_epi16(a, 0, a, count); |
15394 | assert_eq_m256i(r, a); |
15395 | let r = _mm256_mask_srl_epi16(a, 0b11111111_11111111, a, count); |
15396 | let e = _mm256_set1_epi16(0); |
15397 | assert_eq_m256i(r, e); |
15398 | } |
15399 | |
15400 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15401 | unsafe fn test_mm256_maskz_srl_epi16() { |
15402 | let a = _mm256_set1_epi16(1 << 1); |
15403 | let count = _mm_set1_epi16(2); |
15404 | let r = _mm256_maskz_srl_epi16(0, a, count); |
15405 | assert_eq_m256i(r, _mm256_setzero_si256()); |
15406 | let r = _mm256_maskz_srl_epi16(0b11111111_11111111, a, count); |
15407 | let e = _mm256_set1_epi16(0); |
15408 | assert_eq_m256i(r, e); |
15409 | } |
15410 | |
15411 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15412 | unsafe fn test_mm_mask_srl_epi16() { |
15413 | let a = _mm_set1_epi16(1 << 1); |
15414 | let count = _mm_set1_epi16(2); |
15415 | let r = _mm_mask_srl_epi16(a, 0, a, count); |
15416 | assert_eq_m128i(r, a); |
15417 | let r = _mm_mask_srl_epi16(a, 0b11111111, a, count); |
15418 | let e = _mm_set1_epi16(0); |
15419 | assert_eq_m128i(r, e); |
15420 | } |
15421 | |
15422 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15423 | unsafe fn test_mm_maskz_srl_epi16() { |
15424 | let a = _mm_set1_epi16(1 << 1); |
15425 | let count = _mm_set1_epi16(2); |
15426 | let r = _mm_maskz_srl_epi16(0, a, count); |
15427 | assert_eq_m128i(r, _mm_setzero_si128()); |
15428 | let r = _mm_maskz_srl_epi16(0b11111111, a, count); |
15429 | let e = _mm_set1_epi16(0); |
15430 | assert_eq_m128i(r, e); |
15431 | } |
15432 | |
15433 | #[simd_test(enable = "avx512bw" )] |
15434 | unsafe fn test_mm512_srli_epi16() { |
15435 | let a = _mm512_set1_epi16(1 << 1); |
15436 | let r = _mm512_srli_epi16::<2>(a); |
15437 | let e = _mm512_set1_epi16(0); |
15438 | assert_eq_m512i(r, e); |
15439 | } |
15440 | |
15441 | #[simd_test(enable = "avx512bw" )] |
15442 | unsafe fn test_mm512_mask_srli_epi16() { |
15443 | let a = _mm512_set1_epi16(1 << 1); |
15444 | let r = _mm512_mask_srli_epi16::<2>(a, 0, a); |
15445 | assert_eq_m512i(r, a); |
15446 | let r = _mm512_mask_srli_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a); |
15447 | let e = _mm512_set1_epi16(0); |
15448 | assert_eq_m512i(r, e); |
15449 | } |
15450 | |
15451 | #[simd_test(enable = "avx512bw" )] |
15452 | unsafe fn test_mm512_maskz_srli_epi16() { |
15453 | let a = _mm512_set1_epi16(1 << 1); |
15454 | let r = _mm512_maskz_srli_epi16::<2>(0, a); |
15455 | assert_eq_m512i(r, _mm512_setzero_si512()); |
15456 | let r = _mm512_maskz_srli_epi16::<2>(0b11111111_11111111_11111111_11111111, a); |
15457 | let e = _mm512_set1_epi16(0); |
15458 | assert_eq_m512i(r, e); |
15459 | } |
15460 | |
15461 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15462 | unsafe fn test_mm256_mask_srli_epi16() { |
15463 | let a = _mm256_set1_epi16(1 << 1); |
15464 | let r = _mm256_mask_srli_epi16::<2>(a, 0, a); |
15465 | assert_eq_m256i(r, a); |
15466 | let r = _mm256_mask_srli_epi16::<2>(a, 0b11111111_11111111, a); |
15467 | let e = _mm256_set1_epi16(0); |
15468 | assert_eq_m256i(r, e); |
15469 | } |
15470 | |
15471 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15472 | unsafe fn test_mm256_maskz_srli_epi16() { |
15473 | let a = _mm256_set1_epi16(1 << 1); |
15474 | let r = _mm256_maskz_srli_epi16::<2>(0, a); |
15475 | assert_eq_m256i(r, _mm256_setzero_si256()); |
15476 | let r = _mm256_maskz_srli_epi16::<2>(0b11111111_11111111, a); |
15477 | let e = _mm256_set1_epi16(0); |
15478 | assert_eq_m256i(r, e); |
15479 | } |
15480 | |
15481 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15482 | unsafe fn test_mm_mask_srli_epi16() { |
15483 | let a = _mm_set1_epi16(1 << 1); |
15484 | let r = _mm_mask_srli_epi16::<2>(a, 0, a); |
15485 | assert_eq_m128i(r, a); |
15486 | let r = _mm_mask_srli_epi16::<2>(a, 0b11111111, a); |
15487 | let e = _mm_set1_epi16(0); |
15488 | assert_eq_m128i(r, e); |
15489 | } |
15490 | |
15491 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15492 | unsafe fn test_mm_maskz_srli_epi16() { |
15493 | let a = _mm_set1_epi16(1 << 1); |
15494 | let r = _mm_maskz_srli_epi16::<2>(0, a); |
15495 | assert_eq_m128i(r, _mm_setzero_si128()); |
15496 | let r = _mm_maskz_srli_epi16::<2>(0b11111111, a); |
15497 | let e = _mm_set1_epi16(0); |
15498 | assert_eq_m128i(r, e); |
15499 | } |
15500 | |
15501 | #[simd_test(enable = "avx512bw" )] |
15502 | unsafe fn test_mm512_srlv_epi16() { |
15503 | let a = _mm512_set1_epi16(1 << 1); |
15504 | let count = _mm512_set1_epi16(2); |
15505 | let r = _mm512_srlv_epi16(a, count); |
15506 | let e = _mm512_set1_epi16(0); |
15507 | assert_eq_m512i(r, e); |
15508 | } |
15509 | |
15510 | #[simd_test(enable = "avx512bw" )] |
15511 | unsafe fn test_mm512_mask_srlv_epi16() { |
15512 | let a = _mm512_set1_epi16(1 << 1); |
15513 | let count = _mm512_set1_epi16(2); |
15514 | let r = _mm512_mask_srlv_epi16(a, 0, a, count); |
15515 | assert_eq_m512i(r, a); |
15516 | let r = _mm512_mask_srlv_epi16(a, 0b11111111_11111111_11111111_11111111, a, count); |
15517 | let e = _mm512_set1_epi16(0); |
15518 | assert_eq_m512i(r, e); |
15519 | } |
15520 | |
15521 | #[simd_test(enable = "avx512bw" )] |
15522 | unsafe fn test_mm512_maskz_srlv_epi16() { |
15523 | let a = _mm512_set1_epi16(1 << 1); |
15524 | let count = _mm512_set1_epi16(2); |
15525 | let r = _mm512_maskz_srlv_epi16(0, a, count); |
15526 | assert_eq_m512i(r, _mm512_setzero_si512()); |
15527 | let r = _mm512_maskz_srlv_epi16(0b11111111_11111111_11111111_11111111, a, count); |
15528 | let e = _mm512_set1_epi16(0); |
15529 | assert_eq_m512i(r, e); |
15530 | } |
15531 | |
15532 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15533 | unsafe fn test_mm256_srlv_epi16() { |
15534 | let a = _mm256_set1_epi16(1 << 1); |
15535 | let count = _mm256_set1_epi16(2); |
15536 | let r = _mm256_srlv_epi16(a, count); |
15537 | let e = _mm256_set1_epi16(0); |
15538 | assert_eq_m256i(r, e); |
15539 | } |
15540 | |
15541 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15542 | unsafe fn test_mm256_mask_srlv_epi16() { |
15543 | let a = _mm256_set1_epi16(1 << 1); |
15544 | let count = _mm256_set1_epi16(2); |
15545 | let r = _mm256_mask_srlv_epi16(a, 0, a, count); |
15546 | assert_eq_m256i(r, a); |
15547 | let r = _mm256_mask_srlv_epi16(a, 0b11111111_11111111, a, count); |
15548 | let e = _mm256_set1_epi16(0); |
15549 | assert_eq_m256i(r, e); |
15550 | } |
15551 | |
15552 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15553 | unsafe fn test_mm256_maskz_srlv_epi16() { |
15554 | let a = _mm256_set1_epi16(1 << 1); |
15555 | let count = _mm256_set1_epi16(2); |
15556 | let r = _mm256_maskz_srlv_epi16(0, a, count); |
15557 | assert_eq_m256i(r, _mm256_setzero_si256()); |
15558 | let r = _mm256_maskz_srlv_epi16(0b11111111_11111111, a, count); |
15559 | let e = _mm256_set1_epi16(0); |
15560 | assert_eq_m256i(r, e); |
15561 | } |
15562 | |
15563 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15564 | unsafe fn test_mm_srlv_epi16() { |
15565 | let a = _mm_set1_epi16(1 << 1); |
15566 | let count = _mm_set1_epi16(2); |
15567 | let r = _mm_srlv_epi16(a, count); |
15568 | let e = _mm_set1_epi16(0); |
15569 | assert_eq_m128i(r, e); |
15570 | } |
15571 | |
15572 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15573 | unsafe fn test_mm_mask_srlv_epi16() { |
15574 | let a = _mm_set1_epi16(1 << 1); |
15575 | let count = _mm_set1_epi16(2); |
15576 | let r = _mm_mask_srlv_epi16(a, 0, a, count); |
15577 | assert_eq_m128i(r, a); |
15578 | let r = _mm_mask_srlv_epi16(a, 0b11111111, a, count); |
15579 | let e = _mm_set1_epi16(0); |
15580 | assert_eq_m128i(r, e); |
15581 | } |
15582 | |
15583 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15584 | unsafe fn test_mm_maskz_srlv_epi16() { |
15585 | let a = _mm_set1_epi16(1 << 1); |
15586 | let count = _mm_set1_epi16(2); |
15587 | let r = _mm_maskz_srlv_epi16(0, a, count); |
15588 | assert_eq_m128i(r, _mm_setzero_si128()); |
15589 | let r = _mm_maskz_srlv_epi16(0b11111111, a, count); |
15590 | let e = _mm_set1_epi16(0); |
15591 | assert_eq_m128i(r, e); |
15592 | } |
15593 | |
15594 | #[simd_test(enable = "avx512bw" )] |
15595 | unsafe fn test_mm512_sra_epi16() { |
15596 | let a = _mm512_set1_epi16(8); |
15597 | let count = _mm_set1_epi16(1); |
15598 | let r = _mm512_sra_epi16(a, count); |
15599 | let e = _mm512_set1_epi16(0); |
15600 | assert_eq_m512i(r, e); |
15601 | } |
15602 | |
15603 | #[simd_test(enable = "avx512bw" )] |
15604 | unsafe fn test_mm512_mask_sra_epi16() { |
15605 | let a = _mm512_set1_epi16(8); |
15606 | let count = _mm_set1_epi16(1); |
15607 | let r = _mm512_mask_sra_epi16(a, 0, a, count); |
15608 | assert_eq_m512i(r, a); |
15609 | let r = _mm512_mask_sra_epi16(a, 0b11111111_11111111_11111111_11111111, a, count); |
15610 | let e = _mm512_set1_epi16(0); |
15611 | assert_eq_m512i(r, e); |
15612 | } |
15613 | |
15614 | #[simd_test(enable = "avx512bw" )] |
15615 | unsafe fn test_mm512_maskz_sra_epi16() { |
15616 | let a = _mm512_set1_epi16(8); |
15617 | let count = _mm_set1_epi16(1); |
15618 | let r = _mm512_maskz_sra_epi16(0, a, count); |
15619 | assert_eq_m512i(r, _mm512_setzero_si512()); |
15620 | let r = _mm512_maskz_sra_epi16(0b11111111_11111111_11111111_11111111, a, count); |
15621 | let e = _mm512_set1_epi16(0); |
15622 | assert_eq_m512i(r, e); |
15623 | } |
15624 | |
15625 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15626 | unsafe fn test_mm256_mask_sra_epi16() { |
15627 | let a = _mm256_set1_epi16(8); |
15628 | let count = _mm_set1_epi16(1); |
15629 | let r = _mm256_mask_sra_epi16(a, 0, a, count); |
15630 | assert_eq_m256i(r, a); |
15631 | let r = _mm256_mask_sra_epi16(a, 0b11111111_11111111, a, count); |
15632 | let e = _mm256_set1_epi16(0); |
15633 | assert_eq_m256i(r, e); |
15634 | } |
15635 | |
15636 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15637 | unsafe fn test_mm256_maskz_sra_epi16() { |
15638 | let a = _mm256_set1_epi16(8); |
15639 | let count = _mm_set1_epi16(1); |
15640 | let r = _mm256_maskz_sra_epi16(0, a, count); |
15641 | assert_eq_m256i(r, _mm256_setzero_si256()); |
15642 | let r = _mm256_maskz_sra_epi16(0b11111111_11111111, a, count); |
15643 | let e = _mm256_set1_epi16(0); |
15644 | assert_eq_m256i(r, e); |
15645 | } |
15646 | |
15647 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15648 | unsafe fn test_mm_mask_sra_epi16() { |
15649 | let a = _mm_set1_epi16(8); |
15650 | let count = _mm_set1_epi16(1); |
15651 | let r = _mm_mask_sra_epi16(a, 0, a, count); |
15652 | assert_eq_m128i(r, a); |
15653 | let r = _mm_mask_sra_epi16(a, 0b11111111, a, count); |
15654 | let e = _mm_set1_epi16(0); |
15655 | assert_eq_m128i(r, e); |
15656 | } |
15657 | |
15658 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15659 | unsafe fn test_mm_maskz_sra_epi16() { |
15660 | let a = _mm_set1_epi16(8); |
15661 | let count = _mm_set1_epi16(1); |
15662 | let r = _mm_maskz_sra_epi16(0, a, count); |
15663 | assert_eq_m128i(r, _mm_setzero_si128()); |
15664 | let r = _mm_maskz_sra_epi16(0b11111111, a, count); |
15665 | let e = _mm_set1_epi16(0); |
15666 | assert_eq_m128i(r, e); |
15667 | } |
15668 | |
15669 | #[simd_test(enable = "avx512bw" )] |
15670 | unsafe fn test_mm512_srai_epi16() { |
15671 | let a = _mm512_set1_epi16(8); |
15672 | let r = _mm512_srai_epi16::<2>(a); |
15673 | let e = _mm512_set1_epi16(2); |
15674 | assert_eq_m512i(r, e); |
15675 | } |
15676 | |
15677 | #[simd_test(enable = "avx512bw" )] |
15678 | unsafe fn test_mm512_mask_srai_epi16() { |
15679 | let a = _mm512_set1_epi16(8); |
15680 | let r = _mm512_mask_srai_epi16::<2>(a, 0, a); |
15681 | assert_eq_m512i(r, a); |
15682 | let r = _mm512_mask_srai_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a); |
15683 | let e = _mm512_set1_epi16(2); |
15684 | assert_eq_m512i(r, e); |
15685 | } |
15686 | |
15687 | #[simd_test(enable = "avx512bw" )] |
15688 | unsafe fn test_mm512_maskz_srai_epi16() { |
15689 | let a = _mm512_set1_epi16(8); |
15690 | let r = _mm512_maskz_srai_epi16::<2>(0, a); |
15691 | assert_eq_m512i(r, _mm512_setzero_si512()); |
15692 | let r = _mm512_maskz_srai_epi16::<2>(0b11111111_11111111_11111111_11111111, a); |
15693 | let e = _mm512_set1_epi16(2); |
15694 | assert_eq_m512i(r, e); |
15695 | } |
15696 | |
15697 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15698 | unsafe fn test_mm256_mask_srai_epi16() { |
15699 | let a = _mm256_set1_epi16(8); |
15700 | let r = _mm256_mask_srai_epi16::<2>(a, 0, a); |
15701 | assert_eq_m256i(r, a); |
15702 | let r = _mm256_mask_srai_epi16::<2>(a, 0b11111111_11111111, a); |
15703 | let e = _mm256_set1_epi16(2); |
15704 | assert_eq_m256i(r, e); |
15705 | } |
15706 | |
15707 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15708 | unsafe fn test_mm256_maskz_srai_epi16() { |
15709 | let a = _mm256_set1_epi16(8); |
15710 | let r = _mm256_maskz_srai_epi16::<2>(0, a); |
15711 | assert_eq_m256i(r, _mm256_setzero_si256()); |
15712 | let r = _mm256_maskz_srai_epi16::<2>(0b11111111_11111111, a); |
15713 | let e = _mm256_set1_epi16(2); |
15714 | assert_eq_m256i(r, e); |
15715 | } |
15716 | |
15717 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15718 | unsafe fn test_mm_mask_srai_epi16() { |
15719 | let a = _mm_set1_epi16(8); |
15720 | let r = _mm_mask_srai_epi16::<2>(a, 0, a); |
15721 | assert_eq_m128i(r, a); |
15722 | let r = _mm_mask_srai_epi16::<2>(a, 0b11111111, a); |
15723 | let e = _mm_set1_epi16(2); |
15724 | assert_eq_m128i(r, e); |
15725 | } |
15726 | |
15727 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15728 | unsafe fn test_mm_maskz_srai_epi16() { |
15729 | let a = _mm_set1_epi16(8); |
15730 | let r = _mm_maskz_srai_epi16::<2>(0, a); |
15731 | assert_eq_m128i(r, _mm_setzero_si128()); |
15732 | let r = _mm_maskz_srai_epi16::<2>(0b11111111, a); |
15733 | let e = _mm_set1_epi16(2); |
15734 | assert_eq_m128i(r, e); |
15735 | } |
15736 | |
15737 | #[simd_test(enable = "avx512bw" )] |
15738 | unsafe fn test_mm512_srav_epi16() { |
15739 | let a = _mm512_set1_epi16(8); |
15740 | let count = _mm512_set1_epi16(2); |
15741 | let r = _mm512_srav_epi16(a, count); |
15742 | let e = _mm512_set1_epi16(2); |
15743 | assert_eq_m512i(r, e); |
15744 | } |
15745 | |
15746 | #[simd_test(enable = "avx512bw" )] |
15747 | unsafe fn test_mm512_mask_srav_epi16() { |
15748 | let a = _mm512_set1_epi16(8); |
15749 | let count = _mm512_set1_epi16(2); |
15750 | let r = _mm512_mask_srav_epi16(a, 0, a, count); |
15751 | assert_eq_m512i(r, a); |
15752 | let r = _mm512_mask_srav_epi16(a, 0b11111111_11111111_11111111_11111111, a, count); |
15753 | let e = _mm512_set1_epi16(2); |
15754 | assert_eq_m512i(r, e); |
15755 | } |
15756 | |
15757 | #[simd_test(enable = "avx512bw" )] |
15758 | unsafe fn test_mm512_maskz_srav_epi16() { |
15759 | let a = _mm512_set1_epi16(8); |
15760 | let count = _mm512_set1_epi16(2); |
15761 | let r = _mm512_maskz_srav_epi16(0, a, count); |
15762 | assert_eq_m512i(r, _mm512_setzero_si512()); |
15763 | let r = _mm512_maskz_srav_epi16(0b11111111_11111111_11111111_11111111, a, count); |
15764 | let e = _mm512_set1_epi16(2); |
15765 | assert_eq_m512i(r, e); |
15766 | } |
15767 | |
15768 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15769 | unsafe fn test_mm256_srav_epi16() { |
15770 | let a = _mm256_set1_epi16(8); |
15771 | let count = _mm256_set1_epi16(2); |
15772 | let r = _mm256_srav_epi16(a, count); |
15773 | let e = _mm256_set1_epi16(2); |
15774 | assert_eq_m256i(r, e); |
15775 | } |
15776 | |
15777 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15778 | unsafe fn test_mm256_mask_srav_epi16() { |
15779 | let a = _mm256_set1_epi16(8); |
15780 | let count = _mm256_set1_epi16(2); |
15781 | let r = _mm256_mask_srav_epi16(a, 0, a, count); |
15782 | assert_eq_m256i(r, a); |
15783 | let r = _mm256_mask_srav_epi16(a, 0b11111111_11111111, a, count); |
15784 | let e = _mm256_set1_epi16(2); |
15785 | assert_eq_m256i(r, e); |
15786 | } |
15787 | |
15788 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15789 | unsafe fn test_mm256_maskz_srav_epi16() { |
15790 | let a = _mm256_set1_epi16(8); |
15791 | let count = _mm256_set1_epi16(2); |
15792 | let r = _mm256_maskz_srav_epi16(0, a, count); |
15793 | assert_eq_m256i(r, _mm256_setzero_si256()); |
15794 | let r = _mm256_maskz_srav_epi16(0b11111111_11111111, a, count); |
15795 | let e = _mm256_set1_epi16(2); |
15796 | assert_eq_m256i(r, e); |
15797 | } |
15798 | |
15799 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15800 | unsafe fn test_mm_srav_epi16() { |
15801 | let a = _mm_set1_epi16(8); |
15802 | let count = _mm_set1_epi16(2); |
15803 | let r = _mm_srav_epi16(a, count); |
15804 | let e = _mm_set1_epi16(2); |
15805 | assert_eq_m128i(r, e); |
15806 | } |
15807 | |
15808 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15809 | unsafe fn test_mm_mask_srav_epi16() { |
15810 | let a = _mm_set1_epi16(8); |
15811 | let count = _mm_set1_epi16(2); |
15812 | let r = _mm_mask_srav_epi16(a, 0, a, count); |
15813 | assert_eq_m128i(r, a); |
15814 | let r = _mm_mask_srav_epi16(a, 0b11111111, a, count); |
15815 | let e = _mm_set1_epi16(2); |
15816 | assert_eq_m128i(r, e); |
15817 | } |
15818 | |
15819 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15820 | unsafe fn test_mm_maskz_srav_epi16() { |
15821 | let a = _mm_set1_epi16(8); |
15822 | let count = _mm_set1_epi16(2); |
15823 | let r = _mm_maskz_srav_epi16(0, a, count); |
15824 | assert_eq_m128i(r, _mm_setzero_si128()); |
15825 | let r = _mm_maskz_srav_epi16(0b11111111, a, count); |
15826 | let e = _mm_set1_epi16(2); |
15827 | assert_eq_m128i(r, e); |
15828 | } |
15829 | |
15830 | #[simd_test(enable = "avx512bw" )] |
15831 | unsafe fn test_mm512_permutex2var_epi16() { |
15832 | #[rustfmt::skip] |
15833 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
15834 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
15835 | #[rustfmt::skip] |
15836 | let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5, |
15837 | 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5); |
15838 | let b = _mm512_set1_epi16(100); |
15839 | let r = _mm512_permutex2var_epi16(a, idx, b); |
15840 | #[rustfmt::skip] |
15841 | let e = _mm512_set_epi16( |
15842 | 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100, |
15843 | 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100, |
15844 | ); |
15845 | assert_eq_m512i(r, e); |
15846 | } |
15847 | |
15848 | #[simd_test(enable = "avx512bw" )] |
15849 | unsafe fn test_mm512_mask_permutex2var_epi16() { |
15850 | #[rustfmt::skip] |
15851 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
15852 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
15853 | #[rustfmt::skip] |
15854 | let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5, |
15855 | 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5); |
15856 | let b = _mm512_set1_epi16(100); |
15857 | let r = _mm512_mask_permutex2var_epi16(a, 0, idx, b); |
15858 | assert_eq_m512i(r, a); |
15859 | let r = _mm512_mask_permutex2var_epi16(a, 0b11111111_11111111_11111111_11111111, idx, b); |
15860 | #[rustfmt::skip] |
15861 | let e = _mm512_set_epi16( |
15862 | 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100, |
15863 | 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100, |
15864 | ); |
15865 | assert_eq_m512i(r, e); |
15866 | } |
15867 | |
15868 | #[simd_test(enable = "avx512bw" )] |
15869 | unsafe fn test_mm512_maskz_permutex2var_epi16() { |
15870 | #[rustfmt::skip] |
15871 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
15872 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
15873 | #[rustfmt::skip] |
15874 | let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5, |
15875 | 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5); |
15876 | let b = _mm512_set1_epi16(100); |
15877 | let r = _mm512_maskz_permutex2var_epi16(0, a, idx, b); |
15878 | assert_eq_m512i(r, _mm512_setzero_si512()); |
15879 | let r = _mm512_maskz_permutex2var_epi16(0b11111111_11111111_11111111_11111111, a, idx, b); |
15880 | #[rustfmt::skip] |
15881 | let e = _mm512_set_epi16( |
15882 | 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100, |
15883 | 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100, |
15884 | ); |
15885 | assert_eq_m512i(r, e); |
15886 | } |
15887 | |
15888 | #[simd_test(enable = "avx512bw" )] |
15889 | unsafe fn test_mm512_mask2_permutex2var_epi16() { |
15890 | #[rustfmt::skip] |
15891 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
15892 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
15893 | #[rustfmt::skip] |
15894 | let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5, |
15895 | 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5); |
15896 | let b = _mm512_set1_epi16(100); |
15897 | let r = _mm512_mask2_permutex2var_epi16(a, idx, 0, b); |
15898 | assert_eq_m512i(r, idx); |
15899 | let r = _mm512_mask2_permutex2var_epi16(a, idx, 0b11111111_11111111_11111111_11111111, b); |
15900 | #[rustfmt::skip] |
15901 | let e = _mm512_set_epi16( |
15902 | 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100, |
15903 | 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100, |
15904 | ); |
15905 | assert_eq_m512i(r, e); |
15906 | } |
15907 | |
15908 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15909 | unsafe fn test_mm256_permutex2var_epi16() { |
15910 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
15911 | #[rustfmt::skip] |
15912 | let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4); |
15913 | let b = _mm256_set1_epi16(100); |
15914 | let r = _mm256_permutex2var_epi16(a, idx, b); |
15915 | let e = _mm256_set_epi16( |
15916 | 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100, |
15917 | ); |
15918 | assert_eq_m256i(r, e); |
15919 | } |
15920 | |
15921 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15922 | unsafe fn test_mm256_mask_permutex2var_epi16() { |
15923 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
15924 | #[rustfmt::skip] |
15925 | let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4); |
15926 | let b = _mm256_set1_epi16(100); |
15927 | let r = _mm256_mask_permutex2var_epi16(a, 0, idx, b); |
15928 | assert_eq_m256i(r, a); |
15929 | let r = _mm256_mask_permutex2var_epi16(a, 0b11111111_11111111, idx, b); |
15930 | let e = _mm256_set_epi16( |
15931 | 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100, |
15932 | ); |
15933 | assert_eq_m256i(r, e); |
15934 | } |
15935 | |
15936 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15937 | unsafe fn test_mm256_maskz_permutex2var_epi16() { |
15938 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
15939 | #[rustfmt::skip] |
15940 | let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4); |
15941 | let b = _mm256_set1_epi16(100); |
15942 | let r = _mm256_maskz_permutex2var_epi16(0, a, idx, b); |
15943 | assert_eq_m256i(r, _mm256_setzero_si256()); |
15944 | let r = _mm256_maskz_permutex2var_epi16(0b11111111_11111111, a, idx, b); |
15945 | let e = _mm256_set_epi16( |
15946 | 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100, |
15947 | ); |
15948 | assert_eq_m256i(r, e); |
15949 | } |
15950 | |
15951 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15952 | unsafe fn test_mm256_mask2_permutex2var_epi16() { |
15953 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
15954 | #[rustfmt::skip] |
15955 | let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4); |
15956 | let b = _mm256_set1_epi16(100); |
15957 | let r = _mm256_mask2_permutex2var_epi16(a, idx, 0, b); |
15958 | assert_eq_m256i(r, idx); |
15959 | let r = _mm256_mask2_permutex2var_epi16(a, idx, 0b11111111_11111111, b); |
15960 | #[rustfmt::skip] |
15961 | let e = _mm256_set_epi16( |
15962 | 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100, |
15963 | ); |
15964 | assert_eq_m256i(r, e); |
15965 | } |
15966 | |
15967 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15968 | unsafe fn test_mm_permutex2var_epi16() { |
15969 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
15970 | let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3); |
15971 | let b = _mm_set1_epi16(100); |
15972 | let r = _mm_permutex2var_epi16(a, idx, b); |
15973 | let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100); |
15974 | assert_eq_m128i(r, e); |
15975 | } |
15976 | |
15977 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15978 | unsafe fn test_mm_mask_permutex2var_epi16() { |
15979 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
15980 | let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3); |
15981 | let b = _mm_set1_epi16(100); |
15982 | let r = _mm_mask_permutex2var_epi16(a, 0, idx, b); |
15983 | assert_eq_m128i(r, a); |
15984 | let r = _mm_mask_permutex2var_epi16(a, 0b11111111, idx, b); |
15985 | let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100); |
15986 | assert_eq_m128i(r, e); |
15987 | } |
15988 | |
15989 | #[simd_test(enable = "avx512bw,avx512vl" )] |
15990 | unsafe fn test_mm_maskz_permutex2var_epi16() { |
15991 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
15992 | let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3); |
15993 | let b = _mm_set1_epi16(100); |
15994 | let r = _mm_maskz_permutex2var_epi16(0, a, idx, b); |
15995 | assert_eq_m128i(r, _mm_setzero_si128()); |
15996 | let r = _mm_maskz_permutex2var_epi16(0b11111111, a, idx, b); |
15997 | let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100); |
15998 | assert_eq_m128i(r, e); |
15999 | } |
16000 | |
16001 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16002 | unsafe fn test_mm_mask2_permutex2var_epi16() { |
16003 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
16004 | let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3); |
16005 | let b = _mm_set1_epi16(100); |
16006 | let r = _mm_mask2_permutex2var_epi16(a, idx, 0, b); |
16007 | assert_eq_m128i(r, idx); |
16008 | let r = _mm_mask2_permutex2var_epi16(a, idx, 0b11111111, b); |
16009 | let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100); |
16010 | assert_eq_m128i(r, e); |
16011 | } |
16012 | |
16013 | #[simd_test(enable = "avx512bw" )] |
16014 | unsafe fn test_mm512_permutexvar_epi16() { |
16015 | let idx = _mm512_set1_epi16(1); |
16016 | #[rustfmt::skip] |
16017 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
16018 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
16019 | let r = _mm512_permutexvar_epi16(idx, a); |
16020 | let e = _mm512_set1_epi16(30); |
16021 | assert_eq_m512i(r, e); |
16022 | } |
16023 | |
16024 | #[simd_test(enable = "avx512bw" )] |
16025 | unsafe fn test_mm512_mask_permutexvar_epi16() { |
16026 | let idx = _mm512_set1_epi16(1); |
16027 | #[rustfmt::skip] |
16028 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
16029 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
16030 | let r = _mm512_mask_permutexvar_epi16(a, 0, idx, a); |
16031 | assert_eq_m512i(r, a); |
16032 | let r = _mm512_mask_permutexvar_epi16(a, 0b11111111_11111111_11111111_11111111, idx, a); |
16033 | let e = _mm512_set1_epi16(30); |
16034 | assert_eq_m512i(r, e); |
16035 | } |
16036 | |
16037 | #[simd_test(enable = "avx512bw" )] |
16038 | unsafe fn test_mm512_maskz_permutexvar_epi16() { |
16039 | let idx = _mm512_set1_epi16(1); |
16040 | #[rustfmt::skip] |
16041 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
16042 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
16043 | let r = _mm512_maskz_permutexvar_epi16(0, idx, a); |
16044 | assert_eq_m512i(r, _mm512_setzero_si512()); |
16045 | let r = _mm512_maskz_permutexvar_epi16(0b11111111_11111111_11111111_11111111, idx, a); |
16046 | let e = _mm512_set1_epi16(30); |
16047 | assert_eq_m512i(r, e); |
16048 | } |
16049 | |
16050 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16051 | unsafe fn test_mm256_permutexvar_epi16() { |
16052 | let idx = _mm256_set1_epi16(1); |
16053 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
16054 | let r = _mm256_permutexvar_epi16(idx, a); |
16055 | let e = _mm256_set1_epi16(14); |
16056 | assert_eq_m256i(r, e); |
16057 | } |
16058 | |
16059 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16060 | unsafe fn test_mm256_mask_permutexvar_epi16() { |
16061 | let idx = _mm256_set1_epi16(1); |
16062 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
16063 | let r = _mm256_mask_permutexvar_epi16(a, 0, idx, a); |
16064 | assert_eq_m256i(r, a); |
16065 | let r = _mm256_mask_permutexvar_epi16(a, 0b11111111_11111111, idx, a); |
16066 | let e = _mm256_set1_epi16(14); |
16067 | assert_eq_m256i(r, e); |
16068 | } |
16069 | |
16070 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16071 | unsafe fn test_mm256_maskz_permutexvar_epi16() { |
16072 | let idx = _mm256_set1_epi16(1); |
16073 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
16074 | let r = _mm256_maskz_permutexvar_epi16(0, idx, a); |
16075 | assert_eq_m256i(r, _mm256_setzero_si256()); |
16076 | let r = _mm256_maskz_permutexvar_epi16(0b11111111_11111111, idx, a); |
16077 | let e = _mm256_set1_epi16(14); |
16078 | assert_eq_m256i(r, e); |
16079 | } |
16080 | |
16081 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16082 | unsafe fn test_mm_permutexvar_epi16() { |
16083 | let idx = _mm_set1_epi16(1); |
16084 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
16085 | let r = _mm_permutexvar_epi16(idx, a); |
16086 | let e = _mm_set1_epi16(6); |
16087 | assert_eq_m128i(r, e); |
16088 | } |
16089 | |
16090 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16091 | unsafe fn test_mm_mask_permutexvar_epi16() { |
16092 | let idx = _mm_set1_epi16(1); |
16093 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
16094 | let r = _mm_mask_permutexvar_epi16(a, 0, idx, a); |
16095 | assert_eq_m128i(r, a); |
16096 | let r = _mm_mask_permutexvar_epi16(a, 0b11111111, idx, a); |
16097 | let e = _mm_set1_epi16(6); |
16098 | assert_eq_m128i(r, e); |
16099 | } |
16100 | |
16101 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16102 | unsafe fn test_mm_maskz_permutexvar_epi16() { |
16103 | let idx = _mm_set1_epi16(1); |
16104 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
16105 | let r = _mm_maskz_permutexvar_epi16(0, idx, a); |
16106 | assert_eq_m128i(r, _mm_setzero_si128()); |
16107 | let r = _mm_maskz_permutexvar_epi16(0b11111111, idx, a); |
16108 | let e = _mm_set1_epi16(6); |
16109 | assert_eq_m128i(r, e); |
16110 | } |
16111 | |
16112 | #[simd_test(enable = "avx512bw" )] |
16113 | unsafe fn test_mm512_mask_blend_epi16() { |
16114 | let a = _mm512_set1_epi16(1); |
16115 | let b = _mm512_set1_epi16(2); |
16116 | let r = _mm512_mask_blend_epi16(0b11111111_00000000_11111111_00000000, a, b); |
16117 | #[rustfmt::skip] |
16118 | let e = _mm512_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, |
16119 | 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1); |
16120 | assert_eq_m512i(r, e); |
16121 | } |
16122 | |
16123 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16124 | unsafe fn test_mm256_mask_blend_epi16() { |
16125 | let a = _mm256_set1_epi16(1); |
16126 | let b = _mm256_set1_epi16(2); |
16127 | let r = _mm256_mask_blend_epi16(0b11111111_00000000, a, b); |
16128 | let e = _mm256_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1); |
16129 | assert_eq_m256i(r, e); |
16130 | } |
16131 | |
16132 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16133 | unsafe fn test_mm_mask_blend_epi16() { |
16134 | let a = _mm_set1_epi16(1); |
16135 | let b = _mm_set1_epi16(2); |
16136 | let r = _mm_mask_blend_epi16(0b11110000, a, b); |
16137 | let e = _mm_set_epi16(2, 2, 2, 2, 1, 1, 1, 1); |
16138 | assert_eq_m128i(r, e); |
16139 | } |
16140 | |
16141 | #[simd_test(enable = "avx512bw" )] |
16142 | unsafe fn test_mm512_mask_blend_epi8() { |
16143 | let a = _mm512_set1_epi8(1); |
16144 | let b = _mm512_set1_epi8(2); |
16145 | let r = _mm512_mask_blend_epi8( |
16146 | 0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000, |
16147 | a, |
16148 | b, |
16149 | ); |
16150 | #[rustfmt::skip] |
16151 | let e = _mm512_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, |
16152 | 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, |
16153 | 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, |
16154 | 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1); |
16155 | assert_eq_m512i(r, e); |
16156 | } |
16157 | |
16158 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16159 | unsafe fn test_mm256_mask_blend_epi8() { |
16160 | let a = _mm256_set1_epi8(1); |
16161 | let b = _mm256_set1_epi8(2); |
16162 | let r = _mm256_mask_blend_epi8(0b11111111_00000000_11111111_00000000, a, b); |
16163 | #[rustfmt::skip] |
16164 | let e = _mm256_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, |
16165 | 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1); |
16166 | assert_eq_m256i(r, e); |
16167 | } |
16168 | |
16169 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16170 | unsafe fn test_mm_mask_blend_epi8() { |
16171 | let a = _mm_set1_epi8(1); |
16172 | let b = _mm_set1_epi8(2); |
16173 | let r = _mm_mask_blend_epi8(0b11111111_00000000, a, b); |
16174 | let e = _mm_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1); |
16175 | assert_eq_m128i(r, e); |
16176 | } |
16177 | |
16178 | #[simd_test(enable = "avx512bw" )] |
16179 | unsafe fn test_mm512_broadcastw_epi16() { |
16180 | let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24); |
16181 | let r = _mm512_broadcastw_epi16(a); |
16182 | let e = _mm512_set1_epi16(24); |
16183 | assert_eq_m512i(r, e); |
16184 | } |
16185 | |
16186 | #[simd_test(enable = "avx512bw" )] |
16187 | unsafe fn test_mm512_mask_broadcastw_epi16() { |
16188 | let src = _mm512_set1_epi16(1); |
16189 | let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24); |
16190 | let r = _mm512_mask_broadcastw_epi16(src, 0, a); |
16191 | assert_eq_m512i(r, src); |
16192 | let r = _mm512_mask_broadcastw_epi16(src, 0b11111111_11111111_11111111_11111111, a); |
16193 | let e = _mm512_set1_epi16(24); |
16194 | assert_eq_m512i(r, e); |
16195 | } |
16196 | |
16197 | #[simd_test(enable = "avx512bw" )] |
16198 | unsafe fn test_mm512_maskz_broadcastw_epi16() { |
16199 | let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24); |
16200 | let r = _mm512_maskz_broadcastw_epi16(0, a); |
16201 | assert_eq_m512i(r, _mm512_setzero_si512()); |
16202 | let r = _mm512_maskz_broadcastw_epi16(0b11111111_11111111_11111111_11111111, a); |
16203 | let e = _mm512_set1_epi16(24); |
16204 | assert_eq_m512i(r, e); |
16205 | } |
16206 | |
16207 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16208 | unsafe fn test_mm256_mask_broadcastw_epi16() { |
16209 | let src = _mm256_set1_epi16(1); |
16210 | let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24); |
16211 | let r = _mm256_mask_broadcastw_epi16(src, 0, a); |
16212 | assert_eq_m256i(r, src); |
16213 | let r = _mm256_mask_broadcastw_epi16(src, 0b11111111_11111111, a); |
16214 | let e = _mm256_set1_epi16(24); |
16215 | assert_eq_m256i(r, e); |
16216 | } |
16217 | |
16218 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16219 | unsafe fn test_mm256_maskz_broadcastw_epi16() { |
16220 | let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24); |
16221 | let r = _mm256_maskz_broadcastw_epi16(0, a); |
16222 | assert_eq_m256i(r, _mm256_setzero_si256()); |
16223 | let r = _mm256_maskz_broadcastw_epi16(0b11111111_11111111, a); |
16224 | let e = _mm256_set1_epi16(24); |
16225 | assert_eq_m256i(r, e); |
16226 | } |
16227 | |
16228 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16229 | unsafe fn test_mm_mask_broadcastw_epi16() { |
16230 | let src = _mm_set1_epi16(1); |
16231 | let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24); |
16232 | let r = _mm_mask_broadcastw_epi16(src, 0, a); |
16233 | assert_eq_m128i(r, src); |
16234 | let r = _mm_mask_broadcastw_epi16(src, 0b11111111, a); |
16235 | let e = _mm_set1_epi16(24); |
16236 | assert_eq_m128i(r, e); |
16237 | } |
16238 | |
16239 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16240 | unsafe fn test_mm_maskz_broadcastw_epi16() { |
16241 | let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24); |
16242 | let r = _mm_maskz_broadcastw_epi16(0, a); |
16243 | assert_eq_m128i(r, _mm_setzero_si128()); |
16244 | let r = _mm_maskz_broadcastw_epi16(0b11111111, a); |
16245 | let e = _mm_set1_epi16(24); |
16246 | assert_eq_m128i(r, e); |
16247 | } |
16248 | |
16249 | #[simd_test(enable = "avx512bw" )] |
16250 | unsafe fn test_mm512_broadcastb_epi8() { |
16251 | let a = _mm_set_epi8( |
16252 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, |
16253 | ); |
16254 | let r = _mm512_broadcastb_epi8(a); |
16255 | let e = _mm512_set1_epi8(32); |
16256 | assert_eq_m512i(r, e); |
16257 | } |
16258 | |
16259 | #[simd_test(enable = "avx512bw" )] |
16260 | unsafe fn test_mm512_mask_broadcastb_epi8() { |
16261 | let src = _mm512_set1_epi8(1); |
16262 | let a = _mm_set_epi8( |
16263 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, |
16264 | ); |
16265 | let r = _mm512_mask_broadcastb_epi8(src, 0, a); |
16266 | assert_eq_m512i(r, src); |
16267 | let r = _mm512_mask_broadcastb_epi8( |
16268 | src, |
16269 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, |
16270 | a, |
16271 | ); |
16272 | let e = _mm512_set1_epi8(32); |
16273 | assert_eq_m512i(r, e); |
16274 | } |
16275 | |
16276 | #[simd_test(enable = "avx512bw" )] |
16277 | unsafe fn test_mm512_maskz_broadcastb_epi8() { |
16278 | let a = _mm_set_epi8( |
16279 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, |
16280 | ); |
16281 | let r = _mm512_maskz_broadcastb_epi8(0, a); |
16282 | assert_eq_m512i(r, _mm512_setzero_si512()); |
16283 | let r = _mm512_maskz_broadcastb_epi8( |
16284 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, |
16285 | a, |
16286 | ); |
16287 | let e = _mm512_set1_epi8(32); |
16288 | assert_eq_m512i(r, e); |
16289 | } |
16290 | |
16291 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16292 | unsafe fn test_mm256_mask_broadcastb_epi8() { |
16293 | let src = _mm256_set1_epi8(1); |
16294 | let a = _mm_set_epi8( |
16295 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, |
16296 | ); |
16297 | let r = _mm256_mask_broadcastb_epi8(src, 0, a); |
16298 | assert_eq_m256i(r, src); |
16299 | let r = _mm256_mask_broadcastb_epi8(src, 0b11111111_11111111_11111111_11111111, a); |
16300 | let e = _mm256_set1_epi8(32); |
16301 | assert_eq_m256i(r, e); |
16302 | } |
16303 | |
16304 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16305 | unsafe fn test_mm256_maskz_broadcastb_epi8() { |
16306 | let a = _mm_set_epi8( |
16307 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, |
16308 | ); |
16309 | let r = _mm256_maskz_broadcastb_epi8(0, a); |
16310 | assert_eq_m256i(r, _mm256_setzero_si256()); |
16311 | let r = _mm256_maskz_broadcastb_epi8(0b11111111_11111111_11111111_11111111, a); |
16312 | let e = _mm256_set1_epi8(32); |
16313 | assert_eq_m256i(r, e); |
16314 | } |
16315 | |
16316 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16317 | unsafe fn test_mm_mask_broadcastb_epi8() { |
16318 | let src = _mm_set1_epi8(1); |
16319 | let a = _mm_set_epi8( |
16320 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, |
16321 | ); |
16322 | let r = _mm_mask_broadcastb_epi8(src, 0, a); |
16323 | assert_eq_m128i(r, src); |
16324 | let r = _mm_mask_broadcastb_epi8(src, 0b11111111_11111111, a); |
16325 | let e = _mm_set1_epi8(32); |
16326 | assert_eq_m128i(r, e); |
16327 | } |
16328 | |
16329 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16330 | unsafe fn test_mm_maskz_broadcastb_epi8() { |
16331 | let a = _mm_set_epi8( |
16332 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, |
16333 | ); |
16334 | let r = _mm_maskz_broadcastb_epi8(0, a); |
16335 | assert_eq_m128i(r, _mm_setzero_si128()); |
16336 | let r = _mm_maskz_broadcastb_epi8(0b11111111_11111111, a); |
16337 | let e = _mm_set1_epi8(32); |
16338 | assert_eq_m128i(r, e); |
16339 | } |
16340 | |
16341 | #[simd_test(enable = "avx512bw" )] |
16342 | unsafe fn test_mm512_unpackhi_epi16() { |
16343 | #[rustfmt::skip] |
16344 | let a = _mm512_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
16345 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); |
16346 | #[rustfmt::skip] |
16347 | let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, |
16348 | 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); |
16349 | let r = _mm512_unpackhi_epi16(a, b); |
16350 | #[rustfmt::skip] |
16351 | let e = _mm512_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12, |
16352 | 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28); |
16353 | assert_eq_m512i(r, e); |
16354 | } |
16355 | |
16356 | #[simd_test(enable = "avx512bw" )] |
16357 | unsafe fn test_mm512_mask_unpackhi_epi16() { |
16358 | #[rustfmt::skip] |
16359 | let a = _mm512_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
16360 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); |
16361 | #[rustfmt::skip] |
16362 | let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, |
16363 | 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); |
16364 | let r = _mm512_mask_unpackhi_epi16(a, 0, a, b); |
16365 | assert_eq_m512i(r, a); |
16366 | let r = _mm512_mask_unpackhi_epi16(a, 0b11111111_11111111_11111111_11111111, a, b); |
16367 | #[rustfmt::skip] |
16368 | let e = _mm512_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12, |
16369 | 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28); |
16370 | assert_eq_m512i(r, e); |
16371 | } |
16372 | |
16373 | #[simd_test(enable = "avx512bw" )] |
16374 | unsafe fn test_mm512_maskz_unpackhi_epi16() { |
16375 | #[rustfmt::skip] |
16376 | let a = _mm512_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
16377 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); |
16378 | #[rustfmt::skip] |
16379 | let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, |
16380 | 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); |
16381 | let r = _mm512_maskz_unpackhi_epi16(0, a, b); |
16382 | assert_eq_m512i(r, _mm512_setzero_si512()); |
16383 | let r = _mm512_maskz_unpackhi_epi16(0b11111111_11111111_11111111_11111111, a, b); |
16384 | #[rustfmt::skip] |
16385 | let e = _mm512_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12, |
16386 | 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28); |
16387 | assert_eq_m512i(r, e); |
16388 | } |
16389 | |
16390 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16391 | unsafe fn test_mm256_mask_unpackhi_epi16() { |
16392 | let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
16393 | let b = _mm256_set_epi16( |
16394 | 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, |
16395 | ); |
16396 | let r = _mm256_mask_unpackhi_epi16(a, 0, a, b); |
16397 | assert_eq_m256i(r, a); |
16398 | let r = _mm256_mask_unpackhi_epi16(a, 0b11111111_11111111, a, b); |
16399 | let e = _mm256_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12); |
16400 | assert_eq_m256i(r, e); |
16401 | } |
16402 | |
16403 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16404 | unsafe fn test_mm256_maskz_unpackhi_epi16() { |
16405 | let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
16406 | let b = _mm256_set_epi16( |
16407 | 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, |
16408 | ); |
16409 | let r = _mm256_maskz_unpackhi_epi16(0, a, b); |
16410 | assert_eq_m256i(r, _mm256_setzero_si256()); |
16411 | let r = _mm256_maskz_unpackhi_epi16(0b11111111_11111111, a, b); |
16412 | let e = _mm256_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12); |
16413 | assert_eq_m256i(r, e); |
16414 | } |
16415 | |
16416 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16417 | unsafe fn test_mm_mask_unpackhi_epi16() { |
16418 | let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); |
16419 | let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40); |
16420 | let r = _mm_mask_unpackhi_epi16(a, 0, a, b); |
16421 | assert_eq_m128i(r, a); |
16422 | let r = _mm_mask_unpackhi_epi16(a, 0b11111111, a, b); |
16423 | let e = _mm_set_epi16(33, 1, 34, 2, 35, 3, 36, 4); |
16424 | assert_eq_m128i(r, e); |
16425 | } |
16426 | |
16427 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16428 | unsafe fn test_mm_maskz_unpackhi_epi16() { |
16429 | let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); |
16430 | let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40); |
16431 | let r = _mm_maskz_unpackhi_epi16(0, a, b); |
16432 | assert_eq_m128i(r, _mm_setzero_si128()); |
16433 | let r = _mm_maskz_unpackhi_epi16(0b11111111, a, b); |
16434 | let e = _mm_set_epi16(33, 1, 34, 2, 35, 3, 36, 4); |
16435 | assert_eq_m128i(r, e); |
16436 | } |
16437 | |
16438 | #[simd_test(enable = "avx512bw" )] |
16439 | unsafe fn test_mm512_unpackhi_epi8() { |
16440 | #[rustfmt::skip] |
16441 | let a = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
16442 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, |
16443 | 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, |
16444 | 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); |
16445 | #[rustfmt::skip] |
16446 | let b = _mm512_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, |
16447 | 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, |
16448 | 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, |
16449 | 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0); |
16450 | let r = _mm512_unpackhi_epi8(a, b); |
16451 | #[rustfmt::skip] |
16452 | let e = _mm512_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8, |
16453 | 81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24, |
16454 | 97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40, |
16455 | 113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56); |
16456 | assert_eq_m512i(r, e); |
16457 | } |
16458 | |
16459 | #[simd_test(enable = "avx512bw" )] |
16460 | unsafe fn test_mm512_mask_unpackhi_epi8() { |
16461 | #[rustfmt::skip] |
16462 | let a = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
16463 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, |
16464 | 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, |
16465 | 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); |
16466 | #[rustfmt::skip] |
16467 | let b = _mm512_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, |
16468 | 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, |
16469 | 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, |
16470 | 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0); |
16471 | let r = _mm512_mask_unpackhi_epi8(a, 0, a, b); |
16472 | assert_eq_m512i(r, a); |
16473 | let r = _mm512_mask_unpackhi_epi8( |
16474 | a, |
16475 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, |
16476 | a, |
16477 | b, |
16478 | ); |
16479 | #[rustfmt::skip] |
16480 | let e = _mm512_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8, |
16481 | 81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24, |
16482 | 97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40, |
16483 | 113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56); |
16484 | assert_eq_m512i(r, e); |
16485 | } |
16486 | |
16487 | #[simd_test(enable = "avx512bw" )] |
16488 | unsafe fn test_mm512_maskz_unpackhi_epi8() { |
16489 | #[rustfmt::skip] |
16490 | let a = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
16491 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, |
16492 | 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, |
16493 | 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); |
16494 | #[rustfmt::skip] |
16495 | let b = _mm512_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, |
16496 | 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, |
16497 | 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, |
16498 | 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0); |
16499 | let r = _mm512_maskz_unpackhi_epi8(0, a, b); |
16500 | assert_eq_m512i(r, _mm512_setzero_si512()); |
16501 | let r = _mm512_maskz_unpackhi_epi8( |
16502 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, |
16503 | a, |
16504 | b, |
16505 | ); |
16506 | #[rustfmt::skip] |
16507 | let e = _mm512_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8, |
16508 | 81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24, |
16509 | 97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40, |
16510 | 113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56); |
16511 | assert_eq_m512i(r, e); |
16512 | } |
16513 | |
16514 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16515 | unsafe fn test_mm256_mask_unpackhi_epi8() { |
16516 | #[rustfmt::skip] |
16517 | let a = _mm256_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
16518 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); |
16519 | #[rustfmt::skip] |
16520 | let b = _mm256_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, |
16521 | 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96); |
16522 | let r = _mm256_mask_unpackhi_epi8(a, 0, a, b); |
16523 | assert_eq_m256i(r, a); |
16524 | let r = _mm256_mask_unpackhi_epi8(a, 0b11111111_11111111_11111111_11111111, a, b); |
16525 | #[rustfmt::skip] |
16526 | let e = _mm256_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8, |
16527 | 81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24); |
16528 | assert_eq_m256i(r, e); |
16529 | } |
16530 | |
16531 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16532 | unsafe fn test_mm256_maskz_unpackhi_epi8() { |
16533 | #[rustfmt::skip] |
16534 | let a = _mm256_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
16535 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); |
16536 | #[rustfmt::skip] |
16537 | let b = _mm256_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, |
16538 | 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96); |
16539 | let r = _mm256_maskz_unpackhi_epi8(0, a, b); |
16540 | assert_eq_m256i(r, _mm256_setzero_si256()); |
16541 | let r = _mm256_maskz_unpackhi_epi8(0b11111111_11111111_11111111_11111111, a, b); |
16542 | #[rustfmt::skip] |
16543 | let e = _mm256_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8, |
16544 | 81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24); |
16545 | assert_eq_m256i(r, e); |
16546 | } |
16547 | |
16548 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16549 | unsafe fn test_mm_mask_unpackhi_epi8() { |
16550 | let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
16551 | let b = _mm_set_epi8( |
16552 | 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, |
16553 | ); |
16554 | let r = _mm_mask_unpackhi_epi8(a, 0, a, b); |
16555 | assert_eq_m128i(r, a); |
16556 | let r = _mm_mask_unpackhi_epi8(a, 0b11111111_11111111, a, b); |
16557 | let e = _mm_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8); |
16558 | assert_eq_m128i(r, e); |
16559 | } |
16560 | |
16561 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16562 | unsafe fn test_mm_maskz_unpackhi_epi8() { |
16563 | let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
16564 | let b = _mm_set_epi8( |
16565 | 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, |
16566 | ); |
16567 | let r = _mm_maskz_unpackhi_epi8(0, a, b); |
16568 | assert_eq_m128i(r, _mm_setzero_si128()); |
16569 | let r = _mm_maskz_unpackhi_epi8(0b11111111_11111111, a, b); |
16570 | let e = _mm_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8); |
16571 | assert_eq_m128i(r, e); |
16572 | } |
16573 | |
16574 | #[simd_test(enable = "avx512bw" )] |
16575 | unsafe fn test_mm512_unpacklo_epi16() { |
16576 | #[rustfmt::skip] |
16577 | let a = _mm512_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
16578 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); |
16579 | #[rustfmt::skip] |
16580 | let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, |
16581 | 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); |
16582 | let r = _mm512_unpacklo_epi16(a, b); |
16583 | #[rustfmt::skip] |
16584 | let e = _mm512_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16, |
16585 | 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32); |
16586 | assert_eq_m512i(r, e); |
16587 | } |
16588 | |
16589 | #[simd_test(enable = "avx512bw" )] |
16590 | unsafe fn test_mm512_mask_unpacklo_epi16() { |
16591 | #[rustfmt::skip] |
16592 | let a = _mm512_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
16593 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); |
16594 | #[rustfmt::skip] |
16595 | let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, |
16596 | 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); |
16597 | let r = _mm512_mask_unpacklo_epi16(a, 0, a, b); |
16598 | assert_eq_m512i(r, a); |
16599 | let r = _mm512_mask_unpacklo_epi16(a, 0b11111111_11111111_11111111_11111111, a, b); |
16600 | #[rustfmt::skip] |
16601 | let e = _mm512_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16, |
16602 | 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32); |
16603 | assert_eq_m512i(r, e); |
16604 | } |
16605 | |
16606 | #[simd_test(enable = "avx512bw" )] |
16607 | unsafe fn test_mm512_maskz_unpacklo_epi16() { |
16608 | #[rustfmt::skip] |
16609 | let a = _mm512_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
16610 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); |
16611 | #[rustfmt::skip] |
16612 | let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, |
16613 | 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); |
16614 | let r = _mm512_maskz_unpacklo_epi16(0, a, b); |
16615 | assert_eq_m512i(r, _mm512_setzero_si512()); |
16616 | let r = _mm512_maskz_unpacklo_epi16(0b11111111_11111111_11111111_11111111, a, b); |
16617 | #[rustfmt::skip] |
16618 | let e = _mm512_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16, |
16619 | 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32); |
16620 | assert_eq_m512i(r, e); |
16621 | } |
16622 | |
16623 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16624 | unsafe fn test_mm256_mask_unpacklo_epi16() { |
16625 | let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
16626 | let b = _mm256_set_epi16( |
16627 | 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, |
16628 | ); |
16629 | let r = _mm256_mask_unpacklo_epi16(a, 0, a, b); |
16630 | assert_eq_m256i(r, a); |
16631 | let r = _mm256_mask_unpacklo_epi16(a, 0b11111111_11111111, a, b); |
16632 | let e = _mm256_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16); |
16633 | assert_eq_m256i(r, e); |
16634 | } |
16635 | |
16636 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16637 | unsafe fn test_mm256_maskz_unpacklo_epi16() { |
16638 | let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
16639 | let b = _mm256_set_epi16( |
16640 | 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, |
16641 | ); |
16642 | let r = _mm256_maskz_unpacklo_epi16(0, a, b); |
16643 | assert_eq_m256i(r, _mm256_setzero_si256()); |
16644 | let r = _mm256_maskz_unpacklo_epi16(0b11111111_11111111, a, b); |
16645 | let e = _mm256_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16); |
16646 | assert_eq_m256i(r, e); |
16647 | } |
16648 | |
16649 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16650 | unsafe fn test_mm_mask_unpacklo_epi16() { |
16651 | let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); |
16652 | let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40); |
16653 | let r = _mm_mask_unpacklo_epi16(a, 0, a, b); |
16654 | assert_eq_m128i(r, a); |
16655 | let r = _mm_mask_unpacklo_epi16(a, 0b11111111, a, b); |
16656 | let e = _mm_set_epi16(37, 5, 38, 6, 39, 7, 40, 8); |
16657 | assert_eq_m128i(r, e); |
16658 | } |
16659 | |
16660 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16661 | unsafe fn test_mm_maskz_unpacklo_epi16() { |
16662 | let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); |
16663 | let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40); |
16664 | let r = _mm_maskz_unpacklo_epi16(0, a, b); |
16665 | assert_eq_m128i(r, _mm_setzero_si128()); |
16666 | let r = _mm_maskz_unpacklo_epi16(0b11111111, a, b); |
16667 | let e = _mm_set_epi16(37, 5, 38, 6, 39, 7, 40, 8); |
16668 | assert_eq_m128i(r, e); |
16669 | } |
16670 | |
16671 | #[simd_test(enable = "avx512bw" )] |
16672 | unsafe fn test_mm512_unpacklo_epi8() { |
16673 | #[rustfmt::skip] |
16674 | let a = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
16675 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, |
16676 | 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, |
16677 | 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); |
16678 | #[rustfmt::skip] |
16679 | let b = _mm512_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, |
16680 | 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, |
16681 | 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, |
16682 | 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0); |
16683 | let r = _mm512_unpacklo_epi8(a, b); |
16684 | #[rustfmt::skip] |
16685 | let e = _mm512_set_epi8(73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16, |
16686 | 89, 25, 90, 26, 91, 27, 92, 28, 93, 29, 94, 30, 95, 31, 96, 32, |
16687 | 105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48, |
16688 | 121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0, 64); |
16689 | assert_eq_m512i(r, e); |
16690 | } |
16691 | |
16692 | #[simd_test(enable = "avx512bw" )] |
16693 | unsafe fn test_mm512_mask_unpacklo_epi8() { |
16694 | #[rustfmt::skip] |
16695 | let a = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
16696 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, |
16697 | 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, |
16698 | 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); |
16699 | #[rustfmt::skip] |
16700 | let b = _mm512_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, |
16701 | 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, |
16702 | 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, |
16703 | 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0); |
16704 | let r = _mm512_mask_unpacklo_epi8(a, 0, a, b); |
16705 | assert_eq_m512i(r, a); |
16706 | let r = _mm512_mask_unpacklo_epi8( |
16707 | a, |
16708 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, |
16709 | a, |
16710 | b, |
16711 | ); |
16712 | #[rustfmt::skip] |
16713 | let e = _mm512_set_epi8(73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16, |
16714 | 89, 25, 90, 26, 91, 27, 92, 28, 93, 29, 94, 30, 95, 31, 96, 32, |
16715 | 105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48, |
16716 | 121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0, 64); |
16717 | assert_eq_m512i(r, e); |
16718 | } |
16719 | |
16720 | #[simd_test(enable = "avx512bw" )] |
16721 | unsafe fn test_mm512_maskz_unpacklo_epi8() { |
16722 | #[rustfmt::skip] |
16723 | let a = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
16724 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, |
16725 | 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, |
16726 | 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); |
16727 | #[rustfmt::skip] |
16728 | let b = _mm512_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, |
16729 | 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, |
16730 | 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, |
16731 | 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0); |
16732 | let r = _mm512_maskz_unpacklo_epi8(0, a, b); |
16733 | assert_eq_m512i(r, _mm512_setzero_si512()); |
16734 | let r = _mm512_maskz_unpacklo_epi8( |
16735 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, |
16736 | a, |
16737 | b, |
16738 | ); |
16739 | #[rustfmt::skip] |
16740 | let e = _mm512_set_epi8(73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16, |
16741 | 89, 25, 90, 26, 91, 27, 92, 28, 93, 29, 94, 30, 95, 31, 96, 32, |
16742 | 105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48, |
16743 | 121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0, 64); |
16744 | assert_eq_m512i(r, e); |
16745 | } |
16746 | |
16747 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16748 | unsafe fn test_mm256_mask_unpacklo_epi8() { |
16749 | #[rustfmt::skip] |
16750 | let a = _mm256_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
16751 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); |
16752 | #[rustfmt::skip] |
16753 | let b = _mm256_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, |
16754 | 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96); |
16755 | let r = _mm256_mask_unpacklo_epi8(a, 0, a, b); |
16756 | assert_eq_m256i(r, a); |
16757 | let r = _mm256_mask_unpacklo_epi8(a, 0b11111111_11111111_11111111_11111111, a, b); |
16758 | #[rustfmt::skip] |
16759 | let e = _mm256_set_epi8(73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16, |
16760 | 89, 25, 90, 26, 91, 27, 92, 28, 93, 29, 94, 30, 95, 31, 96, 32); |
16761 | assert_eq_m256i(r, e); |
16762 | } |
16763 | |
16764 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16765 | unsafe fn test_mm256_maskz_unpacklo_epi8() { |
16766 | #[rustfmt::skip] |
16767 | let a = _mm256_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
16768 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); |
16769 | #[rustfmt::skip] |
16770 | let b = _mm256_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, |
16771 | 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96); |
16772 | let r = _mm256_maskz_unpacklo_epi8(0, a, b); |
16773 | assert_eq_m256i(r, _mm256_setzero_si256()); |
16774 | let r = _mm256_maskz_unpacklo_epi8(0b11111111_11111111_11111111_11111111, a, b); |
16775 | #[rustfmt::skip] |
16776 | let e = _mm256_set_epi8(73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16, |
16777 | 89, 25, 90, 26, 91, 27, 92, 28, 93, 29, 94, 30, 95, 31, 96, 32); |
16778 | assert_eq_m256i(r, e); |
16779 | } |
16780 | |
16781 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16782 | unsafe fn test_mm_mask_unpacklo_epi8() { |
16783 | let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
16784 | let b = _mm_set_epi8( |
16785 | 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, |
16786 | ); |
16787 | let r = _mm_mask_unpacklo_epi8(a, 0, a, b); |
16788 | assert_eq_m128i(r, a); |
16789 | let r = _mm_mask_unpacklo_epi8(a, 0b11111111_11111111, a, b); |
16790 | let e = _mm_set_epi8( |
16791 | 73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16, |
16792 | ); |
16793 | assert_eq_m128i(r, e); |
16794 | } |
16795 | |
16796 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16797 | unsafe fn test_mm_maskz_unpacklo_epi8() { |
16798 | let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
16799 | let b = _mm_set_epi8( |
16800 | 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, |
16801 | ); |
16802 | let r = _mm_maskz_unpacklo_epi8(0, a, b); |
16803 | assert_eq_m128i(r, _mm_setzero_si128()); |
16804 | let r = _mm_maskz_unpacklo_epi8(0b11111111_11111111, a, b); |
16805 | let e = _mm_set_epi8( |
16806 | 73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16, |
16807 | ); |
16808 | assert_eq_m128i(r, e); |
16809 | } |
16810 | |
16811 | #[simd_test(enable = "avx512bw" )] |
16812 | unsafe fn test_mm512_mask_mov_epi16() { |
16813 | let src = _mm512_set1_epi16(1); |
16814 | let a = _mm512_set1_epi16(2); |
16815 | let r = _mm512_mask_mov_epi16(src, 0, a); |
16816 | assert_eq_m512i(r, src); |
16817 | let r = _mm512_mask_mov_epi16(src, 0b11111111_11111111_11111111_11111111, a); |
16818 | assert_eq_m512i(r, a); |
16819 | } |
16820 | |
16821 | #[simd_test(enable = "avx512bw" )] |
16822 | unsafe fn test_mm512_maskz_mov_epi16() { |
16823 | let a = _mm512_set1_epi16(2); |
16824 | let r = _mm512_maskz_mov_epi16(0, a); |
16825 | assert_eq_m512i(r, _mm512_setzero_si512()); |
16826 | let r = _mm512_maskz_mov_epi16(0b11111111_11111111_11111111_11111111, a); |
16827 | assert_eq_m512i(r, a); |
16828 | } |
16829 | |
16830 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16831 | unsafe fn test_mm256_mask_mov_epi16() { |
16832 | let src = _mm256_set1_epi16(1); |
16833 | let a = _mm256_set1_epi16(2); |
16834 | let r = _mm256_mask_mov_epi16(src, 0, a); |
16835 | assert_eq_m256i(r, src); |
16836 | let r = _mm256_mask_mov_epi16(src, 0b11111111_11111111, a); |
16837 | assert_eq_m256i(r, a); |
16838 | } |
16839 | |
16840 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16841 | unsafe fn test_mm256_maskz_mov_epi16() { |
16842 | let a = _mm256_set1_epi16(2); |
16843 | let r = _mm256_maskz_mov_epi16(0, a); |
16844 | assert_eq_m256i(r, _mm256_setzero_si256()); |
16845 | let r = _mm256_maskz_mov_epi16(0b11111111_11111111, a); |
16846 | assert_eq_m256i(r, a); |
16847 | } |
16848 | |
16849 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16850 | unsafe fn test_mm_mask_mov_epi16() { |
16851 | let src = _mm_set1_epi16(1); |
16852 | let a = _mm_set1_epi16(2); |
16853 | let r = _mm_mask_mov_epi16(src, 0, a); |
16854 | assert_eq_m128i(r, src); |
16855 | let r = _mm_mask_mov_epi16(src, 0b11111111, a); |
16856 | assert_eq_m128i(r, a); |
16857 | } |
16858 | |
16859 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16860 | unsafe fn test_mm_maskz_mov_epi16() { |
16861 | let a = _mm_set1_epi16(2); |
16862 | let r = _mm_maskz_mov_epi16(0, a); |
16863 | assert_eq_m128i(r, _mm_setzero_si128()); |
16864 | let r = _mm_maskz_mov_epi16(0b11111111, a); |
16865 | assert_eq_m128i(r, a); |
16866 | } |
16867 | |
16868 | #[simd_test(enable = "avx512bw" )] |
16869 | unsafe fn test_mm512_mask_mov_epi8() { |
16870 | let src = _mm512_set1_epi8(1); |
16871 | let a = _mm512_set1_epi8(2); |
16872 | let r = _mm512_mask_mov_epi8(src, 0, a); |
16873 | assert_eq_m512i(r, src); |
16874 | let r = _mm512_mask_mov_epi8( |
16875 | src, |
16876 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, |
16877 | a, |
16878 | ); |
16879 | assert_eq_m512i(r, a); |
16880 | } |
16881 | |
16882 | #[simd_test(enable = "avx512bw" )] |
16883 | unsafe fn test_mm512_maskz_mov_epi8() { |
16884 | let a = _mm512_set1_epi8(2); |
16885 | let r = _mm512_maskz_mov_epi8(0, a); |
16886 | assert_eq_m512i(r, _mm512_setzero_si512()); |
16887 | let r = _mm512_maskz_mov_epi8( |
16888 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, |
16889 | a, |
16890 | ); |
16891 | assert_eq_m512i(r, a); |
16892 | } |
16893 | |
16894 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16895 | unsafe fn test_mm256_mask_mov_epi8() { |
16896 | let src = _mm256_set1_epi8(1); |
16897 | let a = _mm256_set1_epi8(2); |
16898 | let r = _mm256_mask_mov_epi8(src, 0, a); |
16899 | assert_eq_m256i(r, src); |
16900 | let r = _mm256_mask_mov_epi8(src, 0b11111111_11111111_11111111_11111111, a); |
16901 | assert_eq_m256i(r, a); |
16902 | } |
16903 | |
16904 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16905 | unsafe fn test_mm256_maskz_mov_epi8() { |
16906 | let a = _mm256_set1_epi8(2); |
16907 | let r = _mm256_maskz_mov_epi8(0, a); |
16908 | assert_eq_m256i(r, _mm256_setzero_si256()); |
16909 | let r = _mm256_maskz_mov_epi8(0b11111111_11111111_11111111_11111111, a); |
16910 | assert_eq_m256i(r, a); |
16911 | } |
16912 | |
16913 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16914 | unsafe fn test_mm_mask_mov_epi8() { |
16915 | let src = _mm_set1_epi8(1); |
16916 | let a = _mm_set1_epi8(2); |
16917 | let r = _mm_mask_mov_epi8(src, 0, a); |
16918 | assert_eq_m128i(r, src); |
16919 | let r = _mm_mask_mov_epi8(src, 0b11111111_11111111, a); |
16920 | assert_eq_m128i(r, a); |
16921 | } |
16922 | |
16923 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16924 | unsafe fn test_mm_maskz_mov_epi8() { |
16925 | let a = _mm_set1_epi8(2); |
16926 | let r = _mm_maskz_mov_epi8(0, a); |
16927 | assert_eq_m128i(r, _mm_setzero_si128()); |
16928 | let r = _mm_maskz_mov_epi8(0b11111111_11111111, a); |
16929 | assert_eq_m128i(r, a); |
16930 | } |
16931 | |
16932 | #[simd_test(enable = "avx512bw" )] |
16933 | unsafe fn test_mm512_mask_set1_epi16() { |
16934 | let src = _mm512_set1_epi16(2); |
16935 | let a: i16 = 11; |
16936 | let r = _mm512_mask_set1_epi16(src, 0, a); |
16937 | assert_eq_m512i(r, src); |
16938 | let r = _mm512_mask_set1_epi16(src, 0b11111111_11111111_11111111_11111111, a); |
16939 | let e = _mm512_set1_epi16(11); |
16940 | assert_eq_m512i(r, e); |
16941 | } |
16942 | |
16943 | #[simd_test(enable = "avx512bw" )] |
16944 | unsafe fn test_mm512_maskz_set1_epi16() { |
16945 | let a: i16 = 11; |
16946 | let r = _mm512_maskz_set1_epi16(0, a); |
16947 | assert_eq_m512i(r, _mm512_setzero_si512()); |
16948 | let r = _mm512_maskz_set1_epi16(0b11111111_11111111_11111111_11111111, a); |
16949 | let e = _mm512_set1_epi16(11); |
16950 | assert_eq_m512i(r, e); |
16951 | } |
16952 | |
16953 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16954 | unsafe fn test_mm256_mask_set1_epi16() { |
16955 | let src = _mm256_set1_epi16(2); |
16956 | let a: i16 = 11; |
16957 | let r = _mm256_mask_set1_epi16(src, 0, a); |
16958 | assert_eq_m256i(r, src); |
16959 | let r = _mm256_mask_set1_epi16(src, 0b11111111_11111111, a); |
16960 | let e = _mm256_set1_epi16(11); |
16961 | assert_eq_m256i(r, e); |
16962 | } |
16963 | |
16964 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16965 | unsafe fn test_mm256_maskz_set1_epi16() { |
16966 | let a: i16 = 11; |
16967 | let r = _mm256_maskz_set1_epi16(0, a); |
16968 | assert_eq_m256i(r, _mm256_setzero_si256()); |
16969 | let r = _mm256_maskz_set1_epi16(0b11111111_11111111, a); |
16970 | let e = _mm256_set1_epi16(11); |
16971 | assert_eq_m256i(r, e); |
16972 | } |
16973 | |
16974 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16975 | unsafe fn test_mm_mask_set1_epi16() { |
16976 | let src = _mm_set1_epi16(2); |
16977 | let a: i16 = 11; |
16978 | let r = _mm_mask_set1_epi16(src, 0, a); |
16979 | assert_eq_m128i(r, src); |
16980 | let r = _mm_mask_set1_epi16(src, 0b11111111, a); |
16981 | let e = _mm_set1_epi16(11); |
16982 | assert_eq_m128i(r, e); |
16983 | } |
16984 | |
16985 | #[simd_test(enable = "avx512bw,avx512vl" )] |
16986 | unsafe fn test_mm_maskz_set1_epi16() { |
16987 | let a: i16 = 11; |
16988 | let r = _mm_maskz_set1_epi16(0, a); |
16989 | assert_eq_m128i(r, _mm_setzero_si128()); |
16990 | let r = _mm_maskz_set1_epi16(0b11111111, a); |
16991 | let e = _mm_set1_epi16(11); |
16992 | assert_eq_m128i(r, e); |
16993 | } |
16994 | |
16995 | #[simd_test(enable = "avx512bw" )] |
16996 | unsafe fn test_mm512_mask_set1_epi8() { |
16997 | let src = _mm512_set1_epi8(2); |
16998 | let a: i8 = 11; |
16999 | let r = _mm512_mask_set1_epi8(src, 0, a); |
17000 | assert_eq_m512i(r, src); |
17001 | let r = _mm512_mask_set1_epi8( |
17002 | src, |
17003 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, |
17004 | a, |
17005 | ); |
17006 | let e = _mm512_set1_epi8(11); |
17007 | assert_eq_m512i(r, e); |
17008 | } |
17009 | |
17010 | #[simd_test(enable = "avx512bw" )] |
17011 | unsafe fn test_mm512_maskz_set1_epi8() { |
17012 | let a: i8 = 11; |
17013 | let r = _mm512_maskz_set1_epi8(0, a); |
17014 | assert_eq_m512i(r, _mm512_setzero_si512()); |
17015 | let r = _mm512_maskz_set1_epi8( |
17016 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, |
17017 | a, |
17018 | ); |
17019 | let e = _mm512_set1_epi8(11); |
17020 | assert_eq_m512i(r, e); |
17021 | } |
17022 | |
17023 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17024 | unsafe fn test_mm256_mask_set1_epi8() { |
17025 | let src = _mm256_set1_epi8(2); |
17026 | let a: i8 = 11; |
17027 | let r = _mm256_mask_set1_epi8(src, 0, a); |
17028 | assert_eq_m256i(r, src); |
17029 | let r = _mm256_mask_set1_epi8(src, 0b11111111_11111111_11111111_11111111, a); |
17030 | let e = _mm256_set1_epi8(11); |
17031 | assert_eq_m256i(r, e); |
17032 | } |
17033 | |
17034 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17035 | unsafe fn test_mm256_maskz_set1_epi8() { |
17036 | let a: i8 = 11; |
17037 | let r = _mm256_maskz_set1_epi8(0, a); |
17038 | assert_eq_m256i(r, _mm256_setzero_si256()); |
17039 | let r = _mm256_maskz_set1_epi8(0b11111111_11111111_11111111_11111111, a); |
17040 | let e = _mm256_set1_epi8(11); |
17041 | assert_eq_m256i(r, e); |
17042 | } |
17043 | |
17044 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17045 | unsafe fn test_mm_mask_set1_epi8() { |
17046 | let src = _mm_set1_epi8(2); |
17047 | let a: i8 = 11; |
17048 | let r = _mm_mask_set1_epi8(src, 0, a); |
17049 | assert_eq_m128i(r, src); |
17050 | let r = _mm_mask_set1_epi8(src, 0b11111111_11111111, a); |
17051 | let e = _mm_set1_epi8(11); |
17052 | assert_eq_m128i(r, e); |
17053 | } |
17054 | |
17055 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17056 | unsafe fn test_mm_maskz_set1_epi8() { |
17057 | let a: i8 = 11; |
17058 | let r = _mm_maskz_set1_epi8(0, a); |
17059 | assert_eq_m128i(r, _mm_setzero_si128()); |
17060 | let r = _mm_maskz_set1_epi8(0b11111111_11111111, a); |
17061 | let e = _mm_set1_epi8(11); |
17062 | assert_eq_m128i(r, e); |
17063 | } |
17064 | |
17065 | #[simd_test(enable = "avx512bw" )] |
17066 | unsafe fn test_mm512_shufflelo_epi16() { |
17067 | #[rustfmt::skip] |
17068 | let a = _mm512_set_epi16( |
17069 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
17070 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
17071 | ); |
17072 | #[rustfmt::skip] |
17073 | let e = _mm512_set_epi16( |
17074 | 0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12, |
17075 | 16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28, |
17076 | ); |
17077 | let r = _mm512_shufflelo_epi16::<0b00_01_01_11>(a); |
17078 | assert_eq_m512i(r, e); |
17079 | } |
17080 | |
17081 | #[simd_test(enable = "avx512bw" )] |
17082 | unsafe fn test_mm512_mask_shufflelo_epi16() { |
17083 | #[rustfmt::skip] |
17084 | let a = _mm512_set_epi16( |
17085 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
17086 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
17087 | ); |
17088 | let r = _mm512_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a); |
17089 | assert_eq_m512i(r, a); |
17090 | let r = _mm512_mask_shufflelo_epi16::<0b00_01_01_11>( |
17091 | a, |
17092 | 0b11111111_11111111_11111111_11111111, |
17093 | a, |
17094 | ); |
17095 | #[rustfmt::skip] |
17096 | let e = _mm512_set_epi16( |
17097 | 0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12, |
17098 | 16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28, |
17099 | ); |
17100 | assert_eq_m512i(r, e); |
17101 | } |
17102 | |
17103 | #[simd_test(enable = "avx512bw" )] |
17104 | unsafe fn test_mm512_maskz_shufflelo_epi16() { |
17105 | #[rustfmt::skip] |
17106 | let a = _mm512_set_epi16( |
17107 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
17108 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
17109 | ); |
17110 | let r = _mm512_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a); |
17111 | assert_eq_m512i(r, _mm512_setzero_si512()); |
17112 | let r = |
17113 | _mm512_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111_11111111_11111111_11111111, a); |
17114 | #[rustfmt::skip] |
17115 | let e = _mm512_set_epi16( |
17116 | 0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12, |
17117 | 16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28, |
17118 | ); |
17119 | assert_eq_m512i(r, e); |
17120 | } |
17121 | |
17122 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17123 | unsafe fn test_mm256_mask_shufflelo_epi16() { |
17124 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
17125 | let r = _mm256_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a); |
17126 | assert_eq_m256i(r, a); |
17127 | let r = _mm256_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0b11111111_11111111, a); |
17128 | let e = _mm256_set_epi16(0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12); |
17129 | assert_eq_m256i(r, e); |
17130 | } |
17131 | |
17132 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17133 | unsafe fn test_mm256_maskz_shufflelo_epi16() { |
17134 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
17135 | let r = _mm256_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a); |
17136 | assert_eq_m256i(r, _mm256_setzero_si256()); |
17137 | let r = _mm256_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111_11111111, a); |
17138 | let e = _mm256_set_epi16(0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12); |
17139 | assert_eq_m256i(r, e); |
17140 | } |
17141 | |
17142 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17143 | unsafe fn test_mm_mask_shufflelo_epi16() { |
17144 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
17145 | let r = _mm_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a); |
17146 | assert_eq_m128i(r, a); |
17147 | let r = _mm_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0b11111111, a); |
17148 | let e = _mm_set_epi16(0, 1, 2, 3, 7, 6, 6, 4); |
17149 | assert_eq_m128i(r, e); |
17150 | } |
17151 | |
17152 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17153 | unsafe fn test_mm_maskz_shufflelo_epi16() { |
17154 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
17155 | let r = _mm_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a); |
17156 | assert_eq_m128i(r, _mm_setzero_si128()); |
17157 | let r = _mm_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111, a); |
17158 | let e = _mm_set_epi16(0, 1, 2, 3, 7, 6, 6, 4); |
17159 | assert_eq_m128i(r, e); |
17160 | } |
17161 | |
17162 | #[simd_test(enable = "avx512bw" )] |
17163 | unsafe fn test_mm512_shufflehi_epi16() { |
17164 | #[rustfmt::skip] |
17165 | let a = _mm512_set_epi16( |
17166 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
17167 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
17168 | ); |
17169 | #[rustfmt::skip] |
17170 | let e = _mm512_set_epi16( |
17171 | 3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15, |
17172 | 19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31, |
17173 | ); |
17174 | let r = _mm512_shufflehi_epi16::<0b00_01_01_11>(a); |
17175 | assert_eq_m512i(r, e); |
17176 | } |
17177 | |
17178 | #[simd_test(enable = "avx512bw" )] |
17179 | unsafe fn test_mm512_mask_shufflehi_epi16() { |
17180 | #[rustfmt::skip] |
17181 | let a = _mm512_set_epi16( |
17182 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
17183 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
17184 | ); |
17185 | let r = _mm512_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a); |
17186 | assert_eq_m512i(r, a); |
17187 | let r = _mm512_mask_shufflehi_epi16::<0b00_01_01_11>( |
17188 | a, |
17189 | 0b11111111_11111111_11111111_11111111, |
17190 | a, |
17191 | ); |
17192 | #[rustfmt::skip] |
17193 | let e = _mm512_set_epi16( |
17194 | 3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15, |
17195 | 19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31, |
17196 | ); |
17197 | assert_eq_m512i(r, e); |
17198 | } |
17199 | |
17200 | #[simd_test(enable = "avx512bw" )] |
17201 | unsafe fn test_mm512_maskz_shufflehi_epi16() { |
17202 | #[rustfmt::skip] |
17203 | let a = _mm512_set_epi16( |
17204 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
17205 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
17206 | ); |
17207 | let r = _mm512_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a); |
17208 | assert_eq_m512i(r, _mm512_setzero_si512()); |
17209 | let r = |
17210 | _mm512_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111_11111111_11111111_11111111, a); |
17211 | #[rustfmt::skip] |
17212 | let e = _mm512_set_epi16( |
17213 | 3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15, |
17214 | 19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31, |
17215 | ); |
17216 | assert_eq_m512i(r, e); |
17217 | } |
17218 | |
17219 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17220 | unsafe fn test_mm256_mask_shufflehi_epi16() { |
17221 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
17222 | let r = _mm256_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a); |
17223 | assert_eq_m256i(r, a); |
17224 | let r = _mm256_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0b11111111_11111111, a); |
17225 | let e = _mm256_set_epi16(3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15); |
17226 | assert_eq_m256i(r, e); |
17227 | } |
17228 | |
17229 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17230 | unsafe fn test_mm256_maskz_shufflehi_epi16() { |
17231 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
17232 | let r = _mm256_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a); |
17233 | assert_eq_m256i(r, _mm256_setzero_si256()); |
17234 | let r = _mm256_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111_11111111, a); |
17235 | let e = _mm256_set_epi16(3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15); |
17236 | assert_eq_m256i(r, e); |
17237 | } |
17238 | |
17239 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17240 | unsafe fn test_mm_mask_shufflehi_epi16() { |
17241 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
17242 | let r = _mm_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a); |
17243 | assert_eq_m128i(r, a); |
17244 | let r = _mm_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0b11111111, a); |
17245 | let e = _mm_set_epi16(3, 2, 2, 0, 4, 5, 6, 7); |
17246 | assert_eq_m128i(r, e); |
17247 | } |
17248 | |
17249 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17250 | unsafe fn test_mm_maskz_shufflehi_epi16() { |
17251 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
17252 | let r = _mm_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a); |
17253 | assert_eq_m128i(r, _mm_setzero_si128()); |
17254 | let r = _mm_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111, a); |
17255 | let e = _mm_set_epi16(3, 2, 2, 0, 4, 5, 6, 7); |
17256 | assert_eq_m128i(r, e); |
17257 | } |
17258 | |
17259 | #[simd_test(enable = "avx512bw" )] |
17260 | unsafe fn test_mm512_shuffle_epi8() { |
17261 | #[rustfmt::skip] |
17262 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
17263 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
17264 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, |
17265 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); |
17266 | let b = _mm512_set1_epi8(1); |
17267 | let r = _mm512_shuffle_epi8(a, b); |
17268 | #[rustfmt::skip] |
17269 | let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, |
17270 | 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, |
17271 | 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, |
17272 | 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62); |
17273 | assert_eq_m512i(r, e); |
17274 | } |
17275 | |
17276 | #[simd_test(enable = "avx512bw" )] |
17277 | unsafe fn test_mm512_mask_shuffle_epi8() { |
17278 | #[rustfmt::skip] |
17279 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
17280 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
17281 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, |
17282 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); |
17283 | let b = _mm512_set1_epi8(1); |
17284 | let r = _mm512_mask_shuffle_epi8(a, 0, a, b); |
17285 | assert_eq_m512i(r, a); |
17286 | let r = _mm512_mask_shuffle_epi8( |
17287 | a, |
17288 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, |
17289 | a, |
17290 | b, |
17291 | ); |
17292 | #[rustfmt::skip] |
17293 | let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, |
17294 | 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, |
17295 | 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, |
17296 | 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62); |
17297 | assert_eq_m512i(r, e); |
17298 | } |
17299 | |
17300 | #[simd_test(enable = "avx512bw" )] |
17301 | unsafe fn test_mm512_maskz_shuffle_epi8() { |
17302 | #[rustfmt::skip] |
17303 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
17304 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
17305 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, |
17306 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); |
17307 | let b = _mm512_set1_epi8(1); |
17308 | let r = _mm512_maskz_shuffle_epi8(0, a, b); |
17309 | assert_eq_m512i(r, _mm512_setzero_si512()); |
17310 | let r = _mm512_maskz_shuffle_epi8( |
17311 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, |
17312 | a, |
17313 | b, |
17314 | ); |
17315 | #[rustfmt::skip] |
17316 | let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, |
17317 | 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, |
17318 | 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, |
17319 | 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62); |
17320 | assert_eq_m512i(r, e); |
17321 | } |
17322 | |
17323 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17324 | unsafe fn test_mm256_mask_shuffle_epi8() { |
17325 | #[rustfmt::skip] |
17326 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
17327 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
17328 | let b = _mm256_set1_epi8(1); |
17329 | let r = _mm256_mask_shuffle_epi8(a, 0, a, b); |
17330 | assert_eq_m256i(r, a); |
17331 | let r = _mm256_mask_shuffle_epi8(a, 0b11111111_11111111_11111111_11111111, a, b); |
17332 | #[rustfmt::skip] |
17333 | let e = _mm256_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, |
17334 | 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30); |
17335 | assert_eq_m256i(r, e); |
17336 | } |
17337 | |
17338 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17339 | unsafe fn test_mm256_maskz_shuffle_epi8() { |
17340 | #[rustfmt::skip] |
17341 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
17342 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); |
17343 | let b = _mm256_set1_epi8(1); |
17344 | let r = _mm256_maskz_shuffle_epi8(0, a, b); |
17345 | assert_eq_m256i(r, _mm256_setzero_si256()); |
17346 | let r = _mm256_maskz_shuffle_epi8(0b11111111_11111111_11111111_11111111, a, b); |
17347 | #[rustfmt::skip] |
17348 | let e = _mm256_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, |
17349 | 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30); |
17350 | assert_eq_m256i(r, e); |
17351 | } |
17352 | |
17353 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17354 | unsafe fn test_mm_mask_shuffle_epi8() { |
17355 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
17356 | let b = _mm_set1_epi8(1); |
17357 | let r = _mm_mask_shuffle_epi8(a, 0, a, b); |
17358 | assert_eq_m128i(r, a); |
17359 | let r = _mm_mask_shuffle_epi8(a, 0b11111111_11111111, a, b); |
17360 | let e = _mm_set_epi8( |
17361 | 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, |
17362 | ); |
17363 | assert_eq_m128i(r, e); |
17364 | } |
17365 | |
17366 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17367 | unsafe fn test_mm_maskz_shuffle_epi8() { |
17368 | #[rustfmt::skip] |
17369 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
17370 | let b = _mm_set1_epi8(1); |
17371 | let r = _mm_maskz_shuffle_epi8(0, a, b); |
17372 | assert_eq_m128i(r, _mm_setzero_si128()); |
17373 | let r = _mm_maskz_shuffle_epi8(0b11111111_11111111, a, b); |
17374 | let e = _mm_set_epi8( |
17375 | 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, |
17376 | ); |
17377 | assert_eq_m128i(r, e); |
17378 | } |
17379 | |
17380 | #[simd_test(enable = "avx512bw" )] |
17381 | unsafe fn test_mm512_test_epi16_mask() { |
17382 | let a = _mm512_set1_epi16(1 << 0); |
17383 | let b = _mm512_set1_epi16(1 << 0 | 1 << 1); |
17384 | let r = _mm512_test_epi16_mask(a, b); |
17385 | let e: __mmask32 = 0b11111111_11111111_11111111_11111111; |
17386 | assert_eq!(r, e); |
17387 | } |
17388 | |
17389 | #[simd_test(enable = "avx512bw" )] |
17390 | unsafe fn test_mm512_mask_test_epi16_mask() { |
17391 | let a = _mm512_set1_epi16(1 << 0); |
17392 | let b = _mm512_set1_epi16(1 << 0 | 1 << 1); |
17393 | let r = _mm512_mask_test_epi16_mask(0, a, b); |
17394 | assert_eq!(r, 0); |
17395 | let r = _mm512_mask_test_epi16_mask(0b11111111_11111111_11111111_11111111, a, b); |
17396 | let e: __mmask32 = 0b11111111_11111111_11111111_11111111; |
17397 | assert_eq!(r, e); |
17398 | } |
17399 | |
17400 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17401 | unsafe fn test_mm256_test_epi16_mask() { |
17402 | let a = _mm256_set1_epi16(1 << 0); |
17403 | let b = _mm256_set1_epi16(1 << 0 | 1 << 1); |
17404 | let r = _mm256_test_epi16_mask(a, b); |
17405 | let e: __mmask16 = 0b11111111_11111111; |
17406 | assert_eq!(r, e); |
17407 | } |
17408 | |
17409 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17410 | unsafe fn test_mm256_mask_test_epi16_mask() { |
17411 | let a = _mm256_set1_epi16(1 << 0); |
17412 | let b = _mm256_set1_epi16(1 << 0 | 1 << 1); |
17413 | let r = _mm256_mask_test_epi16_mask(0, a, b); |
17414 | assert_eq!(r, 0); |
17415 | let r = _mm256_mask_test_epi16_mask(0b11111111_11111111, a, b); |
17416 | let e: __mmask16 = 0b11111111_11111111; |
17417 | assert_eq!(r, e); |
17418 | } |
17419 | |
17420 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17421 | unsafe fn test_mm_test_epi16_mask() { |
17422 | let a = _mm_set1_epi16(1 << 0); |
17423 | let b = _mm_set1_epi16(1 << 0 | 1 << 1); |
17424 | let r = _mm_test_epi16_mask(a, b); |
17425 | let e: __mmask8 = 0b11111111; |
17426 | assert_eq!(r, e); |
17427 | } |
17428 | |
17429 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17430 | unsafe fn test_mm_mask_test_epi16_mask() { |
17431 | let a = _mm_set1_epi16(1 << 0); |
17432 | let b = _mm_set1_epi16(1 << 0 | 1 << 1); |
17433 | let r = _mm_mask_test_epi16_mask(0, a, b); |
17434 | assert_eq!(r, 0); |
17435 | let r = _mm_mask_test_epi16_mask(0b11111111, a, b); |
17436 | let e: __mmask8 = 0b11111111; |
17437 | assert_eq!(r, e); |
17438 | } |
17439 | |
17440 | #[simd_test(enable = "avx512bw" )] |
17441 | unsafe fn test_mm512_test_epi8_mask() { |
17442 | let a = _mm512_set1_epi8(1 << 0); |
17443 | let b = _mm512_set1_epi8(1 << 0 | 1 << 1); |
17444 | let r = _mm512_test_epi8_mask(a, b); |
17445 | let e: __mmask64 = |
17446 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111; |
17447 | assert_eq!(r, e); |
17448 | } |
17449 | |
17450 | #[simd_test(enable = "avx512bw" )] |
17451 | unsafe fn test_mm512_mask_test_epi8_mask() { |
17452 | let a = _mm512_set1_epi8(1 << 0); |
17453 | let b = _mm512_set1_epi8(1 << 0 | 1 << 1); |
17454 | let r = _mm512_mask_test_epi8_mask(0, a, b); |
17455 | assert_eq!(r, 0); |
17456 | let r = _mm512_mask_test_epi8_mask( |
17457 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, |
17458 | a, |
17459 | b, |
17460 | ); |
17461 | let e: __mmask64 = |
17462 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111; |
17463 | assert_eq!(r, e); |
17464 | } |
17465 | |
17466 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17467 | unsafe fn test_mm256_test_epi8_mask() { |
17468 | let a = _mm256_set1_epi8(1 << 0); |
17469 | let b = _mm256_set1_epi8(1 << 0 | 1 << 1); |
17470 | let r = _mm256_test_epi8_mask(a, b); |
17471 | let e: __mmask32 = 0b11111111_11111111_11111111_11111111; |
17472 | assert_eq!(r, e); |
17473 | } |
17474 | |
17475 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17476 | unsafe fn test_mm256_mask_test_epi8_mask() { |
17477 | let a = _mm256_set1_epi8(1 << 0); |
17478 | let b = _mm256_set1_epi8(1 << 0 | 1 << 1); |
17479 | let r = _mm256_mask_test_epi8_mask(0, a, b); |
17480 | assert_eq!(r, 0); |
17481 | let r = _mm256_mask_test_epi8_mask(0b11111111_11111111_11111111_11111111, a, b); |
17482 | let e: __mmask32 = 0b11111111_11111111_11111111_11111111; |
17483 | assert_eq!(r, e); |
17484 | } |
17485 | |
17486 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17487 | unsafe fn test_mm_test_epi8_mask() { |
17488 | let a = _mm_set1_epi8(1 << 0); |
17489 | let b = _mm_set1_epi8(1 << 0 | 1 << 1); |
17490 | let r = _mm_test_epi8_mask(a, b); |
17491 | let e: __mmask16 = 0b11111111_11111111; |
17492 | assert_eq!(r, e); |
17493 | } |
17494 | |
17495 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17496 | unsafe fn test_mm_mask_test_epi8_mask() { |
17497 | let a = _mm_set1_epi8(1 << 0); |
17498 | let b = _mm_set1_epi8(1 << 0 | 1 << 1); |
17499 | let r = _mm_mask_test_epi8_mask(0, a, b); |
17500 | assert_eq!(r, 0); |
17501 | let r = _mm_mask_test_epi8_mask(0b11111111_11111111, a, b); |
17502 | let e: __mmask16 = 0b11111111_11111111; |
17503 | assert_eq!(r, e); |
17504 | } |
17505 | |
17506 | #[simd_test(enable = "avx512bw" )] |
17507 | unsafe fn test_mm512_testn_epi16_mask() { |
17508 | let a = _mm512_set1_epi16(1 << 0); |
17509 | let b = _mm512_set1_epi16(1 << 0 | 1 << 1); |
17510 | let r = _mm512_testn_epi16_mask(a, b); |
17511 | let e: __mmask32 = 0b00000000_00000000_00000000_00000000; |
17512 | assert_eq!(r, e); |
17513 | } |
17514 | |
17515 | #[simd_test(enable = "avx512bw" )] |
17516 | unsafe fn test_mm512_mask_testn_epi16_mask() { |
17517 | let a = _mm512_set1_epi16(1 << 0); |
17518 | let b = _mm512_set1_epi16(1 << 0 | 1 << 1); |
17519 | let r = _mm512_mask_testn_epi16_mask(0, a, b); |
17520 | assert_eq!(r, 0); |
17521 | let r = _mm512_mask_testn_epi16_mask(0b11111111_11111111_11111111_11111111, a, b); |
17522 | let e: __mmask32 = 0b00000000_00000000_00000000_00000000; |
17523 | assert_eq!(r, e); |
17524 | } |
17525 | |
17526 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17527 | unsafe fn test_mm256_testn_epi16_mask() { |
17528 | let a = _mm256_set1_epi16(1 << 0); |
17529 | let b = _mm256_set1_epi16(1 << 0 | 1 << 1); |
17530 | let r = _mm256_testn_epi16_mask(a, b); |
17531 | let e: __mmask16 = 0b00000000_00000000; |
17532 | assert_eq!(r, e); |
17533 | } |
17534 | |
17535 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17536 | unsafe fn test_mm256_mask_testn_epi16_mask() { |
17537 | let a = _mm256_set1_epi16(1 << 0); |
17538 | let b = _mm256_set1_epi16(1 << 0 | 1 << 1); |
17539 | let r = _mm256_mask_testn_epi16_mask(0, a, b); |
17540 | assert_eq!(r, 0); |
17541 | let r = _mm256_mask_testn_epi16_mask(0b11111111_11111111, a, b); |
17542 | let e: __mmask16 = 0b00000000_00000000; |
17543 | assert_eq!(r, e); |
17544 | } |
17545 | |
17546 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17547 | unsafe fn test_mm_testn_epi16_mask() { |
17548 | let a = _mm_set1_epi16(1 << 0); |
17549 | let b = _mm_set1_epi16(1 << 0 | 1 << 1); |
17550 | let r = _mm_testn_epi16_mask(a, b); |
17551 | let e: __mmask8 = 0b00000000; |
17552 | assert_eq!(r, e); |
17553 | } |
17554 | |
17555 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17556 | unsafe fn test_mm_mask_testn_epi16_mask() { |
17557 | let a = _mm_set1_epi16(1 << 0); |
17558 | let b = _mm_set1_epi16(1 << 0 | 1 << 1); |
17559 | let r = _mm_mask_testn_epi16_mask(0, a, b); |
17560 | assert_eq!(r, 0); |
17561 | let r = _mm_mask_testn_epi16_mask(0b11111111, a, b); |
17562 | let e: __mmask8 = 0b00000000; |
17563 | assert_eq!(r, e); |
17564 | } |
17565 | |
17566 | #[simd_test(enable = "avx512bw" )] |
17567 | unsafe fn test_mm512_testn_epi8_mask() { |
17568 | let a = _mm512_set1_epi8(1 << 0); |
17569 | let b = _mm512_set1_epi8(1 << 0 | 1 << 1); |
17570 | let r = _mm512_testn_epi8_mask(a, b); |
17571 | let e: __mmask64 = |
17572 | 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000; |
17573 | assert_eq!(r, e); |
17574 | } |
17575 | |
17576 | #[simd_test(enable = "avx512bw" )] |
17577 | unsafe fn test_mm512_mask_testn_epi8_mask() { |
17578 | let a = _mm512_set1_epi8(1 << 0); |
17579 | let b = _mm512_set1_epi8(1 << 0 | 1 << 1); |
17580 | let r = _mm512_mask_testn_epi8_mask(0, a, b); |
17581 | assert_eq!(r, 0); |
17582 | let r = _mm512_mask_testn_epi8_mask( |
17583 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, |
17584 | a, |
17585 | b, |
17586 | ); |
17587 | let e: __mmask64 = |
17588 | 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000; |
17589 | assert_eq!(r, e); |
17590 | } |
17591 | |
17592 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17593 | unsafe fn test_mm256_testn_epi8_mask() { |
17594 | let a = _mm256_set1_epi8(1 << 0); |
17595 | let b = _mm256_set1_epi8(1 << 0 | 1 << 1); |
17596 | let r = _mm256_testn_epi8_mask(a, b); |
17597 | let e: __mmask32 = 0b00000000_00000000_00000000_00000000; |
17598 | assert_eq!(r, e); |
17599 | } |
17600 | |
17601 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17602 | unsafe fn test_mm256_mask_testn_epi8_mask() { |
17603 | let a = _mm256_set1_epi8(1 << 0); |
17604 | let b = _mm256_set1_epi8(1 << 0 | 1 << 1); |
17605 | let r = _mm256_mask_testn_epi8_mask(0, a, b); |
17606 | assert_eq!(r, 0); |
17607 | let r = _mm256_mask_testn_epi8_mask(0b11111111_11111111_11111111_11111111, a, b); |
17608 | let e: __mmask32 = 0b00000000_00000000_00000000_00000000; |
17609 | assert_eq!(r, e); |
17610 | } |
17611 | |
17612 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17613 | unsafe fn test_mm_testn_epi8_mask() { |
17614 | let a = _mm_set1_epi8(1 << 0); |
17615 | let b = _mm_set1_epi8(1 << 0 | 1 << 1); |
17616 | let r = _mm_testn_epi8_mask(a, b); |
17617 | let e: __mmask16 = 0b00000000_00000000; |
17618 | assert_eq!(r, e); |
17619 | } |
17620 | |
17621 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17622 | unsafe fn test_mm_mask_testn_epi8_mask() { |
17623 | let a = _mm_set1_epi8(1 << 0); |
17624 | let b = _mm_set1_epi8(1 << 0 | 1 << 1); |
17625 | let r = _mm_mask_testn_epi8_mask(0, a, b); |
17626 | assert_eq!(r, 0); |
17627 | let r = _mm_mask_testn_epi8_mask(0b11111111_11111111, a, b); |
17628 | let e: __mmask16 = 0b00000000_00000000; |
17629 | assert_eq!(r, e); |
17630 | } |
17631 | |
17632 | #[simd_test(enable = "avx512bw" )] |
17633 | unsafe fn test_store_mask64() { |
17634 | let a: __mmask64 = |
17635 | 0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000; |
17636 | let mut r = 0; |
17637 | _store_mask64(&mut r as *mut _ as *mut u64, a); |
17638 | assert_eq!(r, a); |
17639 | } |
17640 | |
17641 | #[simd_test(enable = "avx512bw" )] |
17642 | unsafe fn test_store_mask32() { |
17643 | let a: __mmask32 = 0b11111111_00000000_11111111_00000000; |
17644 | let mut r = 0; |
17645 | _store_mask32(&mut r as *mut _ as *mut u32, a); |
17646 | assert_eq!(r, a); |
17647 | } |
17648 | |
17649 | #[simd_test(enable = "avx512bw" )] |
17650 | unsafe fn test_load_mask64() { |
17651 | let p: __mmask64 = |
17652 | 0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000; |
17653 | let r = _load_mask64(&p); |
17654 | let e: __mmask64 = |
17655 | 0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000; |
17656 | assert_eq!(r, e); |
17657 | } |
17658 | |
17659 | #[simd_test(enable = "avx512bw" )] |
17660 | unsafe fn test_load_mask32() { |
17661 | let p: __mmask32 = 0b11111111_00000000_11111111_00000000; |
17662 | let r = _load_mask32(&p); |
17663 | let e: __mmask32 = 0b11111111_00000000_11111111_00000000; |
17664 | assert_eq!(r, e); |
17665 | } |
17666 | |
17667 | #[simd_test(enable = "avx512bw" )] |
17668 | unsafe fn test_mm512_sad_epu8() { |
17669 | let a = _mm512_set1_epi8(2); |
17670 | let b = _mm512_set1_epi8(4); |
17671 | let r = _mm512_sad_epu8(a, b); |
17672 | let e = _mm512_set1_epi64(16); |
17673 | assert_eq_m512i(r, e); |
17674 | } |
17675 | |
17676 | #[simd_test(enable = "avx512bw" )] |
17677 | unsafe fn test_mm512_dbsad_epu8() { |
17678 | let a = _mm512_set1_epi8(2); |
17679 | let b = _mm512_set1_epi8(4); |
17680 | let r = _mm512_dbsad_epu8::<0>(a, b); |
17681 | let e = _mm512_set1_epi16(8); |
17682 | assert_eq_m512i(r, e); |
17683 | } |
17684 | |
17685 | #[simd_test(enable = "avx512bw" )] |
17686 | unsafe fn test_mm512_mask_dbsad_epu8() { |
17687 | let src = _mm512_set1_epi16(1); |
17688 | let a = _mm512_set1_epi8(2); |
17689 | let b = _mm512_set1_epi8(4); |
17690 | let r = _mm512_mask_dbsad_epu8::<0>(src, 0, a, b); |
17691 | assert_eq_m512i(r, src); |
17692 | let r = _mm512_mask_dbsad_epu8::<0>(src, 0b11111111_11111111_11111111_11111111, a, b); |
17693 | let e = _mm512_set1_epi16(8); |
17694 | assert_eq_m512i(r, e); |
17695 | } |
17696 | |
17697 | #[simd_test(enable = "avx512bw" )] |
17698 | unsafe fn test_mm512_maskz_dbsad_epu8() { |
17699 | let a = _mm512_set1_epi8(2); |
17700 | let b = _mm512_set1_epi8(4); |
17701 | let r = _mm512_maskz_dbsad_epu8::<0>(0, a, b); |
17702 | assert_eq_m512i(r, _mm512_setzero_si512()); |
17703 | let r = _mm512_maskz_dbsad_epu8::<0>(0b11111111_11111111_11111111_11111111, a, b); |
17704 | let e = _mm512_set1_epi16(8); |
17705 | assert_eq_m512i(r, e); |
17706 | } |
17707 | |
17708 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17709 | unsafe fn test_mm256_dbsad_epu8() { |
17710 | let a = _mm256_set1_epi8(2); |
17711 | let b = _mm256_set1_epi8(4); |
17712 | let r = _mm256_dbsad_epu8::<0>(a, b); |
17713 | let e = _mm256_set1_epi16(8); |
17714 | assert_eq_m256i(r, e); |
17715 | } |
17716 | |
17717 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17718 | unsafe fn test_mm256_mask_dbsad_epu8() { |
17719 | let src = _mm256_set1_epi16(1); |
17720 | let a = _mm256_set1_epi8(2); |
17721 | let b = _mm256_set1_epi8(4); |
17722 | let r = _mm256_mask_dbsad_epu8::<0>(src, 0, a, b); |
17723 | assert_eq_m256i(r, src); |
17724 | let r = _mm256_mask_dbsad_epu8::<0>(src, 0b11111111_11111111, a, b); |
17725 | let e = _mm256_set1_epi16(8); |
17726 | assert_eq_m256i(r, e); |
17727 | } |
17728 | |
17729 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17730 | unsafe fn test_mm256_maskz_dbsad_epu8() { |
17731 | let a = _mm256_set1_epi8(2); |
17732 | let b = _mm256_set1_epi8(4); |
17733 | let r = _mm256_maskz_dbsad_epu8::<0>(0, a, b); |
17734 | assert_eq_m256i(r, _mm256_setzero_si256()); |
17735 | let r = _mm256_maskz_dbsad_epu8::<0>(0b11111111_11111111, a, b); |
17736 | let e = _mm256_set1_epi16(8); |
17737 | assert_eq_m256i(r, e); |
17738 | } |
17739 | |
17740 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17741 | unsafe fn test_mm_dbsad_epu8() { |
17742 | let a = _mm_set1_epi8(2); |
17743 | let b = _mm_set1_epi8(4); |
17744 | let r = _mm_dbsad_epu8::<0>(a, b); |
17745 | let e = _mm_set1_epi16(8); |
17746 | assert_eq_m128i(r, e); |
17747 | } |
17748 | |
17749 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17750 | unsafe fn test_mm_mask_dbsad_epu8() { |
17751 | let src = _mm_set1_epi16(1); |
17752 | let a = _mm_set1_epi8(2); |
17753 | let b = _mm_set1_epi8(4); |
17754 | let r = _mm_mask_dbsad_epu8::<0>(src, 0, a, b); |
17755 | assert_eq_m128i(r, src); |
17756 | let r = _mm_mask_dbsad_epu8::<0>(src, 0b11111111, a, b); |
17757 | let e = _mm_set1_epi16(8); |
17758 | assert_eq_m128i(r, e); |
17759 | } |
17760 | |
17761 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17762 | unsafe fn test_mm_maskz_dbsad_epu8() { |
17763 | let a = _mm_set1_epi8(2); |
17764 | let b = _mm_set1_epi8(4); |
17765 | let r = _mm_maskz_dbsad_epu8::<0>(0, a, b); |
17766 | assert_eq_m128i(r, _mm_setzero_si128()); |
17767 | let r = _mm_maskz_dbsad_epu8::<0>(0b11111111, a, b); |
17768 | let e = _mm_set1_epi16(8); |
17769 | assert_eq_m128i(r, e); |
17770 | } |
17771 | |
17772 | #[simd_test(enable = "avx512bw" )] |
17773 | unsafe fn test_mm512_movepi16_mask() { |
17774 | let a = _mm512_set1_epi16(1 << 15); |
17775 | let r = _mm512_movepi16_mask(a); |
17776 | let e: __mmask32 = 0b11111111_11111111_11111111_11111111; |
17777 | assert_eq!(r, e); |
17778 | } |
17779 | |
17780 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17781 | unsafe fn test_mm256_movepi16_mask() { |
17782 | let a = _mm256_set1_epi16(1 << 15); |
17783 | let r = _mm256_movepi16_mask(a); |
17784 | let e: __mmask16 = 0b11111111_11111111; |
17785 | assert_eq!(r, e); |
17786 | } |
17787 | |
17788 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17789 | unsafe fn test_mm_movepi16_mask() { |
17790 | let a = _mm_set1_epi16(1 << 15); |
17791 | let r = _mm_movepi16_mask(a); |
17792 | let e: __mmask8 = 0b11111111; |
17793 | assert_eq!(r, e); |
17794 | } |
17795 | |
17796 | #[simd_test(enable = "avx512bw" )] |
17797 | unsafe fn test_mm512_movepi8_mask() { |
17798 | let a = _mm512_set1_epi8(1 << 7); |
17799 | let r = _mm512_movepi8_mask(a); |
17800 | let e: __mmask64 = |
17801 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111; |
17802 | assert_eq!(r, e); |
17803 | } |
17804 | |
17805 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17806 | unsafe fn test_mm256_movepi8_mask() { |
17807 | let a = _mm256_set1_epi8(1 << 7); |
17808 | let r = _mm256_movepi8_mask(a); |
17809 | let e: __mmask32 = 0b11111111_11111111_11111111_11111111; |
17810 | assert_eq!(r, e); |
17811 | } |
17812 | |
17813 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17814 | unsafe fn test_mm_movepi8_mask() { |
17815 | let a = _mm_set1_epi8(1 << 7); |
17816 | let r = _mm_movepi8_mask(a); |
17817 | let e: __mmask16 = 0b11111111_11111111; |
17818 | assert_eq!(r, e); |
17819 | } |
17820 | |
17821 | #[simd_test(enable = "avx512bw" )] |
17822 | unsafe fn test_mm512_movm_epi16() { |
17823 | let a: __mmask32 = 0b11111111_11111111_11111111_11111111; |
17824 | let r = _mm512_movm_epi16(a); |
17825 | let e = _mm512_set1_epi16( |
17826 | 1 << 15 |
17827 | | 1 << 14 |
17828 | | 1 << 13 |
17829 | | 1 << 12 |
17830 | | 1 << 11 |
17831 | | 1 << 10 |
17832 | | 1 << 9 |
17833 | | 1 << 8 |
17834 | | 1 << 7 |
17835 | | 1 << 6 |
17836 | | 1 << 5 |
17837 | | 1 << 4 |
17838 | | 1 << 3 |
17839 | | 1 << 2 |
17840 | | 1 << 1 |
17841 | | 1 << 0, |
17842 | ); |
17843 | assert_eq_m512i(r, e); |
17844 | } |
17845 | |
17846 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17847 | unsafe fn test_mm256_movm_epi16() { |
17848 | let a: __mmask16 = 0b11111111_11111111; |
17849 | let r = _mm256_movm_epi16(a); |
17850 | let e = _mm256_set1_epi16( |
17851 | 1 << 15 |
17852 | | 1 << 14 |
17853 | | 1 << 13 |
17854 | | 1 << 12 |
17855 | | 1 << 11 |
17856 | | 1 << 10 |
17857 | | 1 << 9 |
17858 | | 1 << 8 |
17859 | | 1 << 7 |
17860 | | 1 << 6 |
17861 | | 1 << 5 |
17862 | | 1 << 4 |
17863 | | 1 << 3 |
17864 | | 1 << 2 |
17865 | | 1 << 1 |
17866 | | 1 << 0, |
17867 | ); |
17868 | assert_eq_m256i(r, e); |
17869 | } |
17870 | |
17871 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17872 | unsafe fn test_mm_movm_epi16() { |
17873 | let a: __mmask8 = 0b11111111; |
17874 | let r = _mm_movm_epi16(a); |
17875 | let e = _mm_set1_epi16( |
17876 | 1 << 15 |
17877 | | 1 << 14 |
17878 | | 1 << 13 |
17879 | | 1 << 12 |
17880 | | 1 << 11 |
17881 | | 1 << 10 |
17882 | | 1 << 9 |
17883 | | 1 << 8 |
17884 | | 1 << 7 |
17885 | | 1 << 6 |
17886 | | 1 << 5 |
17887 | | 1 << 4 |
17888 | | 1 << 3 |
17889 | | 1 << 2 |
17890 | | 1 << 1 |
17891 | | 1 << 0, |
17892 | ); |
17893 | assert_eq_m128i(r, e); |
17894 | } |
17895 | |
17896 | #[simd_test(enable = "avx512bw" )] |
17897 | unsafe fn test_mm512_movm_epi8() { |
17898 | let a: __mmask64 = |
17899 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111; |
17900 | let r = _mm512_movm_epi8(a); |
17901 | let e = |
17902 | _mm512_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0); |
17903 | assert_eq_m512i(r, e); |
17904 | } |
17905 | |
17906 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17907 | unsafe fn test_mm256_movm_epi8() { |
17908 | let a: __mmask32 = 0b11111111_11111111_11111111_11111111; |
17909 | let r = _mm256_movm_epi8(a); |
17910 | let e = |
17911 | _mm256_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0); |
17912 | assert_eq_m256i(r, e); |
17913 | } |
17914 | |
17915 | #[simd_test(enable = "avx512bw,avx512vl" )] |
17916 | unsafe fn test_mm_movm_epi8() { |
17917 | let a: __mmask16 = 0b11111111_11111111; |
17918 | let r = _mm_movm_epi8(a); |
17919 | let e = |
17920 | _mm_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0); |
17921 | assert_eq_m128i(r, e); |
17922 | } |
17923 | |
17924 | #[simd_test(enable = "avx512bw" )] |
17925 | unsafe fn test_kadd_mask32() { |
17926 | let a: __mmask32 = 11; |
17927 | let b: __mmask32 = 22; |
17928 | let r = _kadd_mask32(a, b); |
17929 | let e: __mmask32 = 33; |
17930 | assert_eq!(r, e); |
17931 | } |
17932 | |
17933 | #[simd_test(enable = "avx512bw" )] |
17934 | unsafe fn test_kadd_mask64() { |
17935 | let a: __mmask64 = 11; |
17936 | let b: __mmask64 = 22; |
17937 | let r = _kadd_mask64(a, b); |
17938 | let e: __mmask64 = 33; |
17939 | assert_eq!(r, e); |
17940 | } |
17941 | |
17942 | #[simd_test(enable = "avx512bw" )] |
17943 | unsafe fn test_kand_mask32() { |
17944 | let a: __mmask32 = 0b11001100_00110011_11001100_00110011; |
17945 | let b: __mmask32 = 0b11001100_00110011_11001100_00110011; |
17946 | let r = _kand_mask32(a, b); |
17947 | let e: __mmask32 = 0b11001100_00110011_11001100_00110011; |
17948 | assert_eq!(r, e); |
17949 | } |
17950 | |
17951 | #[simd_test(enable = "avx512bw" )] |
17952 | unsafe fn test_kand_mask64() { |
17953 | let a: __mmask64 = |
17954 | 0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011; |
17955 | let b: __mmask64 = |
17956 | 0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011; |
17957 | let r = _kand_mask64(a, b); |
17958 | let e: __mmask64 = |
17959 | 0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011; |
17960 | assert_eq!(r, e); |
17961 | } |
17962 | |
17963 | #[simd_test(enable = "avx512bw" )] |
17964 | unsafe fn test_knot_mask32() { |
17965 | let a: __mmask32 = 0b11001100_00110011_11001100_00110011; |
17966 | let r = _knot_mask32(a); |
17967 | let e: __mmask32 = 0b00110011_11001100_00110011_11001100; |
17968 | assert_eq!(r, e); |
17969 | } |
17970 | |
17971 | #[simd_test(enable = "avx512bw" )] |
17972 | unsafe fn test_knot_mask64() { |
17973 | let a: __mmask64 = |
17974 | 0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011; |
17975 | let r = _knot_mask64(a); |
17976 | let e: __mmask64 = |
17977 | 0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100; |
17978 | assert_eq!(r, e); |
17979 | } |
17980 | |
17981 | #[simd_test(enable = "avx512bw" )] |
17982 | unsafe fn test_kandn_mask32() { |
17983 | let a: __mmask32 = 0b11001100_00110011_11001100_00110011; |
17984 | let b: __mmask32 = 0b11001100_00110011_11001100_00110011; |
17985 | let r = _kandn_mask32(a, b); |
17986 | let e: __mmask32 = 0b00000000_00000000_00000000_00000000; |
17987 | assert_eq!(r, e); |
17988 | } |
17989 | |
17990 | #[simd_test(enable = "avx512bw" )] |
17991 | unsafe fn test_kandn_mask64() { |
17992 | let a: __mmask64 = |
17993 | 0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011; |
17994 | let b: __mmask64 = |
17995 | 0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011; |
17996 | let r = _kandn_mask64(a, b); |
17997 | let e: __mmask64 = |
17998 | 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000; |
17999 | assert_eq!(r, e); |
18000 | } |
18001 | |
18002 | #[simd_test(enable = "avx512bw" )] |
18003 | unsafe fn test_kor_mask32() { |
18004 | let a: __mmask32 = 0b00110011_11001100_00110011_11001100; |
18005 | let b: __mmask32 = 0b11001100_00110011_11001100_00110011; |
18006 | let r = _kor_mask32(a, b); |
18007 | let e: __mmask32 = 0b11111111_11111111_11111111_11111111; |
18008 | assert_eq!(r, e); |
18009 | } |
18010 | |
18011 | #[simd_test(enable = "avx512bw" )] |
18012 | unsafe fn test_kor_mask64() { |
18013 | let a: __mmask64 = |
18014 | 0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100; |
18015 | let b: __mmask64 = |
18016 | 0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011; |
18017 | let r = _kor_mask64(a, b); |
18018 | let e: __mmask64 = |
18019 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111; |
18020 | assert_eq!(r, e); |
18021 | } |
18022 | |
18023 | #[simd_test(enable = "avx512bw" )] |
18024 | unsafe fn test_kxor_mask32() { |
18025 | let a: __mmask32 = 0b00110011_11001100_00110011_11001100; |
18026 | let b: __mmask32 = 0b11001100_00110011_11001100_00110011; |
18027 | let r = _kxor_mask32(a, b); |
18028 | let e: __mmask32 = 0b11111111_11111111_11111111_11111111; |
18029 | assert_eq!(r, e); |
18030 | } |
18031 | |
18032 | #[simd_test(enable = "avx512bw" )] |
18033 | unsafe fn test_kxor_mask64() { |
18034 | let a: __mmask64 = |
18035 | 0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100; |
18036 | let b: __mmask64 = |
18037 | 0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011; |
18038 | let r = _kxor_mask64(a, b); |
18039 | let e: __mmask64 = |
18040 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111; |
18041 | assert_eq!(r, e); |
18042 | } |
18043 | |
18044 | #[simd_test(enable = "avx512bw" )] |
18045 | unsafe fn test_kxnor_mask32() { |
18046 | let a: __mmask32 = 0b00110011_11001100_00110011_11001100; |
18047 | let b: __mmask32 = 0b11001100_00110011_11001100_00110011; |
18048 | let r = _kxnor_mask32(a, b); |
18049 | let e: __mmask32 = 0b00000000_00000000_00000000_00000000; |
18050 | assert_eq!(r, e); |
18051 | } |
18052 | |
18053 | #[simd_test(enable = "avx512bw" )] |
18054 | unsafe fn test_kxnor_mask64() { |
18055 | let a: __mmask64 = |
18056 | 0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100; |
18057 | let b: __mmask64 = |
18058 | 0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011; |
18059 | let r = _kxnor_mask64(a, b); |
18060 | let e: __mmask64 = |
18061 | 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000; |
18062 | assert_eq!(r, e); |
18063 | } |
18064 | |
18065 | #[simd_test(enable = "avx512bw" )] |
18066 | unsafe fn test_mm512_cvtepi16_epi8() { |
18067 | let a = _mm512_set1_epi16(2); |
18068 | let r = _mm512_cvtepi16_epi8(a); |
18069 | let e = _mm256_set1_epi8(2); |
18070 | assert_eq_m256i(r, e); |
18071 | } |
18072 | |
18073 | #[simd_test(enable = "avx512bw" )] |
18074 | unsafe fn test_mm512_mask_cvtepi16_epi8() { |
18075 | let src = _mm256_set1_epi8(1); |
18076 | let a = _mm512_set1_epi16(2); |
18077 | let r = _mm512_mask_cvtepi16_epi8(src, 0, a); |
18078 | assert_eq_m256i(r, src); |
18079 | let r = _mm512_mask_cvtepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a); |
18080 | let e = _mm256_set1_epi8(2); |
18081 | assert_eq_m256i(r, e); |
18082 | } |
18083 | |
18084 | #[simd_test(enable = "avx512bw" )] |
18085 | unsafe fn test_mm512_maskz_cvtepi16_epi8() { |
18086 | let a = _mm512_set1_epi16(2); |
18087 | let r = _mm512_maskz_cvtepi16_epi8(0, a); |
18088 | assert_eq_m256i(r, _mm256_setzero_si256()); |
18089 | let r = _mm512_maskz_cvtepi16_epi8(0b11111111_11111111_11111111_11111111, a); |
18090 | let e = _mm256_set1_epi8(2); |
18091 | assert_eq_m256i(r, e); |
18092 | } |
18093 | |
18094 | #[simd_test(enable = "avx512bw,avx512vl" )] |
18095 | unsafe fn test_mm256_cvtepi16_epi8() { |
18096 | let a = _mm256_set1_epi16(2); |
18097 | let r = _mm256_cvtepi16_epi8(a); |
18098 | let e = _mm_set1_epi8(2); |
18099 | assert_eq_m128i(r, e); |
18100 | } |
18101 | |
18102 | #[simd_test(enable = "avx512bw,avx512vl" )] |
18103 | unsafe fn test_mm256_mask_cvtepi16_epi8() { |
18104 | let src = _mm_set1_epi8(1); |
18105 | let a = _mm256_set1_epi16(2); |
18106 | let r = _mm256_mask_cvtepi16_epi8(src, 0, a); |
18107 | assert_eq_m128i(r, src); |
18108 | let r = _mm256_mask_cvtepi16_epi8(src, 0b11111111_11111111, a); |
18109 | let e = _mm_set1_epi8(2); |
18110 | assert_eq_m128i(r, e); |
18111 | } |
18112 | |
18113 | #[simd_test(enable = "avx512bw,avx512vl" )] |
18114 | unsafe fn test_mm256_maskz_cvtepi16_epi8() { |
18115 | let a = _mm256_set1_epi16(2); |
18116 | let r = _mm256_maskz_cvtepi16_epi8(0, a); |
18117 | assert_eq_m128i(r, _mm_setzero_si128()); |
18118 | let r = _mm256_maskz_cvtepi16_epi8(0b11111111_11111111, a); |
18119 | let e = _mm_set1_epi8(2); |
18120 | assert_eq_m128i(r, e); |
18121 | } |
18122 | |
18123 | #[simd_test(enable = "avx512bw,avx512vl" )] |
18124 | unsafe fn test_mm_cvtepi16_epi8() { |
18125 | let a = _mm_set1_epi16(2); |
18126 | let r = _mm_cvtepi16_epi8(a); |
18127 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2); |
18128 | assert_eq_m128i(r, e); |
18129 | } |
18130 | |
18131 | #[simd_test(enable = "avx512bw,avx512vl" )] |
18132 | unsafe fn test_mm_mask_cvtepi16_epi8() { |
18133 | let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1); |
18134 | let a = _mm_set1_epi16(2); |
18135 | let r = _mm_mask_cvtepi16_epi8(src, 0, a); |
18136 | assert_eq_m128i(r, src); |
18137 | let r = _mm_mask_cvtepi16_epi8(src, 0b11111111, a); |
18138 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2); |
18139 | assert_eq_m128i(r, e); |
18140 | } |
18141 | |
18142 | #[simd_test(enable = "avx512bw,avx512vl" )] |
18143 | unsafe fn test_mm_maskz_cvtepi16_epi8() { |
18144 | let a = _mm_set1_epi16(2); |
18145 | let r = _mm_maskz_cvtepi16_epi8(0, a); |
18146 | assert_eq_m128i(r, _mm_setzero_si128()); |
18147 | let r = _mm_maskz_cvtepi16_epi8(0b11111111, a); |
18148 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2); |
18149 | assert_eq_m128i(r, e); |
18150 | } |
18151 | |
18152 | #[simd_test(enable = "avx512bw" )] |
18153 | unsafe fn test_mm512_cvtsepi16_epi8() { |
18154 | let a = _mm512_set1_epi16(i16::MAX); |
18155 | let r = _mm512_cvtsepi16_epi8(a); |
18156 | let e = _mm256_set1_epi8(i8::MAX); |
18157 | assert_eq_m256i(r, e); |
18158 | } |
18159 | |
18160 | #[simd_test(enable = "avx512bw" )] |
18161 | unsafe fn test_mm512_mask_cvtsepi16_epi8() { |
18162 | let src = _mm256_set1_epi8(1); |
18163 | let a = _mm512_set1_epi16(i16::MAX); |
18164 | let r = _mm512_mask_cvtsepi16_epi8(src, 0, a); |
18165 | assert_eq_m256i(r, src); |
18166 | let r = _mm512_mask_cvtsepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a); |
18167 | let e = _mm256_set1_epi8(i8::MAX); |
18168 | assert_eq_m256i(r, e); |
18169 | } |
18170 | |
18171 | #[simd_test(enable = "avx512bw,avx512vl" )] |
18172 | unsafe fn test_mm256_cvtsepi16_epi8() { |
18173 | let a = _mm256_set1_epi16(i16::MAX); |
18174 | let r = _mm256_cvtsepi16_epi8(a); |
18175 | let e = _mm_set1_epi8(i8::MAX); |
18176 | assert_eq_m128i(r, e); |
18177 | } |
18178 | |
18179 | #[simd_test(enable = "avx512bw,avx512vl" )] |
18180 | unsafe fn test_mm256_mask_cvtsepi16_epi8() { |
18181 | let src = _mm_set1_epi8(1); |
18182 | let a = _mm256_set1_epi16(i16::MAX); |
18183 | let r = _mm256_mask_cvtsepi16_epi8(src, 0, a); |
18184 | assert_eq_m128i(r, src); |
18185 | let r = _mm256_mask_cvtsepi16_epi8(src, 0b11111111_11111111, a); |
18186 | let e = _mm_set1_epi8(i8::MAX); |
18187 | assert_eq_m128i(r, e); |
18188 | } |
18189 | |
18190 | #[simd_test(enable = "avx512bw,avx512vl" )] |
18191 | unsafe fn test_mm256_maskz_cvtsepi16_epi8() { |
18192 | let a = _mm256_set1_epi16(i16::MAX); |
18193 | let r = _mm256_maskz_cvtsepi16_epi8(0, a); |
18194 | assert_eq_m128i(r, _mm_setzero_si128()); |
18195 | let r = _mm256_maskz_cvtsepi16_epi8(0b11111111_11111111, a); |
18196 | let e = _mm_set1_epi8(i8::MAX); |
18197 | assert_eq_m128i(r, e); |
18198 | } |
18199 | |
18200 | #[simd_test(enable = "avx512bw,avx512vl" )] |
18201 | unsafe fn test_mm_cvtsepi16_epi8() { |
18202 | let a = _mm_set1_epi16(i16::MAX); |
18203 | let r = _mm_cvtsepi16_epi8(a); |
18204 | #[rustfmt::skip] |
18205 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX); |
18206 | assert_eq_m128i(r, e); |
18207 | } |
18208 | |
18209 | #[simd_test(enable = "avx512bw,avx512vl" )] |
18210 | unsafe fn test_mm_mask_cvtsepi16_epi8() { |
18211 | let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1); |
18212 | let a = _mm_set1_epi16(i16::MAX); |
18213 | let r = _mm_mask_cvtsepi16_epi8(src, 0, a); |
18214 | assert_eq_m128i(r, src); |
18215 | let r = _mm_mask_cvtsepi16_epi8(src, 0b11111111, a); |
18216 | #[rustfmt::skip] |
18217 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX); |
18218 | assert_eq_m128i(r, e); |
18219 | } |
18220 | |
18221 | #[simd_test(enable = "avx512bw,avx512vl" )] |
18222 | unsafe fn test_mm_maskz_cvtsepi16_epi8() { |
18223 | let a = _mm_set1_epi16(i16::MAX); |
18224 | let r = _mm_maskz_cvtsepi16_epi8(0, a); |
18225 | assert_eq_m128i(r, _mm_setzero_si128()); |
18226 | let r = _mm_maskz_cvtsepi16_epi8(0b11111111, a); |
18227 | #[rustfmt::skip] |
18228 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX); |
18229 | assert_eq_m128i(r, e); |
18230 | } |
18231 | |
18232 | #[simd_test(enable = "avx512bw" )] |
18233 | unsafe fn test_mm512_maskz_cvtsepi16_epi8() { |
18234 | let a = _mm512_set1_epi16(i16::MAX); |
18235 | let r = _mm512_maskz_cvtsepi16_epi8(0, a); |
18236 | assert_eq_m256i(r, _mm256_setzero_si256()); |
18237 | let r = _mm512_maskz_cvtsepi16_epi8(0b11111111_11111111_11111111_11111111, a); |
18238 | let e = _mm256_set1_epi8(i8::MAX); |
18239 | assert_eq_m256i(r, e); |
18240 | } |
18241 | |
18242 | #[simd_test(enable = "avx512bw" )] |
18243 | unsafe fn test_mm512_cvtusepi16_epi8() { |
18244 | let a = _mm512_set1_epi16(i16::MIN); |
18245 | let r = _mm512_cvtusepi16_epi8(a); |
18246 | let e = _mm256_set1_epi8(-1); |
18247 | assert_eq_m256i(r, e); |
18248 | } |
18249 | |
18250 | #[simd_test(enable = "avx512bw" )] |
18251 | unsafe fn test_mm512_mask_cvtusepi16_epi8() { |
18252 | let src = _mm256_set1_epi8(1); |
18253 | let a = _mm512_set1_epi16(i16::MIN); |
18254 | let r = _mm512_mask_cvtusepi16_epi8(src, 0, a); |
18255 | assert_eq_m256i(r, src); |
18256 | let r = _mm512_mask_cvtusepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a); |
18257 | let e = _mm256_set1_epi8(-1); |
18258 | assert_eq_m256i(r, e); |
18259 | } |
18260 | |
18261 | #[simd_test(enable = "avx512bw" )] |
18262 | unsafe fn test_mm512_maskz_cvtusepi16_epi8() { |
18263 | let a = _mm512_set1_epi16(i16::MIN); |
18264 | let r = _mm512_maskz_cvtusepi16_epi8(0, a); |
18265 | assert_eq_m256i(r, _mm256_setzero_si256()); |
18266 | let r = _mm512_maskz_cvtusepi16_epi8(0b11111111_11111111_11111111_11111111, a); |
18267 | let e = _mm256_set1_epi8(-1); |
18268 | assert_eq_m256i(r, e); |
18269 | } |
18270 | |
18271 | #[simd_test(enable = "avx512bw,avx512vl" )] |
18272 | unsafe fn test_mm256_cvtusepi16_epi8() { |
18273 | let a = _mm256_set1_epi16(i16::MIN); |
18274 | let r = _mm256_cvtusepi16_epi8(a); |
18275 | let e = _mm_set1_epi8(-1); |
18276 | assert_eq_m128i(r, e); |
18277 | } |
18278 | |
18279 | #[simd_test(enable = "avx512bw,avx512vl" )] |
18280 | unsafe fn test_mm256_mask_cvtusepi16_epi8() { |
18281 | let src = _mm_set1_epi8(1); |
18282 | let a = _mm256_set1_epi16(i16::MIN); |
18283 | let r = _mm256_mask_cvtusepi16_epi8(src, 0, a); |
18284 | assert_eq_m128i(r, src); |
18285 | let r = _mm256_mask_cvtusepi16_epi8(src, 0b11111111_11111111, a); |
18286 | let e = _mm_set1_epi8(-1); |
18287 | assert_eq_m128i(r, e); |
18288 | } |
18289 | |
18290 | #[simd_test(enable = "avx512bw,avx512vl" )] |
18291 | unsafe fn test_mm256_maskz_cvtusepi16_epi8() { |
18292 | let a = _mm256_set1_epi16(i16::MIN); |
18293 | let r = _mm256_maskz_cvtusepi16_epi8(0, a); |
18294 | assert_eq_m128i(r, _mm_setzero_si128()); |
18295 | let r = _mm256_maskz_cvtusepi16_epi8(0b11111111_11111111, a); |
18296 | let e = _mm_set1_epi8(-1); |
18297 | assert_eq_m128i(r, e); |
18298 | } |
18299 | |
18300 | #[simd_test(enable = "avx512bw,avx512vl" )] |
18301 | unsafe fn test_mm_cvtusepi16_epi8() { |
18302 | let a = _mm_set1_epi16(i16::MIN); |
18303 | let r = _mm_cvtusepi16_epi8(a); |
18304 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1); |
18305 | assert_eq_m128i(r, e); |
18306 | } |
18307 | |
18308 | #[simd_test(enable = "avx512bw,avx512vl" )] |
18309 | unsafe fn test_mm_mask_cvtusepi16_epi8() { |
18310 | let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1); |
18311 | let a = _mm_set1_epi16(i16::MIN); |
18312 | let r = _mm_mask_cvtusepi16_epi8(src, 0, a); |
18313 | assert_eq_m128i(r, src); |
18314 | let r = _mm_mask_cvtusepi16_epi8(src, 0b11111111, a); |
18315 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1); |
18316 | assert_eq_m128i(r, e); |
18317 | } |
18318 | |
18319 | #[simd_test(enable = "avx512bw,avx512vl" )] |
18320 | unsafe fn test_mm_maskz_cvtusepi16_epi8() { |
18321 | let a = _mm_set1_epi16(i16::MIN); |
18322 | let r = _mm_maskz_cvtusepi16_epi8(0, a); |
18323 | assert_eq_m128i(r, _mm_setzero_si128()); |
18324 | let r = _mm_maskz_cvtusepi16_epi8(0b11111111, a); |
18325 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1); |
18326 | assert_eq_m128i(r, e); |
18327 | } |
18328 | |
18329 | #[simd_test(enable = "avx512bw" )] |
18330 | unsafe fn test_mm512_cvtepi8_epi16() { |
18331 | let a = _mm256_set1_epi8(2); |
18332 | let r = _mm512_cvtepi8_epi16(a); |
18333 | let e = _mm512_set1_epi16(2); |
18334 | assert_eq_m512i(r, e); |
18335 | } |
18336 | |
18337 | #[simd_test(enable = "avx512bw" )] |
18338 | unsafe fn test_mm512_mask_cvtepi8_epi16() { |
18339 | let src = _mm512_set1_epi16(1); |
18340 | let a = _mm256_set1_epi8(2); |
18341 | let r = _mm512_mask_cvtepi8_epi16(src, 0, a); |
18342 | assert_eq_m512i(r, src); |
18343 | let r = _mm512_mask_cvtepi8_epi16(src, 0b11111111_11111111_11111111_11111111, a); |
18344 | let e = _mm512_set1_epi16(2); |
18345 | assert_eq_m512i(r, e); |
18346 | } |
18347 | |
18348 | #[simd_test(enable = "avx512bw" )] |
18349 | unsafe fn test_mm512_maskz_cvtepi8_epi16() { |
18350 | let a = _mm256_set1_epi8(2); |
18351 | let r = _mm512_maskz_cvtepi8_epi16(0, a); |
18352 | assert_eq_m512i(r, _mm512_setzero_si512()); |
18353 | let r = _mm512_maskz_cvtepi8_epi16(0b11111111_11111111_11111111_11111111, a); |
18354 | let e = _mm512_set1_epi16(2); |
18355 | assert_eq_m512i(r, e); |
18356 | } |
18357 | |
18358 | #[simd_test(enable = "avx512bw,avx512vl" )] |
18359 | unsafe fn test_mm256_mask_cvtepi8_epi16() { |
18360 | let src = _mm256_set1_epi16(1); |
18361 | let a = _mm_set1_epi8(2); |
18362 | let r = _mm256_mask_cvtepi8_epi16(src, 0, a); |
18363 | assert_eq_m256i(r, src); |
18364 | let r = _mm256_mask_cvtepi8_epi16(src, 0b11111111_11111111, a); |
18365 | let e = _mm256_set1_epi16(2); |
18366 | assert_eq_m256i(r, e); |
18367 | } |
18368 | |
18369 | #[simd_test(enable = "avx512bw,avx512vl" )] |
18370 | unsafe fn test_mm256_maskz_cvtepi8_epi16() { |
18371 | let a = _mm_set1_epi8(2); |
18372 | let r = _mm256_maskz_cvtepi8_epi16(0, a); |
18373 | assert_eq_m256i(r, _mm256_setzero_si256()); |
18374 | let r = _mm256_maskz_cvtepi8_epi16(0b11111111_11111111, a); |
18375 | let e = _mm256_set1_epi16(2); |
18376 | assert_eq_m256i(r, e); |
18377 | } |
18378 | |
18379 | #[simd_test(enable = "avx512bw,avx512vl" )] |
18380 | unsafe fn test_mm_mask_cvtepi8_epi16() { |
18381 | let src = _mm_set1_epi16(1); |
18382 | let a = _mm_set1_epi8(2); |
18383 | let r = _mm_mask_cvtepi8_epi16(src, 0, a); |
18384 | assert_eq_m128i(r, src); |
18385 | let r = _mm_mask_cvtepi8_epi16(src, 0b11111111, a); |
18386 | let e = _mm_set1_epi16(2); |
18387 | assert_eq_m128i(r, e); |
18388 | } |
18389 | |
18390 | #[simd_test(enable = "avx512bw,avx512vl" )] |
18391 | unsafe fn test_mm_maskz_cvtepi8_epi16() { |
18392 | let a = _mm_set1_epi8(2); |
18393 | let r = _mm_maskz_cvtepi8_epi16(0, a); |
18394 | assert_eq_m128i(r, _mm_setzero_si128()); |
18395 | let r = _mm_maskz_cvtepi8_epi16(0b11111111, a); |
18396 | let e = _mm_set1_epi16(2); |
18397 | assert_eq_m128i(r, e); |
18398 | } |
18399 | |
18400 | #[simd_test(enable = "avx512bw" )] |
18401 | unsafe fn test_mm512_cvtepu8_epi16() { |
18402 | let a = _mm256_set1_epi8(2); |
18403 | let r = _mm512_cvtepu8_epi16(a); |
18404 | let e = _mm512_set1_epi16(2); |
18405 | assert_eq_m512i(r, e); |
18406 | } |
18407 | |
18408 | #[simd_test(enable = "avx512bw" )] |
18409 | unsafe fn test_mm512_mask_cvtepu8_epi16() { |
18410 | let src = _mm512_set1_epi16(1); |
18411 | let a = _mm256_set1_epi8(2); |
18412 | let r = _mm512_mask_cvtepu8_epi16(src, 0, a); |
18413 | assert_eq_m512i(r, src); |
18414 | let r = _mm512_mask_cvtepu8_epi16(src, 0b11111111_11111111_11111111_11111111, a); |
18415 | let e = _mm512_set1_epi16(2); |
18416 | assert_eq_m512i(r, e); |
18417 | } |
18418 | |
18419 | #[simd_test(enable = "avx512bw" )] |
18420 | unsafe fn test_mm512_maskz_cvtepu8_epi16() { |
18421 | let a = _mm256_set1_epi8(2); |
18422 | let r = _mm512_maskz_cvtepu8_epi16(0, a); |
18423 | assert_eq_m512i(r, _mm512_setzero_si512()); |
18424 | let r = _mm512_maskz_cvtepu8_epi16(0b11111111_11111111_11111111_11111111, a); |
18425 | let e = _mm512_set1_epi16(2); |
18426 | assert_eq_m512i(r, e); |
18427 | } |
18428 | |
18429 | #[simd_test(enable = "avx512bw,avx512vl" )] |
18430 | unsafe fn test_mm256_mask_cvtepu8_epi16() { |
18431 | let src = _mm256_set1_epi16(1); |
18432 | let a = _mm_set1_epi8(2); |
18433 | let r = _mm256_mask_cvtepu8_epi16(src, 0, a); |
18434 | assert_eq_m256i(r, src); |
18435 | let r = _mm256_mask_cvtepu8_epi16(src, 0b11111111_11111111, a); |
18436 | let e = _mm256_set1_epi16(2); |
18437 | assert_eq_m256i(r, e); |
18438 | } |
18439 | |
18440 | #[simd_test(enable = "avx512bw,avx512vl" )] |
18441 | unsafe fn test_mm256_maskz_cvtepu8_epi16() { |
18442 | let a = _mm_set1_epi8(2); |
18443 | let r = _mm256_maskz_cvtepu8_epi16(0, a); |
18444 | assert_eq_m256i(r, _mm256_setzero_si256()); |
18445 | let r = _mm256_maskz_cvtepu8_epi16(0b11111111_11111111, a); |
18446 | let e = _mm256_set1_epi16(2); |
18447 | assert_eq_m256i(r, e); |
18448 | } |
18449 | |
18450 | #[simd_test(enable = "avx512bw,avx512vl" )] |
18451 | unsafe fn test_mm_mask_cvtepu8_epi16() { |
18452 | let src = _mm_set1_epi16(1); |
18453 | let a = _mm_set1_epi8(2); |
18454 | let r = _mm_mask_cvtepu8_epi16(src, 0, a); |
18455 | assert_eq_m128i(r, src); |
18456 | let r = _mm_mask_cvtepu8_epi16(src, 0b11111111, a); |
18457 | let e = _mm_set1_epi16(2); |
18458 | assert_eq_m128i(r, e); |
18459 | } |
18460 | |
18461 | #[simd_test(enable = "avx512bw,avx512vl" )] |
18462 | unsafe fn test_mm_maskz_cvtepu8_epi16() { |
18463 | let a = _mm_set1_epi8(2); |
18464 | let r = _mm_maskz_cvtepu8_epi16(0, a); |
18465 | assert_eq_m128i(r, _mm_setzero_si128()); |
18466 | let r = _mm_maskz_cvtepu8_epi16(0b11111111, a); |
18467 | let e = _mm_set1_epi16(2); |
18468 | assert_eq_m128i(r, e); |
18469 | } |
18470 | |
18471 | #[simd_test(enable = "avx512bw" )] |
18472 | unsafe fn test_mm512_bslli_epi128() { |
18473 | #[rustfmt::skip] |
18474 | let a = _mm512_set_epi8( |
18475 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, |
18476 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, |
18477 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, |
18478 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, |
18479 | ); |
18480 | let r = _mm512_bslli_epi128::<9>(a); |
18481 | #[rustfmt::skip] |
18482 | let e = _mm512_set_epi8( |
18483 | 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
18484 | 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
18485 | 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
18486 | 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
18487 | ); |
18488 | assert_eq_m512i(r, e); |
18489 | } |
18490 | |
18491 | #[simd_test(enable = "avx512bw" )] |
18492 | unsafe fn test_mm512_bsrli_epi128() { |
18493 | #[rustfmt::skip] |
18494 | let a = _mm512_set_epi8( |
18495 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
18496 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, |
18497 | 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, |
18498 | 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, |
18499 | ); |
18500 | let r = _mm512_bsrli_epi128::<3>(a); |
18501 | #[rustfmt::skip] |
18502 | let e = _mm512_set_epi8( |
18503 | 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, |
18504 | 0, 0, 0, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, |
18505 | 0, 0, 0, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, |
18506 | 0, 0, 0, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, |
18507 | ); |
18508 | assert_eq_m512i(r, e); |
18509 | } |
18510 | |
18511 | #[simd_test(enable = "avx512bw" )] |
18512 | unsafe fn test_mm512_alignr_epi8() { |
18513 | #[rustfmt::skip] |
18514 | let a = _mm512_set_epi8( |
18515 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, |
18516 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, |
18517 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, |
18518 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, |
18519 | ); |
18520 | let b = _mm512_set1_epi8(1); |
18521 | let r = _mm512_alignr_epi8::<14>(a, b); |
18522 | #[rustfmt::skip] |
18523 | let e = _mm512_set_epi8( |
18524 | 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, |
18525 | 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, |
18526 | 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, |
18527 | 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, |
18528 | ); |
18529 | assert_eq_m512i(r, e); |
18530 | } |
18531 | |
18532 | #[simd_test(enable = "avx512bw" )] |
18533 | unsafe fn test_mm512_mask_alignr_epi8() { |
18534 | #[rustfmt::skip] |
18535 | let a = _mm512_set_epi8( |
18536 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, |
18537 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, |
18538 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, |
18539 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, |
18540 | ); |
18541 | let b = _mm512_set1_epi8(1); |
18542 | let r = _mm512_mask_alignr_epi8::<14>(a, 0, a, b); |
18543 | assert_eq_m512i(r, a); |
18544 | let r = _mm512_mask_alignr_epi8::<14>( |
18545 | a, |
18546 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, |
18547 | a, |
18548 | b, |
18549 | ); |
18550 | #[rustfmt::skip] |
18551 | let e = _mm512_set_epi8( |
18552 | 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, |
18553 | 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, |
18554 | 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, |
18555 | 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, |
18556 | ); |
18557 | assert_eq_m512i(r, e); |
18558 | } |
18559 | |
18560 | #[simd_test(enable = "avx512bw" )] |
18561 | unsafe fn test_mm512_maskz_alignr_epi8() { |
18562 | #[rustfmt::skip] |
18563 | let a = _mm512_set_epi8( |
18564 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, |
18565 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, |
18566 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, |
18567 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, |
18568 | ); |
18569 | let b = _mm512_set1_epi8(1); |
18570 | let r = _mm512_maskz_alignr_epi8::<14>(0, a, b); |
18571 | assert_eq_m512i(r, _mm512_setzero_si512()); |
18572 | let r = _mm512_maskz_alignr_epi8::<14>( |
18573 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, |
18574 | a, |
18575 | b, |
18576 | ); |
18577 | #[rustfmt::skip] |
18578 | let e = _mm512_set_epi8( |
18579 | 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, |
18580 | 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, |
18581 | 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, |
18582 | 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, |
18583 | ); |
18584 | assert_eq_m512i(r, e); |
18585 | } |
18586 | |
18587 | #[simd_test(enable = "avx512bw,avx512vl" )] |
18588 | unsafe fn test_mm256_mask_alignr_epi8() { |
18589 | #[rustfmt::skip] |
18590 | let a = _mm256_set_epi8( |
18591 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, |
18592 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, |
18593 | ); |
18594 | let b = _mm256_set1_epi8(1); |
18595 | let r = _mm256_mask_alignr_epi8::<14>(a, 0, a, b); |
18596 | assert_eq_m256i(r, a); |
18597 | let r = _mm256_mask_alignr_epi8::<14>(a, 0b11111111_11111111_11111111_11111111, a, b); |
18598 | #[rustfmt::skip] |
18599 | let e = _mm256_set_epi8( |
18600 | 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, |
18601 | 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, |
18602 | ); |
18603 | assert_eq_m256i(r, e); |
18604 | } |
18605 | |
18606 | #[simd_test(enable = "avx512bw,avx512vl" )] |
18607 | unsafe fn test_mm256_maskz_alignr_epi8() { |
18608 | #[rustfmt::skip] |
18609 | let a = _mm256_set_epi8( |
18610 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, |
18611 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, |
18612 | ); |
18613 | let b = _mm256_set1_epi8(1); |
18614 | let r = _mm256_maskz_alignr_epi8::<14>(0, a, b); |
18615 | assert_eq_m256i(r, _mm256_setzero_si256()); |
18616 | let r = _mm256_maskz_alignr_epi8::<14>(0b11111111_11111111_11111111_11111111, a, b); |
18617 | #[rustfmt::skip] |
18618 | let e = _mm256_set_epi8( |
18619 | 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, |
18620 | 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, |
18621 | ); |
18622 | assert_eq_m256i(r, e); |
18623 | } |
18624 | |
18625 | #[simd_test(enable = "avx512bw,avx512vl" )] |
18626 | unsafe fn test_mm_mask_alignr_epi8() { |
18627 | let a = _mm_set_epi8(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0); |
18628 | let b = _mm_set1_epi8(1); |
18629 | let r = _mm_mask_alignr_epi8::<14>(a, 0, a, b); |
18630 | assert_eq_m128i(r, a); |
18631 | let r = _mm_mask_alignr_epi8::<14>(a, 0b11111111_11111111, a, b); |
18632 | let e = _mm_set_epi8(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1); |
18633 | assert_eq_m128i(r, e); |
18634 | } |
18635 | |
18636 | #[simd_test(enable = "avx512bw,avx512vl" )] |
18637 | unsafe fn test_mm_maskz_alignr_epi8() { |
18638 | let a = _mm_set_epi8(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0); |
18639 | let b = _mm_set1_epi8(1); |
18640 | let r = _mm_maskz_alignr_epi8::<14>(0, a, b); |
18641 | assert_eq_m128i(r, _mm_setzero_si128()); |
18642 | let r = _mm_maskz_alignr_epi8::<14>(0b11111111_11111111, a, b); |
18643 | let e = _mm_set_epi8(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1); |
18644 | assert_eq_m128i(r, e); |
18645 | } |
18646 | |
18647 | #[simd_test(enable = "avx512bw" )] |
18648 | unsafe fn test_mm512_mask_cvtsepi16_storeu_epi8() { |
18649 | let a = _mm512_set1_epi16(i16::MAX); |
18650 | let mut r = _mm256_undefined_si256(); |
18651 | _mm512_mask_cvtsepi16_storeu_epi8( |
18652 | &mut r as *mut _ as *mut i8, |
18653 | 0b11111111_11111111_11111111_11111111, |
18654 | a, |
18655 | ); |
18656 | let e = _mm256_set1_epi8(i8::MAX); |
18657 | assert_eq_m256i(r, e); |
18658 | } |
18659 | |
18660 | #[simd_test(enable = "avx512bw,avx512vl" )] |
18661 | unsafe fn test_mm256_mask_cvtsepi16_storeu_epi8() { |
18662 | let a = _mm256_set1_epi16(i16::MAX); |
18663 | let mut r = _mm_undefined_si128(); |
18664 | _mm256_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a); |
18665 | let e = _mm_set1_epi8(i8::MAX); |
18666 | assert_eq_m128i(r, e); |
18667 | } |
18668 | |
18669 | #[simd_test(enable = "avx512bw,avx512vl" )] |
18670 | unsafe fn test_mm_mask_cvtsepi16_storeu_epi8() { |
18671 | let a = _mm_set1_epi16(i16::MAX); |
18672 | let mut r = _mm_set1_epi8(0); |
18673 | _mm_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); |
18674 | #[rustfmt::skip] |
18675 | let e = _mm_set_epi8( |
18676 | 0, 0, 0, 0, 0, 0, 0, 0, |
18677 | i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, |
18678 | ); |
18679 | assert_eq_m128i(r, e); |
18680 | } |
18681 | |
18682 | #[simd_test(enable = "avx512bw" )] |
18683 | unsafe fn test_mm512_mask_cvtepi16_storeu_epi8() { |
18684 | let a = _mm512_set1_epi16(8); |
18685 | let mut r = _mm256_undefined_si256(); |
18686 | _mm512_mask_cvtepi16_storeu_epi8( |
18687 | &mut r as *mut _ as *mut i8, |
18688 | 0b11111111_11111111_11111111_11111111, |
18689 | a, |
18690 | ); |
18691 | let e = _mm256_set1_epi8(8); |
18692 | assert_eq_m256i(r, e); |
18693 | } |
18694 | |
18695 | #[simd_test(enable = "avx512bw,avx512vl" )] |
18696 | unsafe fn test_mm256_mask_cvtepi16_storeu_epi8() { |
18697 | let a = _mm256_set1_epi16(8); |
18698 | let mut r = _mm_undefined_si128(); |
18699 | _mm256_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a); |
18700 | let e = _mm_set1_epi8(8); |
18701 | assert_eq_m128i(r, e); |
18702 | } |
18703 | |
18704 | #[simd_test(enable = "avx512bw,avx512vl" )] |
18705 | unsafe fn test_mm_mask_cvtepi16_storeu_epi8() { |
18706 | let a = _mm_set1_epi16(8); |
18707 | let mut r = _mm_set1_epi8(0); |
18708 | _mm_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); |
18709 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8); |
18710 | assert_eq_m128i(r, e); |
18711 | } |
18712 | |
18713 | #[simd_test(enable = "avx512bw" )] |
18714 | unsafe fn test_mm512_mask_cvtusepi16_storeu_epi8() { |
18715 | let a = _mm512_set1_epi16(i16::MAX); |
18716 | let mut r = _mm256_undefined_si256(); |
18717 | _mm512_mask_cvtusepi16_storeu_epi8( |
18718 | &mut r as *mut _ as *mut i8, |
18719 | 0b11111111_11111111_11111111_11111111, |
18720 | a, |
18721 | ); |
18722 | let e = _mm256_set1_epi8(u8::MAX as i8); |
18723 | assert_eq_m256i(r, e); |
18724 | } |
18725 | |
18726 | #[simd_test(enable = "avx512bw,avx512vl" )] |
18727 | unsafe fn test_mm256_mask_cvtusepi16_storeu_epi8() { |
18728 | let a = _mm256_set1_epi16(i16::MAX); |
18729 | let mut r = _mm_undefined_si128(); |
18730 | _mm256_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a); |
18731 | let e = _mm_set1_epi8(u8::MAX as i8); |
18732 | assert_eq_m128i(r, e); |
18733 | } |
18734 | |
18735 | #[simd_test(enable = "avx512bw,avx512vl" )] |
18736 | unsafe fn test_mm_mask_cvtusepi16_storeu_epi8() { |
18737 | let a = _mm_set1_epi16(i16::MAX); |
18738 | let mut r = _mm_set1_epi8(0); |
18739 | _mm_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); |
18740 | #[rustfmt::skip] |
18741 | let e = _mm_set_epi8( |
18742 | 0, 0, 0, 0, |
18743 | 0, 0, 0, 0, |
18744 | u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, |
18745 | u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, |
18746 | ); |
18747 | assert_eq_m128i(r, e); |
18748 | } |
18749 | } |
18750 | |