1 | use crate::{ |
2 | core_arch::{simd::*, x86::*}, |
3 | intrinsics::simd::*, |
4 | mem::transmute, |
5 | }; |
6 | |
7 | // And // |
8 | |
9 | /// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b |
10 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding |
11 | /// bit is not set). |
12 | /// |
13 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_pd&ig_expand=288) |
14 | #[inline ] |
15 | #[target_feature (enable = "avx512dq,avx512vl" )] |
16 | #[cfg_attr (test, assert_instr(vandpd))] |
17 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
18 | pub fn _mm_mask_and_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { |
19 | unsafe { |
20 | let and: f64x2 = _mm_and_pd(a, b).as_f64x2(); |
21 | transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_f64x2())) |
22 | } |
23 | } |
24 | |
25 | /// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b and |
26 | /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
27 | /// |
28 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_pd&ig_expand=289) |
29 | #[inline ] |
30 | #[target_feature (enable = "avx512dq,avx512vl" )] |
31 | #[cfg_attr (test, assert_instr(vandpd))] |
32 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
33 | pub fn _mm_maskz_and_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { |
34 | unsafe { |
35 | let and: f64x2 = _mm_and_pd(a, b).as_f64x2(); |
36 | transmute(src:simd_select_bitmask(m:k, yes:and, no:f64x2::ZERO)) |
37 | } |
38 | } |
39 | |
40 | /// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b |
41 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding |
42 | /// bit is not set). |
43 | /// |
44 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_pd&ig_expand=291) |
45 | #[inline ] |
46 | #[target_feature (enable = "avx512dq,avx512vl" )] |
47 | #[cfg_attr (test, assert_instr(vandpd))] |
48 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
49 | pub fn _mm256_mask_and_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { |
50 | unsafe { |
51 | let and: f64x4 = _mm256_and_pd(a, b).as_f64x4(); |
52 | transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_f64x4())) |
53 | } |
54 | } |
55 | |
56 | /// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b and |
57 | /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
58 | /// |
59 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_pd&ig_expand=292) |
60 | #[inline ] |
61 | #[target_feature (enable = "avx512dq,avx512vl" )] |
62 | #[cfg_attr (test, assert_instr(vandpd))] |
63 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
64 | pub fn _mm256_maskz_and_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { |
65 | unsafe { |
66 | let and: f64x4 = _mm256_and_pd(a, b).as_f64x4(); |
67 | transmute(src:simd_select_bitmask(m:k, yes:and, no:f64x4::ZERO)) |
68 | } |
69 | } |
70 | |
71 | /// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b |
72 | /// and store the results in dst. |
73 | /// |
74 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_pd&ig_expand=293) |
75 | #[inline ] |
76 | #[target_feature (enable = "avx512dq" )] |
77 | #[cfg_attr (test, assert_instr(vandp))] |
78 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
79 | pub fn _mm512_and_pd(a: __m512d, b: __m512d) -> __m512d { |
80 | unsafe { transmute(src:simd_and(x:transmute::<_, u64x8>(a), y:transmute::<_, u64x8>(src:b))) } |
81 | } |
82 | |
83 | /// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b |
84 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding |
85 | /// bit is not set). |
86 | /// |
87 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_pd&ig_expand=294) |
88 | #[inline ] |
89 | #[target_feature (enable = "avx512dq" )] |
90 | #[cfg_attr (test, assert_instr(vandpd))] |
91 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
92 | pub fn _mm512_mask_and_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { |
93 | unsafe { |
94 | let and: f64x8 = _mm512_and_pd(a, b).as_f64x8(); |
95 | transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_f64x8())) |
96 | } |
97 | } |
98 | |
99 | /// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b and |
100 | /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
101 | /// |
102 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_pd&ig_expand=295) |
103 | #[inline ] |
104 | #[target_feature (enable = "avx512dq" )] |
105 | #[cfg_attr (test, assert_instr(vandpd))] |
106 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
107 | pub fn _mm512_maskz_and_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { |
108 | unsafe { |
109 | let and: f64x8 = _mm512_and_pd(a, b).as_f64x8(); |
110 | transmute(src:simd_select_bitmask(m:k, yes:and, no:f64x8::ZERO)) |
111 | } |
112 | } |
113 | |
114 | /// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b |
115 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding |
116 | /// bit is not set). |
117 | /// |
118 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_ps&ig_expand=297) |
119 | #[inline ] |
120 | #[target_feature (enable = "avx512dq,avx512vl" )] |
121 | #[cfg_attr (test, assert_instr(vandps))] |
122 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
123 | pub fn _mm_mask_and_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { |
124 | unsafe { |
125 | let and: f32x4 = _mm_and_ps(a, b).as_f32x4(); |
126 | transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_f32x4())) |
127 | } |
128 | } |
129 | |
130 | /// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b and |
131 | /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
132 | /// |
133 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_ps&ig_expand=298) |
134 | #[inline ] |
135 | #[target_feature (enable = "avx512dq,avx512vl" )] |
136 | #[cfg_attr (test, assert_instr(vandps))] |
137 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
138 | pub fn _mm_maskz_and_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { |
139 | unsafe { |
140 | let and: f32x4 = _mm_and_ps(a, b).as_f32x4(); |
141 | transmute(src:simd_select_bitmask(m:k, yes:and, no:f32x4::ZERO)) |
142 | } |
143 | } |
144 | |
145 | /// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b |
146 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding |
147 | /// bit is not set). |
148 | /// |
149 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_ps&ig_expand=300) |
150 | #[inline ] |
151 | #[target_feature (enable = "avx512dq,avx512vl" )] |
152 | #[cfg_attr (test, assert_instr(vandps))] |
153 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
154 | pub fn _mm256_mask_and_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { |
155 | unsafe { |
156 | let and: f32x8 = _mm256_and_ps(a, b).as_f32x8(); |
157 | transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_f32x8())) |
158 | } |
159 | } |
160 | |
161 | /// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b and |
162 | /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
163 | /// |
164 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_ps&ig_expand=301) |
165 | #[inline ] |
166 | #[target_feature (enable = "avx512dq,avx512vl" )] |
167 | #[cfg_attr (test, assert_instr(vandps))] |
168 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
169 | pub fn _mm256_maskz_and_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { |
170 | unsafe { |
171 | let and: f32x8 = _mm256_and_ps(a, b).as_f32x8(); |
172 | transmute(src:simd_select_bitmask(m:k, yes:and, no:f32x8::ZERO)) |
173 | } |
174 | } |
175 | |
176 | /// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b |
177 | /// and store the results in dst. |
178 | /// |
179 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_ps&ig_expand=303) |
180 | #[inline ] |
181 | #[target_feature (enable = "avx512dq" )] |
182 | #[cfg_attr (test, assert_instr(vandps))] |
183 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
184 | pub fn _mm512_and_ps(a: __m512, b: __m512) -> __m512 { |
185 | unsafe { |
186 | transmute(src:simd_and( |
187 | x:transmute::<_, u32x16>(a), |
188 | y:transmute::<_, u32x16>(src:b), |
189 | )) |
190 | } |
191 | } |
192 | |
193 | /// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b |
194 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding |
195 | /// bit is not set). |
196 | /// |
197 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_ps&ig_expand=304) |
198 | #[inline ] |
199 | #[target_feature (enable = "avx512dq" )] |
200 | #[cfg_attr (test, assert_instr(vandps))] |
201 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
202 | pub fn _mm512_mask_and_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { |
203 | unsafe { |
204 | let and: f32x16 = _mm512_and_ps(a, b).as_f32x16(); |
205 | transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_f32x16())) |
206 | } |
207 | } |
208 | |
209 | /// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b and |
210 | /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
211 | /// |
212 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_ps&ig_expand=305) |
213 | #[inline ] |
214 | #[target_feature (enable = "avx512dq" )] |
215 | #[cfg_attr (test, assert_instr(vandps))] |
216 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
217 | pub fn _mm512_maskz_and_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { |
218 | unsafe { |
219 | let and: f32x16 = _mm512_and_ps(a, b).as_f32x16(); |
220 | transmute(src:simd_select_bitmask(m:k, yes:and, no:f32x16::ZERO)) |
221 | } |
222 | } |
223 | |
224 | // Andnot |
225 | |
226 | /// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then |
227 | /// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the |
228 | /// corresponding bit is not set). |
229 | /// |
230 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_pd&ig_expand=326) |
231 | #[inline ] |
232 | #[target_feature (enable = "avx512dq,avx512vl" )] |
233 | #[cfg_attr (test, assert_instr(vandnpd))] |
234 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
235 | pub fn _mm_mask_andnot_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { |
236 | unsafe { |
237 | let andnot: f64x2 = _mm_andnot_pd(a, b).as_f64x2(); |
238 | transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_f64x2())) |
239 | } |
240 | } |
241 | |
242 | /// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then |
243 | /// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the |
244 | /// corresponding bit is not set). |
245 | /// |
246 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_pd&ig_expand=327) |
247 | #[inline ] |
248 | #[target_feature (enable = "avx512dq,avx512vl" )] |
249 | #[cfg_attr (test, assert_instr(vandnpd))] |
250 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
251 | pub fn _mm_maskz_andnot_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { |
252 | unsafe { |
253 | let andnot: f64x2 = _mm_andnot_pd(a, b).as_f64x2(); |
254 | transmute(src:simd_select_bitmask(m:k, yes:andnot, no:f64x2::ZERO)) |
255 | } |
256 | } |
257 | |
258 | /// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then |
259 | /// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the |
260 | /// corresponding bit is not set). |
261 | /// |
262 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_pd&ig_expand=329) |
263 | #[inline ] |
264 | #[target_feature (enable = "avx512dq,avx512vl" )] |
265 | #[cfg_attr (test, assert_instr(vandnpd))] |
266 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
267 | pub fn _mm256_mask_andnot_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { |
268 | unsafe { |
269 | let andnot: f64x4 = _mm256_andnot_pd(a, b).as_f64x4(); |
270 | transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_f64x4())) |
271 | } |
272 | } |
273 | |
274 | /// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then |
275 | /// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the |
276 | /// corresponding bit is not set). |
277 | /// |
278 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_pd&ig_expand=330) |
279 | #[inline ] |
280 | #[target_feature (enable = "avx512dq,avx512vl" )] |
281 | #[cfg_attr (test, assert_instr(vandnpd))] |
282 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
283 | pub fn _mm256_maskz_andnot_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { |
284 | unsafe { |
285 | let andnot: f64x4 = _mm256_andnot_pd(a, b).as_f64x4(); |
286 | transmute(src:simd_select_bitmask(m:k, yes:andnot, no:f64x4::ZERO)) |
287 | } |
288 | } |
289 | |
290 | /// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then |
291 | /// bitwise AND with b and store the results in dst. |
292 | /// |
293 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_pd&ig_expand=331) |
294 | #[inline ] |
295 | #[target_feature (enable = "avx512dq" )] |
296 | #[cfg_attr (test, assert_instr(vandnp))] |
297 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
298 | pub fn _mm512_andnot_pd(a: __m512d, b: __m512d) -> __m512d { |
299 | unsafe { _mm512_and_pd(a:_mm512_xor_pd(a, b:transmute(src:_mm512_set1_epi64(-1))), b) } |
300 | } |
301 | |
302 | /// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then |
303 | /// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the |
304 | /// corresponding bit is not set). |
305 | /// |
306 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_pd&ig_expand=332) |
307 | #[inline ] |
308 | #[target_feature (enable = "avx512dq" )] |
309 | #[cfg_attr (test, assert_instr(vandnpd))] |
310 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
311 | pub fn _mm512_mask_andnot_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { |
312 | unsafe { |
313 | let andnot: f64x8 = _mm512_andnot_pd(a, b).as_f64x8(); |
314 | transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_f64x8())) |
315 | } |
316 | } |
317 | |
318 | /// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then |
319 | /// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the |
320 | /// corresponding bit is not set). |
321 | /// |
322 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_pd&ig_expand=333) |
323 | #[inline ] |
324 | #[target_feature (enable = "avx512dq" )] |
325 | #[cfg_attr (test, assert_instr(vandnpd))] |
326 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
327 | pub fn _mm512_maskz_andnot_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { |
328 | unsafe { |
329 | let andnot: f64x8 = _mm512_andnot_pd(a, b).as_f64x8(); |
330 | transmute(src:simd_select_bitmask(m:k, yes:andnot, no:f64x8::ZERO)) |
331 | } |
332 | } |
333 | |
334 | /// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then |
335 | /// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the |
336 | /// corresponding bit is not set). |
337 | /// |
338 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_ps&ig_expand=335) |
339 | #[inline ] |
340 | #[target_feature (enable = "avx512dq,avx512vl" )] |
341 | #[cfg_attr (test, assert_instr(vandnps))] |
342 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
343 | pub fn _mm_mask_andnot_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { |
344 | unsafe { |
345 | let andnot: f32x4 = _mm_andnot_ps(a, b).as_f32x4(); |
346 | transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_f32x4())) |
347 | } |
348 | } |
349 | |
350 | /// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then |
351 | /// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the |
352 | /// corresponding bit is not set). |
353 | /// |
354 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_ps&ig_expand=336) |
355 | #[inline ] |
356 | #[target_feature (enable = "avx512dq,avx512vl" )] |
357 | #[cfg_attr (test, assert_instr(vandnps))] |
358 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
359 | pub fn _mm_maskz_andnot_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { |
360 | unsafe { |
361 | let andnot: f32x4 = _mm_andnot_ps(a, b).as_f32x4(); |
362 | transmute(src:simd_select_bitmask(m:k, yes:andnot, no:f32x4::ZERO)) |
363 | } |
364 | } |
365 | |
366 | /// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then |
367 | /// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the |
368 | /// corresponding bit is not set). |
369 | /// |
370 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_ps&ig_expand=338) |
371 | #[inline ] |
372 | #[target_feature (enable = "avx512dq,avx512vl" )] |
373 | #[cfg_attr (test, assert_instr(vandnps))] |
374 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
375 | pub fn _mm256_mask_andnot_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { |
376 | unsafe { |
377 | let andnot: f32x8 = _mm256_andnot_ps(a, b).as_f32x8(); |
378 | transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_f32x8())) |
379 | } |
380 | } |
381 | |
382 | /// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then |
383 | /// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the |
384 | /// corresponding bit is not set). |
385 | /// |
386 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_ps&ig_expand=339) |
387 | #[inline ] |
388 | #[target_feature (enable = "avx512dq,avx512vl" )] |
389 | #[cfg_attr (test, assert_instr(vandnps))] |
390 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
391 | pub fn _mm256_maskz_andnot_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { |
392 | unsafe { |
393 | let andnot: f32x8 = _mm256_andnot_ps(a, b).as_f32x8(); |
394 | transmute(src:simd_select_bitmask(m:k, yes:andnot, no:f32x8::ZERO)) |
395 | } |
396 | } |
397 | |
398 | /// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then |
399 | /// bitwise AND with b and store the results in dst. |
400 | /// |
401 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_ps&ig_expand=340) |
402 | #[inline ] |
403 | #[target_feature (enable = "avx512dq" )] |
404 | #[cfg_attr (test, assert_instr(vandnps))] |
405 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
406 | pub fn _mm512_andnot_ps(a: __m512, b: __m512) -> __m512 { |
407 | unsafe { _mm512_and_ps(a:_mm512_xor_ps(a, b:transmute(src:_mm512_set1_epi32(-1))), b) } |
408 | } |
409 | |
410 | /// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then |
411 | /// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the |
412 | /// corresponding bit is not set). |
413 | /// |
414 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_ps&ig_expand=341) |
415 | #[inline ] |
416 | #[target_feature (enable = "avx512dq" )] |
417 | #[cfg_attr (test, assert_instr(vandnps))] |
418 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
419 | pub fn _mm512_mask_andnot_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { |
420 | unsafe { |
421 | let andnot: f32x16 = _mm512_andnot_ps(a, b).as_f32x16(); |
422 | transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_f32x16())) |
423 | } |
424 | } |
425 | |
426 | /// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then |
427 | /// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the |
428 | /// corresponding bit is not set). |
429 | /// |
430 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_ps&ig_expand=342) |
431 | #[inline ] |
432 | #[target_feature (enable = "avx512dq" )] |
433 | #[cfg_attr (test, assert_instr(vandnps))] |
434 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
435 | pub fn _mm512_maskz_andnot_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { |
436 | unsafe { |
437 | let andnot: f32x16 = _mm512_andnot_ps(a, b).as_f32x16(); |
438 | transmute(src:simd_select_bitmask(m:k, yes:andnot, no:f32x16::ZERO)) |
439 | } |
440 | } |
441 | |
442 | // Or |
443 | |
444 | /// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b |
445 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding |
446 | /// bit is not set). |
447 | /// |
448 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_pd&ig_expand=4824) |
449 | #[inline ] |
450 | #[target_feature (enable = "avx512dq,avx512vl" )] |
451 | #[cfg_attr (test, assert_instr(vorpd))] |
452 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
453 | pub fn _mm_mask_or_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { |
454 | unsafe { |
455 | let or: f64x2 = _mm_or_pd(a, b).as_f64x2(); |
456 | transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_f64x2())) |
457 | } |
458 | } |
459 | |
460 | /// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and |
461 | /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
462 | /// |
463 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_pd&ig_expand=4825) |
464 | #[inline ] |
465 | #[target_feature (enable = "avx512dq,avx512vl" )] |
466 | #[cfg_attr (test, assert_instr(vorpd))] |
467 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
468 | pub fn _mm_maskz_or_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { |
469 | unsafe { |
470 | let or: f64x2 = _mm_or_pd(a, b).as_f64x2(); |
471 | transmute(src:simd_select_bitmask(m:k, yes:or, no:f64x2::ZERO)) |
472 | } |
473 | } |
474 | |
475 | /// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b |
476 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding |
477 | /// bit is not set). |
478 | /// |
479 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_pd&ig_expand=4827) |
480 | #[inline ] |
481 | #[target_feature (enable = "avx512dq,avx512vl" )] |
482 | #[cfg_attr (test, assert_instr(vorpd))] |
483 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
484 | pub fn _mm256_mask_or_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { |
485 | unsafe { |
486 | let or: f64x4 = _mm256_or_pd(a, b).as_f64x4(); |
487 | transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_f64x4())) |
488 | } |
489 | } |
490 | |
491 | /// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and |
492 | /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
493 | /// |
494 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_pd&ig_expand=4828) |
495 | #[inline ] |
496 | #[target_feature (enable = "avx512dq,avx512vl" )] |
497 | #[cfg_attr (test, assert_instr(vorpd))] |
498 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
499 | pub fn _mm256_maskz_or_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { |
500 | unsafe { |
501 | let or: f64x4 = _mm256_or_pd(a, b).as_f64x4(); |
502 | transmute(src:simd_select_bitmask(m:k, yes:or, no:f64x4::ZERO)) |
503 | } |
504 | } |
505 | |
506 | /// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b |
507 | /// and store the results in dst. |
508 | /// |
509 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_pd&ig_expand=4829) |
510 | #[inline ] |
511 | #[target_feature (enable = "avx512dq" )] |
512 | #[cfg_attr (test, assert_instr(vorp))] |
513 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
514 | pub fn _mm512_or_pd(a: __m512d, b: __m512d) -> __m512d { |
515 | unsafe { transmute(src:simd_or(x:transmute::<_, u64x8>(a), y:transmute::<_, u64x8>(src:b))) } |
516 | } |
517 | |
518 | /// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and |
519 | /// store the results in dst using writemask k (elements are copied from src if the corresponding |
520 | /// bit is not set). |
521 | /// |
522 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_pd&ig_expand=4830) |
523 | #[inline ] |
524 | #[target_feature (enable = "avx512dq" )] |
525 | #[cfg_attr (test, assert_instr(vorpd))] |
526 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
527 | pub fn _mm512_mask_or_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { |
528 | unsafe { |
529 | let or: f64x8 = _mm512_or_pd(a, b).as_f64x8(); |
530 | transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_f64x8())) |
531 | } |
532 | } |
533 | |
534 | /// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and |
535 | /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
536 | /// |
537 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_pd&ig_expand=4831) |
538 | #[inline ] |
539 | #[target_feature (enable = "avx512dq" )] |
540 | #[cfg_attr (test, assert_instr(vorpd))] |
541 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
542 | pub fn _mm512_maskz_or_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { |
543 | unsafe { |
544 | let or: f64x8 = _mm512_or_pd(a, b).as_f64x8(); |
545 | transmute(src:simd_select_bitmask(m:k, yes:or, no:f64x8::ZERO)) |
546 | } |
547 | } |
548 | |
549 | /// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b |
550 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding |
551 | /// bit is not set). |
552 | /// |
553 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_ps&ig_expand=4833) |
554 | #[inline ] |
555 | #[target_feature (enable = "avx512dq,avx512vl" )] |
556 | #[cfg_attr (test, assert_instr(vorps))] |
557 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
558 | pub fn _mm_mask_or_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { |
559 | unsafe { |
560 | let or: f32x4 = _mm_or_ps(a, b).as_f32x4(); |
561 | transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_f32x4())) |
562 | } |
563 | } |
564 | |
565 | /// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and |
566 | /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
567 | /// |
568 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_ps&ig_expand=4834) |
569 | #[inline ] |
570 | #[target_feature (enable = "avx512dq,avx512vl" )] |
571 | #[cfg_attr (test, assert_instr(vorps))] |
572 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
573 | pub fn _mm_maskz_or_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { |
574 | unsafe { |
575 | let or: f32x4 = _mm_or_ps(a, b).as_f32x4(); |
576 | transmute(src:simd_select_bitmask(m:k, yes:or, no:f32x4::ZERO)) |
577 | } |
578 | } |
579 | |
580 | /// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b |
581 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding |
582 | /// bit is not set). |
583 | /// |
584 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_ps&ig_expand=4836) |
585 | #[inline ] |
586 | #[target_feature (enable = "avx512dq,avx512vl" )] |
587 | #[cfg_attr (test, assert_instr(vorps))] |
588 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
589 | pub fn _mm256_mask_or_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { |
590 | unsafe { |
591 | let or: f32x8 = _mm256_or_ps(a, b).as_f32x8(); |
592 | transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_f32x8())) |
593 | } |
594 | } |
595 | |
596 | /// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and |
597 | /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
598 | /// |
599 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_ps&ig_expand=4837) |
600 | #[inline ] |
601 | #[target_feature (enable = "avx512dq,avx512vl" )] |
602 | #[cfg_attr (test, assert_instr(vorps))] |
603 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
604 | pub fn _mm256_maskz_or_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { |
605 | unsafe { |
606 | let or: f32x8 = _mm256_or_ps(a, b).as_f32x8(); |
607 | transmute(src:simd_select_bitmask(m:k, yes:or, no:f32x8::ZERO)) |
608 | } |
609 | } |
610 | |
611 | /// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b |
612 | /// and store the results in dst. |
613 | /// |
614 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_ps&ig_expand=4838) |
615 | #[inline ] |
616 | #[target_feature (enable = "avx512dq" )] |
617 | #[cfg_attr (test, assert_instr(vorps))] |
618 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
619 | pub fn _mm512_or_ps(a: __m512, b: __m512) -> __m512 { |
620 | unsafe { |
621 | transmute(src:simd_or( |
622 | x:transmute::<_, u32x16>(a), |
623 | y:transmute::<_, u32x16>(src:b), |
624 | )) |
625 | } |
626 | } |
627 | |
628 | /// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and |
629 | /// store the results in dst using writemask k (elements are copied from src if the corresponding |
630 | /// bit is not set). |
631 | /// |
632 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_ps&ig_expand=4839) |
633 | #[inline ] |
634 | #[target_feature (enable = "avx512dq" )] |
635 | #[cfg_attr (test, assert_instr(vorps))] |
636 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
637 | pub fn _mm512_mask_or_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { |
638 | unsafe { |
639 | let or: f32x16 = _mm512_or_ps(a, b).as_f32x16(); |
640 | transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_f32x16())) |
641 | } |
642 | } |
643 | |
644 | /// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and |
645 | /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
646 | /// |
647 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_ps&ig_expand=4840) |
648 | #[inline ] |
649 | #[target_feature (enable = "avx512dq" )] |
650 | #[cfg_attr (test, assert_instr(vorps))] |
651 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
652 | pub fn _mm512_maskz_or_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { |
653 | unsafe { |
654 | let or: f32x16 = _mm512_or_ps(a, b).as_f32x16(); |
655 | transmute(src:simd_select_bitmask(m:k, yes:or, no:f32x16::ZERO)) |
656 | } |
657 | } |
658 | |
659 | // Xor |
660 | |
661 | /// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b |
662 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding |
663 | /// bit is not set). |
664 | /// |
665 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_pd&ig_expand=7094) |
666 | #[inline ] |
667 | #[target_feature (enable = "avx512dq,avx512vl" )] |
668 | #[cfg_attr (test, assert_instr(vxorpd))] |
669 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
670 | pub fn _mm_mask_xor_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { |
671 | unsafe { |
672 | let xor: f64x2 = _mm_xor_pd(a, b).as_f64x2(); |
673 | transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_f64x2())) |
674 | } |
675 | } |
676 | |
677 | /// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and |
678 | /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
679 | /// |
680 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_pd&ig_expand=7095) |
681 | #[inline ] |
682 | #[target_feature (enable = "avx512dq,avx512vl" )] |
683 | #[cfg_attr (test, assert_instr(vxorpd))] |
684 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
685 | pub fn _mm_maskz_xor_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { |
686 | unsafe { |
687 | let xor: f64x2 = _mm_xor_pd(a, b).as_f64x2(); |
688 | transmute(src:simd_select_bitmask(m:k, yes:xor, no:f64x2::ZERO)) |
689 | } |
690 | } |
691 | |
692 | /// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b |
693 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding |
694 | /// bit is not set). |
695 | /// |
696 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_pd&ig_expand=7097) |
697 | #[inline ] |
698 | #[target_feature (enable = "avx512dq,avx512vl" )] |
699 | #[cfg_attr (test, assert_instr(vxorpd))] |
700 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
701 | pub fn _mm256_mask_xor_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { |
702 | unsafe { |
703 | let xor: f64x4 = _mm256_xor_pd(a, b).as_f64x4(); |
704 | transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_f64x4())) |
705 | } |
706 | } |
707 | |
708 | /// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and |
709 | /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
710 | /// |
711 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_pd&ig_expand=7098) |
712 | #[inline ] |
713 | #[target_feature (enable = "avx512dq,avx512vl" )] |
714 | #[cfg_attr (test, assert_instr(vxorpd))] |
715 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
716 | pub fn _mm256_maskz_xor_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { |
717 | unsafe { |
718 | let xor: f64x4 = _mm256_xor_pd(a, b).as_f64x4(); |
719 | transmute(src:simd_select_bitmask(m:k, yes:xor, no:f64x4::ZERO)) |
720 | } |
721 | } |
722 | |
723 | /// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b |
724 | /// and store the results in dst. |
725 | /// |
726 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_pd&ig_expand=7102) |
727 | #[inline ] |
728 | #[target_feature (enable = "avx512dq" )] |
729 | #[cfg_attr (test, assert_instr(vxorp))] |
730 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
731 | pub fn _mm512_xor_pd(a: __m512d, b: __m512d) -> __m512d { |
732 | unsafe { transmute(src:simd_xor(x:transmute::<_, u64x8>(a), y:transmute::<_, u64x8>(src:b))) } |
733 | } |
734 | |
735 | /// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and |
736 | /// store the results in dst using writemask k (elements are copied from src if the corresponding |
737 | /// bit is not set). |
738 | /// |
739 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_pd&ig_expand=7100) |
740 | #[inline ] |
741 | #[target_feature (enable = "avx512dq" )] |
742 | #[cfg_attr (test, assert_instr(vxorpd))] |
743 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
744 | pub fn _mm512_mask_xor_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { |
745 | unsafe { |
746 | let xor: f64x8 = _mm512_xor_pd(a, b).as_f64x8(); |
747 | transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_f64x8())) |
748 | } |
749 | } |
750 | |
751 | /// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and |
752 | /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
753 | /// |
754 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_pd&ig_expand=7101) |
755 | #[inline ] |
756 | #[target_feature (enable = "avx512dq" )] |
757 | #[cfg_attr (test, assert_instr(vxorpd))] |
758 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
759 | pub fn _mm512_maskz_xor_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { |
760 | unsafe { |
761 | let xor: f64x8 = _mm512_xor_pd(a, b).as_f64x8(); |
762 | transmute(src:simd_select_bitmask(m:k, yes:xor, no:f64x8::ZERO)) |
763 | } |
764 | } |
765 | |
766 | /// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b |
767 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding |
768 | /// bit is not set). |
769 | /// |
770 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_ps&ig_expand=7103) |
771 | #[inline ] |
772 | #[target_feature (enable = "avx512dq,avx512vl" )] |
773 | #[cfg_attr (test, assert_instr(vxorps))] |
774 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
775 | pub fn _mm_mask_xor_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { |
776 | unsafe { |
777 | let xor: f32x4 = _mm_xor_ps(a, b).as_f32x4(); |
778 | transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_f32x4())) |
779 | } |
780 | } |
781 | |
782 | /// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and |
783 | /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
784 | /// |
785 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_ps&ig_expand=7104) |
786 | #[inline ] |
787 | #[target_feature (enable = "avx512dq,avx512vl" )] |
788 | #[cfg_attr (test, assert_instr(vxorps))] |
789 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
790 | pub fn _mm_maskz_xor_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { |
791 | unsafe { |
792 | let xor: f32x4 = _mm_xor_ps(a, b).as_f32x4(); |
793 | transmute(src:simd_select_bitmask(m:k, yes:xor, no:f32x4::ZERO)) |
794 | } |
795 | } |
796 | |
797 | /// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b |
798 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding |
799 | /// bit is not set). |
800 | /// |
801 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_ps&ig_expand=7106) |
802 | #[inline ] |
803 | #[target_feature (enable = "avx512dq,avx512vl" )] |
804 | #[cfg_attr (test, assert_instr(vxorps))] |
805 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
806 | pub fn _mm256_mask_xor_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { |
807 | unsafe { |
808 | let xor: f32x8 = _mm256_xor_ps(a, b).as_f32x8(); |
809 | transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_f32x8())) |
810 | } |
811 | } |
812 | |
813 | /// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and |
814 | /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
815 | /// |
816 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_ps&ig_expand=7107) |
817 | #[inline ] |
818 | #[target_feature (enable = "avx512dq,avx512vl" )] |
819 | #[cfg_attr (test, assert_instr(vxorps))] |
820 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
821 | pub fn _mm256_maskz_xor_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { |
822 | unsafe { |
823 | let xor: f32x8 = _mm256_xor_ps(a, b).as_f32x8(); |
824 | transmute(src:simd_select_bitmask(m:k, yes:xor, no:f32x8::ZERO)) |
825 | } |
826 | } |
827 | |
828 | /// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b |
829 | /// and store the results in dst. |
830 | /// |
831 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_ps&ig_expand=7111) |
832 | #[inline ] |
833 | #[target_feature (enable = "avx512dq" )] |
834 | #[cfg_attr (test, assert_instr(vxorps))] |
835 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
836 | pub fn _mm512_xor_ps(a: __m512, b: __m512) -> __m512 { |
837 | unsafe { |
838 | transmute(src:simd_xor( |
839 | x:transmute::<_, u32x16>(a), |
840 | y:transmute::<_, u32x16>(src:b), |
841 | )) |
842 | } |
843 | } |
844 | |
845 | /// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and |
846 | /// store the results in dst using writemask k (elements are copied from src if the corresponding |
847 | /// bit is not set). |
848 | /// |
849 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_ps&ig_expand=7109) |
850 | #[inline ] |
851 | #[target_feature (enable = "avx512dq" )] |
852 | #[cfg_attr (test, assert_instr(vxorps))] |
853 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
854 | pub fn _mm512_mask_xor_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { |
855 | unsafe { |
856 | let xor: f32x16 = _mm512_xor_ps(a, b).as_f32x16(); |
857 | transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_f32x16())) |
858 | } |
859 | } |
860 | |
861 | /// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and |
862 | /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
863 | /// |
864 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_ps&ig_expand=7110) |
865 | #[inline ] |
866 | #[target_feature (enable = "avx512dq" )] |
867 | #[cfg_attr (test, assert_instr(vxorps))] |
868 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
869 | pub fn _mm512_maskz_xor_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { |
870 | unsafe { |
871 | let xor: f32x16 = _mm512_xor_ps(a, b).as_f32x16(); |
872 | transmute(src:simd_select_bitmask(m:k, yes:xor, no:f32x16::ZERO)) |
873 | } |
874 | } |
875 | |
876 | // Broadcast |
877 | |
878 | /// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all |
879 | /// elements of dst. |
880 | /// |
881 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f32x2&ig_expand=509) |
882 | #[inline ] |
883 | #[target_feature (enable = "avx512dq,avx512vl" )] |
884 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
885 | pub fn _mm256_broadcast_f32x2(a: __m128) -> __m256 { |
886 | unsafe { |
887 | let b: f32x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]); |
888 | transmute(src:b) |
889 | } |
890 | } |
891 | |
892 | /// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all |
893 | /// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set). |
894 | /// |
895 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f32x2&ig_expand=510) |
896 | #[inline ] |
897 | #[target_feature (enable = "avx512dq,avx512vl" )] |
898 | #[cfg_attr (test, assert_instr(vbroadcastf32x2))] |
899 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
900 | pub fn _mm256_mask_broadcast_f32x2(src: __m256, k: __mmask8, a: __m128) -> __m256 { |
901 | unsafe { |
902 | let b: f32x8 = _mm256_broadcast_f32x2(a).as_f32x8(); |
903 | transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f32x8())) |
904 | } |
905 | } |
906 | |
907 | /// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all |
908 | /// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
909 | /// |
910 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f32x2&ig_expand=511) |
911 | #[inline ] |
912 | #[target_feature (enable = "avx512dq,avx512vl" )] |
913 | #[cfg_attr (test, assert_instr(vbroadcastf32x2))] |
914 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
915 | pub fn _mm256_maskz_broadcast_f32x2(k: __mmask8, a: __m128) -> __m256 { |
916 | unsafe { |
917 | let b: f32x8 = _mm256_broadcast_f32x2(a).as_f32x8(); |
918 | transmute(src:simd_select_bitmask(m:k, yes:b, no:f32x8::ZERO)) |
919 | } |
920 | } |
921 | |
922 | /// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all |
923 | /// elements of dst. |
924 | /// |
925 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x2&ig_expand=512) |
926 | #[inline ] |
927 | #[target_feature (enable = "avx512dq" )] |
928 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
929 | pub fn _mm512_broadcast_f32x2(a: __m128) -> __m512 { |
930 | unsafe { |
931 | let b: f32x16 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]); |
932 | transmute(src:b) |
933 | } |
934 | } |
935 | |
936 | /// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all |
937 | /// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set). |
938 | /// |
939 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x2&ig_expand=513) |
940 | #[inline ] |
941 | #[target_feature (enable = "avx512dq" )] |
942 | #[cfg_attr (test, assert_instr(vbroadcastf32x2))] |
943 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
944 | pub fn _mm512_mask_broadcast_f32x2(src: __m512, k: __mmask16, a: __m128) -> __m512 { |
945 | unsafe { |
946 | let b: f32x16 = _mm512_broadcast_f32x2(a).as_f32x16(); |
947 | transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f32x16())) |
948 | } |
949 | } |
950 | |
951 | /// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all |
952 | /// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
953 | /// |
954 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x2&ig_expand=514) |
955 | #[inline ] |
956 | #[target_feature (enable = "avx512dq" )] |
957 | #[cfg_attr (test, assert_instr(vbroadcastf32x2))] |
958 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
959 | pub fn _mm512_maskz_broadcast_f32x2(k: __mmask16, a: __m128) -> __m512 { |
960 | unsafe { |
961 | let b: f32x16 = _mm512_broadcast_f32x2(a).as_f32x16(); |
962 | transmute(src:simd_select_bitmask(m:k, yes:b, no:f32x16::ZERO)) |
963 | } |
964 | } |
965 | |
966 | /// Broadcasts the 8 packed single-precision (32-bit) floating-point elements from a to all |
967 | /// elements of dst. |
968 | /// |
969 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x8&ig_expand=521) |
970 | #[inline ] |
971 | #[target_feature (enable = "avx512dq" )] |
972 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
973 | pub fn _mm512_broadcast_f32x8(a: __m256) -> __m512 { |
974 | unsafe { |
975 | let b: f32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7]); |
976 | transmute(src:b) |
977 | } |
978 | } |
979 | |
980 | /// Broadcasts the 8 packed single-precision (32-bit) floating-point elements from a to all |
981 | /// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set). |
982 | /// |
983 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x8&ig_expand=522) |
984 | #[inline ] |
985 | #[target_feature (enable = "avx512dq" )] |
986 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
987 | pub fn _mm512_mask_broadcast_f32x8(src: __m512, k: __mmask16, a: __m256) -> __m512 { |
988 | unsafe { |
989 | let b: f32x16 = _mm512_broadcast_f32x8(a).as_f32x16(); |
990 | transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f32x16())) |
991 | } |
992 | } |
993 | |
994 | /// Broadcasts the 8 packed single-precision (32-bit) floating-point elements from a to all |
995 | /// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
996 | /// |
997 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x8&ig_expand=523) |
998 | #[inline ] |
999 | #[target_feature (enable = "avx512dq" )] |
1000 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1001 | pub fn _mm512_maskz_broadcast_f32x8(k: __mmask16, a: __m256) -> __m512 { |
1002 | unsafe { |
1003 | let b: f32x16 = _mm512_broadcast_f32x8(a).as_f32x16(); |
1004 | transmute(src:simd_select_bitmask(m:k, yes:b, no:f32x16::ZERO)) |
1005 | } |
1006 | } |
1007 | |
1008 | /// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all |
1009 | /// elements of dst. |
1010 | /// |
1011 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f64x2&ig_expand=524) |
1012 | #[inline ] |
1013 | #[target_feature (enable = "avx512dq,avx512vl" )] |
1014 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1015 | pub fn _mm256_broadcast_f64x2(a: __m128d) -> __m256d { |
1016 | unsafe { |
1017 | let b: f64x4 = simd_shuffle!(a, a, [0, 1, 0, 1]); |
1018 | transmute(src:b) |
1019 | } |
1020 | } |
1021 | |
1022 | /// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all |
1023 | /// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set). |
1024 | /// |
1025 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f64x2&ig_expand=525) |
1026 | #[inline ] |
1027 | #[target_feature (enable = "avx512dq,avx512vl" )] |
1028 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1029 | pub fn _mm256_mask_broadcast_f64x2(src: __m256d, k: __mmask8, a: __m128d) -> __m256d { |
1030 | unsafe { |
1031 | let b: f64x4 = _mm256_broadcast_f64x2(a).as_f64x4(); |
1032 | transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f64x4())) |
1033 | } |
1034 | } |
1035 | |
1036 | /// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all |
1037 | /// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
1038 | /// |
1039 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f64x2&ig_expand=526) |
1040 | #[inline ] |
1041 | #[target_feature (enable = "avx512dq,avx512vl" )] |
1042 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1043 | pub fn _mm256_maskz_broadcast_f64x2(k: __mmask8, a: __m128d) -> __m256d { |
1044 | unsafe { |
1045 | let b: f64x4 = _mm256_broadcast_f64x2(a).as_f64x4(); |
1046 | transmute(src:simd_select_bitmask(m:k, yes:b, no:f64x4::ZERO)) |
1047 | } |
1048 | } |
1049 | |
1050 | /// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all |
1051 | /// elements of dst. |
1052 | /// |
1053 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f64x2&ig_expand=527) |
1054 | #[inline ] |
1055 | #[target_feature (enable = "avx512dq" )] |
1056 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1057 | pub fn _mm512_broadcast_f64x2(a: __m128d) -> __m512d { |
1058 | unsafe { |
1059 | let b: f64x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]); |
1060 | transmute(src:b) |
1061 | } |
1062 | } |
1063 | |
1064 | /// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all |
1065 | /// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set). |
1066 | /// |
1067 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f64x2&ig_expand=528) |
1068 | #[inline ] |
1069 | #[target_feature (enable = "avx512dq" )] |
1070 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1071 | pub fn _mm512_mask_broadcast_f64x2(src: __m512d, k: __mmask8, a: __m128d) -> __m512d { |
1072 | unsafe { |
1073 | let b: f64x8 = _mm512_broadcast_f64x2(a).as_f64x8(); |
1074 | transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f64x8())) |
1075 | } |
1076 | } |
1077 | |
1078 | /// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all |
1079 | /// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
1080 | /// |
1081 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f64x2&ig_expand=529) |
1082 | #[inline ] |
1083 | #[target_feature (enable = "avx512dq" )] |
1084 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1085 | pub fn _mm512_maskz_broadcast_f64x2(k: __mmask8, a: __m128d) -> __m512d { |
1086 | unsafe { |
1087 | let b: f64x8 = _mm512_broadcast_f64x2(a).as_f64x8(); |
1088 | transmute(src:simd_select_bitmask(m:k, yes:b, no:f64x8::ZERO)) |
1089 | } |
1090 | } |
1091 | |
1092 | /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst. |
1093 | /// |
1094 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcast_i32x2&ig_expand=533) |
1095 | #[inline ] |
1096 | #[target_feature (enable = "avx512dq,avx512vl" )] |
1097 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1098 | pub fn _mm_broadcast_i32x2(a: __m128i) -> __m128i { |
1099 | unsafe { |
1100 | let a: i32x4 = a.as_i32x4(); |
1101 | let b: i32x4 = simd_shuffle!(a, a, [0, 1, 0, 1]); |
1102 | transmute(src:b) |
1103 | } |
1104 | } |
1105 | |
1106 | /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using writemask k |
1107 | /// (elements are copied from src if the corresponding bit is not set). |
1108 | /// |
1109 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcast_i32x2&ig_expand=534) |
1110 | #[inline ] |
1111 | #[target_feature (enable = "avx512dq,avx512vl" )] |
1112 | #[cfg_attr (test, assert_instr(vbroadcasti32x2))] |
1113 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1114 | pub fn _mm_mask_broadcast_i32x2(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { |
1115 | unsafe { |
1116 | let b: i32x4 = _mm_broadcast_i32x2(a).as_i32x4(); |
1117 | transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_i32x4())) |
1118 | } |
1119 | } |
1120 | |
1121 | /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using zeromask k |
1122 | /// (elements are zeroed out if the corresponding bit is not set). |
1123 | /// |
1124 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcast_i32x2&ig_expand=535) |
1125 | #[inline ] |
1126 | #[target_feature (enable = "avx512dq,avx512vl" )] |
1127 | #[cfg_attr (test, assert_instr(vbroadcasti32x2))] |
1128 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1129 | pub fn _mm_maskz_broadcast_i32x2(k: __mmask8, a: __m128i) -> __m128i { |
1130 | unsafe { |
1131 | let b: i32x4 = _mm_broadcast_i32x2(a).as_i32x4(); |
1132 | transmute(src:simd_select_bitmask(m:k, yes:b, no:i32x4::ZERO)) |
1133 | } |
1134 | } |
1135 | |
1136 | /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst. |
1137 | /// |
1138 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i32x2&ig_expand=536) |
1139 | #[inline ] |
1140 | #[target_feature (enable = "avx512dq,avx512vl" )] |
1141 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1142 | pub fn _mm256_broadcast_i32x2(a: __m128i) -> __m256i { |
1143 | unsafe { |
1144 | let a: i32x4 = a.as_i32x4(); |
1145 | let b: i32x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]); |
1146 | transmute(src:b) |
1147 | } |
1148 | } |
1149 | |
1150 | /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using writemask k |
1151 | /// (elements are copied from src if the corresponding bit is not set). |
1152 | /// |
1153 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i32x2&ig_expand=537) |
1154 | #[inline ] |
1155 | #[target_feature (enable = "avx512dq,avx512vl" )] |
1156 | #[cfg_attr (test, assert_instr(vbroadcasti32x2))] |
1157 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1158 | pub fn _mm256_mask_broadcast_i32x2(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { |
1159 | unsafe { |
1160 | let b: i32x8 = _mm256_broadcast_i32x2(a).as_i32x8(); |
1161 | transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_i32x8())) |
1162 | } |
1163 | } |
1164 | |
1165 | /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using zeromask k |
1166 | /// (elements are zeroed out if the corresponding bit is not set). |
1167 | /// |
1168 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i32x2&ig_expand=538) |
1169 | #[inline ] |
1170 | #[target_feature (enable = "avx512dq,avx512vl" )] |
1171 | #[cfg_attr (test, assert_instr(vbroadcasti32x2))] |
1172 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1173 | pub fn _mm256_maskz_broadcast_i32x2(k: __mmask8, a: __m128i) -> __m256i { |
1174 | unsafe { |
1175 | let b: i32x8 = _mm256_broadcast_i32x2(a).as_i32x8(); |
1176 | transmute(src:simd_select_bitmask(m:k, yes:b, no:i32x8::ZERO)) |
1177 | } |
1178 | } |
1179 | |
1180 | /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst. |
1181 | /// |
1182 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x2&ig_expand=539) |
1183 | #[inline ] |
1184 | #[target_feature (enable = "avx512dq" )] |
1185 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1186 | pub fn _mm512_broadcast_i32x2(a: __m128i) -> __m512i { |
1187 | unsafe { |
1188 | let a: i32x4 = a.as_i32x4(); |
1189 | let b: i32x16 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]); |
1190 | transmute(src:b) |
1191 | } |
1192 | } |
1193 | |
1194 | /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using writemask k |
1195 | /// (elements are copied from src if the corresponding bit is not set). |
1196 | /// |
1197 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x2&ig_expand=540) |
1198 | #[inline ] |
1199 | #[target_feature (enable = "avx512dq" )] |
1200 | #[cfg_attr (test, assert_instr(vbroadcasti32x2))] |
1201 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1202 | pub fn _mm512_mask_broadcast_i32x2(src: __m512i, k: __mmask16, a: __m128i) -> __m512i { |
1203 | unsafe { |
1204 | let b: i32x16 = _mm512_broadcast_i32x2(a).as_i32x16(); |
1205 | transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_i32x16())) |
1206 | } |
1207 | } |
1208 | |
1209 | /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using zeromask k |
1210 | /// (elements are zeroed out if the corresponding bit is not set). |
1211 | /// |
1212 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x2&ig_expand=541) |
1213 | #[inline ] |
1214 | #[target_feature (enable = "avx512dq" )] |
1215 | #[cfg_attr (test, assert_instr(vbroadcasti32x2))] |
1216 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1217 | pub fn _mm512_maskz_broadcast_i32x2(k: __mmask16, a: __m128i) -> __m512i { |
1218 | unsafe { |
1219 | let b: i32x16 = _mm512_broadcast_i32x2(a).as_i32x16(); |
1220 | transmute(src:simd_select_bitmask(m:k, yes:b, no:i32x16::ZERO)) |
1221 | } |
1222 | } |
1223 | |
1224 | /// Broadcasts the 8 packed 32-bit integers from a to all elements of dst. |
1225 | /// |
1226 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x8&ig_expand=548) |
1227 | #[inline ] |
1228 | #[target_feature (enable = "avx512dq" )] |
1229 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1230 | pub fn _mm512_broadcast_i32x8(a: __m256i) -> __m512i { |
1231 | unsafe { |
1232 | let a: i32x8 = a.as_i32x8(); |
1233 | let b: i32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7]); |
1234 | transmute(src:b) |
1235 | } |
1236 | } |
1237 | |
1238 | /// Broadcasts the 8 packed 32-bit integers from a to all elements of dst using writemask k |
1239 | /// (elements are copied from src if the corresponding bit is not set). |
1240 | /// |
1241 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x8&ig_expand=549) |
1242 | #[inline ] |
1243 | #[target_feature (enable = "avx512dq" )] |
1244 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1245 | pub fn _mm512_mask_broadcast_i32x8(src: __m512i, k: __mmask16, a: __m256i) -> __m512i { |
1246 | unsafe { |
1247 | let b: i32x16 = _mm512_broadcast_i32x8(a).as_i32x16(); |
1248 | transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_i32x16())) |
1249 | } |
1250 | } |
1251 | |
1252 | /// Broadcasts the 8 packed 32-bit integers from a to all elements of dst using zeromask k |
1253 | /// (elements are zeroed out if the corresponding bit is not set). |
1254 | /// |
1255 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x8&ig_expand=550) |
1256 | #[inline ] |
1257 | #[target_feature (enable = "avx512dq" )] |
1258 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1259 | pub fn _mm512_maskz_broadcast_i32x8(k: __mmask16, a: __m256i) -> __m512i { |
1260 | unsafe { |
1261 | let b: i32x16 = _mm512_broadcast_i32x8(a).as_i32x16(); |
1262 | transmute(src:simd_select_bitmask(m:k, yes:b, no:i32x16::ZERO)) |
1263 | } |
1264 | } |
1265 | |
1266 | /// Broadcasts the 2 packed 64-bit integers from a to all elements of dst. |
1267 | /// |
1268 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i64x2&ig_expand=551) |
1269 | #[inline ] |
1270 | #[target_feature (enable = "avx512dq,avx512vl" )] |
1271 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1272 | pub fn _mm256_broadcast_i64x2(a: __m128i) -> __m256i { |
1273 | unsafe { |
1274 | let a: i64x2 = a.as_i64x2(); |
1275 | let b: i64x4 = simd_shuffle!(a, a, [0, 1, 0, 1]); |
1276 | transmute(src:b) |
1277 | } |
1278 | } |
1279 | |
1280 | /// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using writemask k |
1281 | /// (elements are copied from src if the corresponding bit is not set). |
1282 | /// |
1283 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i64x2&ig_expand=552) |
1284 | #[inline ] |
1285 | #[target_feature (enable = "avx512dq,avx512vl" )] |
1286 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1287 | pub fn _mm256_mask_broadcast_i64x2(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { |
1288 | unsafe { |
1289 | let b: i64x4 = _mm256_broadcast_i64x2(a).as_i64x4(); |
1290 | transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_i64x4())) |
1291 | } |
1292 | } |
1293 | |
1294 | /// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using zeromask k |
1295 | /// (elements are zeroed out if the corresponding bit is not set). |
1296 | /// |
1297 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i64x2&ig_expand=553) |
1298 | #[inline ] |
1299 | #[target_feature (enable = "avx512dq,avx512vl" )] |
1300 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1301 | pub fn _mm256_maskz_broadcast_i64x2(k: __mmask8, a: __m128i) -> __m256i { |
1302 | unsafe { |
1303 | let b: i64x4 = _mm256_broadcast_i64x2(a).as_i64x4(); |
1304 | transmute(src:simd_select_bitmask(m:k, yes:b, no:i64x4::ZERO)) |
1305 | } |
1306 | } |
1307 | |
1308 | /// Broadcasts the 2 packed 64-bit integers from a to all elements of dst. |
1309 | /// |
1310 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i64x2&ig_expand=554) |
1311 | #[inline ] |
1312 | #[target_feature (enable = "avx512dq" )] |
1313 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1314 | pub fn _mm512_broadcast_i64x2(a: __m128i) -> __m512i { |
1315 | unsafe { |
1316 | let a: i64x2 = a.as_i64x2(); |
1317 | let b: i64x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]); |
1318 | transmute(src:b) |
1319 | } |
1320 | } |
1321 | |
1322 | /// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using writemask k |
1323 | /// (elements are copied from src if the corresponding bit is not set). |
1324 | /// |
1325 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i64x2&ig_expand=555) |
1326 | #[inline ] |
1327 | #[target_feature (enable = "avx512dq" )] |
1328 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1329 | pub fn _mm512_mask_broadcast_i64x2(src: __m512i, k: __mmask8, a: __m128i) -> __m512i { |
1330 | unsafe { |
1331 | let b: i64x8 = _mm512_broadcast_i64x2(a).as_i64x8(); |
1332 | transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_i64x8())) |
1333 | } |
1334 | } |
1335 | |
1336 | /// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using zeromask k |
1337 | /// (elements are zeroed out if the corresponding bit is not set). |
1338 | /// |
1339 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i64x2&ig_expand=556) |
1340 | #[inline ] |
1341 | #[target_feature (enable = "avx512dq" )] |
1342 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1343 | pub fn _mm512_maskz_broadcast_i64x2(k: __mmask8, a: __m128i) -> __m512i { |
1344 | unsafe { |
1345 | let b: i64x8 = _mm512_broadcast_i64x2(a).as_i64x8(); |
1346 | transmute(src:simd_select_bitmask(m:k, yes:b, no:i64x8::ZERO)) |
1347 | } |
1348 | } |
1349 | |
1350 | // Extract |
1351 | |
1352 | /// Extracts 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from a, |
1353 | /// selected with IMM8, and stores the result in dst. |
1354 | /// |
1355 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf32x8_ps&ig_expand=2946) |
1356 | #[inline ] |
1357 | #[target_feature (enable = "avx512dq" )] |
1358 | #[rustc_legacy_const_generics (1)] |
1359 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1360 | pub fn _mm512_extractf32x8_ps<const IMM8: i32>(a: __m512) -> __m256 { |
1361 | unsafe { |
1362 | static_assert_uimm_bits!(IMM8, 1); |
1363 | match IMM8 & 1 { |
1364 | 0 => simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]), |
1365 | _ => simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]), |
1366 | } |
1367 | } |
1368 | } |
1369 | |
1370 | /// Extracts 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from a, |
1371 | /// selected with IMM8, and stores the result in dst using writemask k (elements are copied from src |
1372 | /// if the corresponding bit is not set). |
1373 | /// |
1374 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf32x8_ps&ig_expand=2947) |
1375 | #[inline ] |
1376 | #[target_feature (enable = "avx512dq" )] |
1377 | #[cfg_attr (test, assert_instr(vextractf32x8, IMM8 = 1))] |
1378 | #[rustc_legacy_const_generics (3)] |
1379 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1380 | pub fn _mm512_mask_extractf32x8_ps<const IMM8: i32>(src: __m256, k: __mmask8, a: __m512) -> __m256 { |
1381 | unsafe { |
1382 | static_assert_uimm_bits!(IMM8, 1); |
1383 | let b: __m256 = _mm512_extractf32x8_ps::<IMM8>(a); |
1384 | transmute(src:simd_select_bitmask(m:k, yes:b.as_f32x8(), no:src.as_f32x8())) |
1385 | } |
1386 | } |
1387 | |
1388 | /// Extracts 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from a, |
1389 | /// selected with IMM8, and stores the result in dst using zeromask k (elements are zeroed out if the |
1390 | /// corresponding bit is not set). |
1391 | /// |
1392 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf32x8_ps&ig_expand=2948) |
1393 | #[inline ] |
1394 | #[target_feature (enable = "avx512dq" )] |
1395 | #[cfg_attr (test, assert_instr(vextractf32x8, IMM8 = 1))] |
1396 | #[rustc_legacy_const_generics (2)] |
1397 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1398 | pub fn _mm512_maskz_extractf32x8_ps<const IMM8: i32>(k: __mmask8, a: __m512) -> __m256 { |
1399 | unsafe { |
1400 | static_assert_uimm_bits!(IMM8, 1); |
1401 | let b: __m256 = _mm512_extractf32x8_ps::<IMM8>(a); |
1402 | transmute(src:simd_select_bitmask(m:k, yes:b.as_f32x8(), no:f32x8::ZERO)) |
1403 | } |
1404 | } |
1405 | |
1406 | /// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a, |
1407 | /// selected with IMM8, and stores the result in dst. |
1408 | /// |
1409 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf64x2_pd&ig_expand=2949) |
1410 | #[inline ] |
1411 | #[target_feature (enable = "avx512dq,avx512vl" )] |
1412 | #[rustc_legacy_const_generics (1)] |
1413 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1414 | pub fn _mm256_extractf64x2_pd<const IMM8: i32>(a: __m256d) -> __m128d { |
1415 | unsafe { |
1416 | static_assert_uimm_bits!(IMM8, 1); |
1417 | match IMM8 & 1 { |
1418 | 0 => simd_shuffle!(a, a, [0, 1]), |
1419 | _ => simd_shuffle!(a, a, [2, 3]), |
1420 | } |
1421 | } |
1422 | } |
1423 | |
1424 | /// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a, |
1425 | /// selected with IMM8, and stores the result in dst using writemask k (elements are copied from src |
1426 | /// if the corresponding bit is not set). |
1427 | /// |
1428 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extractf64x2_pd&ig_expand=2950) |
1429 | #[inline ] |
1430 | #[target_feature (enable = "avx512dq,avx512vl" )] |
1431 | #[cfg_attr (test, assert_instr(vextractf64x2, IMM8 = 1))] |
1432 | #[rustc_legacy_const_generics (3)] |
1433 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1434 | pub fn _mm256_mask_extractf64x2_pd<const IMM8: i32>( |
1435 | src: __m128d, |
1436 | k: __mmask8, |
1437 | a: __m256d, |
1438 | ) -> __m128d { |
1439 | unsafe { |
1440 | static_assert_uimm_bits!(IMM8, 1); |
1441 | let b: __m128d = _mm256_extractf64x2_pd::<IMM8>(a); |
1442 | transmute(src:simd_select_bitmask(m:k, yes:b.as_f64x2(), no:src.as_f64x2())) |
1443 | } |
1444 | } |
1445 | |
1446 | /// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a, |
1447 | /// selected with IMM8, and stores the result in dst using zeromask k (elements are zeroed out if the |
1448 | /// corresponding bit is not set). |
1449 | /// |
1450 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extractf64x2_pd&ig_expand=2951) |
1451 | #[inline ] |
1452 | #[target_feature (enable = "avx512dq,avx512vl" )] |
1453 | #[cfg_attr (test, assert_instr(vextractf64x2, IMM8 = 1))] |
1454 | #[rustc_legacy_const_generics (2)] |
1455 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1456 | pub fn _mm256_maskz_extractf64x2_pd<const IMM8: i32>(k: __mmask8, a: __m256d) -> __m128d { |
1457 | unsafe { |
1458 | static_assert_uimm_bits!(IMM8, 1); |
1459 | let b: __m128d = _mm256_extractf64x2_pd::<IMM8>(a); |
1460 | transmute(src:simd_select_bitmask(m:k, yes:b.as_f64x2(), no:f64x2::ZERO)) |
1461 | } |
1462 | } |
1463 | |
1464 | /// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a, |
1465 | /// selected with IMM8, and stores the result in dst. |
1466 | /// |
1467 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf64x2_pd&ig_expand=2952) |
1468 | #[inline ] |
1469 | #[target_feature (enable = "avx512dq" )] |
1470 | #[rustc_legacy_const_generics (1)] |
1471 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1472 | pub fn _mm512_extractf64x2_pd<const IMM8: i32>(a: __m512d) -> __m128d { |
1473 | unsafe { |
1474 | static_assert_uimm_bits!(IMM8, 2); |
1475 | match IMM8 & 3 { |
1476 | 0 => simd_shuffle!(a, a, [0, 1]), |
1477 | 1 => simd_shuffle!(a, a, [2, 3]), |
1478 | 2 => simd_shuffle!(a, a, [4, 5]), |
1479 | _ => simd_shuffle!(a, a, [6, 7]), |
1480 | } |
1481 | } |
1482 | } |
1483 | |
1484 | /// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a, |
1485 | /// selected with IMM8, and stores the result in dst using writemask k (elements are copied from src |
1486 | /// if the corresponding bit is not set). |
1487 | /// |
1488 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf64x2_pd&ig_expand=2953) |
1489 | #[inline ] |
1490 | #[target_feature (enable = "avx512dq" )] |
1491 | #[cfg_attr (test, assert_instr(vextractf64x2, IMM8 = 3))] |
1492 | #[rustc_legacy_const_generics (3)] |
1493 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1494 | pub fn _mm512_mask_extractf64x2_pd<const IMM8: i32>( |
1495 | src: __m128d, |
1496 | k: __mmask8, |
1497 | a: __m512d, |
1498 | ) -> __m128d { |
1499 | unsafe { |
1500 | static_assert_uimm_bits!(IMM8, 2); |
1501 | let b: f64x2 = _mm512_extractf64x2_pd::<IMM8>(a).as_f64x2(); |
1502 | transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f64x2())) |
1503 | } |
1504 | } |
1505 | |
1506 | /// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a, |
1507 | /// selected with IMM8, and stores the result in dst using zeromask k (elements are zeroed out if the |
1508 | /// corresponding bit is not set). |
1509 | /// |
1510 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf64x2_pd&ig_expand=2954) |
1511 | #[inline ] |
1512 | #[target_feature (enable = "avx512dq" )] |
1513 | #[cfg_attr (test, assert_instr(vextractf64x2, IMM8 = 3))] |
1514 | #[rustc_legacy_const_generics (2)] |
1515 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1516 | pub fn _mm512_maskz_extractf64x2_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m128d { |
1517 | unsafe { |
1518 | static_assert_uimm_bits!(IMM8, 2); |
1519 | let b: f64x2 = _mm512_extractf64x2_pd::<IMM8>(a).as_f64x2(); |
1520 | transmute(src:simd_select_bitmask(m:k, yes:b, no:f64x2::ZERO)) |
1521 | } |
1522 | } |
1523 | |
1524 | /// Extracts 256 bits (composed of 8 packed 32-bit integers) from a, selected with IMM8, and stores |
1525 | /// the result in dst. |
1526 | /// |
1527 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti32x8_epi32&ig_expand=2965) |
1528 | #[inline ] |
1529 | #[target_feature (enable = "avx512dq" )] |
1530 | #[rustc_legacy_const_generics (1)] |
1531 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1532 | pub fn _mm512_extracti32x8_epi32<const IMM8: i32>(a: __m512i) -> __m256i { |
1533 | unsafe { |
1534 | static_assert_uimm_bits!(IMM8, 1); |
1535 | let a: i32x16 = a.as_i32x16(); |
1536 | let b: i32x8 = match IMM8 & 1 { |
1537 | 0 => simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]), |
1538 | _ => simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]), |
1539 | }; |
1540 | transmute(src:b) |
1541 | } |
1542 | } |
1543 | |
1544 | /// Extracts 256 bits (composed of 8 packed 32-bit integers) from a, selected with IMM8, and stores |
1545 | /// the result in dst using writemask k (elements are copied from src if the corresponding bit is not set). |
1546 | /// |
1547 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti32x8_epi32&ig_expand=2966) |
1548 | #[inline ] |
1549 | #[target_feature (enable = "avx512dq" )] |
1550 | #[cfg_attr (test, assert_instr(vextracti32x8, IMM8 = 1))] |
1551 | #[rustc_legacy_const_generics (3)] |
1552 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1553 | pub fn _mm512_mask_extracti32x8_epi32<const IMM8: i32>( |
1554 | src: __m256i, |
1555 | k: __mmask8, |
1556 | a: __m512i, |
1557 | ) -> __m256i { |
1558 | unsafe { |
1559 | static_assert_uimm_bits!(IMM8, 1); |
1560 | let b: i32x8 = _mm512_extracti32x8_epi32::<IMM8>(a).as_i32x8(); |
1561 | transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_i32x8())) |
1562 | } |
1563 | } |
1564 | |
1565 | /// Extracts 256 bits (composed of 8 packed 32-bit integers) from a, selected with IMM8, and stores |
1566 | /// the result in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
1567 | /// |
1568 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti32x8_epi32&ig_expand=2967) |
1569 | #[inline ] |
1570 | #[target_feature (enable = "avx512dq" )] |
1571 | #[cfg_attr (test, assert_instr(vextracti32x8, IMM8 = 1))] |
1572 | #[rustc_legacy_const_generics (2)] |
1573 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1574 | pub fn _mm512_maskz_extracti32x8_epi32<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m256i { |
1575 | unsafe { |
1576 | static_assert_uimm_bits!(IMM8, 1); |
1577 | let b: i32x8 = _mm512_extracti32x8_epi32::<IMM8>(a).as_i32x8(); |
1578 | transmute(src:simd_select_bitmask(m:k, yes:b, no:i32x8::ZERO)) |
1579 | } |
1580 | } |
1581 | |
1582 | /// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores |
1583 | /// the result in dst. |
1584 | /// |
1585 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extracti64x2_epi64&ig_expand=2968) |
1586 | #[inline ] |
1587 | #[target_feature (enable = "avx512dq,avx512vl" )] |
1588 | #[rustc_legacy_const_generics (1)] |
1589 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1590 | pub fn _mm256_extracti64x2_epi64<const IMM8: i32>(a: __m256i) -> __m128i { |
1591 | unsafe { |
1592 | static_assert_uimm_bits!(IMM8, 1); |
1593 | let a: i64x4 = a.as_i64x4(); |
1594 | match IMM8 & 1 { |
1595 | 0 => simd_shuffle!(a, a, [0, 1]), |
1596 | _ => simd_shuffle!(a, a, [2, 3]), |
1597 | } |
1598 | } |
1599 | } |
1600 | |
1601 | /// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores |
1602 | /// the result in dst using writemask k (elements are copied from src if the corresponding bit is not set). |
1603 | /// |
1604 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extracti64x2_epi64&ig_expand=2969) |
1605 | #[inline ] |
1606 | #[target_feature (enable = "avx512dq,avx512vl" )] |
1607 | #[cfg_attr (test, assert_instr(vextracti64x2, IMM8 = 1))] |
1608 | #[rustc_legacy_const_generics (3)] |
1609 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1610 | pub fn _mm256_mask_extracti64x2_epi64<const IMM8: i32>( |
1611 | src: __m128i, |
1612 | k: __mmask8, |
1613 | a: __m256i, |
1614 | ) -> __m128i { |
1615 | unsafe { |
1616 | static_assert_uimm_bits!(IMM8, 1); |
1617 | let b: i64x2 = _mm256_extracti64x2_epi64::<IMM8>(a).as_i64x2(); |
1618 | transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_i64x2())) |
1619 | } |
1620 | } |
1621 | |
1622 | /// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores |
1623 | /// the result in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
1624 | /// |
1625 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extracti64x2_epi64&ig_expand=2970) |
1626 | #[inline ] |
1627 | #[target_feature (enable = "avx512dq,avx512vl" )] |
1628 | #[cfg_attr (test, assert_instr(vextracti64x2, IMM8 = 1))] |
1629 | #[rustc_legacy_const_generics (2)] |
1630 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1631 | pub fn _mm256_maskz_extracti64x2_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m128i { |
1632 | unsafe { |
1633 | static_assert_uimm_bits!(IMM8, 1); |
1634 | let b: i64x2 = _mm256_extracti64x2_epi64::<IMM8>(a).as_i64x2(); |
1635 | transmute(src:simd_select_bitmask(m:k, yes:b, no:i64x2::ZERO)) |
1636 | } |
1637 | } |
1638 | |
1639 | /// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores |
1640 | /// the result in dst. |
1641 | /// |
1642 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti64x2_epi64&ig_expand=2971) |
1643 | #[inline ] |
1644 | #[target_feature (enable = "avx512dq" )] |
1645 | #[rustc_legacy_const_generics (1)] |
1646 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1647 | pub fn _mm512_extracti64x2_epi64<const IMM8: i32>(a: __m512i) -> __m128i { |
1648 | unsafe { |
1649 | static_assert_uimm_bits!(IMM8, 2); |
1650 | let a: i64x8 = a.as_i64x8(); |
1651 | match IMM8 & 3 { |
1652 | 0 => simd_shuffle!(a, a, [0, 1]), |
1653 | 1 => simd_shuffle!(a, a, [2, 3]), |
1654 | 2 => simd_shuffle!(a, a, [4, 5]), |
1655 | _ => simd_shuffle!(a, a, [6, 7]), |
1656 | } |
1657 | } |
1658 | } |
1659 | |
1660 | /// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores |
1661 | /// the result in dst using writemask k (elements are copied from src if the corresponding bit is not set). |
1662 | /// |
1663 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti64x2_epi64&ig_expand=2972) |
1664 | #[inline ] |
1665 | #[target_feature (enable = "avx512dq" )] |
1666 | #[cfg_attr (test, assert_instr(vextracti64x2, IMM8 = 3))] |
1667 | #[rustc_legacy_const_generics (3)] |
1668 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1669 | pub fn _mm512_mask_extracti64x2_epi64<const IMM8: i32>( |
1670 | src: __m128i, |
1671 | k: __mmask8, |
1672 | a: __m512i, |
1673 | ) -> __m128i { |
1674 | unsafe { |
1675 | static_assert_uimm_bits!(IMM8, 2); |
1676 | let b: i64x2 = _mm512_extracti64x2_epi64::<IMM8>(a).as_i64x2(); |
1677 | transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_i64x2())) |
1678 | } |
1679 | } |
1680 | |
1681 | /// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores |
1682 | /// the result in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
1683 | /// |
1684 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti64x2_epi64&ig_expand=2973) |
1685 | #[inline ] |
1686 | #[target_feature (enable = "avx512dq" )] |
1687 | #[cfg_attr (test, assert_instr(vextracti64x2, IMM8 = 3))] |
1688 | #[rustc_legacy_const_generics (2)] |
1689 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1690 | pub fn _mm512_maskz_extracti64x2_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m128i { |
1691 | unsafe { |
1692 | static_assert_uimm_bits!(IMM8, 2); |
1693 | let b: i64x2 = _mm512_extracti64x2_epi64::<IMM8>(a).as_i64x2(); |
1694 | transmute(src:simd_select_bitmask(m:k, yes:b, no:i64x2::ZERO)) |
1695 | } |
1696 | } |
1697 | |
1698 | // Insert |
1699 | |
1700 | /// Copy a to dst, then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point |
1701 | /// elements) from b into dst at the location specified by IMM8. |
1702 | /// |
1703 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf32x8&ig_expand=3850) |
1704 | #[inline ] |
1705 | #[target_feature (enable = "avx512dq" )] |
1706 | #[rustc_legacy_const_generics (2)] |
1707 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1708 | pub fn _mm512_insertf32x8<const IMM8: i32>(a: __m512, b: __m256) -> __m512 { |
1709 | unsafe { |
1710 | static_assert_uimm_bits!(IMM8, 1); |
1711 | let b: __m512 = _mm512_castps256_ps512(b); |
1712 | match IMM8 & 1 { |
1713 | 0 => { |
1714 | simd_shuffle!( |
1715 | a, |
1716 | b, |
1717 | [16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15] |
1718 | ) |
1719 | } |
1720 | _ => { |
1721 | simd_shuffle!( |
1722 | a, |
1723 | b, |
1724 | [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23] |
1725 | ) |
1726 | } |
1727 | } |
1728 | } |
1729 | } |
1730 | |
1731 | /// Copy a to tmp, then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point |
1732 | /// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using writemask k |
1733 | /// (elements are copied from src if the corresponding bit is not set). |
1734 | /// |
1735 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf32x8&ig_expand=3851) |
1736 | #[inline ] |
1737 | #[target_feature (enable = "avx512dq" )] |
1738 | #[cfg_attr (test, assert_instr(vinsertf32x8, IMM8 = 1))] |
1739 | #[rustc_legacy_const_generics (4)] |
1740 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1741 | pub fn _mm512_mask_insertf32x8<const IMM8: i32>( |
1742 | src: __m512, |
1743 | k: __mmask16, |
1744 | a: __m512, |
1745 | b: __m256, |
1746 | ) -> __m512 { |
1747 | unsafe { |
1748 | static_assert_uimm_bits!(IMM8, 1); |
1749 | let c: __m512 = _mm512_insertf32x8::<IMM8>(a, b); |
1750 | transmute(src:simd_select_bitmask(m:k, yes:c.as_f32x16(), no:src.as_f32x16())) |
1751 | } |
1752 | } |
1753 | |
1754 | /// Copy a to tmp, then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point |
1755 | /// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using zeromask k |
1756 | /// (elements are zeroed out if the corresponding bit is not set). |
1757 | /// |
1758 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf32x8&ig_expand=3852) |
1759 | #[inline ] |
1760 | #[target_feature (enable = "avx512dq" )] |
1761 | #[cfg_attr (test, assert_instr(vinsertf32x8, IMM8 = 1))] |
1762 | #[rustc_legacy_const_generics (3)] |
1763 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1764 | pub fn _mm512_maskz_insertf32x8<const IMM8: i32>(k: __mmask16, a: __m512, b: __m256) -> __m512 { |
1765 | unsafe { |
1766 | static_assert_uimm_bits!(IMM8, 1); |
1767 | let c: f32x16 = _mm512_insertf32x8::<IMM8>(a, b).as_f32x16(); |
1768 | transmute(src:simd_select_bitmask(m:k, yes:c, no:f32x16::ZERO)) |
1769 | } |
1770 | } |
1771 | |
1772 | /// Copy a to dst, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point |
1773 | /// elements) from b into dst at the location specified by IMM8. |
1774 | /// |
1775 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf64x2&ig_expand=3853) |
1776 | #[inline ] |
1777 | #[target_feature (enable = "avx512dq,avx512vl" )] |
1778 | #[rustc_legacy_const_generics (2)] |
1779 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1780 | pub fn _mm256_insertf64x2<const IMM8: i32>(a: __m256d, b: __m128d) -> __m256d { |
1781 | unsafe { |
1782 | static_assert_uimm_bits!(IMM8, 1); |
1783 | let b: __m256d = _mm256_castpd128_pd256(b); |
1784 | match IMM8 & 1 { |
1785 | 0 => simd_shuffle!(a, b, [4, 5, 2, 3]), |
1786 | _ => simd_shuffle!(a, b, [0, 1, 4, 5]), |
1787 | } |
1788 | } |
1789 | } |
1790 | |
1791 | /// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point |
1792 | /// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using writemask k |
1793 | /// (elements are copied from src if the corresponding bit is not set). |
1794 | /// |
1795 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_insertf64x2&ig_expand=3854) |
1796 | #[inline ] |
1797 | #[target_feature (enable = "avx512dq,avx512vl" )] |
1798 | #[cfg_attr (test, assert_instr(vinsertf64x2, IMM8 = 1))] |
1799 | #[rustc_legacy_const_generics (4)] |
1800 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1801 | pub fn _mm256_mask_insertf64x2<const IMM8: i32>( |
1802 | src: __m256d, |
1803 | k: __mmask8, |
1804 | a: __m256d, |
1805 | b: __m128d, |
1806 | ) -> __m256d { |
1807 | unsafe { |
1808 | static_assert_uimm_bits!(IMM8, 1); |
1809 | let c: __m256d = _mm256_insertf64x2::<IMM8>(a, b); |
1810 | transmute(src:simd_select_bitmask(m:k, yes:c.as_f64x4(), no:src.as_f64x4())) |
1811 | } |
1812 | } |
1813 | |
1814 | /// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point |
1815 | /// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using zeromask k |
1816 | /// (elements are zeroed out if the corresponding bit is not set). |
1817 | /// |
1818 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_insertf64x2&ig_expand=3855) |
1819 | #[inline ] |
1820 | #[target_feature (enable = "avx512dq,avx512vl" )] |
1821 | #[cfg_attr (test, assert_instr(vinsertf64x2, IMM8 = 1))] |
1822 | #[rustc_legacy_const_generics (3)] |
1823 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1824 | pub fn _mm256_maskz_insertf64x2<const IMM8: i32>(k: __mmask8, a: __m256d, b: __m128d) -> __m256d { |
1825 | unsafe { |
1826 | static_assert_uimm_bits!(IMM8, 1); |
1827 | let c: f64x4 = _mm256_insertf64x2::<IMM8>(a, b).as_f64x4(); |
1828 | transmute(src:simd_select_bitmask(m:k, yes:c, no:f64x4::ZERO)) |
1829 | } |
1830 | } |
1831 | |
1832 | /// Copy a to dst, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point |
1833 | /// elements) from b into dst at the location specified by IMM8. |
1834 | /// |
1835 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf64x2&ig_expand=3856) |
1836 | #[inline ] |
1837 | #[target_feature (enable = "avx512dq" )] |
1838 | #[rustc_legacy_const_generics (2)] |
1839 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1840 | pub fn _mm512_insertf64x2<const IMM8: i32>(a: __m512d, b: __m128d) -> __m512d { |
1841 | unsafe { |
1842 | static_assert_uimm_bits!(IMM8, 2); |
1843 | let b: __m512d = _mm512_castpd128_pd512(b); |
1844 | match IMM8 & 3 { |
1845 | 0 => simd_shuffle!(a, b, [8, 9, 2, 3, 4, 5, 6, 7]), |
1846 | 1 => simd_shuffle!(a, b, [0, 1, 8, 9, 4, 5, 6, 7]), |
1847 | 2 => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 6, 7]), |
1848 | _ => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8, 9]), |
1849 | } |
1850 | } |
1851 | } |
1852 | |
1853 | /// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point |
1854 | /// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using writemask k |
1855 | /// (elements are copied from src if the corresponding bit is not set). |
1856 | /// |
1857 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf64x2&ig_expand=3857) |
1858 | #[inline ] |
1859 | #[target_feature (enable = "avx512dq" )] |
1860 | #[cfg_attr (test, assert_instr(vinsertf64x2, IMM8 = 3))] |
1861 | #[rustc_legacy_const_generics (4)] |
1862 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1863 | pub fn _mm512_mask_insertf64x2<const IMM8: i32>( |
1864 | src: __m512d, |
1865 | k: __mmask8, |
1866 | a: __m512d, |
1867 | b: __m128d, |
1868 | ) -> __m512d { |
1869 | unsafe { |
1870 | static_assert_uimm_bits!(IMM8, 2); |
1871 | let c: __m512d = _mm512_insertf64x2::<IMM8>(a, b); |
1872 | transmute(src:simd_select_bitmask(m:k, yes:c.as_f64x8(), no:src.as_f64x8())) |
1873 | } |
1874 | } |
1875 | |
1876 | /// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point |
1877 | /// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using zeromask k |
1878 | /// (elements are zeroed out if the corresponding bit is not set). |
1879 | /// |
1880 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf64x2&ig_expand=3858) |
1881 | #[inline ] |
1882 | #[target_feature (enable = "avx512dq" )] |
1883 | #[cfg_attr (test, assert_instr(vinsertf64x2, IMM8 = 3))] |
1884 | #[rustc_legacy_const_generics (3)] |
1885 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1886 | pub fn _mm512_maskz_insertf64x2<const IMM8: i32>(k: __mmask8, a: __m512d, b: __m128d) -> __m512d { |
1887 | unsafe { |
1888 | static_assert_uimm_bits!(IMM8, 2); |
1889 | let c: f64x8 = _mm512_insertf64x2::<IMM8>(a, b).as_f64x8(); |
1890 | transmute(src:simd_select_bitmask(m:k, yes:c, no:f64x8::ZERO)) |
1891 | } |
1892 | } |
1893 | |
1894 | /// Copy a to dst, then insert 256 bits (composed of 8 packed 32-bit integers) from b into dst at the |
1895 | /// location specified by IMM8. |
1896 | /// |
1897 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti32x8&ig_expand=3869) |
1898 | #[inline ] |
1899 | #[target_feature (enable = "avx512dq" )] |
1900 | #[rustc_legacy_const_generics (2)] |
1901 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1902 | pub fn _mm512_inserti32x8<const IMM8: i32>(a: __m512i, b: __m256i) -> __m512i { |
1903 | unsafe { |
1904 | static_assert_uimm_bits!(IMM8, 1); |
1905 | let a: i32x16 = a.as_i32x16(); |
1906 | let b: i32x16 = _mm512_castsi256_si512(b).as_i32x16(); |
1907 | let r: i32x16 = match IMM8 & 1 { |
1908 | 0 => { |
1909 | simd_shuffle!( |
1910 | a, |
1911 | b, |
1912 | [16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15] |
1913 | ) |
1914 | } |
1915 | _ => { |
1916 | simd_shuffle!( |
1917 | a, |
1918 | b, |
1919 | [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23] |
1920 | ) |
1921 | } |
1922 | }; |
1923 | transmute(src:r) |
1924 | } |
1925 | } |
1926 | |
1927 | /// Copy a to tmp, then insert 256 bits (composed of 8 packed 32-bit integers) from b into tmp at the |
1928 | /// location specified by IMM8, and copy tmp to dst using writemask k (elements are copied from src if |
1929 | /// the corresponding bit is not set). |
1930 | /// |
1931 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti32x8&ig_expand=3870) |
1932 | #[inline ] |
1933 | #[target_feature (enable = "avx512dq" )] |
1934 | #[cfg_attr (test, assert_instr(vinserti32x8, IMM8 = 1))] |
1935 | #[rustc_legacy_const_generics (4)] |
1936 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1937 | pub fn _mm512_mask_inserti32x8<const IMM8: i32>( |
1938 | src: __m512i, |
1939 | k: __mmask16, |
1940 | a: __m512i, |
1941 | b: __m256i, |
1942 | ) -> __m512i { |
1943 | unsafe { |
1944 | static_assert_uimm_bits!(IMM8, 1); |
1945 | let c: __m512i = _mm512_inserti32x8::<IMM8>(a, b); |
1946 | transmute(src:simd_select_bitmask(m:k, yes:c.as_i32x16(), no:src.as_i32x16())) |
1947 | } |
1948 | } |
1949 | |
1950 | /// Copy a to tmp, then insert 256 bits (composed of 8 packed 32-bit integers) from b into tmp at the |
1951 | /// location specified by IMM8, and copy tmp to dst using zeromask k (elements are zeroed out if the |
1952 | /// corresponding bit is not set). |
1953 | /// |
1954 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti32x8&ig_expand=3871) |
1955 | #[inline ] |
1956 | #[target_feature (enable = "avx512dq" )] |
1957 | #[cfg_attr (test, assert_instr(vinserti32x8, IMM8 = 1))] |
1958 | #[rustc_legacy_const_generics (3)] |
1959 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1960 | pub fn _mm512_maskz_inserti32x8<const IMM8: i32>(k: __mmask16, a: __m512i, b: __m256i) -> __m512i { |
1961 | unsafe { |
1962 | static_assert_uimm_bits!(IMM8, 1); |
1963 | let c: i32x16 = _mm512_inserti32x8::<IMM8>(a, b).as_i32x16(); |
1964 | transmute(src:simd_select_bitmask(m:k, yes:c, no:i32x16::ZERO)) |
1965 | } |
1966 | } |
1967 | |
1968 | /// Copy a to dst, then insert 128 bits (composed of 2 packed 64-bit integers) from b into dst at the |
1969 | /// location specified by IMM8. |
1970 | /// |
1971 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_inserti64x2&ig_expand=3872) |
1972 | #[inline ] |
1973 | #[target_feature (enable = "avx512dq,avx512vl" )] |
1974 | #[rustc_legacy_const_generics (2)] |
1975 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1976 | pub fn _mm256_inserti64x2<const IMM8: i32>(a: __m256i, b: __m128i) -> __m256i { |
1977 | unsafe { |
1978 | static_assert_uimm_bits!(IMM8, 1); |
1979 | let a: i64x4 = a.as_i64x4(); |
1980 | let b: i64x4 = _mm256_castsi128_si256(b).as_i64x4(); |
1981 | match IMM8 & 1 { |
1982 | 0 => simd_shuffle!(a, b, [4, 5, 2, 3]), |
1983 | _ => simd_shuffle!(a, b, [0, 1, 4, 5]), |
1984 | } |
1985 | } |
1986 | } |
1987 | |
1988 | /// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the |
1989 | /// location specified by IMM8, and copy tmp to dst using writemask k (elements are copied from src if |
1990 | /// the corresponding bit is not set). |
1991 | /// |
1992 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_inserti64x2&ig_expand=3873) |
1993 | #[inline ] |
1994 | #[target_feature (enable = "avx512dq,avx512vl" )] |
1995 | #[cfg_attr (test, assert_instr(vinserti64x2, IMM8 = 1))] |
1996 | #[rustc_legacy_const_generics (4)] |
1997 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
1998 | pub fn _mm256_mask_inserti64x2<const IMM8: i32>( |
1999 | src: __m256i, |
2000 | k: __mmask8, |
2001 | a: __m256i, |
2002 | b: __m128i, |
2003 | ) -> __m256i { |
2004 | unsafe { |
2005 | static_assert_uimm_bits!(IMM8, 1); |
2006 | let c: __m256i = _mm256_inserti64x2::<IMM8>(a, b); |
2007 | transmute(src:simd_select_bitmask(m:k, yes:c.as_i64x4(), no:src.as_i64x4())) |
2008 | } |
2009 | } |
2010 | |
2011 | /// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the |
2012 | /// location specified by IMM8, and copy tmp to dst using zeromask k (elements are zeroed out if the |
2013 | /// corresponding bit is not set). |
2014 | /// |
2015 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_inserti64x2&ig_expand=3874) |
2016 | #[inline ] |
2017 | #[target_feature (enable = "avx512dq,avx512vl" )] |
2018 | #[cfg_attr (test, assert_instr(vinserti64x2, IMM8 = 1))] |
2019 | #[rustc_legacy_const_generics (3)] |
2020 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2021 | pub fn _mm256_maskz_inserti64x2<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m128i) -> __m256i { |
2022 | unsafe { |
2023 | static_assert_uimm_bits!(IMM8, 1); |
2024 | let c: i64x4 = _mm256_inserti64x2::<IMM8>(a, b).as_i64x4(); |
2025 | transmute(src:simd_select_bitmask(m:k, yes:c, no:i64x4::ZERO)) |
2026 | } |
2027 | } |
2028 | |
2029 | /// Copy a to dst, then insert 128 bits (composed of 2 packed 64-bit integers) from b into dst at the |
2030 | /// location specified by IMM8. |
2031 | /// |
2032 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti64x2&ig_expand=3875) |
2033 | #[inline ] |
2034 | #[target_feature (enable = "avx512dq" )] |
2035 | #[rustc_legacy_const_generics (2)] |
2036 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2037 | pub fn _mm512_inserti64x2<const IMM8: i32>(a: __m512i, b: __m128i) -> __m512i { |
2038 | unsafe { |
2039 | static_assert_uimm_bits!(IMM8, 2); |
2040 | let a: i64x8 = a.as_i64x8(); |
2041 | let b: i64x8 = _mm512_castsi128_si512(b).as_i64x8(); |
2042 | match IMM8 & 3 { |
2043 | 0 => simd_shuffle!(a, b, [8, 9, 2, 3, 4, 5, 6, 7]), |
2044 | 1 => simd_shuffle!(a, b, [0, 1, 8, 9, 4, 5, 6, 7]), |
2045 | 2 => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 6, 7]), |
2046 | _ => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8, 9]), |
2047 | } |
2048 | } |
2049 | } |
2050 | |
2051 | /// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the |
2052 | /// location specified by IMM8, and copy tmp to dst using writemask k (elements are copied from src if |
2053 | /// the corresponding bit is not set). |
2054 | /// |
2055 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti64x2&ig_expand=3876) |
2056 | #[inline ] |
2057 | #[target_feature (enable = "avx512dq" )] |
2058 | #[cfg_attr (test, assert_instr(vinserti64x2, IMM8 = 3))] |
2059 | #[rustc_legacy_const_generics (4)] |
2060 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2061 | pub fn _mm512_mask_inserti64x2<const IMM8: i32>( |
2062 | src: __m512i, |
2063 | k: __mmask8, |
2064 | a: __m512i, |
2065 | b: __m128i, |
2066 | ) -> __m512i { |
2067 | unsafe { |
2068 | static_assert_uimm_bits!(IMM8, 2); |
2069 | let c: __m512i = _mm512_inserti64x2::<IMM8>(a, b); |
2070 | transmute(src:simd_select_bitmask(m:k, yes:c.as_i64x8(), no:src.as_i64x8())) |
2071 | } |
2072 | } |
2073 | |
2074 | /// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the |
2075 | /// location specified by IMM8, and copy tmp to dst using zeromask k (elements are zeroed out if the |
2076 | /// corresponding bit is not set). |
2077 | /// |
2078 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti64x2&ig_expand=3877) |
2079 | #[inline ] |
2080 | #[target_feature (enable = "avx512dq" )] |
2081 | #[cfg_attr (test, assert_instr(vinserti64x2, IMM8 = 3))] |
2082 | #[rustc_legacy_const_generics (3)] |
2083 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2084 | pub fn _mm512_maskz_inserti64x2<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m128i) -> __m512i { |
2085 | unsafe { |
2086 | static_assert_uimm_bits!(IMM8, 2); |
2087 | let c: i64x8 = _mm512_inserti64x2::<IMM8>(a, b).as_i64x8(); |
2088 | transmute(src:simd_select_bitmask(m:k, yes:c, no:i64x8::ZERO)) |
2089 | } |
2090 | } |
2091 | |
2092 | // Convert |
2093 | |
2094 | /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
2095 | /// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of: |
2096 | /// |
2097 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2098 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2099 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2100 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2101 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2102 | /// |
2103 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepi64_pd&ig_expand=1437) |
2104 | #[inline ] |
2105 | #[target_feature (enable = "avx512dq" )] |
2106 | #[cfg_attr (test, assert_instr(vcvtqq2pd, ROUNDING = 8))] |
2107 | #[rustc_legacy_const_generics (1)] |
2108 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2109 | pub fn _mm512_cvt_roundepi64_pd<const ROUNDING: i32>(a: __m512i) -> __m512d { |
2110 | unsafe { |
2111 | static_assert_rounding!(ROUNDING); |
2112 | transmute(src:vcvtqq2pd_512(a.as_i64x8(), ROUNDING)) |
2113 | } |
2114 | } |
2115 | |
2116 | /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
2117 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
2118 | /// not set). Rounding is done according to the ROUNDING parameter, which can be one of: |
2119 | /// |
2120 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2121 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2122 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2123 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2124 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2125 | /// |
2126 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepi64_pd&ig_expand=1438) |
2127 | #[inline ] |
2128 | #[target_feature (enable = "avx512dq" )] |
2129 | #[cfg_attr (test, assert_instr(vcvtqq2pd, ROUNDING = 8))] |
2130 | #[rustc_legacy_const_generics (3)] |
2131 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2132 | pub fn _mm512_mask_cvt_roundepi64_pd<const ROUNDING: i32>( |
2133 | src: __m512d, |
2134 | k: __mmask8, |
2135 | a: __m512i, |
2136 | ) -> __m512d { |
2137 | unsafe { |
2138 | static_assert_rounding!(ROUNDING); |
2139 | let b: f64x8 = _mm512_cvt_roundepi64_pd::<ROUNDING>(a).as_f64x8(); |
2140 | transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f64x8())) |
2141 | } |
2142 | } |
2143 | |
2144 | /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
2145 | /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
2146 | /// Rounding is done according to the ROUNDING parameter, which can be one of: |
2147 | /// |
2148 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2149 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2150 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2151 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2152 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2153 | /// |
2154 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepi64_pd&ig_expand=1439) |
2155 | #[inline ] |
2156 | #[target_feature (enable = "avx512dq" )] |
2157 | #[cfg_attr (test, assert_instr(vcvtqq2pd, ROUNDING = 8))] |
2158 | #[rustc_legacy_const_generics (2)] |
2159 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2160 | pub fn _mm512_maskz_cvt_roundepi64_pd<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m512d { |
2161 | unsafe { |
2162 | static_assert_rounding!(ROUNDING); |
2163 | let b: f64x8 = _mm512_cvt_roundepi64_pd::<ROUNDING>(a).as_f64x8(); |
2164 | transmute(src:simd_select_bitmask(m:k, yes:b, no:f64x8::ZERO)) |
2165 | } |
2166 | } |
2167 | |
2168 | /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
2169 | /// and store the results in dst. |
2170 | /// |
2171 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_pd&ig_expand=1705) |
2172 | #[inline ] |
2173 | #[target_feature (enable = "avx512dq,avx512vl" )] |
2174 | #[cfg_attr (test, assert_instr(vcvtqq2pd))] |
2175 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2176 | pub fn _mm_cvtepi64_pd(a: __m128i) -> __m128d { |
2177 | unsafe { transmute(src:vcvtqq2pd_128(a.as_i64x2(), _MM_FROUND_CUR_DIRECTION)) } |
2178 | } |
2179 | |
2180 | /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
2181 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
2182 | /// not set). |
2183 | /// |
2184 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_pd&ig_expand=1706) |
2185 | #[inline ] |
2186 | #[target_feature (enable = "avx512dq,avx512vl" )] |
2187 | #[cfg_attr (test, assert_instr(vcvtqq2pd))] |
2188 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2189 | pub fn _mm_mask_cvtepi64_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d { |
2190 | unsafe { |
2191 | let b: f64x2 = _mm_cvtepi64_pd(a).as_f64x2(); |
2192 | transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f64x2())) |
2193 | } |
2194 | } |
2195 | |
2196 | /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
2197 | /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
2198 | /// |
2199 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_pd&ig_expand=1707) |
2200 | #[inline ] |
2201 | #[target_feature (enable = "avx512dq,avx512vl" )] |
2202 | #[cfg_attr (test, assert_instr(vcvtqq2pd))] |
2203 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2204 | pub fn _mm_maskz_cvtepi64_pd(k: __mmask8, a: __m128i) -> __m128d { |
2205 | unsafe { |
2206 | let b: f64x2 = _mm_cvtepi64_pd(a).as_f64x2(); |
2207 | transmute(src:simd_select_bitmask(m:k, yes:b, no:f64x2::ZERO)) |
2208 | } |
2209 | } |
2210 | |
2211 | /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
2212 | /// and store the results in dst. |
2213 | /// |
2214 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_pd&ig_expand=1708) |
2215 | #[inline ] |
2216 | #[target_feature (enable = "avx512dq,avx512vl" )] |
2217 | #[cfg_attr (test, assert_instr(vcvtqq2pd))] |
2218 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2219 | pub fn _mm256_cvtepi64_pd(a: __m256i) -> __m256d { |
2220 | unsafe { transmute(src:vcvtqq2pd_256(a.as_i64x4(), _MM_FROUND_CUR_DIRECTION)) } |
2221 | } |
2222 | |
2223 | /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
2224 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
2225 | /// not set). |
2226 | /// |
2227 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_pd&ig_expand=1709) |
2228 | #[inline ] |
2229 | #[target_feature (enable = "avx512dq,avx512vl" )] |
2230 | #[cfg_attr (test, assert_instr(vcvtqq2pd))] |
2231 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2232 | pub fn _mm256_mask_cvtepi64_pd(src: __m256d, k: __mmask8, a: __m256i) -> __m256d { |
2233 | unsafe { |
2234 | let b: f64x4 = _mm256_cvtepi64_pd(a).as_f64x4(); |
2235 | transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f64x4())) |
2236 | } |
2237 | } |
2238 | |
2239 | /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
2240 | /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
2241 | /// |
2242 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_pd&ig_expand=1710) |
2243 | #[inline ] |
2244 | #[target_feature (enable = "avx512dq,avx512vl" )] |
2245 | #[cfg_attr (test, assert_instr(vcvtqq2pd))] |
2246 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2247 | pub fn _mm256_maskz_cvtepi64_pd(k: __mmask8, a: __m256i) -> __m256d { |
2248 | unsafe { |
2249 | let b: f64x4 = _mm256_cvtepi64_pd(a).as_f64x4(); |
2250 | transmute(src:simd_select_bitmask(m:k, yes:b, no:f64x4::ZERO)) |
2251 | } |
2252 | } |
2253 | |
2254 | /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
2255 | /// and store the results in dst. |
2256 | /// |
2257 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_pd&ig_expand=1711) |
2258 | #[inline ] |
2259 | #[target_feature (enable = "avx512dq" )] |
2260 | #[cfg_attr (test, assert_instr(vcvtqq2pd))] |
2261 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2262 | pub fn _mm512_cvtepi64_pd(a: __m512i) -> __m512d { |
2263 | unsafe { transmute(src:vcvtqq2pd_512(a.as_i64x8(), _MM_FROUND_CUR_DIRECTION)) } |
2264 | } |
2265 | |
2266 | /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
2267 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
2268 | /// not set). |
2269 | /// |
2270 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_pd&ig_expand=1712) |
2271 | #[inline ] |
2272 | #[target_feature (enable = "avx512dq" )] |
2273 | #[cfg_attr (test, assert_instr(vcvtqq2pd))] |
2274 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2275 | pub fn _mm512_mask_cvtepi64_pd(src: __m512d, k: __mmask8, a: __m512i) -> __m512d { |
2276 | unsafe { |
2277 | let b: f64x8 = _mm512_cvtepi64_pd(a).as_f64x8(); |
2278 | transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f64x8())) |
2279 | } |
2280 | } |
2281 | |
2282 | /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
2283 | /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
2284 | /// |
2285 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_pd&ig_expand=1713) |
2286 | #[inline ] |
2287 | #[target_feature (enable = "avx512dq" )] |
2288 | #[cfg_attr (test, assert_instr(vcvtqq2pd))] |
2289 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2290 | pub fn _mm512_maskz_cvtepi64_pd(k: __mmask8, a: __m512i) -> __m512d { |
2291 | unsafe { |
2292 | let b: f64x8 = _mm512_cvtepi64_pd(a).as_f64x8(); |
2293 | transmute(src:simd_select_bitmask(m:k, yes:b, no:f64x8::ZERO)) |
2294 | } |
2295 | } |
2296 | |
2297 | /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
2298 | /// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of: |
2299 | /// |
2300 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2301 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2302 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2303 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2304 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2305 | /// |
2306 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepi64_ps&ig_expand=1443) |
2307 | #[inline ] |
2308 | #[target_feature (enable = "avx512dq" )] |
2309 | #[cfg_attr (test, assert_instr(vcvtqq2ps, ROUNDING = 8))] |
2310 | #[rustc_legacy_const_generics (1)] |
2311 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2312 | pub fn _mm512_cvt_roundepi64_ps<const ROUNDING: i32>(a: __m512i) -> __m256 { |
2313 | unsafe { |
2314 | static_assert_rounding!(ROUNDING); |
2315 | transmute(src:vcvtqq2ps_512(a.as_i64x8(), ROUNDING)) |
2316 | } |
2317 | } |
2318 | |
2319 | /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
2320 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
2321 | /// not set). Rounding is done according to the ROUNDING parameter, which can be one of: |
2322 | /// |
2323 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2324 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2325 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2326 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2327 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2328 | /// |
2329 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepi64_ps&ig_expand=1444) |
2330 | #[inline ] |
2331 | #[target_feature (enable = "avx512dq" )] |
2332 | #[cfg_attr (test, assert_instr(vcvtqq2ps, ROUNDING = 8))] |
2333 | #[rustc_legacy_const_generics (3)] |
2334 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2335 | pub fn _mm512_mask_cvt_roundepi64_ps<const ROUNDING: i32>( |
2336 | src: __m256, |
2337 | k: __mmask8, |
2338 | a: __m512i, |
2339 | ) -> __m256 { |
2340 | unsafe { |
2341 | static_assert_rounding!(ROUNDING); |
2342 | let b: f32x8 = _mm512_cvt_roundepi64_ps::<ROUNDING>(a).as_f32x8(); |
2343 | transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f32x8())) |
2344 | } |
2345 | } |
2346 | |
2347 | /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
2348 | /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
2349 | /// Rounding is done according to the ROUNDING parameter, which can be one of: |
2350 | /// |
2351 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2352 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2353 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2354 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2355 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2356 | /// |
2357 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepi64_ps&ig_expand=1445) |
2358 | #[inline ] |
2359 | #[target_feature (enable = "avx512dq" )] |
2360 | #[cfg_attr (test, assert_instr(vcvtqq2ps, ROUNDING = 8))] |
2361 | #[rustc_legacy_const_generics (2)] |
2362 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2363 | pub fn _mm512_maskz_cvt_roundepi64_ps<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m256 { |
2364 | unsafe { |
2365 | static_assert_rounding!(ROUNDING); |
2366 | let b: f32x8 = _mm512_cvt_roundepi64_ps::<ROUNDING>(a).as_f32x8(); |
2367 | transmute(src:simd_select_bitmask(m:k, yes:b, no:f32x8::ZERO)) |
2368 | } |
2369 | } |
2370 | |
2371 | /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
2372 | /// and store the results in dst. |
2373 | /// |
2374 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_ps&ig_expand=1723) |
2375 | #[inline ] |
2376 | #[target_feature (enable = "avx512dq,avx512vl" )] |
2377 | #[cfg_attr (test, assert_instr(vcvtqq2ps))] |
2378 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2379 | pub fn _mm_cvtepi64_ps(a: __m128i) -> __m128 { |
2380 | _mm_mask_cvtepi64_ps(src:_mm_undefined_ps(), k:0xff, a) |
2381 | } |
2382 | |
2383 | /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
2384 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
2385 | /// not set). |
2386 | /// |
2387 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_ps&ig_expand=1724) |
2388 | #[inline ] |
2389 | #[target_feature (enable = "avx512dq,avx512vl" )] |
2390 | #[cfg_attr (test, assert_instr(vcvtqq2ps))] |
2391 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2392 | pub fn _mm_mask_cvtepi64_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 { |
2393 | unsafe { transmute(src:vcvtqq2ps_128(a.as_i64x2(), src.as_f32x4(), k)) } |
2394 | } |
2395 | |
2396 | /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
2397 | /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
2398 | /// |
2399 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_ps&ig_expand=1725) |
2400 | #[inline ] |
2401 | #[target_feature (enable = "avx512dq,avx512vl" )] |
2402 | #[cfg_attr (test, assert_instr(vcvtqq2ps))] |
2403 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2404 | pub fn _mm_maskz_cvtepi64_ps(k: __mmask8, a: __m128i) -> __m128 { |
2405 | _mm_mask_cvtepi64_ps(src:_mm_setzero_ps(), k, a) |
2406 | } |
2407 | |
2408 | /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
2409 | /// and store the results in dst. |
2410 | /// |
2411 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_ps&ig_expand=1726) |
2412 | #[inline ] |
2413 | #[target_feature (enable = "avx512dq,avx512vl" )] |
2414 | #[cfg_attr (test, assert_instr(vcvtqq2ps))] |
2415 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2416 | pub fn _mm256_cvtepi64_ps(a: __m256i) -> __m128 { |
2417 | unsafe { transmute(src:vcvtqq2ps_256(a.as_i64x4(), _MM_FROUND_CUR_DIRECTION)) } |
2418 | } |
2419 | |
2420 | /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
2421 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
2422 | /// not set). |
2423 | /// |
2424 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_ps&ig_expand=1727) |
2425 | #[inline ] |
2426 | #[target_feature (enable = "avx512dq,avx512vl" )] |
2427 | #[cfg_attr (test, assert_instr(vcvtqq2ps))] |
2428 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2429 | pub fn _mm256_mask_cvtepi64_ps(src: __m128, k: __mmask8, a: __m256i) -> __m128 { |
2430 | unsafe { |
2431 | let b: f32x4 = _mm256_cvtepi64_ps(a).as_f32x4(); |
2432 | transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f32x4())) |
2433 | } |
2434 | } |
2435 | |
2436 | /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
2437 | /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
2438 | /// |
2439 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_ps&ig_expand=1728) |
2440 | #[inline ] |
2441 | #[target_feature (enable = "avx512dq,avx512vl" )] |
2442 | #[cfg_attr (test, assert_instr(vcvtqq2ps))] |
2443 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2444 | pub fn _mm256_maskz_cvtepi64_ps(k: __mmask8, a: __m256i) -> __m128 { |
2445 | unsafe { |
2446 | let b: f32x4 = _mm256_cvtepi64_ps(a).as_f32x4(); |
2447 | transmute(src:simd_select_bitmask(m:k, yes:b, no:f32x4::ZERO)) |
2448 | } |
2449 | } |
2450 | |
2451 | /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
2452 | /// and store the results in dst. |
2453 | /// |
2454 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_ps&ig_expand=1729) |
2455 | #[inline ] |
2456 | #[target_feature (enable = "avx512dq" )] |
2457 | #[cfg_attr (test, assert_instr(vcvtqq2ps))] |
2458 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2459 | pub fn _mm512_cvtepi64_ps(a: __m512i) -> __m256 { |
2460 | unsafe { transmute(src:vcvtqq2ps_512(a.as_i64x8(), _MM_FROUND_CUR_DIRECTION)) } |
2461 | } |
2462 | |
2463 | /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
2464 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
2465 | /// not set). |
2466 | /// |
2467 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_ps&ig_expand=1730) |
2468 | #[inline ] |
2469 | #[target_feature (enable = "avx512dq" )] |
2470 | #[cfg_attr (test, assert_instr(vcvtqq2ps))] |
2471 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2472 | pub fn _mm512_mask_cvtepi64_ps(src: __m256, k: __mmask8, a: __m512i) -> __m256 { |
2473 | unsafe { |
2474 | let b: f32x8 = _mm512_cvtepi64_ps(a).as_f32x8(); |
2475 | transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f32x8())) |
2476 | } |
2477 | } |
2478 | |
2479 | /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
2480 | /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
2481 | /// |
2482 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_ps&ig_expand=1731) |
2483 | #[inline ] |
2484 | #[target_feature (enable = "avx512dq" )] |
2485 | #[cfg_attr (test, assert_instr(vcvtqq2ps))] |
2486 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2487 | pub fn _mm512_maskz_cvtepi64_ps(k: __mmask8, a: __m512i) -> __m256 { |
2488 | unsafe { |
2489 | let b: f32x8 = _mm512_cvtepi64_ps(a).as_f32x8(); |
2490 | transmute(src:simd_select_bitmask(m:k, yes:b, no:f32x8::ZERO)) |
2491 | } |
2492 | } |
2493 | |
2494 | /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
2495 | /// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of: |
2496 | /// |
2497 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2498 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2499 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2500 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2501 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2502 | /// |
2503 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepu64_pd&ig_expand=1455) |
2504 | #[inline ] |
2505 | #[target_feature (enable = "avx512dq" )] |
2506 | #[cfg_attr (test, assert_instr(vcvtuqq2pd, ROUNDING = 8))] |
2507 | #[rustc_legacy_const_generics (1)] |
2508 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2509 | pub fn _mm512_cvt_roundepu64_pd<const ROUNDING: i32>(a: __m512i) -> __m512d { |
2510 | unsafe { |
2511 | static_assert_rounding!(ROUNDING); |
2512 | transmute(src:vcvtuqq2pd_512(a.as_u64x8(), ROUNDING)) |
2513 | } |
2514 | } |
2515 | |
2516 | /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
2517 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
2518 | /// not set). Rounding is done according to the ROUNDING parameter, which can be one of: |
2519 | /// |
2520 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2521 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2522 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2523 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2524 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2525 | /// |
2526 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepu64_pd&ig_expand=1456) |
2527 | #[inline ] |
2528 | #[target_feature (enable = "avx512dq" )] |
2529 | #[cfg_attr (test, assert_instr(vcvtuqq2pd, ROUNDING = 8))] |
2530 | #[rustc_legacy_const_generics (3)] |
2531 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2532 | pub fn _mm512_mask_cvt_roundepu64_pd<const ROUNDING: i32>( |
2533 | src: __m512d, |
2534 | k: __mmask8, |
2535 | a: __m512i, |
2536 | ) -> __m512d { |
2537 | unsafe { |
2538 | static_assert_rounding!(ROUNDING); |
2539 | let b: f64x8 = _mm512_cvt_roundepu64_pd::<ROUNDING>(a).as_f64x8(); |
2540 | transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f64x8())) |
2541 | } |
2542 | } |
2543 | |
2544 | /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
2545 | /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
2546 | /// Rounding is done according to the ROUNDING parameter, which can be one of: |
2547 | /// |
2548 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2549 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2550 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2551 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2552 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2553 | /// |
2554 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepu64_pd&ig_expand=1457) |
2555 | #[inline ] |
2556 | #[target_feature (enable = "avx512dq" )] |
2557 | #[cfg_attr (test, assert_instr(vcvtuqq2pd, ROUNDING = 8))] |
2558 | #[rustc_legacy_const_generics (2)] |
2559 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2560 | pub fn _mm512_maskz_cvt_roundepu64_pd<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m512d { |
2561 | unsafe { |
2562 | static_assert_rounding!(ROUNDING); |
2563 | let b: f64x8 = _mm512_cvt_roundepu64_pd::<ROUNDING>(a).as_f64x8(); |
2564 | transmute(src:simd_select_bitmask(m:k, yes:b, no:f64x8::ZERO)) |
2565 | } |
2566 | } |
2567 | |
2568 | /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
2569 | /// and store the results in dst. |
2570 | /// |
2571 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu64_pd&ig_expand=1827) |
2572 | #[inline ] |
2573 | #[target_feature (enable = "avx512dq,avx512vl" )] |
2574 | #[cfg_attr (test, assert_instr(vcvtuqq2pd))] |
2575 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2576 | pub fn _mm_cvtepu64_pd(a: __m128i) -> __m128d { |
2577 | unsafe { transmute(src:vcvtuqq2pd_128(a.as_u64x2(), _MM_FROUND_CUR_DIRECTION)) } |
2578 | } |
2579 | |
2580 | /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
2581 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
2582 | /// not set). |
2583 | /// |
2584 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu64_pd&ig_expand=1828) |
2585 | #[inline ] |
2586 | #[target_feature (enable = "avx512dq,avx512vl" )] |
2587 | #[cfg_attr (test, assert_instr(vcvtuqq2pd))] |
2588 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2589 | pub fn _mm_mask_cvtepu64_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d { |
2590 | unsafe { |
2591 | let b: f64x2 = _mm_cvtepu64_pd(a).as_f64x2(); |
2592 | transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f64x2())) |
2593 | } |
2594 | } |
2595 | |
2596 | /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
2597 | /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
2598 | /// |
2599 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu64_pd&ig_expand=1829) |
2600 | #[inline ] |
2601 | #[target_feature (enable = "avx512dq,avx512vl" )] |
2602 | #[cfg_attr (test, assert_instr(vcvtuqq2pd))] |
2603 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2604 | pub fn _mm_maskz_cvtepu64_pd(k: __mmask8, a: __m128i) -> __m128d { |
2605 | unsafe { |
2606 | let b: f64x2 = _mm_cvtepu64_pd(a).as_f64x2(); |
2607 | transmute(src:simd_select_bitmask(m:k, yes:b, no:f64x2::ZERO)) |
2608 | } |
2609 | } |
2610 | |
2611 | /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
2612 | /// and store the results in dst. |
2613 | /// |
2614 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu64_pd&ig_expand=1830) |
2615 | #[inline ] |
2616 | #[target_feature (enable = "avx512dq,avx512vl" )] |
2617 | #[cfg_attr (test, assert_instr(vcvtuqq2pd))] |
2618 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2619 | pub fn _mm256_cvtepu64_pd(a: __m256i) -> __m256d { |
2620 | unsafe { transmute(src:vcvtuqq2pd_256(a.as_u64x4(), _MM_FROUND_CUR_DIRECTION)) } |
2621 | } |
2622 | |
2623 | /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
2624 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
2625 | /// not set). |
2626 | /// |
2627 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu64_pd&ig_expand=1831) |
2628 | #[inline ] |
2629 | #[target_feature (enable = "avx512dq,avx512vl" )] |
2630 | #[cfg_attr (test, assert_instr(vcvtuqq2pd))] |
2631 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2632 | pub fn _mm256_mask_cvtepu64_pd(src: __m256d, k: __mmask8, a: __m256i) -> __m256d { |
2633 | unsafe { |
2634 | let b: f64x4 = _mm256_cvtepu64_pd(a).as_f64x4(); |
2635 | transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f64x4())) |
2636 | } |
2637 | } |
2638 | |
2639 | /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
2640 | /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
2641 | /// |
2642 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu64_pd&ig_expand=1832) |
2643 | #[inline ] |
2644 | #[target_feature (enable = "avx512dq,avx512vl" )] |
2645 | #[cfg_attr (test, assert_instr(vcvtuqq2pd))] |
2646 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2647 | pub fn _mm256_maskz_cvtepu64_pd(k: __mmask8, a: __m256i) -> __m256d { |
2648 | unsafe { |
2649 | let b: f64x4 = _mm256_cvtepu64_pd(a).as_f64x4(); |
2650 | transmute(src:simd_select_bitmask(m:k, yes:b, no:f64x4::ZERO)) |
2651 | } |
2652 | } |
2653 | |
2654 | /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
2655 | /// and store the results in dst. |
2656 | /// |
2657 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu64_pd&ig_expand=1833) |
2658 | #[inline ] |
2659 | #[target_feature (enable = "avx512dq" )] |
2660 | #[cfg_attr (test, assert_instr(vcvtuqq2pd))] |
2661 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2662 | pub fn _mm512_cvtepu64_pd(a: __m512i) -> __m512d { |
2663 | unsafe { transmute(src:vcvtuqq2pd_512(a.as_u64x8(), _MM_FROUND_CUR_DIRECTION)) } |
2664 | } |
2665 | |
2666 | /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
2667 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
2668 | /// not set). |
2669 | /// |
2670 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu64_pd&ig_expand=1834) |
2671 | #[inline ] |
2672 | #[target_feature (enable = "avx512dq" )] |
2673 | #[cfg_attr (test, assert_instr(vcvtuqq2pd))] |
2674 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2675 | pub fn _mm512_mask_cvtepu64_pd(src: __m512d, k: __mmask8, a: __m512i) -> __m512d { |
2676 | unsafe { |
2677 | let b: f64x8 = _mm512_cvtepu64_pd(a).as_f64x8(); |
2678 | transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f64x8())) |
2679 | } |
2680 | } |
2681 | |
2682 | /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
2683 | /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
2684 | /// |
2685 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu64_pd&ig_expand=1835) |
2686 | #[inline ] |
2687 | #[target_feature (enable = "avx512dq" )] |
2688 | #[cfg_attr (test, assert_instr(vcvtuqq2pd))] |
2689 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2690 | pub fn _mm512_maskz_cvtepu64_pd(k: __mmask8, a: __m512i) -> __m512d { |
2691 | unsafe { |
2692 | let b: f64x8 = _mm512_cvtepu64_pd(a).as_f64x8(); |
2693 | transmute(src:simd_select_bitmask(m:k, yes:b, no:f64x8::ZERO)) |
2694 | } |
2695 | } |
2696 | |
2697 | /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
2698 | /// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of: |
2699 | /// |
2700 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2701 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2702 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2703 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2704 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2705 | /// |
2706 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepu64_ps&ig_expand=1461) |
2707 | #[inline ] |
2708 | #[target_feature (enable = "avx512dq" )] |
2709 | #[cfg_attr (test, assert_instr(vcvtuqq2ps, ROUNDING = 8))] |
2710 | #[rustc_legacy_const_generics (1)] |
2711 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2712 | pub fn _mm512_cvt_roundepu64_ps<const ROUNDING: i32>(a: __m512i) -> __m256 { |
2713 | unsafe { |
2714 | static_assert_rounding!(ROUNDING); |
2715 | transmute(src:vcvtuqq2ps_512(a.as_u64x8(), ROUNDING)) |
2716 | } |
2717 | } |
2718 | |
2719 | /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
2720 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
2721 | /// not set). Rounding is done according to the ROUNDING parameter, which can be one of: |
2722 | /// |
2723 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2724 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2725 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2726 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2727 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2728 | /// |
2729 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepu64_ps&ig_expand=1462) |
2730 | #[inline ] |
2731 | #[target_feature (enable = "avx512dq" )] |
2732 | #[cfg_attr (test, assert_instr(vcvtuqq2ps, ROUNDING = 8))] |
2733 | #[rustc_legacy_const_generics (3)] |
2734 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2735 | pub fn _mm512_mask_cvt_roundepu64_ps<const ROUNDING: i32>( |
2736 | src: __m256, |
2737 | k: __mmask8, |
2738 | a: __m512i, |
2739 | ) -> __m256 { |
2740 | unsafe { |
2741 | static_assert_rounding!(ROUNDING); |
2742 | let b: f32x8 = _mm512_cvt_roundepu64_ps::<ROUNDING>(a).as_f32x8(); |
2743 | transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f32x8())) |
2744 | } |
2745 | } |
2746 | |
2747 | /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
2748 | /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
2749 | /// Rounding is done according to the ROUNDING parameter, which can be one of: |
2750 | /// |
2751 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2752 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2753 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2754 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2755 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2756 | /// |
2757 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepu64_ps&ig_expand=1463) |
2758 | #[inline ] |
2759 | #[target_feature (enable = "avx512dq" )] |
2760 | #[cfg_attr (test, assert_instr(vcvtuqq2ps, ROUNDING = 8))] |
2761 | #[rustc_legacy_const_generics (2)] |
2762 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2763 | pub fn _mm512_maskz_cvt_roundepu64_ps<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m256 { |
2764 | unsafe { |
2765 | static_assert_rounding!(ROUNDING); |
2766 | let b: f32x8 = _mm512_cvt_roundepu64_ps::<ROUNDING>(a).as_f32x8(); |
2767 | transmute(src:simd_select_bitmask(m:k, yes:b, no:f32x8::ZERO)) |
2768 | } |
2769 | } |
2770 | |
2771 | /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
2772 | /// and store the results in dst. |
2773 | /// |
2774 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu64_ps&ig_expand=1845) |
2775 | #[inline ] |
2776 | #[target_feature (enable = "avx512dq,avx512vl" )] |
2777 | #[cfg_attr (test, assert_instr(vcvtuqq2ps))] |
2778 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2779 | pub fn _mm_cvtepu64_ps(a: __m128i) -> __m128 { |
2780 | _mm_mask_cvtepu64_ps(src:_mm_undefined_ps(), k:0xff, a) |
2781 | } |
2782 | |
2783 | /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
2784 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
2785 | /// not set). |
2786 | /// |
2787 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu64_ps&ig_expand=1846) |
2788 | #[inline ] |
2789 | #[target_feature (enable = "avx512dq,avx512vl" )] |
2790 | #[cfg_attr (test, assert_instr(vcvtuqq2ps))] |
2791 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2792 | pub fn _mm_mask_cvtepu64_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 { |
2793 | unsafe { transmute(src:vcvtuqq2ps_128(a.as_u64x2(), src.as_f32x4(), k)) } |
2794 | } |
2795 | |
2796 | /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
2797 | /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
2798 | /// |
2799 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu64_ps&ig_expand=1847) |
2800 | #[inline ] |
2801 | #[target_feature (enable = "avx512dq,avx512vl" )] |
2802 | #[cfg_attr (test, assert_instr(vcvtuqq2ps))] |
2803 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2804 | pub fn _mm_maskz_cvtepu64_ps(k: __mmask8, a: __m128i) -> __m128 { |
2805 | _mm_mask_cvtepu64_ps(src:_mm_setzero_ps(), k, a) |
2806 | } |
2807 | |
2808 | /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
2809 | /// and store the results in dst. |
2810 | /// |
2811 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu64_ps&ig_expand=1848) |
2812 | #[inline ] |
2813 | #[target_feature (enable = "avx512dq,avx512vl" )] |
2814 | #[cfg_attr (test, assert_instr(vcvtuqq2ps))] |
2815 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2816 | pub fn _mm256_cvtepu64_ps(a: __m256i) -> __m128 { |
2817 | unsafe { transmute(src:vcvtuqq2ps_256(a.as_u64x4(), _MM_FROUND_CUR_DIRECTION)) } |
2818 | } |
2819 | |
2820 | /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
2821 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
2822 | /// not set). |
2823 | /// |
2824 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu64_ps&ig_expand=1849) |
2825 | #[inline ] |
2826 | #[target_feature (enable = "avx512dq,avx512vl" )] |
2827 | #[cfg_attr (test, assert_instr(vcvtuqq2ps))] |
2828 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2829 | pub fn _mm256_mask_cvtepu64_ps(src: __m128, k: __mmask8, a: __m256i) -> __m128 { |
2830 | unsafe { |
2831 | let b: f32x4 = _mm256_cvtepu64_ps(a).as_f32x4(); |
2832 | transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f32x4())) |
2833 | } |
2834 | } |
2835 | |
2836 | /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
2837 | /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
2838 | /// |
2839 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu64_ps&ig_expand=1850) |
2840 | #[inline ] |
2841 | #[target_feature (enable = "avx512dq,avx512vl" )] |
2842 | #[cfg_attr (test, assert_instr(vcvtuqq2ps))] |
2843 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2844 | pub fn _mm256_maskz_cvtepu64_ps(k: __mmask8, a: __m256i) -> __m128 { |
2845 | unsafe { |
2846 | let b: f32x4 = _mm256_cvtepu64_ps(a).as_f32x4(); |
2847 | transmute(src:simd_select_bitmask(m:k, yes:b, no:f32x4::ZERO)) |
2848 | } |
2849 | } |
2850 | |
2851 | /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
2852 | /// and store the results in dst. |
2853 | /// |
2854 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu64_ps&ig_expand=1851) |
2855 | #[inline ] |
2856 | #[target_feature (enable = "avx512dq" )] |
2857 | #[cfg_attr (test, assert_instr(vcvtuqq2ps))] |
2858 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2859 | pub fn _mm512_cvtepu64_ps(a: __m512i) -> __m256 { |
2860 | unsafe { transmute(src:vcvtuqq2ps_512(a.as_u64x8(), _MM_FROUND_CUR_DIRECTION)) } |
2861 | } |
2862 | |
2863 | /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
2864 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
2865 | /// not set). |
2866 | /// |
2867 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu64_ps&ig_expand=1852) |
2868 | #[inline ] |
2869 | #[target_feature (enable = "avx512dq" )] |
2870 | #[cfg_attr (test, assert_instr(vcvtuqq2ps))] |
2871 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2872 | pub fn _mm512_mask_cvtepu64_ps(src: __m256, k: __mmask8, a: __m512i) -> __m256 { |
2873 | unsafe { |
2874 | let b: f32x8 = _mm512_cvtepu64_ps(a).as_f32x8(); |
2875 | transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f32x8())) |
2876 | } |
2877 | } |
2878 | |
2879 | /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
2880 | /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
2881 | /// |
2882 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu64_ps&ig_expand=1853) |
2883 | #[inline ] |
2884 | #[target_feature (enable = "avx512dq" )] |
2885 | #[cfg_attr (test, assert_instr(vcvtuqq2ps))] |
2886 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2887 | pub fn _mm512_maskz_cvtepu64_ps(k: __mmask8, a: __m512i) -> __m256 { |
2888 | unsafe { |
2889 | let b: f32x8 = _mm512_cvtepu64_ps(a).as_f32x8(); |
2890 | transmute(src:simd_select_bitmask(m:k, yes:b, no:f32x8::ZERO)) |
2891 | } |
2892 | } |
2893 | |
2894 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, |
2895 | /// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of: |
2896 | /// |
2897 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2898 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2899 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2900 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2901 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2902 | /// |
2903 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epi64&ig_expand=1472) |
2904 | #[inline ] |
2905 | #[target_feature (enable = "avx512dq" )] |
2906 | #[cfg_attr (test, assert_instr(vcvtpd2qq, ROUNDING = 8))] |
2907 | #[rustc_legacy_const_generics (1)] |
2908 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2909 | pub fn _mm512_cvt_roundpd_epi64<const ROUNDING: i32>(a: __m512d) -> __m512i { |
2910 | static_assert_rounding!(ROUNDING); |
2911 | _mm512_mask_cvt_roundpd_epi64::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xff, a) |
2912 | } |
2913 | |
2914 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, |
2915 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
2916 | /// not set). Rounding is done according to the ROUNDING parameter, which can be one of: |
2917 | /// |
2918 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2919 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2920 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2921 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2922 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2923 | /// |
2924 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epi64&ig_expand=1473) |
2925 | #[inline ] |
2926 | #[target_feature (enable = "avx512dq" )] |
2927 | #[cfg_attr (test, assert_instr(vcvtpd2qq, ROUNDING = 8))] |
2928 | #[rustc_legacy_const_generics (3)] |
2929 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2930 | pub fn _mm512_mask_cvt_roundpd_epi64<const ROUNDING: i32>( |
2931 | src: __m512i, |
2932 | k: __mmask8, |
2933 | a: __m512d, |
2934 | ) -> __m512i { |
2935 | unsafe { |
2936 | static_assert_rounding!(ROUNDING); |
2937 | transmute(src:vcvtpd2qq_512(a.as_f64x8(), src.as_i64x8(), k, ROUNDING)) |
2938 | } |
2939 | } |
2940 | |
2941 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, |
2942 | /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
2943 | /// Rounding is done according to the ROUNDING parameter, which can be one of: |
2944 | /// |
2945 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
2946 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
2947 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
2948 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
2949 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
2950 | /// |
2951 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundpd_epi64&ig_expand=1474) |
2952 | #[inline ] |
2953 | #[target_feature (enable = "avx512dq" )] |
2954 | #[cfg_attr (test, assert_instr(vcvtpd2qq, ROUNDING = 8))] |
2955 | #[rustc_legacy_const_generics (2)] |
2956 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2957 | pub fn _mm512_maskz_cvt_roundpd_epi64<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m512i { |
2958 | static_assert_rounding!(ROUNDING); |
2959 | _mm512_mask_cvt_roundpd_epi64::<ROUNDING>(src:_mm512_setzero_si512(), k, a) |
2960 | } |
2961 | |
2962 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, |
2963 | /// and store the results in dst. |
2964 | /// |
2965 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epi64&ig_expand=1941) |
2966 | #[inline ] |
2967 | #[target_feature (enable = "avx512dq,avx512vl" )] |
2968 | #[cfg_attr (test, assert_instr(vcvtpd2qq))] |
2969 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2970 | pub fn _mm_cvtpd_epi64(a: __m128d) -> __m128i { |
2971 | _mm_mask_cvtpd_epi64(src:_mm_undefined_si128(), k:0xff, a) |
2972 | } |
2973 | |
2974 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, |
2975 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
2976 | /// not set). |
2977 | /// |
2978 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epi64&ig_expand=1942) |
2979 | #[inline ] |
2980 | #[target_feature (enable = "avx512dq,avx512vl" )] |
2981 | #[cfg_attr (test, assert_instr(vcvtpd2qq))] |
2982 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2983 | pub fn _mm_mask_cvtpd_epi64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i { |
2984 | unsafe { transmute(src:vcvtpd2qq_128(a.as_f64x2(), src.as_i64x2(), k)) } |
2985 | } |
2986 | |
2987 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, |
2988 | /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
2989 | /// |
2990 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epi64&ig_expand=1943) |
2991 | #[inline ] |
2992 | #[target_feature (enable = "avx512dq,avx512vl" )] |
2993 | #[cfg_attr (test, assert_instr(vcvtpd2qq))] |
2994 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
2995 | pub fn _mm_maskz_cvtpd_epi64(k: __mmask8, a: __m128d) -> __m128i { |
2996 | _mm_mask_cvtpd_epi64(src:_mm_setzero_si128(), k, a) |
2997 | } |
2998 | |
2999 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, |
3000 | /// and store the results in dst. |
3001 | /// |
3002 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epi64&ig_expand=1944) |
3003 | #[inline ] |
3004 | #[target_feature (enable = "avx512dq,avx512vl" )] |
3005 | #[cfg_attr (test, assert_instr(vcvtpd2qq))] |
3006 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3007 | pub fn _mm256_cvtpd_epi64(a: __m256d) -> __m256i { |
3008 | _mm256_mask_cvtpd_epi64(src:_mm256_undefined_si256(), k:0xff, a) |
3009 | } |
3010 | |
3011 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, |
3012 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
3013 | /// not set). |
3014 | /// |
3015 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epi64&ig_expand=1945) |
3016 | #[inline ] |
3017 | #[target_feature (enable = "avx512dq,avx512vl" )] |
3018 | #[cfg_attr (test, assert_instr(vcvtpd2qq))] |
3019 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3020 | pub fn _mm256_mask_cvtpd_epi64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i { |
3021 | unsafe { transmute(src:vcvtpd2qq_256(a.as_f64x4(), src.as_i64x4(), k)) } |
3022 | } |
3023 | |
3024 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, |
3025 | /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
3026 | /// |
3027 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epi64&ig_expand=1946) |
3028 | #[inline ] |
3029 | #[target_feature (enable = "avx512dq,avx512vl" )] |
3030 | #[cfg_attr (test, assert_instr(vcvtpd2qq))] |
3031 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3032 | pub fn _mm256_maskz_cvtpd_epi64(k: __mmask8, a: __m256d) -> __m256i { |
3033 | _mm256_mask_cvtpd_epi64(src:_mm256_setzero_si256(), k, a) |
3034 | } |
3035 | |
3036 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, |
3037 | /// and store the results in dst. |
3038 | /// |
3039 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epi64&ig_expand=1947) |
3040 | #[inline ] |
3041 | #[target_feature (enable = "avx512dq" )] |
3042 | #[cfg_attr (test, assert_instr(vcvtpd2qq))] |
3043 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3044 | pub fn _mm512_cvtpd_epi64(a: __m512d) -> __m512i { |
3045 | _mm512_mask_cvtpd_epi64(src:_mm512_undefined_epi32(), k:0xff, a) |
3046 | } |
3047 | |
3048 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, |
3049 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
3050 | /// not set). |
3051 | /// |
3052 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epi64&ig_expand=1948) |
3053 | #[inline ] |
3054 | #[target_feature (enable = "avx512dq" )] |
3055 | #[cfg_attr (test, assert_instr(vcvtpd2qq))] |
3056 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3057 | pub fn _mm512_mask_cvtpd_epi64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i { |
3058 | unsafe { |
3059 | transmute(src:vcvtpd2qq_512( |
3060 | a.as_f64x8(), |
3061 | src.as_i64x8(), |
3062 | k, |
3063 | _MM_FROUND_CUR_DIRECTION, |
3064 | )) |
3065 | } |
3066 | } |
3067 | |
3068 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, |
3069 | /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
3070 | /// |
3071 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epi64&ig_expand=1949) |
3072 | #[inline ] |
3073 | #[target_feature (enable = "avx512dq" )] |
3074 | #[cfg_attr (test, assert_instr(vcvtpd2qq))] |
3075 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3076 | pub fn _mm512_maskz_cvtpd_epi64(k: __mmask8, a: __m512d) -> __m512i { |
3077 | _mm512_mask_cvtpd_epi64(src:_mm512_setzero_si512(), k, a) |
3078 | } |
3079 | |
3080 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, |
3081 | /// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of: |
3082 | /// |
3083 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3084 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3085 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3086 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3087 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3088 | /// |
3089 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epi64&ig_expand=1514) |
3090 | #[inline ] |
3091 | #[target_feature (enable = "avx512dq" )] |
3092 | #[cfg_attr (test, assert_instr(vcvtps2qq, ROUNDING = 8))] |
3093 | #[rustc_legacy_const_generics (1)] |
3094 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3095 | pub fn _mm512_cvt_roundps_epi64<const ROUNDING: i32>(a: __m256) -> __m512i { |
3096 | static_assert_rounding!(ROUNDING); |
3097 | _mm512_mask_cvt_roundps_epi64::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xff, a) |
3098 | } |
3099 | |
3100 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, |
3101 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
3102 | /// not set). Rounding is done according to the ROUNDING parameter, which can be one of: |
3103 | /// |
3104 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3105 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3106 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3107 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3108 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3109 | /// |
3110 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epi64&ig_expand=1515) |
3111 | #[inline ] |
3112 | #[target_feature (enable = "avx512dq" )] |
3113 | #[cfg_attr (test, assert_instr(vcvtps2qq, ROUNDING = 8))] |
3114 | #[rustc_legacy_const_generics (3)] |
3115 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3116 | pub fn _mm512_mask_cvt_roundps_epi64<const ROUNDING: i32>( |
3117 | src: __m512i, |
3118 | k: __mmask8, |
3119 | a: __m256, |
3120 | ) -> __m512i { |
3121 | unsafe { |
3122 | static_assert_rounding!(ROUNDING); |
3123 | transmute(src:vcvtps2qq_512(a.as_f32x8(), src.as_i64x8(), k, ROUNDING)) |
3124 | } |
3125 | } |
3126 | |
3127 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, |
3128 | /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
3129 | /// Rounding is done according to the ROUNDING parameter, which can be one of: |
3130 | /// |
3131 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3132 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3133 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3134 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3135 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3136 | /// |
3137 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epi64&ig_expand=1516) |
3138 | #[inline ] |
3139 | #[target_feature (enable = "avx512dq" )] |
3140 | #[cfg_attr (test, assert_instr(vcvtps2qq, ROUNDING = 8))] |
3141 | #[rustc_legacy_const_generics (2)] |
3142 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3143 | pub fn _mm512_maskz_cvt_roundps_epi64<const ROUNDING: i32>(k: __mmask8, a: __m256) -> __m512i { |
3144 | static_assert_rounding!(ROUNDING); |
3145 | _mm512_mask_cvt_roundps_epi64::<ROUNDING>(src:_mm512_setzero_si512(), k, a) |
3146 | } |
3147 | |
3148 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, |
3149 | /// and store the results in dst. |
3150 | /// |
3151 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epi64&ig_expand=2075) |
3152 | #[inline ] |
3153 | #[target_feature (enable = "avx512dq,avx512vl" )] |
3154 | #[cfg_attr (test, assert_instr(vcvtps2qq))] |
3155 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3156 | pub fn _mm_cvtps_epi64(a: __m128) -> __m128i { |
3157 | _mm_mask_cvtps_epi64(src:_mm_undefined_si128(), k:0xff, a) |
3158 | } |
3159 | |
3160 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, |
3161 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
3162 | /// not set). |
3163 | /// |
3164 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epi64&ig_expand=2076) |
3165 | #[inline ] |
3166 | #[target_feature (enable = "avx512dq,avx512vl" )] |
3167 | #[cfg_attr (test, assert_instr(vcvtps2qq))] |
3168 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3169 | pub fn _mm_mask_cvtps_epi64(src: __m128i, k: __mmask8, a: __m128) -> __m128i { |
3170 | unsafe { transmute(src:vcvtps2qq_128(a.as_f32x4(), src.as_i64x2(), k)) } |
3171 | } |
3172 | |
3173 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, |
3174 | /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
3175 | /// |
3176 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epi64&ig_expand=2077) |
3177 | #[inline ] |
3178 | #[target_feature (enable = "avx512dq,avx512vl" )] |
3179 | #[cfg_attr (test, assert_instr(vcvtps2qq))] |
3180 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3181 | pub fn _mm_maskz_cvtps_epi64(k: __mmask8, a: __m128) -> __m128i { |
3182 | _mm_mask_cvtps_epi64(src:_mm_setzero_si128(), k, a) |
3183 | } |
3184 | |
3185 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, |
3186 | /// and store the results in dst. |
3187 | /// |
3188 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epi64&ig_expand=2078) |
3189 | #[inline ] |
3190 | #[target_feature (enable = "avx512dq,avx512vl" )] |
3191 | #[cfg_attr (test, assert_instr(vcvtps2qq))] |
3192 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3193 | pub fn _mm256_cvtps_epi64(a: __m128) -> __m256i { |
3194 | _mm256_mask_cvtps_epi64(src:_mm256_undefined_si256(), k:0xff, a) |
3195 | } |
3196 | |
3197 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, |
3198 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
3199 | /// not set). |
3200 | /// |
3201 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epi64&ig_expand=2079) |
3202 | #[inline ] |
3203 | #[target_feature (enable = "avx512dq,avx512vl" )] |
3204 | #[cfg_attr (test, assert_instr(vcvtps2qq))] |
3205 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3206 | pub fn _mm256_mask_cvtps_epi64(src: __m256i, k: __mmask8, a: __m128) -> __m256i { |
3207 | unsafe { transmute(src:vcvtps2qq_256(a.as_f32x4(), src.as_i64x4(), k)) } |
3208 | } |
3209 | |
3210 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, |
3211 | /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
3212 | /// |
3213 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epi64&ig_expand=2080) |
3214 | #[inline ] |
3215 | #[target_feature (enable = "avx512dq,avx512vl" )] |
3216 | #[cfg_attr (test, assert_instr(vcvtps2qq))] |
3217 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3218 | pub fn _mm256_maskz_cvtps_epi64(k: __mmask8, a: __m128) -> __m256i { |
3219 | _mm256_mask_cvtps_epi64(src:_mm256_setzero_si256(), k, a) |
3220 | } |
3221 | |
3222 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, |
3223 | /// and store the results in dst. |
3224 | /// |
3225 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epi64&ig_expand=2081) |
3226 | #[inline ] |
3227 | #[target_feature (enable = "avx512dq" )] |
3228 | #[cfg_attr (test, assert_instr(vcvtps2qq))] |
3229 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3230 | pub fn _mm512_cvtps_epi64(a: __m256) -> __m512i { |
3231 | _mm512_mask_cvtps_epi64(src:_mm512_undefined_epi32(), k:0xff, a) |
3232 | } |
3233 | |
3234 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, |
3235 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
3236 | /// not set). |
3237 | /// |
3238 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epi64&ig_expand=2082) |
3239 | #[inline ] |
3240 | #[target_feature (enable = "avx512dq" )] |
3241 | #[cfg_attr (test, assert_instr(vcvtps2qq))] |
3242 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3243 | pub fn _mm512_mask_cvtps_epi64(src: __m512i, k: __mmask8, a: __m256) -> __m512i { |
3244 | unsafe { |
3245 | transmute(src:vcvtps2qq_512( |
3246 | a.as_f32x8(), |
3247 | src.as_i64x8(), |
3248 | k, |
3249 | _MM_FROUND_CUR_DIRECTION, |
3250 | )) |
3251 | } |
3252 | } |
3253 | |
3254 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, |
3255 | /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
3256 | /// |
3257 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epi64&ig_expand=2083) |
3258 | #[inline ] |
3259 | #[target_feature (enable = "avx512dq" )] |
3260 | #[cfg_attr (test, assert_instr(vcvtps2qq))] |
3261 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3262 | pub fn _mm512_maskz_cvtps_epi64(k: __mmask8, a: __m256) -> __m512i { |
3263 | _mm512_mask_cvtps_epi64(src:_mm512_setzero_si512(), k, a) |
3264 | } |
3265 | |
3266 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, |
3267 | /// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of: |
3268 | /// |
3269 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3270 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3271 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3272 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3273 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3274 | /// |
3275 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epu64&ig_expand=1478) |
3276 | #[inline ] |
3277 | #[target_feature (enable = "avx512dq" )] |
3278 | #[cfg_attr (test, assert_instr(vcvtpd2uqq, ROUNDING = 8))] |
3279 | #[rustc_legacy_const_generics (1)] |
3280 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3281 | pub fn _mm512_cvt_roundpd_epu64<const ROUNDING: i32>(a: __m512d) -> __m512i { |
3282 | static_assert_rounding!(ROUNDING); |
3283 | _mm512_mask_cvt_roundpd_epu64::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xff, a) |
3284 | } |
3285 | |
3286 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, |
3287 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
3288 | /// not set). Rounding is done according to the ROUNDING parameter, which can be one of: |
3289 | /// |
3290 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3291 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3292 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3293 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3294 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3295 | /// |
3296 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epu64&ig_expand=1479) |
3297 | #[inline ] |
3298 | #[target_feature (enable = "avx512dq" )] |
3299 | #[cfg_attr (test, assert_instr(vcvtpd2uqq, ROUNDING = 8))] |
3300 | #[rustc_legacy_const_generics (3)] |
3301 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3302 | pub fn _mm512_mask_cvt_roundpd_epu64<const ROUNDING: i32>( |
3303 | src: __m512i, |
3304 | k: __mmask8, |
3305 | a: __m512d, |
3306 | ) -> __m512i { |
3307 | unsafe { |
3308 | static_assert_rounding!(ROUNDING); |
3309 | transmute(src:vcvtpd2uqq_512(a.as_f64x8(), src.as_u64x8(), k, ROUNDING)) |
3310 | } |
3311 | } |
3312 | |
3313 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, |
3314 | /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
3315 | /// Rounding is done according to the ROUNDING parameter, which can be one of: |
3316 | /// |
3317 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3318 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3319 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3320 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3321 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3322 | /// |
3323 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundpd_epu64&ig_expand=1480) |
3324 | #[inline ] |
3325 | #[target_feature (enable = "avx512dq" )] |
3326 | #[cfg_attr (test, assert_instr(vcvtpd2uqq, ROUNDING = 8))] |
3327 | #[rustc_legacy_const_generics (2)] |
3328 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3329 | pub fn _mm512_maskz_cvt_roundpd_epu64<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m512i { |
3330 | static_assert_rounding!(ROUNDING); |
3331 | _mm512_mask_cvt_roundpd_epu64::<ROUNDING>(src:_mm512_setzero_si512(), k, a) |
3332 | } |
3333 | |
3334 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, |
3335 | /// and store the results in dst. |
3336 | /// |
3337 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epu64&ig_expand=1959) |
3338 | #[inline ] |
3339 | #[target_feature (enable = "avx512dq,avx512vl" )] |
3340 | #[cfg_attr (test, assert_instr(vcvtpd2uqq))] |
3341 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3342 | pub fn _mm_cvtpd_epu64(a: __m128d) -> __m128i { |
3343 | _mm_mask_cvtpd_epu64(src:_mm_undefined_si128(), k:0xff, a) |
3344 | } |
3345 | |
3346 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, |
3347 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
3348 | /// not set). |
3349 | /// |
3350 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epu64&ig_expand=1960) |
3351 | #[inline ] |
3352 | #[target_feature (enable = "avx512dq,avx512vl" )] |
3353 | #[cfg_attr (test, assert_instr(vcvtpd2uqq))] |
3354 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3355 | pub fn _mm_mask_cvtpd_epu64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i { |
3356 | unsafe { transmute(src:vcvtpd2uqq_128(a.as_f64x2(), src.as_u64x2(), k)) } |
3357 | } |
3358 | |
3359 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, |
3360 | /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
3361 | /// |
3362 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epu64&ig_expand=1961) |
3363 | #[inline ] |
3364 | #[target_feature (enable = "avx512dq,avx512vl" )] |
3365 | #[cfg_attr (test, assert_instr(vcvtpd2uqq))] |
3366 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3367 | pub fn _mm_maskz_cvtpd_epu64(k: __mmask8, a: __m128d) -> __m128i { |
3368 | _mm_mask_cvtpd_epu64(src:_mm_setzero_si128(), k, a) |
3369 | } |
3370 | |
3371 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, |
3372 | /// and store the results in dst. |
3373 | /// |
3374 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epu64&ig_expand=1962) |
3375 | #[inline ] |
3376 | #[target_feature (enable = "avx512dq,avx512vl" )] |
3377 | #[cfg_attr (test, assert_instr(vcvtpd2uqq))] |
3378 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3379 | pub fn _mm256_cvtpd_epu64(a: __m256d) -> __m256i { |
3380 | _mm256_mask_cvtpd_epu64(src:_mm256_undefined_si256(), k:0xff, a) |
3381 | } |
3382 | |
3383 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, |
3384 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
3385 | /// not set). |
3386 | /// |
3387 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epu64&ig_expand=1963) |
3388 | #[inline ] |
3389 | #[target_feature (enable = "avx512dq,avx512vl" )] |
3390 | #[cfg_attr (test, assert_instr(vcvtpd2uqq))] |
3391 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3392 | pub fn _mm256_mask_cvtpd_epu64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i { |
3393 | unsafe { transmute(src:vcvtpd2uqq_256(a.as_f64x4(), src.as_u64x4(), k)) } |
3394 | } |
3395 | |
3396 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, |
3397 | /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
3398 | /// |
3399 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epu64&ig_expand=1964) |
3400 | #[inline ] |
3401 | #[target_feature (enable = "avx512dq,avx512vl" )] |
3402 | #[cfg_attr (test, assert_instr(vcvtpd2uqq))] |
3403 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3404 | pub fn _mm256_maskz_cvtpd_epu64(k: __mmask8, a: __m256d) -> __m256i { |
3405 | _mm256_mask_cvtpd_epu64(src:_mm256_setzero_si256(), k, a) |
3406 | } |
3407 | |
3408 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, |
3409 | /// and store the results in dst. |
3410 | /// |
3411 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epu64&ig_expand=1965) |
3412 | #[inline ] |
3413 | #[target_feature (enable = "avx512dq" )] |
3414 | #[cfg_attr (test, assert_instr(vcvtpd2uqq))] |
3415 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3416 | pub fn _mm512_cvtpd_epu64(a: __m512d) -> __m512i { |
3417 | _mm512_mask_cvtpd_epu64(src:_mm512_undefined_epi32(), k:0xff, a) |
3418 | } |
3419 | |
3420 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, |
3421 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
3422 | /// not set). |
3423 | /// |
3424 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epu64&ig_expand=1966) |
3425 | #[inline ] |
3426 | #[target_feature (enable = "avx512dq" )] |
3427 | #[cfg_attr (test, assert_instr(vcvtpd2uqq))] |
3428 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3429 | pub fn _mm512_mask_cvtpd_epu64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i { |
3430 | unsafe { |
3431 | transmute(src:vcvtpd2uqq_512( |
3432 | a.as_f64x8(), |
3433 | src.as_u64x8(), |
3434 | k, |
3435 | _MM_FROUND_CUR_DIRECTION, |
3436 | )) |
3437 | } |
3438 | } |
3439 | |
3440 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, |
3441 | /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
3442 | /// |
3443 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epu64&ig_expand=1967) |
3444 | #[inline ] |
3445 | #[target_feature (enable = "avx512dq" )] |
3446 | #[cfg_attr (test, assert_instr(vcvtpd2uqq))] |
3447 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3448 | pub fn _mm512_maskz_cvtpd_epu64(k: __mmask8, a: __m512d) -> __m512i { |
3449 | _mm512_mask_cvtpd_epu64(src:_mm512_setzero_si512(), k, a) |
3450 | } |
3451 | |
3452 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, |
3453 | /// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of: |
3454 | /// |
3455 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3456 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3457 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3458 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3459 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3460 | /// |
3461 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epu64&ig_expand=1520) |
3462 | #[inline ] |
3463 | #[target_feature (enable = "avx512dq" )] |
3464 | #[cfg_attr (test, assert_instr(vcvtps2uqq, ROUNDING = 8))] |
3465 | #[rustc_legacy_const_generics (1)] |
3466 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3467 | pub fn _mm512_cvt_roundps_epu64<const ROUNDING: i32>(a: __m256) -> __m512i { |
3468 | static_assert_rounding!(ROUNDING); |
3469 | _mm512_mask_cvt_roundps_epu64::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xff, a) |
3470 | } |
3471 | |
3472 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, |
3473 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
3474 | /// not set). Rounding is done according to the ROUNDING parameter, which can be one of: |
3475 | /// |
3476 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3477 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3478 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3479 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3480 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3481 | /// |
3482 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epu64&ig_expand=1521) |
3483 | #[inline ] |
3484 | #[target_feature (enable = "avx512dq" )] |
3485 | #[cfg_attr (test, assert_instr(vcvtps2uqq, ROUNDING = 8))] |
3486 | #[rustc_legacy_const_generics (3)] |
3487 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3488 | pub fn _mm512_mask_cvt_roundps_epu64<const ROUNDING: i32>( |
3489 | src: __m512i, |
3490 | k: __mmask8, |
3491 | a: __m256, |
3492 | ) -> __m512i { |
3493 | unsafe { |
3494 | static_assert_rounding!(ROUNDING); |
3495 | transmute(src:vcvtps2uqq_512(a.as_f32x8(), src.as_u64x8(), k, ROUNDING)) |
3496 | } |
3497 | } |
3498 | |
3499 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, |
3500 | /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
3501 | /// Rounding is done according to the ROUNDING parameter, which can be one of: |
3502 | /// |
3503 | /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
3504 | /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
3505 | /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
3506 | /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
3507 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
3508 | /// |
3509 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epu64&ig_expand=1522) |
3510 | #[inline ] |
3511 | #[target_feature (enable = "avx512dq" )] |
3512 | #[cfg_attr (test, assert_instr(vcvtps2uqq, ROUNDING = 8))] |
3513 | #[rustc_legacy_const_generics (2)] |
3514 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3515 | pub fn _mm512_maskz_cvt_roundps_epu64<const ROUNDING: i32>(k: __mmask8, a: __m256) -> __m512i { |
3516 | static_assert_rounding!(ROUNDING); |
3517 | _mm512_mask_cvt_roundps_epu64::<ROUNDING>(src:_mm512_setzero_si512(), k, a) |
3518 | } |
3519 | |
3520 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, |
3521 | /// and store the results in dst. |
3522 | /// |
3523 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epu64&ig_expand=2093) |
3524 | #[inline ] |
3525 | #[target_feature (enable = "avx512dq,avx512vl" )] |
3526 | #[cfg_attr (test, assert_instr(vcvtps2uqq))] |
3527 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3528 | pub fn _mm_cvtps_epu64(a: __m128) -> __m128i { |
3529 | _mm_mask_cvtps_epu64(src:_mm_undefined_si128(), k:0xff, a) |
3530 | } |
3531 | |
3532 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, |
3533 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
3534 | /// not set). |
3535 | /// |
3536 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epu64&ig_expand=2094) |
3537 | #[inline ] |
3538 | #[target_feature (enable = "avx512dq,avx512vl" )] |
3539 | #[cfg_attr (test, assert_instr(vcvtps2uqq))] |
3540 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3541 | pub fn _mm_mask_cvtps_epu64(src: __m128i, k: __mmask8, a: __m128) -> __m128i { |
3542 | unsafe { transmute(src:vcvtps2uqq_128(a.as_f32x4(), src.as_u64x2(), k)) } |
3543 | } |
3544 | |
3545 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, |
3546 | /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
3547 | /// |
3548 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epu64&ig_expand=2095) |
3549 | #[inline ] |
3550 | #[target_feature (enable = "avx512dq,avx512vl" )] |
3551 | #[cfg_attr (test, assert_instr(vcvtps2uqq))] |
3552 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3553 | pub fn _mm_maskz_cvtps_epu64(k: __mmask8, a: __m128) -> __m128i { |
3554 | _mm_mask_cvtps_epu64(src:_mm_setzero_si128(), k, a) |
3555 | } |
3556 | |
3557 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, |
3558 | /// and store the results in dst. |
3559 | /// |
3560 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epu64&ig_expand=2096) |
3561 | #[inline ] |
3562 | #[target_feature (enable = "avx512dq,avx512vl" )] |
3563 | #[cfg_attr (test, assert_instr(vcvtps2uqq))] |
3564 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3565 | pub fn _mm256_cvtps_epu64(a: __m128) -> __m256i { |
3566 | _mm256_mask_cvtps_epu64(src:_mm256_undefined_si256(), k:0xff, a) |
3567 | } |
3568 | |
3569 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, |
3570 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
3571 | /// not set). |
3572 | /// |
3573 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epu64&ig_expand=2097) |
3574 | #[inline ] |
3575 | #[target_feature (enable = "avx512dq,avx512vl" )] |
3576 | #[cfg_attr (test, assert_instr(vcvtps2uqq))] |
3577 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3578 | pub fn _mm256_mask_cvtps_epu64(src: __m256i, k: __mmask8, a: __m128) -> __m256i { |
3579 | unsafe { transmute(src:vcvtps2uqq_256(a.as_f32x4(), src.as_u64x4(), k)) } |
3580 | } |
3581 | |
3582 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, |
3583 | /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
3584 | /// |
3585 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epu64&ig_expand=2098) |
3586 | #[inline ] |
3587 | #[target_feature (enable = "avx512dq,avx512vl" )] |
3588 | #[cfg_attr (test, assert_instr(vcvtps2uqq))] |
3589 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3590 | pub fn _mm256_maskz_cvtps_epu64(k: __mmask8, a: __m128) -> __m256i { |
3591 | _mm256_mask_cvtps_epu64(src:_mm256_setzero_si256(), k, a) |
3592 | } |
3593 | |
3594 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, |
3595 | /// and store the results in dst. |
3596 | /// |
3597 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epu64&ig_expand=2099) |
3598 | #[inline ] |
3599 | #[target_feature (enable = "avx512dq" )] |
3600 | #[cfg_attr (test, assert_instr(vcvtps2uqq))] |
3601 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3602 | pub fn _mm512_cvtps_epu64(a: __m256) -> __m512i { |
3603 | _mm512_mask_cvtps_epu64(src:_mm512_undefined_epi32(), k:0xff, a) |
3604 | } |
3605 | |
3606 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, |
3607 | /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
3608 | /// not set). |
3609 | /// |
3610 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epu64&ig_expand=2100) |
3611 | #[inline ] |
3612 | #[target_feature (enable = "avx512dq" )] |
3613 | #[cfg_attr (test, assert_instr(vcvtps2uqq))] |
3614 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3615 | pub fn _mm512_mask_cvtps_epu64(src: __m512i, k: __mmask8, a: __m256) -> __m512i { |
3616 | unsafe { |
3617 | transmute(src:vcvtps2uqq_512( |
3618 | a.as_f32x8(), |
3619 | src.as_u64x8(), |
3620 | k, |
3621 | _MM_FROUND_CUR_DIRECTION, |
3622 | )) |
3623 | } |
3624 | } |
3625 | |
3626 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, |
3627 | /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
3628 | /// |
3629 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epu64&ig_expand=2101) |
3630 | #[inline ] |
3631 | #[target_feature (enable = "avx512dq" )] |
3632 | #[cfg_attr (test, assert_instr(vcvtps2uqq))] |
3633 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3634 | pub fn _mm512_maskz_cvtps_epu64(k: __mmask8, a: __m256) -> __m512i { |
3635 | _mm512_mask_cvtps_epu64(src:_mm512_setzero_si512(), k, a) |
3636 | } |
3637 | |
3638 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers |
3639 | /// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC |
3640 | /// to the sae parameter. |
3641 | /// |
3642 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epi64&ig_expand=2264) |
3643 | #[inline ] |
3644 | #[target_feature (enable = "avx512dq" )] |
3645 | #[cfg_attr (test, assert_instr(vcvttpd2qq, SAE = 8))] |
3646 | #[rustc_legacy_const_generics (1)] |
3647 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3648 | pub fn _mm512_cvtt_roundpd_epi64<const SAE: i32>(a: __m512d) -> __m512i { |
3649 | static_assert_sae!(SAE); |
3650 | _mm512_mask_cvtt_roundpd_epi64::<SAE>(src:_mm512_undefined_epi32(), k:0xff, a) |
3651 | } |
3652 | |
3653 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers |
3654 | /// with truncation, and store the result in dst using writemask k (elements are copied from src if the |
3655 | /// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter. |
3656 | /// |
3657 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epi64&ig_expand=2265) |
3658 | #[inline ] |
3659 | #[target_feature (enable = "avx512dq" )] |
3660 | #[cfg_attr (test, assert_instr(vcvttpd2qq, SAE = 8))] |
3661 | #[rustc_legacy_const_generics (3)] |
3662 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3663 | pub fn _mm512_mask_cvtt_roundpd_epi64<const SAE: i32>( |
3664 | src: __m512i, |
3665 | k: __mmask8, |
3666 | a: __m512d, |
3667 | ) -> __m512i { |
3668 | unsafe { |
3669 | static_assert_sae!(SAE); |
3670 | transmute(src:vcvttpd2qq_512(a.as_f64x8(), src.as_i64x8(), k, SAE)) |
3671 | } |
3672 | } |
3673 | |
3674 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers |
3675 | /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding |
3676 | /// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter. |
3677 | /// |
3678 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epi64&ig_expand=2266) |
3679 | #[inline ] |
3680 | #[target_feature (enable = "avx512dq" )] |
3681 | #[cfg_attr (test, assert_instr(vcvttpd2qq, SAE = 8))] |
3682 | #[rustc_legacy_const_generics (2)] |
3683 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3684 | pub fn _mm512_maskz_cvtt_roundpd_epi64<const SAE: i32>(k: __mmask8, a: __m512d) -> __m512i { |
3685 | static_assert_sae!(SAE); |
3686 | _mm512_mask_cvtt_roundpd_epi64::<SAE>(src:_mm512_setzero_si512(), k, a) |
3687 | } |
3688 | |
3689 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers |
3690 | /// with truncation, and store the result in dst. |
3691 | /// |
3692 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epi64&ig_expand=2329) |
3693 | #[inline ] |
3694 | #[target_feature (enable = "avx512dq,avx512vl" )] |
3695 | #[cfg_attr (test, assert_instr(vcvttpd2qq))] |
3696 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3697 | pub fn _mm_cvttpd_epi64(a: __m128d) -> __m128i { |
3698 | _mm_mask_cvttpd_epi64(src:_mm_undefined_si128(), k:0xff, a) |
3699 | } |
3700 | |
3701 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers |
3702 | /// with truncation, and store the result in dst using writemask k (elements are copied from src if the |
3703 | /// corresponding bit is not set). |
3704 | /// |
3705 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epi64&ig_expand=2330) |
3706 | #[inline ] |
3707 | #[target_feature (enable = "avx512dq,avx512vl" )] |
3708 | #[cfg_attr (test, assert_instr(vcvttpd2qq))] |
3709 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3710 | pub fn _mm_mask_cvttpd_epi64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i { |
3711 | unsafe { transmute(src:vcvttpd2qq_128(a.as_f64x2(), src.as_i64x2(), k)) } |
3712 | } |
3713 | |
3714 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers |
3715 | /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding |
3716 | /// bit is not set). |
3717 | /// |
3718 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epi64&ig_expand=2331) |
3719 | #[inline ] |
3720 | #[target_feature (enable = "avx512dq,avx512vl" )] |
3721 | #[cfg_attr (test, assert_instr(vcvttpd2qq))] |
3722 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3723 | pub fn _mm_maskz_cvttpd_epi64(k: __mmask8, a: __m128d) -> __m128i { |
3724 | _mm_mask_cvttpd_epi64(src:_mm_setzero_si128(), k, a) |
3725 | } |
3726 | |
3727 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers |
3728 | /// with truncation, and store the result in dst. |
3729 | /// |
3730 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epi64&ig_expand=2332) |
3731 | #[inline ] |
3732 | #[target_feature (enable = "avx512dq,avx512vl" )] |
3733 | #[cfg_attr (test, assert_instr(vcvttpd2qq))] |
3734 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3735 | pub fn _mm256_cvttpd_epi64(a: __m256d) -> __m256i { |
3736 | _mm256_mask_cvttpd_epi64(src:_mm256_undefined_si256(), k:0xff, a) |
3737 | } |
3738 | |
3739 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers |
3740 | /// with truncation, and store the result in dst using writemask k (elements are copied from src if the |
3741 | /// corresponding bit is not set). |
3742 | /// |
3743 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epi64&ig_expand=2333) |
3744 | #[inline ] |
3745 | #[target_feature (enable = "avx512dq,avx512vl" )] |
3746 | #[cfg_attr (test, assert_instr(vcvttpd2qq))] |
3747 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3748 | pub fn _mm256_mask_cvttpd_epi64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i { |
3749 | unsafe { transmute(src:vcvttpd2qq_256(a.as_f64x4(), src.as_i64x4(), k)) } |
3750 | } |
3751 | |
3752 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers |
3753 | /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding |
3754 | /// bit is not set). |
3755 | /// |
3756 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epi64&ig_expand=2334) |
3757 | #[inline ] |
3758 | #[target_feature (enable = "avx512dq,avx512vl" )] |
3759 | #[cfg_attr (test, assert_instr(vcvttpd2qq))] |
3760 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3761 | pub fn _mm256_maskz_cvttpd_epi64(k: __mmask8, a: __m256d) -> __m256i { |
3762 | _mm256_mask_cvttpd_epi64(src:_mm256_setzero_si256(), k, a) |
3763 | } |
3764 | |
3765 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers |
3766 | /// with truncation, and store the result in dst. |
3767 | /// |
3768 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epi64&ig_expand=2335) |
3769 | #[inline ] |
3770 | #[target_feature (enable = "avx512dq" )] |
3771 | #[cfg_attr (test, assert_instr(vcvttpd2qq))] |
3772 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3773 | pub fn _mm512_cvttpd_epi64(a: __m512d) -> __m512i { |
3774 | _mm512_mask_cvttpd_epi64(src:_mm512_undefined_epi32(), k:0xff, a) |
3775 | } |
3776 | |
3777 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers |
3778 | /// with truncation, and store the result in dst using writemask k (elements are copied from src if the |
3779 | /// corresponding bit is not set). |
3780 | /// |
3781 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epi64&ig_expand=2336) |
3782 | #[inline ] |
3783 | #[target_feature (enable = "avx512dq" )] |
3784 | #[cfg_attr (test, assert_instr(vcvttpd2qq))] |
3785 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3786 | pub fn _mm512_mask_cvttpd_epi64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i { |
3787 | unsafe { |
3788 | transmute(src:vcvttpd2qq_512( |
3789 | a.as_f64x8(), |
3790 | src.as_i64x8(), |
3791 | k, |
3792 | _MM_FROUND_CUR_DIRECTION, |
3793 | )) |
3794 | } |
3795 | } |
3796 | |
3797 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers |
3798 | /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding |
3799 | /// bit is not set). |
3800 | /// |
3801 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epi64&ig_expand=2337) |
3802 | #[inline ] |
3803 | #[target_feature (enable = "avx512dq" )] |
3804 | #[cfg_attr (test, assert_instr(vcvttpd2qq))] |
3805 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3806 | pub fn _mm512_maskz_cvttpd_epi64(k: __mmask8, a: __m512d) -> __m512i { |
3807 | _mm512_mask_cvttpd_epi64(src:_mm512_setzero_si512(), k, a) |
3808 | } |
3809 | |
3810 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers |
3811 | /// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC |
3812 | /// to the sae parameter. |
3813 | /// |
3814 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epi64&ig_expand=2294) |
3815 | #[inline ] |
3816 | #[target_feature (enable = "avx512dq" )] |
3817 | #[cfg_attr (test, assert_instr(vcvttps2qq, SAE = 8))] |
3818 | #[rustc_legacy_const_generics (1)] |
3819 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3820 | pub fn _mm512_cvtt_roundps_epi64<const SAE: i32>(a: __m256) -> __m512i { |
3821 | static_assert_sae!(SAE); |
3822 | _mm512_mask_cvtt_roundps_epi64::<SAE>(src:_mm512_undefined_epi32(), k:0xff, a) |
3823 | } |
3824 | |
3825 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers |
3826 | /// with truncation, and store the result in dst using writemask k (elements are copied from src if the |
3827 | /// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter. |
3828 | /// |
3829 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epi64&ig_expand=2295) |
3830 | #[inline ] |
3831 | #[target_feature (enable = "avx512dq" )] |
3832 | #[cfg_attr (test, assert_instr(vcvttps2qq, SAE = 8))] |
3833 | #[rustc_legacy_const_generics (3)] |
3834 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3835 | pub fn _mm512_mask_cvtt_roundps_epi64<const SAE: i32>( |
3836 | src: __m512i, |
3837 | k: __mmask8, |
3838 | a: __m256, |
3839 | ) -> __m512i { |
3840 | unsafe { |
3841 | static_assert_sae!(SAE); |
3842 | transmute(src:vcvttps2qq_512(a.as_f32x8(), src.as_i64x8(), k, SAE)) |
3843 | } |
3844 | } |
3845 | |
3846 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers |
3847 | /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding |
3848 | /// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter. |
3849 | /// |
3850 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epi64&ig_expand=2296) |
3851 | #[inline ] |
3852 | #[target_feature (enable = "avx512dq" )] |
3853 | #[cfg_attr (test, assert_instr(vcvttps2qq, SAE = 8))] |
3854 | #[rustc_legacy_const_generics (2)] |
3855 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3856 | pub fn _mm512_maskz_cvtt_roundps_epi64<const SAE: i32>(k: __mmask8, a: __m256) -> __m512i { |
3857 | static_assert_sae!(SAE); |
3858 | _mm512_mask_cvtt_roundps_epi64::<SAE>(src:_mm512_setzero_si512(), k, a) |
3859 | } |
3860 | |
3861 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers |
3862 | /// with truncation, and store the result in dst. |
3863 | /// |
3864 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epi64&ig_expand=2420) |
3865 | #[inline ] |
3866 | #[target_feature (enable = "avx512dq,avx512vl" )] |
3867 | #[cfg_attr (test, assert_instr(vcvttps2qq))] |
3868 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3869 | pub fn _mm_cvttps_epi64(a: __m128) -> __m128i { |
3870 | _mm_mask_cvttps_epi64(src:_mm_undefined_si128(), k:0xff, a) |
3871 | } |
3872 | |
3873 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers |
3874 | /// with truncation, and store the result in dst using writemask k (elements are copied from src if the |
3875 | /// corresponding bit is not set). |
3876 | /// |
3877 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epi64&ig_expand=2421) |
3878 | #[inline ] |
3879 | #[target_feature (enable = "avx512dq,avx512vl" )] |
3880 | #[cfg_attr (test, assert_instr(vcvttps2qq))] |
3881 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3882 | pub fn _mm_mask_cvttps_epi64(src: __m128i, k: __mmask8, a: __m128) -> __m128i { |
3883 | unsafe { transmute(src:vcvttps2qq_128(a.as_f32x4(), src.as_i64x2(), k)) } |
3884 | } |
3885 | |
3886 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers |
3887 | /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding |
3888 | /// bit is not set). |
3889 | /// |
3890 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epi64&ig_expand=2422) |
3891 | #[inline ] |
3892 | #[target_feature (enable = "avx512dq,avx512vl" )] |
3893 | #[cfg_attr (test, assert_instr(vcvttps2qq))] |
3894 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3895 | pub fn _mm_maskz_cvttps_epi64(k: __mmask8, a: __m128) -> __m128i { |
3896 | _mm_mask_cvttps_epi64(src:_mm_setzero_si128(), k, a) |
3897 | } |
3898 | |
3899 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers |
3900 | /// with truncation, and store the result in dst. |
3901 | /// |
3902 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epi64&ig_expand=2423) |
3903 | #[inline ] |
3904 | #[target_feature (enable = "avx512dq,avx512vl" )] |
3905 | #[cfg_attr (test, assert_instr(vcvttps2qq))] |
3906 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3907 | pub fn _mm256_cvttps_epi64(a: __m128) -> __m256i { |
3908 | _mm256_mask_cvttps_epi64(src:_mm256_undefined_si256(), k:0xff, a) |
3909 | } |
3910 | |
3911 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers |
3912 | /// with truncation, and store the result in dst using writemask k (elements are copied from src if the |
3913 | /// corresponding bit is not set). |
3914 | /// |
3915 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epi64&ig_expand=2424) |
3916 | #[inline ] |
3917 | #[target_feature (enable = "avx512dq,avx512vl" )] |
3918 | #[cfg_attr (test, assert_instr(vcvttps2qq))] |
3919 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3920 | pub fn _mm256_mask_cvttps_epi64(src: __m256i, k: __mmask8, a: __m128) -> __m256i { |
3921 | unsafe { transmute(src:vcvttps2qq_256(a.as_f32x4(), src.as_i64x4(), k)) } |
3922 | } |
3923 | |
3924 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers |
3925 | /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding |
3926 | /// bit is not set). |
3927 | /// |
3928 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epi64&ig_expand=2425) |
3929 | #[inline ] |
3930 | #[target_feature (enable = "avx512dq,avx512vl" )] |
3931 | #[cfg_attr (test, assert_instr(vcvttps2qq))] |
3932 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3933 | pub fn _mm256_maskz_cvttps_epi64(k: __mmask8, a: __m128) -> __m256i { |
3934 | _mm256_mask_cvttps_epi64(src:_mm256_setzero_si256(), k, a) |
3935 | } |
3936 | |
3937 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers |
3938 | /// with truncation, and store the result in dst. |
3939 | /// |
3940 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epi64&ig_expand=2426) |
3941 | #[inline ] |
3942 | #[target_feature (enable = "avx512dq" )] |
3943 | #[cfg_attr (test, assert_instr(vcvttps2qq))] |
3944 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3945 | pub fn _mm512_cvttps_epi64(a: __m256) -> __m512i { |
3946 | _mm512_mask_cvttps_epi64(src:_mm512_undefined_epi32(), k:0xff, a) |
3947 | } |
3948 | |
3949 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers |
3950 | /// with truncation, and store the result in dst using writemask k (elements are copied from src if the |
3951 | /// corresponding bit is not set). |
3952 | /// |
3953 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epi64&ig_expand=2427) |
3954 | #[inline ] |
3955 | #[target_feature (enable = "avx512dq" )] |
3956 | #[cfg_attr (test, assert_instr(vcvttps2qq))] |
3957 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3958 | pub fn _mm512_mask_cvttps_epi64(src: __m512i, k: __mmask8, a: __m256) -> __m512i { |
3959 | unsafe { |
3960 | transmute(src:vcvttps2qq_512( |
3961 | a.as_f32x8(), |
3962 | src.as_i64x8(), |
3963 | k, |
3964 | _MM_FROUND_CUR_DIRECTION, |
3965 | )) |
3966 | } |
3967 | } |
3968 | |
3969 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers |
3970 | /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding |
3971 | /// bit is not set). |
3972 | /// |
3973 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epi64&ig_expand=2428) |
3974 | #[inline ] |
3975 | #[target_feature (enable = "avx512dq" )] |
3976 | #[cfg_attr (test, assert_instr(vcvttps2qq))] |
3977 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3978 | pub fn _mm512_maskz_cvttps_epi64(k: __mmask8, a: __m256) -> __m512i { |
3979 | _mm512_mask_cvttps_epi64(src:_mm512_setzero_si512(), k, a) |
3980 | } |
3981 | |
3982 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers |
3983 | /// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC |
3984 | /// to the sae parameter. |
3985 | /// |
3986 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epu64&ig_expand=1965) |
3987 | #[inline ] |
3988 | #[target_feature (enable = "avx512dq" )] |
3989 | #[cfg_attr (test, assert_instr(vcvttpd2uqq, SAE = 8))] |
3990 | #[rustc_legacy_const_generics (1)] |
3991 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
3992 | pub fn _mm512_cvtt_roundpd_epu64<const SAE: i32>(a: __m512d) -> __m512i { |
3993 | static_assert_sae!(SAE); |
3994 | _mm512_mask_cvtt_roundpd_epu64::<SAE>(src:_mm512_undefined_epi32(), k:0xff, a) |
3995 | } |
3996 | |
3997 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers |
3998 | /// with truncation, and store the result in dst using writemask k (elements are copied from src if the |
3999 | /// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter. |
4000 | /// |
4001 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epu64&ig_expand=1966) |
4002 | #[inline ] |
4003 | #[target_feature (enable = "avx512dq" )] |
4004 | #[cfg_attr (test, assert_instr(vcvttpd2uqq, SAE = 8))] |
4005 | #[rustc_legacy_const_generics (3)] |
4006 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4007 | pub fn _mm512_mask_cvtt_roundpd_epu64<const SAE: i32>( |
4008 | src: __m512i, |
4009 | k: __mmask8, |
4010 | a: __m512d, |
4011 | ) -> __m512i { |
4012 | unsafe { |
4013 | static_assert_sae!(SAE); |
4014 | transmute(src:vcvttpd2uqq_512(a.as_f64x8(), src.as_u64x8(), k, SAE)) |
4015 | } |
4016 | } |
4017 | |
4018 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers |
4019 | /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding |
4020 | /// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter. |
4021 | /// |
4022 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epu64&ig_expand=1967) |
4023 | #[inline ] |
4024 | #[target_feature (enable = "avx512dq" )] |
4025 | #[cfg_attr (test, assert_instr(vcvttpd2uqq, SAE = 8))] |
4026 | #[rustc_legacy_const_generics (2)] |
4027 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4028 | pub fn _mm512_maskz_cvtt_roundpd_epu64<const SAE: i32>(k: __mmask8, a: __m512d) -> __m512i { |
4029 | static_assert_sae!(SAE); |
4030 | _mm512_mask_cvtt_roundpd_epu64::<SAE>(src:_mm512_setzero_si512(), k, a) |
4031 | } |
4032 | |
4033 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers |
4034 | /// with truncation, and store the result in dst. |
4035 | /// |
4036 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epu64&ig_expand=2347) |
4037 | #[inline ] |
4038 | #[target_feature (enable = "avx512dq,avx512vl" )] |
4039 | #[cfg_attr (test, assert_instr(vcvttpd2uqq))] |
4040 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4041 | pub fn _mm_cvttpd_epu64(a: __m128d) -> __m128i { |
4042 | _mm_mask_cvttpd_epu64(src:_mm_undefined_si128(), k:0xff, a) |
4043 | } |
4044 | |
4045 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers |
4046 | /// with truncation, and store the result in dst using writemask k (elements are copied from src if the corresponding |
4047 | /// bit is not set). |
4048 | /// |
4049 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epu64&ig_expand=2348) |
4050 | #[inline ] |
4051 | #[target_feature (enable = "avx512dq,avx512vl" )] |
4052 | #[cfg_attr (test, assert_instr(vcvttpd2uqq))] |
4053 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4054 | pub fn _mm_mask_cvttpd_epu64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i { |
4055 | unsafe { transmute(src:vcvttpd2uqq_128(a.as_f64x2(), src.as_u64x2(), k)) } |
4056 | } |
4057 | |
4058 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers |
4059 | /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding |
4060 | /// bit is not set). |
4061 | /// |
4062 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epu64&ig_expand=2349) |
4063 | #[inline ] |
4064 | #[target_feature (enable = "avx512dq,avx512vl" )] |
4065 | #[cfg_attr (test, assert_instr(vcvttpd2uqq))] |
4066 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4067 | pub fn _mm_maskz_cvttpd_epu64(k: __mmask8, a: __m128d) -> __m128i { |
4068 | _mm_mask_cvttpd_epu64(src:_mm_setzero_si128(), k, a) |
4069 | } |
4070 | |
4071 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers |
4072 | /// with truncation, and store the result in dst. |
4073 | /// |
4074 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epu64&ig_expand=2350) |
4075 | #[inline ] |
4076 | #[target_feature (enable = "avx512dq,avx512vl" )] |
4077 | #[cfg_attr (test, assert_instr(vcvttpd2uqq))] |
4078 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4079 | pub fn _mm256_cvttpd_epu64(a: __m256d) -> __m256i { |
4080 | _mm256_mask_cvttpd_epu64(src:_mm256_undefined_si256(), k:0xff, a) |
4081 | } |
4082 | |
4083 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers |
4084 | /// with truncation, and store the results in dst using writemask k (elements are copied from src if the corresponding |
4085 | /// bit is not set). |
4086 | /// |
4087 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epu64&ig_expand=2351) |
4088 | #[inline ] |
4089 | #[target_feature (enable = "avx512dq,avx512vl" )] |
4090 | #[cfg_attr (test, assert_instr(vcvttpd2uqq))] |
4091 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4092 | pub fn _mm256_mask_cvttpd_epu64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i { |
4093 | unsafe { transmute(src:vcvttpd2uqq_256(a.as_f64x4(), src.as_u64x4(), k)) } |
4094 | } |
4095 | |
4096 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers |
4097 | /// with truncation, and store the results in dst using zeromask k (elements are zeroed out if the corresponding |
4098 | /// bit is not set). |
4099 | /// |
4100 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epu64&ig_expand=2352) |
4101 | #[inline ] |
4102 | #[target_feature (enable = "avx512dq,avx512vl" )] |
4103 | #[cfg_attr (test, assert_instr(vcvttpd2uqq))] |
4104 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4105 | pub fn _mm256_maskz_cvttpd_epu64(k: __mmask8, a: __m256d) -> __m256i { |
4106 | _mm256_mask_cvttpd_epu64(src:_mm256_setzero_si256(), k, a) |
4107 | } |
4108 | |
4109 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers |
4110 | /// with truncation, and store the result in dst. |
4111 | /// |
4112 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epu64&ig_expand=2353) |
4113 | #[inline ] |
4114 | #[target_feature (enable = "avx512dq" )] |
4115 | #[cfg_attr (test, assert_instr(vcvttpd2uqq))] |
4116 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4117 | pub fn _mm512_cvttpd_epu64(a: __m512d) -> __m512i { |
4118 | _mm512_mask_cvttpd_epu64(src:_mm512_undefined_epi32(), k:0xff, a) |
4119 | } |
4120 | |
4121 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers |
4122 | /// with truncation, and store the result in dst using writemask k (elements are copied from src if the corresponding |
4123 | /// bit is not set). |
4124 | /// |
4125 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epu64&ig_expand=2354) |
4126 | #[inline ] |
4127 | #[target_feature (enable = "avx512dq" )] |
4128 | #[cfg_attr (test, assert_instr(vcvttpd2uqq))] |
4129 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4130 | pub fn _mm512_mask_cvttpd_epu64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i { |
4131 | unsafe { |
4132 | transmute(src:vcvttpd2uqq_512( |
4133 | a.as_f64x8(), |
4134 | src.as_u64x8(), |
4135 | k, |
4136 | _MM_FROUND_CUR_DIRECTION, |
4137 | )) |
4138 | } |
4139 | } |
4140 | |
4141 | /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers |
4142 | /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding |
4143 | /// |
4144 | /// |
4145 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epu64&ig_expand=2355) |
4146 | #[inline ] |
4147 | #[target_feature (enable = "avx512dq" )] |
4148 | #[cfg_attr (test, assert_instr(vcvttpd2uqq))] |
4149 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4150 | pub fn _mm512_maskz_cvttpd_epu64(k: __mmask8, a: __m512d) -> __m512i { |
4151 | _mm512_mask_cvttpd_epu64(src:_mm512_setzero_si512(), k, a) |
4152 | } |
4153 | |
4154 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers |
4155 | /// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC |
4156 | /// to the sae parameter. |
4157 | /// |
4158 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epu64&ig_expand=2300) |
4159 | #[inline ] |
4160 | #[target_feature (enable = "avx512dq" )] |
4161 | #[cfg_attr (test, assert_instr(vcvttps2uqq, SAE = 8))] |
4162 | #[rustc_legacy_const_generics (1)] |
4163 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4164 | pub fn _mm512_cvtt_roundps_epu64<const SAE: i32>(a: __m256) -> __m512i { |
4165 | static_assert_sae!(SAE); |
4166 | _mm512_mask_cvtt_roundps_epu64::<SAE>(src:_mm512_undefined_epi32(), k:0xff, a) |
4167 | } |
4168 | |
4169 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers |
4170 | /// with truncation, and store the result in dst using writemask k (elements are copied from src if the |
4171 | /// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter. |
4172 | /// |
4173 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epu64&ig_expand=2301) |
4174 | #[inline ] |
4175 | #[target_feature (enable = "avx512dq" )] |
4176 | #[cfg_attr (test, assert_instr(vcvttps2uqq, SAE = 8))] |
4177 | #[rustc_legacy_const_generics (3)] |
4178 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4179 | pub fn _mm512_mask_cvtt_roundps_epu64<const SAE: i32>( |
4180 | src: __m512i, |
4181 | k: __mmask8, |
4182 | a: __m256, |
4183 | ) -> __m512i { |
4184 | unsafe { |
4185 | static_assert_sae!(SAE); |
4186 | transmute(src:vcvttps2uqq_512(a.as_f32x8(), src.as_u64x8(), k, SAE)) |
4187 | } |
4188 | } |
4189 | |
4190 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers |
4191 | /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding |
4192 | /// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter. |
4193 | /// |
4194 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epu64&ig_expand=2302) |
4195 | #[inline ] |
4196 | #[target_feature (enable = "avx512dq" )] |
4197 | #[cfg_attr (test, assert_instr(vcvttps2uqq, SAE = 8))] |
4198 | #[rustc_legacy_const_generics (2)] |
4199 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4200 | pub fn _mm512_maskz_cvtt_roundps_epu64<const SAE: i32>(k: __mmask8, a: __m256) -> __m512i { |
4201 | static_assert_sae!(SAE); |
4202 | _mm512_mask_cvtt_roundps_epu64::<SAE>(src:_mm512_setzero_si512(), k, a) |
4203 | } |
4204 | |
4205 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers |
4206 | /// with truncation, and store the result in dst. |
4207 | /// |
4208 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epu64&ig_expand=2438) |
4209 | #[inline ] |
4210 | #[target_feature (enable = "avx512dq,avx512vl" )] |
4211 | #[cfg_attr (test, assert_instr(vcvttps2uqq))] |
4212 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4213 | pub fn _mm_cvttps_epu64(a: __m128) -> __m128i { |
4214 | _mm_mask_cvttps_epu64(src:_mm_undefined_si128(), k:0xff, a) |
4215 | } |
4216 | |
4217 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers |
4218 | /// with truncation, and store the result in dst using writemask k (elements are copied from src if the |
4219 | /// corresponding bit is not set). |
4220 | /// |
4221 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epu64&ig_expand=2439) |
4222 | #[inline ] |
4223 | #[target_feature (enable = "avx512dq,avx512vl" )] |
4224 | #[cfg_attr (test, assert_instr(vcvttps2uqq))] |
4225 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4226 | pub fn _mm_mask_cvttps_epu64(src: __m128i, k: __mmask8, a: __m128) -> __m128i { |
4227 | unsafe { transmute(src:vcvttps2uqq_128(a.as_f32x4(), src.as_u64x2(), k)) } |
4228 | } |
4229 | |
4230 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers |
4231 | /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding |
4232 | /// bit is not set). |
4233 | /// |
4234 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epu64&ig_expand=2440) |
4235 | #[inline ] |
4236 | #[target_feature (enable = "avx512dq,avx512vl" )] |
4237 | #[cfg_attr (test, assert_instr(vcvttps2uqq))] |
4238 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4239 | pub fn _mm_maskz_cvttps_epu64(k: __mmask8, a: __m128) -> __m128i { |
4240 | _mm_mask_cvttps_epu64(src:_mm_setzero_si128(), k, a) |
4241 | } |
4242 | |
4243 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers |
4244 | /// with truncation, and store the result in dst. |
4245 | /// |
4246 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epu64&ig_expand=2441) |
4247 | #[inline ] |
4248 | #[target_feature (enable = "avx512dq,avx512vl" )] |
4249 | #[cfg_attr (test, assert_instr(vcvttps2uqq))] |
4250 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4251 | pub fn _mm256_cvttps_epu64(a: __m128) -> __m256i { |
4252 | _mm256_mask_cvttps_epu64(src:_mm256_undefined_si256(), k:0xff, a) |
4253 | } |
4254 | |
4255 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers |
4256 | /// with truncation, and store the result in dst using writemask k (elements are copied from src if the |
4257 | /// corresponding bit is not set). |
4258 | /// |
4259 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epu64&ig_expand=2442) |
4260 | #[inline ] |
4261 | #[target_feature (enable = "avx512dq,avx512vl" )] |
4262 | #[cfg_attr (test, assert_instr(vcvttps2uqq))] |
4263 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4264 | pub fn _mm256_mask_cvttps_epu64(src: __m256i, k: __mmask8, a: __m128) -> __m256i { |
4265 | unsafe { transmute(src:vcvttps2uqq_256(a.as_f32x4(), src.as_u64x4(), k)) } |
4266 | } |
4267 | |
4268 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers |
4269 | /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding |
4270 | /// bit is not set). |
4271 | /// |
4272 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epu64&ig_expand=2443) |
4273 | #[inline ] |
4274 | #[target_feature (enable = "avx512dq,avx512vl" )] |
4275 | #[cfg_attr (test, assert_instr(vcvttps2uqq))] |
4276 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4277 | pub fn _mm256_maskz_cvttps_epu64(k: __mmask8, a: __m128) -> __m256i { |
4278 | _mm256_mask_cvttps_epu64(src:_mm256_setzero_si256(), k, a) |
4279 | } |
4280 | |
4281 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers |
4282 | /// with truncation, and store the result in dst. |
4283 | /// |
4284 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epu64&ig_expand=2444) |
4285 | #[inline ] |
4286 | #[target_feature (enable = "avx512dq" )] |
4287 | #[cfg_attr (test, assert_instr(vcvttps2uqq))] |
4288 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4289 | pub fn _mm512_cvttps_epu64(a: __m256) -> __m512i { |
4290 | _mm512_mask_cvttps_epu64(src:_mm512_undefined_epi32(), k:0xff, a) |
4291 | } |
4292 | |
4293 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers |
4294 | /// with truncation, and store the result in dst using writemask k (elements are copied from src if the |
4295 | /// corresponding bit is not set). |
4296 | /// |
4297 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epu64&ig_expand=2445) |
4298 | #[inline ] |
4299 | #[target_feature (enable = "avx512dq" )] |
4300 | #[cfg_attr (test, assert_instr(vcvttps2uqq))] |
4301 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4302 | pub fn _mm512_mask_cvttps_epu64(src: __m512i, k: __mmask8, a: __m256) -> __m512i { |
4303 | unsafe { |
4304 | transmute(src:vcvttps2uqq_512( |
4305 | a.as_f32x8(), |
4306 | src.as_u64x8(), |
4307 | k, |
4308 | _MM_FROUND_CUR_DIRECTION, |
4309 | )) |
4310 | } |
4311 | } |
4312 | |
4313 | /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers |
4314 | /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding |
4315 | /// bit is not set). |
4316 | /// |
4317 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epu64&ig_expand=2446) |
4318 | #[inline ] |
4319 | #[target_feature (enable = "avx512dq" )] |
4320 | #[cfg_attr (test, assert_instr(vcvttps2uqq))] |
4321 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4322 | pub fn _mm512_maskz_cvttps_epu64(k: __mmask8, a: __m256) -> __m512i { |
4323 | _mm512_mask_cvttps_epu64(src:_mm512_setzero_si512(), k, a) |
4324 | } |
4325 | |
4326 | // Multiply-Low |
4327 | |
4328 | /// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store |
4329 | /// the low 64 bits of the intermediate integers in `dst`. |
4330 | /// |
4331 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mullo_epi64&ig_expand=4778) |
4332 | #[inline ] |
4333 | #[target_feature (enable = "avx512dq,avx512vl" )] |
4334 | #[cfg_attr (test, assert_instr(vpmullq))] |
4335 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4336 | pub fn _mm_mullo_epi64(a: __m128i, b: __m128i) -> __m128i { |
4337 | unsafe { transmute(src:simd_mul(x:a.as_i64x2(), y:b.as_i64x2())) } |
4338 | } |
4339 | |
4340 | /// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store |
4341 | /// the low 64 bits of the intermediate integers in `dst` using writemask `k` (elements are copied from |
4342 | /// `src` if the corresponding bit is not set). |
4343 | /// |
4344 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi64&ig_expand=4776) |
4345 | #[inline ] |
4346 | #[target_feature (enable = "avx512dq,avx512vl" )] |
4347 | #[cfg_attr (test, assert_instr(vpmullq))] |
4348 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4349 | pub fn _mm_mask_mullo_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
4350 | unsafe { |
4351 | let b: i64x2 = _mm_mullo_epi64(a, b).as_i64x2(); |
4352 | transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_i64x2())) |
4353 | } |
4354 | } |
4355 | |
4356 | /// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store |
4357 | /// the low 64 bits of the intermediate integers in `dst` using zeromask `k` (elements are zeroed out if |
4358 | /// the corresponding bit is not set). |
4359 | /// |
4360 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi64&ig_expand=4777) |
4361 | #[inline ] |
4362 | #[target_feature (enable = "avx512dq,avx512vl" )] |
4363 | #[cfg_attr (test, assert_instr(vpmullq))] |
4364 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4365 | pub fn _mm_maskz_mullo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
4366 | unsafe { |
4367 | let b: i64x2 = _mm_mullo_epi64(a, b).as_i64x2(); |
4368 | transmute(src:simd_select_bitmask(m:k, yes:b, no:i64x2::ZERO)) |
4369 | } |
4370 | } |
4371 | |
4372 | /// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store |
4373 | /// the low 64 bits of the intermediate integers in `dst`. |
4374 | /// |
4375 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mullo_epi64&ig_expand=4781) |
4376 | #[inline ] |
4377 | #[target_feature (enable = "avx512dq,avx512vl" )] |
4378 | #[cfg_attr (test, assert_instr(vpmullq))] |
4379 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4380 | pub fn _mm256_mullo_epi64(a: __m256i, b: __m256i) -> __m256i { |
4381 | unsafe { transmute(src:simd_mul(x:a.as_i64x4(), y:b.as_i64x4())) } |
4382 | } |
4383 | |
4384 | /// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store |
4385 | /// the low 64 bits of the intermediate integers in `dst` using writemask `k` (elements are copied from |
4386 | /// `src` if the corresponding bit is not set). |
4387 | /// |
4388 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi64&ig_expand=4779) |
4389 | #[inline ] |
4390 | #[target_feature (enable = "avx512dq,avx512vl" )] |
4391 | #[cfg_attr (test, assert_instr(vpmullq))] |
4392 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4393 | pub fn _mm256_mask_mullo_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { |
4394 | unsafe { |
4395 | let b: i64x4 = _mm256_mullo_epi64(a, b).as_i64x4(); |
4396 | transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_i64x4())) |
4397 | } |
4398 | } |
4399 | |
4400 | /// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store |
4401 | /// the low 64 bits of the intermediate integers in `dst` using zeromask `k` (elements are zeroed out if |
4402 | /// the corresponding bit is not set). |
4403 | /// |
4404 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi64&ig_expand=4780) |
4405 | #[inline ] |
4406 | #[target_feature (enable = "avx512dq,avx512vl" )] |
4407 | #[cfg_attr (test, assert_instr(vpmullq))] |
4408 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4409 | pub fn _mm256_maskz_mullo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { |
4410 | unsafe { |
4411 | let b: i64x4 = _mm256_mullo_epi64(a, b).as_i64x4(); |
4412 | transmute(src:simd_select_bitmask(m:k, yes:b, no:i64x4::ZERO)) |
4413 | } |
4414 | } |
4415 | |
4416 | /// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store |
4417 | /// the low 64 bits of the intermediate integers in `dst`. |
4418 | /// |
4419 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi64&ig_expand=4784) |
4420 | #[inline ] |
4421 | #[target_feature (enable = "avx512dq" )] |
4422 | #[cfg_attr (test, assert_instr(vpmullq))] |
4423 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4424 | pub fn _mm512_mullo_epi64(a: __m512i, b: __m512i) -> __m512i { |
4425 | unsafe { transmute(src:simd_mul(x:a.as_i64x8(), y:b.as_i64x8())) } |
4426 | } |
4427 | |
4428 | /// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store |
4429 | /// the low 64 bits of the intermediate integers in `dst` using writemask `k` (elements are copied from |
4430 | /// `src` if the corresponding bit is not set). |
4431 | /// |
4432 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi64&ig_expand=4782) |
4433 | #[inline ] |
4434 | #[target_feature (enable = "avx512dq" )] |
4435 | #[cfg_attr (test, assert_instr(vpmullq))] |
4436 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4437 | pub fn _mm512_mask_mullo_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { |
4438 | unsafe { |
4439 | let b: i64x8 = _mm512_mullo_epi64(a, b).as_i64x8(); |
4440 | transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_i64x8())) |
4441 | } |
4442 | } |
4443 | |
4444 | /// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store |
4445 | /// the low 64 bits of the intermediate integers in `dst` using zeromask `k` (elements are zeroed out if |
4446 | /// the corresponding bit is not set). |
4447 | /// |
4448 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi64&ig_expand=4783) |
4449 | #[inline ] |
4450 | #[target_feature (enable = "avx512dq" )] |
4451 | #[cfg_attr (test, assert_instr(vpmullq))] |
4452 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4453 | pub fn _mm512_maskz_mullo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { |
4454 | unsafe { |
4455 | let b: i64x8 = _mm512_mullo_epi64(a, b).as_i64x8(); |
4456 | transmute(src:simd_select_bitmask(m:k, yes:b, no:i64x8::ZERO)) |
4457 | } |
4458 | } |
4459 | |
4460 | // Mask Registers |
4461 | |
4462 | /// Convert 8-bit mask a to a 32-bit integer value and store the result in dst. |
4463 | /// |
4464 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtmask8_u32&ig_expand=1891) |
4465 | #[inline ] |
4466 | #[target_feature (enable = "avx512dq" )] |
4467 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4468 | pub fn _cvtmask8_u32(a: __mmask8) -> u32 { |
4469 | a as u32 |
4470 | } |
4471 | |
4472 | /// Convert 32-bit integer value a to an 8-bit mask and store the result in dst. |
4473 | /// |
4474 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask8&ig_expand=2467) |
4475 | #[inline ] |
4476 | #[target_feature (enable = "avx512dq" )] |
4477 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4478 | pub fn _cvtu32_mask8(a: u32) -> __mmask8 { |
4479 | a as __mmask8 |
4480 | } |
4481 | |
4482 | /// Add 16-bit masks a and b, and store the result in dst. |
4483 | /// |
4484 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask16&ig_expand=3903) |
4485 | #[inline ] |
4486 | #[target_feature (enable = "avx512dq" )] |
4487 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4488 | pub fn _kadd_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { |
4489 | a + b |
4490 | } |
4491 | |
4492 | /// Add 8-bit masks a and b, and store the result in dst. |
4493 | /// |
4494 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask8&ig_expand=3906) |
4495 | #[inline ] |
4496 | #[target_feature (enable = "avx512dq" )] |
4497 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4498 | pub fn _kadd_mask8(a: __mmask8, b: __mmask8) -> __mmask8 { |
4499 | a + b |
4500 | } |
4501 | |
4502 | /// Bitwise AND of 8-bit masks a and b, and store the result in dst. |
4503 | /// |
4504 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kand_mask8&ig_expand=3911) |
4505 | #[inline ] |
4506 | #[target_feature (enable = "avx512dq" )] |
4507 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4508 | pub fn _kand_mask8(a: __mmask8, b: __mmask8) -> __mmask8 { |
4509 | a & b |
4510 | } |
4511 | |
4512 | /// Bitwise AND NOT of 8-bit masks a and b, and store the result in dst. |
4513 | /// |
4514 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask8&ig_expand=3916) |
4515 | #[inline ] |
4516 | #[target_feature (enable = "avx512dq" )] |
4517 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4518 | pub fn _kandn_mask8(a: __mmask8, b: __mmask8) -> __mmask8 { |
4519 | _knot_mask8(a) & b |
4520 | } |
4521 | |
4522 | /// Bitwise NOT of 8-bit mask a, and store the result in dst. |
4523 | /// |
4524 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask8&ig_expand=3922) |
4525 | #[inline ] |
4526 | #[target_feature (enable = "avx512dq" )] |
4527 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4528 | pub fn _knot_mask8(a: __mmask8) -> __mmask8 { |
4529 | a ^ 0b11111111 |
4530 | } |
4531 | |
4532 | /// Bitwise OR of 8-bit masks a and b, and store the result in dst. |
4533 | /// |
4534 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask8&ig_expand=3927) |
4535 | #[inline ] |
4536 | #[target_feature (enable = "avx512dq" )] |
4537 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4538 | pub fn _kor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 { |
4539 | a | b |
4540 | } |
4541 | |
4542 | /// Bitwise XNOR of 8-bit masks a and b, and store the result in dst. |
4543 | /// |
4544 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask8&ig_expand=3969) |
4545 | #[inline ] |
4546 | #[target_feature (enable = "avx512dq" )] |
4547 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4548 | pub fn _kxnor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 { |
4549 | _knot_mask8(_kxor_mask8(a, b)) |
4550 | } |
4551 | |
4552 | /// Bitwise XOR of 8-bit masks a and b, and store the result in dst. |
4553 | /// |
4554 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask8&ig_expand=3974) |
4555 | #[inline ] |
4556 | #[target_feature (enable = "avx512dq" )] |
4557 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4558 | pub fn _kxor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 { |
4559 | a ^ b |
4560 | } |
4561 | |
4562 | /// Compute the bitwise OR of 8-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise |
4563 | /// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones. |
4564 | /// |
4565 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask8_u8&ig_expand=3931) |
4566 | #[inline ] |
4567 | #[target_feature (enable = "avx512dq" )] |
4568 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4569 | pub unsafe fn _kortest_mask8_u8(a: __mmask8, b: __mmask8, all_ones: *mut u8) -> u8 { |
4570 | let tmp: u8 = _kor_mask8(a, b); |
4571 | *all_ones = (tmp == 0xff) as u8; |
4572 | (tmp == 0) as u8 |
4573 | } |
4574 | |
4575 | /// Compute the bitwise OR of 8-bit masks a and b. If the result is all ones, store 1 in dst, otherwise |
4576 | /// store 0 in dst. |
4577 | /// |
4578 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask8_u8&ig_expand=3936) |
4579 | #[inline ] |
4580 | #[target_feature (enable = "avx512dq" )] |
4581 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4582 | pub fn _kortestc_mask8_u8(a: __mmask8, b: __mmask8) -> u8 { |
4583 | (_kor_mask8(a, b) == 0xff) as u8 |
4584 | } |
4585 | |
4586 | /// Compute the bitwise OR of 8-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise |
4587 | /// store 0 in dst. |
4588 | /// |
4589 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask8_u8&ig_expand=3941) |
4590 | #[inline ] |
4591 | #[target_feature (enable = "avx512dq" )] |
4592 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4593 | pub fn _kortestz_mask8_u8(a: __mmask8, b: __mmask8) -> u8 { |
4594 | (_kor_mask8(a, b) == 0) as u8 |
4595 | } |
4596 | |
4597 | /// Shift 8-bit mask a left by count bits while shifting in zeros, and store the result in dst. |
4598 | /// |
4599 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask8&ig_expand=3945) |
4600 | #[inline ] |
4601 | #[target_feature (enable = "avx512dq" )] |
4602 | #[rustc_legacy_const_generics (1)] |
4603 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4604 | pub fn _kshiftli_mask8<const COUNT: u32>(a: __mmask8) -> __mmask8 { |
4605 | a << COUNT |
4606 | } |
4607 | |
4608 | /// Shift 8-bit mask a right by count bits while shifting in zeros, and store the result in dst. |
4609 | /// |
4610 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask8&ig_expand=3949) |
4611 | #[inline ] |
4612 | #[target_feature (enable = "avx512dq" )] |
4613 | #[rustc_legacy_const_generics (1)] |
4614 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4615 | pub fn _kshiftri_mask8<const COUNT: u32>(a: __mmask8) -> __mmask8 { |
4616 | a >> COUNT |
4617 | } |
4618 | |
4619 | /// Compute the bitwise AND of 16-bit masks a and b, and if the result is all zeros, store 1 in dst, |
4620 | /// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all |
4621 | /// zeros, store 1 in and_not, otherwise store 0 in and_not. |
4622 | /// |
4623 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask16_u8&ig_expand=3950) |
4624 | #[inline ] |
4625 | #[target_feature (enable = "avx512dq" )] |
4626 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4627 | pub unsafe fn _ktest_mask16_u8(a: __mmask16, b: __mmask16, and_not: *mut u8) -> u8 { |
4628 | *and_not = (_kandn_mask16(a, b) == 0) as u8; |
4629 | (_kand_mask16(a, b) == 0) as u8 |
4630 | } |
4631 | |
4632 | /// Compute the bitwise AND of 8-bit masks a and b, and if the result is all zeros, store 1 in dst, |
4633 | /// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all |
4634 | /// zeros, store 1 in and_not, otherwise store 0 in and_not. |
4635 | /// |
4636 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask8_u8&ig_expand=3953) |
4637 | #[inline ] |
4638 | #[target_feature (enable = "avx512dq" )] |
4639 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4640 | pub unsafe fn _ktest_mask8_u8(a: __mmask8, b: __mmask8, and_not: *mut u8) -> u8 { |
4641 | *and_not = (_kandn_mask8(a, b) == 0) as u8; |
4642 | (_kand_mask8(a, b) == 0) as u8 |
4643 | } |
4644 | |
4645 | /// Compute the bitwise NOT of 16-bit mask a and then AND with 16-bit mask b, if the result is all |
4646 | /// zeros, store 1 in dst, otherwise store 0 in dst. |
4647 | /// |
4648 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask16_u8&ig_expand=3954) |
4649 | #[inline ] |
4650 | #[target_feature (enable = "avx512dq" )] |
4651 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4652 | pub fn _ktestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 { |
4653 | (_kandn_mask16(a, b) == 0) as u8 |
4654 | } |
4655 | |
4656 | /// Compute the bitwise NOT of 8-bit mask a and then AND with 8-bit mask b, if the result is all |
4657 | /// zeros, store 1 in dst, otherwise store 0 in dst. |
4658 | /// |
4659 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask8_u8&ig_expand=3957) |
4660 | #[inline ] |
4661 | #[target_feature (enable = "avx512dq" )] |
4662 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4663 | pub fn _ktestc_mask8_u8(a: __mmask8, b: __mmask8) -> u8 { |
4664 | (_kandn_mask8(a, b) == 0) as u8 |
4665 | } |
4666 | |
4667 | /// Compute the bitwise AND of 16-bit masks a and b, if the result is all zeros, store 1 in dst, otherwise |
4668 | /// store 0 in dst. |
4669 | /// |
4670 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask16_u8&ig_expand=3958) |
4671 | #[inline ] |
4672 | #[target_feature (enable = "avx512dq" )] |
4673 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4674 | pub fn _ktestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 { |
4675 | (_kand_mask16(a, b) == 0) as u8 |
4676 | } |
4677 | |
4678 | /// Compute the bitwise AND of 8-bit masks a and b, if the result is all zeros, store 1 in dst, otherwise |
4679 | /// store 0 in dst. |
4680 | /// |
4681 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask8_u8&ig_expand=3961) |
4682 | #[inline ] |
4683 | #[target_feature (enable = "avx512dq" )] |
4684 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4685 | pub fn _ktestz_mask8_u8(a: __mmask8, b: __mmask8) -> u8 { |
4686 | (_kand_mask8(a, b) == 0) as u8 |
4687 | } |
4688 | |
4689 | /// Load 8-bit mask from memory |
4690 | /// |
4691 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask8&ig_expand=3999) |
4692 | #[inline ] |
4693 | #[target_feature (enable = "avx512dq" )] |
4694 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4695 | pub unsafe fn _load_mask8(mem_addr: *const __mmask8) -> __mmask8 { |
4696 | *mem_addr |
4697 | } |
4698 | |
4699 | /// Store 8-bit mask to memory |
4700 | /// |
4701 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask8&ig_expand=6468) |
4702 | #[inline ] |
4703 | #[target_feature (enable = "avx512dq" )] |
4704 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4705 | pub unsafe fn _store_mask8(mem_addr: *mut __mmask8, a: __mmask8) { |
4706 | *mem_addr = a; |
4707 | } |
4708 | |
4709 | /// Set each bit of mask register k based on the most significant bit of the corresponding packed 32-bit |
4710 | /// integer in a. |
4711 | /// |
4712 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi32_mask&ig_expand=4612) |
4713 | #[inline ] |
4714 | #[target_feature (enable = "avx512dq,avx512vl" )] |
4715 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4716 | pub fn _mm_movepi32_mask(a: __m128i) -> __mmask8 { |
4717 | let zero: __m128i = _mm_setzero_si128(); |
4718 | _mm_cmplt_epi32_mask(a, b:zero) |
4719 | } |
4720 | |
4721 | /// Set each bit of mask register k based on the most significant bit of the corresponding packed 32-bit |
4722 | /// integer in a. |
4723 | /// |
4724 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi32_mask&ig_expand=4613) |
4725 | #[inline ] |
4726 | #[target_feature (enable = "avx512dq,avx512vl" )] |
4727 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4728 | pub fn _mm256_movepi32_mask(a: __m256i) -> __mmask8 { |
4729 | let zero: __m256i = _mm256_setzero_si256(); |
4730 | _mm256_cmplt_epi32_mask(a, b:zero) |
4731 | } |
4732 | |
4733 | /// Set each bit of mask register k based on the most significant bit of the corresponding packed 32-bit |
4734 | /// integer in a. |
4735 | /// |
4736 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi32_mask&ig_expand=4614) |
4737 | #[inline ] |
4738 | #[target_feature (enable = "avx512dq" )] |
4739 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4740 | pub fn _mm512_movepi32_mask(a: __m512i) -> __mmask16 { |
4741 | let zero: __m512i = _mm512_setzero_si512(); |
4742 | _mm512_cmplt_epi32_mask(a, b:zero) |
4743 | } |
4744 | |
4745 | /// Set each bit of mask register k based on the most significant bit of the corresponding packed 64-bit |
4746 | /// integer in a. |
4747 | /// |
4748 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi64_mask&ig_expand=4615) |
4749 | #[inline ] |
4750 | #[target_feature (enable = "avx512dq,avx512vl" )] |
4751 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4752 | pub fn _mm_movepi64_mask(a: __m128i) -> __mmask8 { |
4753 | let zero: __m128i = _mm_setzero_si128(); |
4754 | _mm_cmplt_epi64_mask(a, b:zero) |
4755 | } |
4756 | |
4757 | /// Set each bit of mask register k based on the most significant bit of the corresponding packed 64-bit |
4758 | /// integer in a. |
4759 | /// |
4760 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi64_mask&ig_expand=4616) |
4761 | #[inline ] |
4762 | #[target_feature (enable = "avx512dq,avx512vl" )] |
4763 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4764 | pub fn _mm256_movepi64_mask(a: __m256i) -> __mmask8 { |
4765 | let zero: __m256i = _mm256_setzero_si256(); |
4766 | _mm256_cmplt_epi64_mask(a, b:zero) |
4767 | } |
4768 | |
4769 | /// Set each bit of mask register k based on the most significant bit of the corresponding packed 64-bit |
4770 | /// integer in a. |
4771 | /// |
4772 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi64_mask&ig_expand=4617) |
4773 | #[inline ] |
4774 | #[target_feature (enable = "avx512dq" )] |
4775 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4776 | pub fn _mm512_movepi64_mask(a: __m512i) -> __mmask8 { |
4777 | let zero: __m512i = _mm512_setzero_si512(); |
4778 | _mm512_cmplt_epi64_mask(a, b:zero) |
4779 | } |
4780 | |
4781 | /// Set each packed 32-bit integer in dst to all ones or all zeros based on the value of the corresponding |
4782 | /// bit in k. |
4783 | /// |
4784 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi32&ig_expand=4625) |
4785 | #[inline ] |
4786 | #[target_feature (enable = "avx512dq,avx512vl" )] |
4787 | #[cfg_attr (test, assert_instr(vpmovm2d))] |
4788 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4789 | pub fn _mm_movm_epi32(k: __mmask8) -> __m128i { |
4790 | let ones: __m128i = _mm_set1_epi32(-1); |
4791 | _mm_maskz_mov_epi32(k, a:ones) |
4792 | } |
4793 | |
4794 | /// Set each packed 32-bit integer in dst to all ones or all zeros based on the value of the corresponding |
4795 | /// bit in k. |
4796 | /// |
4797 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi32&ig_expand=4626) |
4798 | #[inline ] |
4799 | #[target_feature (enable = "avx512dq,avx512vl" )] |
4800 | #[cfg_attr (test, assert_instr(vpmovm2d))] |
4801 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4802 | pub fn _mm256_movm_epi32(k: __mmask8) -> __m256i { |
4803 | let ones: __m256i = _mm256_set1_epi32(-1); |
4804 | _mm256_maskz_mov_epi32(k, a:ones) |
4805 | } |
4806 | |
4807 | /// Set each packed 32-bit integer in dst to all ones or all zeros based on the value of the corresponding |
4808 | /// bit in k. |
4809 | /// |
4810 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi32&ig_expand=4627) |
4811 | #[inline ] |
4812 | #[target_feature (enable = "avx512dq" )] |
4813 | #[cfg_attr (test, assert_instr(vpmovm2d))] |
4814 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4815 | pub fn _mm512_movm_epi32(k: __mmask16) -> __m512i { |
4816 | let ones: __m512i = _mm512_set1_epi32(-1); |
4817 | _mm512_maskz_mov_epi32(k, a:ones) |
4818 | } |
4819 | |
4820 | /// Set each packed 64-bit integer in dst to all ones or all zeros based on the value of the corresponding |
4821 | /// bit in k. |
4822 | /// |
4823 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi64&ig_expand=4628) |
4824 | #[inline ] |
4825 | #[target_feature (enable = "avx512dq,avx512vl" )] |
4826 | #[cfg_attr (test, assert_instr(vpmovm2q))] |
4827 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4828 | pub fn _mm_movm_epi64(k: __mmask8) -> __m128i { |
4829 | let ones: __m128i = _mm_set1_epi64x(-1); |
4830 | _mm_maskz_mov_epi64(k, a:ones) |
4831 | } |
4832 | |
4833 | /// Set each packed 64-bit integer in dst to all ones or all zeros based on the value of the corresponding |
4834 | /// bit in k. |
4835 | /// |
4836 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi64&ig_expand=4629) |
4837 | #[inline ] |
4838 | #[target_feature (enable = "avx512dq,avx512vl" )] |
4839 | #[cfg_attr (test, assert_instr(vpmovm2q))] |
4840 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4841 | pub fn _mm256_movm_epi64(k: __mmask8) -> __m256i { |
4842 | let ones: __m256i = _mm256_set1_epi64x(-1); |
4843 | _mm256_maskz_mov_epi64(k, a:ones) |
4844 | } |
4845 | |
4846 | /// Set each packed 64-bit integer in dst to all ones or all zeros based on the value of the corresponding |
4847 | /// bit in k. |
4848 | /// |
4849 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi64&ig_expand=4630) |
4850 | #[inline ] |
4851 | #[target_feature (enable = "avx512dq" )] |
4852 | #[cfg_attr (test, assert_instr(vpmovm2q))] |
4853 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4854 | pub fn _mm512_movm_epi64(k: __mmask8) -> __m512i { |
4855 | let ones: __m512i = _mm512_set1_epi64(-1); |
4856 | _mm512_maskz_mov_epi64(k, a:ones) |
4857 | } |
4858 | |
4859 | // Range |
4860 | |
4861 | /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
4862 | /// double-precision (64-bit) floating-point elements in a and b, and store the results in dst. |
4863 | /// Lower 2 bits of IMM8 specifies the operation control: |
4864 | /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
4865 | /// Upper 2 bits of IMM8 specifies the sign control: |
4866 | /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
4867 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
4868 | /// |
4869 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_round_pd&ig_expand=5210) |
4870 | #[inline ] |
4871 | #[target_feature (enable = "avx512dq" )] |
4872 | #[cfg_attr (test, assert_instr(vrangepd, IMM8 = 5, SAE = 8))] |
4873 | #[rustc_legacy_const_generics (2, 3)] |
4874 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4875 | pub fn _mm512_range_round_pd<const IMM8: i32, const SAE: i32>(a: __m512d, b: __m512d) -> __m512d { |
4876 | static_assert_uimm_bits!(IMM8, 4); |
4877 | static_assert_sae!(SAE); |
4878 | _mm512_mask_range_round_pd::<IMM8, SAE>(src:_mm512_setzero_pd(), k:0xff, a, b) |
4879 | } |
4880 | |
4881 | /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
4882 | /// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using |
4883 | /// writemask k (elements are copied from src to dst if the corresponding mask bit is not set). |
4884 | /// Lower 2 bits of IMM8 specifies the operation control: |
4885 | /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
4886 | /// Upper 2 bits of IMM8 specifies the sign control: |
4887 | /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
4888 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
4889 | /// |
4890 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_round_pd&ig_expand=5208) |
4891 | #[inline ] |
4892 | #[target_feature (enable = "avx512dq" )] |
4893 | #[cfg_attr (test, assert_instr(vrangepd, IMM8 = 5, SAE = 8))] |
4894 | #[rustc_legacy_const_generics (4, 5)] |
4895 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4896 | pub fn _mm512_mask_range_round_pd<const IMM8: i32, const SAE: i32>( |
4897 | src: __m512d, |
4898 | k: __mmask8, |
4899 | a: __m512d, |
4900 | b: __m512d, |
4901 | ) -> __m512d { |
4902 | unsafe { |
4903 | static_assert_uimm_bits!(IMM8, 4); |
4904 | static_assert_sae!(SAE); |
4905 | transmute(src:vrangepd_512( |
4906 | a.as_f64x8(), |
4907 | b.as_f64x8(), |
4908 | IMM8, |
4909 | src.as_f64x8(), |
4910 | k, |
4911 | SAE, |
4912 | )) |
4913 | } |
4914 | } |
4915 | |
4916 | /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
4917 | /// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using |
4918 | /// zeromask k (elements are zeroed out if the corresponding mask bit is not set). |
4919 | /// Lower 2 bits of IMM8 specifies the operation control: |
4920 | /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
4921 | /// Upper 2 bits of IMM8 specifies the sign control: |
4922 | /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
4923 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
4924 | /// |
4925 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_round_pd&ig_expand=5209) |
4926 | #[inline ] |
4927 | #[target_feature (enable = "avx512dq" )] |
4928 | #[cfg_attr (test, assert_instr(vrangepd, IMM8 = 5, SAE = 8))] |
4929 | #[rustc_legacy_const_generics (3, 4)] |
4930 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4931 | pub fn _mm512_maskz_range_round_pd<const IMM8: i32, const SAE: i32>( |
4932 | k: __mmask8, |
4933 | a: __m512d, |
4934 | b: __m512d, |
4935 | ) -> __m512d { |
4936 | static_assert_uimm_bits!(IMM8, 4); |
4937 | static_assert_sae!(SAE); |
4938 | _mm512_mask_range_round_pd::<IMM8, SAE>(src:_mm512_setzero_pd(), k, a, b) |
4939 | } |
4940 | |
4941 | /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
4942 | /// double-precision (64-bit) floating-point elements in a and b, and store the results in dst. |
4943 | /// Lower 2 bits of IMM8 specifies the operation control: |
4944 | /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
4945 | /// Upper 2 bits of IMM8 specifies the sign control: |
4946 | /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
4947 | /// |
4948 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_pd&ig_expand=5192) |
4949 | #[inline ] |
4950 | #[target_feature (enable = "avx512dq,avx512vl" )] |
4951 | #[cfg_attr (test, assert_instr(vrangepd, IMM8 = 5))] |
4952 | #[rustc_legacy_const_generics (2)] |
4953 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4954 | pub fn _mm_range_pd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d { |
4955 | static_assert_uimm_bits!(IMM8, 4); |
4956 | _mm_mask_range_pd::<IMM8>(src:_mm_setzero_pd(), k:0xff, a, b) |
4957 | } |
4958 | |
4959 | /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
4960 | /// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using |
4961 | /// writemask k (elements are copied from src to dst if the corresponding mask bit is not set). |
4962 | /// Lower 2 bits of IMM8 specifies the operation control: |
4963 | /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
4964 | /// Upper 2 bits of IMM8 specifies the sign control: |
4965 | /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
4966 | /// |
4967 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_pd&ig_expand=5190) |
4968 | #[inline ] |
4969 | #[target_feature (enable = "avx512dq,avx512vl" )] |
4970 | #[cfg_attr (test, assert_instr(vrangepd, IMM8 = 5))] |
4971 | #[rustc_legacy_const_generics (4)] |
4972 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
4973 | pub fn _mm_mask_range_pd<const IMM8: i32>( |
4974 | src: __m128d, |
4975 | k: __mmask8, |
4976 | a: __m128d, |
4977 | b: __m128d, |
4978 | ) -> __m128d { |
4979 | unsafe { |
4980 | static_assert_uimm_bits!(IMM8, 4); |
4981 | transmute(src:vrangepd_128( |
4982 | a.as_f64x2(), |
4983 | b.as_f64x2(), |
4984 | IMM8, |
4985 | src.as_f64x2(), |
4986 | k, |
4987 | )) |
4988 | } |
4989 | } |
4990 | |
4991 | /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
4992 | /// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using |
4993 | /// zeromask k (elements are zeroed out if the corresponding mask bit is not set). |
4994 | /// Lower 2 bits of IMM8 specifies the operation control: |
4995 | /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
4996 | /// Upper 2 bits of IMM8 specifies the sign control: |
4997 | /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
4998 | /// |
4999 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_pd&ig_expand=5191) |
5000 | #[inline ] |
5001 | #[target_feature (enable = "avx512dq,avx512vl" )] |
5002 | #[cfg_attr (test, assert_instr(vrangepd, IMM8 = 5))] |
5003 | #[rustc_legacy_const_generics (3)] |
5004 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5005 | pub fn _mm_maskz_range_pd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { |
5006 | static_assert_uimm_bits!(IMM8, 4); |
5007 | _mm_mask_range_pd::<IMM8>(src:_mm_setzero_pd(), k, a, b) |
5008 | } |
5009 | |
5010 | /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
5011 | /// double-precision (64-bit) floating-point elements in a and b, and store the results in dst. |
5012 | /// Lower 2 bits of IMM8 specifies the operation control: |
5013 | /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
5014 | /// Upper 2 bits of IMM8 specifies the sign control: |
5015 | /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
5016 | /// |
5017 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_range_pd&ig_expand=5195) |
5018 | #[inline ] |
5019 | #[target_feature (enable = "avx512dq,avx512vl" )] |
5020 | #[cfg_attr (test, assert_instr(vrangepd, IMM8 = 5))] |
5021 | #[rustc_legacy_const_generics (2)] |
5022 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5023 | pub fn _mm256_range_pd<const IMM8: i32>(a: __m256d, b: __m256d) -> __m256d { |
5024 | static_assert_uimm_bits!(IMM8, 4); |
5025 | _mm256_mask_range_pd::<IMM8>(src:_mm256_setzero_pd(), k:0xff, a, b) |
5026 | } |
5027 | |
5028 | /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
5029 | /// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using |
5030 | /// writemask k (elements are copied from src to dst if the corresponding mask bit is not set). |
5031 | /// Lower 2 bits of IMM8 specifies the operation control: |
5032 | /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
5033 | /// Upper 2 bits of IMM8 specifies the sign control: |
5034 | /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
5035 | /// |
5036 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_range_pd&ig_expand=5193) |
5037 | #[inline ] |
5038 | #[target_feature (enable = "avx512dq,avx512vl" )] |
5039 | #[cfg_attr (test, assert_instr(vrangepd, IMM8 = 5))] |
5040 | #[rustc_legacy_const_generics (4)] |
5041 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5042 | pub fn _mm256_mask_range_pd<const IMM8: i32>( |
5043 | src: __m256d, |
5044 | k: __mmask8, |
5045 | a: __m256d, |
5046 | b: __m256d, |
5047 | ) -> __m256d { |
5048 | unsafe { |
5049 | static_assert_uimm_bits!(IMM8, 4); |
5050 | transmute(src:vrangepd_256( |
5051 | a.as_f64x4(), |
5052 | b.as_f64x4(), |
5053 | IMM8, |
5054 | src.as_f64x4(), |
5055 | k, |
5056 | )) |
5057 | } |
5058 | } |
5059 | |
5060 | /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
5061 | /// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using |
5062 | /// zeromask k (elements are zeroed out if the corresponding mask bit is not set). |
5063 | /// Lower 2 bits of IMM8 specifies the operation control: |
5064 | /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
5065 | /// Upper 2 bits of IMM8 specifies the sign control: |
5066 | /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
5067 | /// |
5068 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_range_pd&ig_expand=5194) |
5069 | #[inline ] |
5070 | #[target_feature (enable = "avx512dq,avx512vl" )] |
5071 | #[cfg_attr (test, assert_instr(vrangepd, IMM8 = 5))] |
5072 | #[rustc_legacy_const_generics (3)] |
5073 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5074 | pub fn _mm256_maskz_range_pd<const IMM8: i32>(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { |
5075 | static_assert_uimm_bits!(IMM8, 4); |
5076 | _mm256_mask_range_pd::<IMM8>(src:_mm256_setzero_pd(), k, a, b) |
5077 | } |
5078 | |
5079 | /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
5080 | /// double-precision (64-bit) floating-point elements in a and b, and store the results in dst. |
5081 | /// Lower 2 bits of IMM8 specifies the operation control: |
5082 | /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
5083 | /// Upper 2 bits of IMM8 specifies the sign control: |
5084 | /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
5085 | /// |
5086 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_pd&ig_expand=5198) |
5087 | #[inline ] |
5088 | #[target_feature (enable = "avx512dq" )] |
5089 | #[cfg_attr (test, assert_instr(vrangepd, IMM8 = 5))] |
5090 | #[rustc_legacy_const_generics (2)] |
5091 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5092 | pub fn _mm512_range_pd<const IMM8: i32>(a: __m512d, b: __m512d) -> __m512d { |
5093 | static_assert_uimm_bits!(IMM8, 4); |
5094 | _mm512_mask_range_pd::<IMM8>(src:_mm512_setzero_pd(), k:0xff, a, b) |
5095 | } |
5096 | |
5097 | /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
5098 | /// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using |
5099 | /// writemask k (elements are copied from src to dst if the corresponding mask bit is not set). |
5100 | /// Lower 2 bits of IMM8 specifies the operation control: |
5101 | /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
5102 | /// Upper 2 bits of IMM8 specifies the sign control: |
5103 | /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
5104 | /// |
5105 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_pd&ig_expand=5196) |
5106 | #[inline ] |
5107 | #[target_feature (enable = "avx512dq" )] |
5108 | #[cfg_attr (test, assert_instr(vrangepd, IMM8 = 5))] |
5109 | #[rustc_legacy_const_generics (4)] |
5110 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5111 | pub fn _mm512_mask_range_pd<const IMM8: i32>( |
5112 | src: __m512d, |
5113 | k: __mmask8, |
5114 | a: __m512d, |
5115 | b: __m512d, |
5116 | ) -> __m512d { |
5117 | unsafe { |
5118 | static_assert_uimm_bits!(IMM8, 4); |
5119 | transmute(src:vrangepd_512( |
5120 | a.as_f64x8(), |
5121 | b.as_f64x8(), |
5122 | IMM8, |
5123 | src.as_f64x8(), |
5124 | k, |
5125 | _MM_FROUND_CUR_DIRECTION, |
5126 | )) |
5127 | } |
5128 | } |
5129 | |
5130 | /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
5131 | /// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using |
5132 | /// zeromask k (elements are zeroed out if the corresponding mask bit is not set). |
5133 | /// Lower 2 bits of IMM8 specifies the operation control: |
5134 | /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
5135 | /// Upper 2 bits of IMM8 specifies the sign control: |
5136 | /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
5137 | /// |
5138 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_pd&ig_expand=5197) |
5139 | #[inline ] |
5140 | #[target_feature (enable = "avx512dq" )] |
5141 | #[cfg_attr (test, assert_instr(vrangepd, IMM8 = 5))] |
5142 | #[rustc_legacy_const_generics (3)] |
5143 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5144 | pub fn _mm512_maskz_range_pd<const IMM8: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { |
5145 | static_assert_uimm_bits!(IMM8, 4); |
5146 | _mm512_mask_range_pd::<IMM8>(src:_mm512_setzero_pd(), k, a, b) |
5147 | } |
5148 | |
5149 | /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
5150 | /// single-precision (32-bit) floating-point elements in a and b, and store the results in dst. |
5151 | /// Lower 2 bits of IMM8 specifies the operation control: |
5152 | /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
5153 | /// Upper 2 bits of IMM8 specifies the sign control: |
5154 | /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
5155 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
5156 | /// |
5157 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_round_ps&ig_expand=5213) |
5158 | #[inline ] |
5159 | #[target_feature (enable = "avx512dq" )] |
5160 | #[cfg_attr (test, assert_instr(vrangeps, IMM8 = 5, SAE = 8))] |
5161 | #[rustc_legacy_const_generics (2, 3)] |
5162 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5163 | pub fn _mm512_range_round_ps<const IMM8: i32, const SAE: i32>(a: __m512, b: __m512) -> __m512 { |
5164 | static_assert_uimm_bits!(IMM8, 4); |
5165 | static_assert_sae!(SAE); |
5166 | _mm512_mask_range_round_ps::<IMM8, SAE>(src:_mm512_setzero_ps(), k:0xffff, a, b) |
5167 | } |
5168 | |
5169 | /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
5170 | /// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using |
5171 | /// writemask k (elements are copied from src to dst if the corresponding mask bit is not set). |
5172 | /// Lower 2 bits of IMM8 specifies the operation control: |
5173 | /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
5174 | /// Upper 2 bits of IMM8 specifies the sign control: |
5175 | /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
5176 | /// |
5177 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_round_ps&ig_expand=5211) |
5178 | #[inline ] |
5179 | #[target_feature (enable = "avx512dq" )] |
5180 | #[cfg_attr (test, assert_instr(vrangeps, IMM8 = 5, SAE = 8))] |
5181 | #[rustc_legacy_const_generics (4, 5)] |
5182 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5183 | pub fn _mm512_mask_range_round_ps<const IMM8: i32, const SAE: i32>( |
5184 | src: __m512, |
5185 | k: __mmask16, |
5186 | a: __m512, |
5187 | b: __m512, |
5188 | ) -> __m512 { |
5189 | unsafe { |
5190 | static_assert_uimm_bits!(IMM8, 4); |
5191 | static_assert_sae!(SAE); |
5192 | transmute(src:vrangeps_512( |
5193 | a.as_f32x16(), |
5194 | b.as_f32x16(), |
5195 | IMM8, |
5196 | src.as_f32x16(), |
5197 | k, |
5198 | SAE, |
5199 | )) |
5200 | } |
5201 | } |
5202 | |
5203 | /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
5204 | /// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using |
5205 | /// zeromask k (elements are zeroed out if the corresponding mask bit is not set). |
5206 | /// Lower 2 bits of IMM8 specifies the operation control: |
5207 | /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
5208 | /// Upper 2 bits of IMM8 specifies the sign control: |
5209 | /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
5210 | /// |
5211 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_round_ps&ig_expand=5212) |
5212 | #[inline ] |
5213 | #[target_feature (enable = "avx512dq" )] |
5214 | #[cfg_attr (test, assert_instr(vrangeps, IMM8 = 5, SAE = 8))] |
5215 | #[rustc_legacy_const_generics (3, 4)] |
5216 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5217 | pub fn _mm512_maskz_range_round_ps<const IMM8: i32, const SAE: i32>( |
5218 | k: __mmask16, |
5219 | a: __m512, |
5220 | b: __m512, |
5221 | ) -> __m512 { |
5222 | static_assert_uimm_bits!(IMM8, 4); |
5223 | static_assert_sae!(SAE); |
5224 | _mm512_mask_range_round_ps::<IMM8, SAE>(src:_mm512_setzero_ps(), k, a, b) |
5225 | } |
5226 | |
5227 | /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
5228 | /// single-precision (32-bit) floating-point elements in a and b, and store the results in dst. |
5229 | /// Lower 2 bits of IMM8 specifies the operation control: |
5230 | /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
5231 | /// Upper 2 bits of IMM8 specifies the sign control: |
5232 | /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
5233 | /// |
5234 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_ps&ig_expand=5201) |
5235 | #[inline ] |
5236 | #[target_feature (enable = "avx512dq,avx512vl" )] |
5237 | #[cfg_attr (test, assert_instr(vrangeps, IMM8 = 5))] |
5238 | #[rustc_legacy_const_generics (2)] |
5239 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5240 | pub fn _mm_range_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128 { |
5241 | static_assert_uimm_bits!(IMM8, 4); |
5242 | _mm_mask_range_ps::<IMM8>(src:_mm_setzero_ps(), k:0xff, a, b) |
5243 | } |
5244 | |
5245 | /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
5246 | /// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using |
5247 | /// writemask k (elements are copied from src to dst if the corresponding mask bit is not set). |
5248 | /// Lower 2 bits of IMM8 specifies the operation control: |
5249 | /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
5250 | /// Upper 2 bits of IMM8 specifies the sign control: |
5251 | /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
5252 | /// |
5253 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_ps&ig_expand=5199) |
5254 | #[inline ] |
5255 | #[target_feature (enable = "avx512dq,avx512vl" )] |
5256 | #[cfg_attr (test, assert_instr(vrangeps, IMM8 = 5))] |
5257 | #[rustc_legacy_const_generics (4)] |
5258 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5259 | pub fn _mm_mask_range_ps<const IMM8: i32>( |
5260 | src: __m128, |
5261 | k: __mmask8, |
5262 | a: __m128, |
5263 | b: __m128, |
5264 | ) -> __m128 { |
5265 | unsafe { |
5266 | static_assert_uimm_bits!(IMM8, 4); |
5267 | transmute(src:vrangeps_128( |
5268 | a.as_f32x4(), |
5269 | b.as_f32x4(), |
5270 | IMM8, |
5271 | src.as_f32x4(), |
5272 | k, |
5273 | )) |
5274 | } |
5275 | } |
5276 | |
5277 | /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
5278 | /// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using |
5279 | /// zeromask k (elements are zeroed out if the corresponding mask bit is not set). |
5280 | /// Lower 2 bits of IMM8 specifies the operation control: |
5281 | /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
5282 | /// Upper 2 bits of IMM8 specifies the sign control: |
5283 | /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
5284 | /// |
5285 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_ps&ig_expand=5200) |
5286 | #[inline ] |
5287 | #[target_feature (enable = "avx512dq,avx512vl" )] |
5288 | #[cfg_attr (test, assert_instr(vrangeps, IMM8 = 5))] |
5289 | #[rustc_legacy_const_generics (3)] |
5290 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5291 | pub fn _mm_maskz_range_ps<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 { |
5292 | static_assert_uimm_bits!(IMM8, 4); |
5293 | _mm_mask_range_ps::<IMM8>(src:_mm_setzero_ps(), k, a, b) |
5294 | } |
5295 | |
5296 | /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
5297 | /// single-precision (32-bit) floating-point elements in a and b, and store the results in dst. |
5298 | /// Lower 2 bits of IMM8 specifies the operation control: |
5299 | /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
5300 | /// Upper 2 bits of IMM8 specifies the sign control: |
5301 | /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
5302 | /// |
5303 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_range_ps&ig_expand=5204) |
5304 | #[inline ] |
5305 | #[target_feature (enable = "avx512dq,avx512vl" )] |
5306 | #[cfg_attr (test, assert_instr(vrangeps, IMM8 = 5))] |
5307 | #[rustc_legacy_const_generics (2)] |
5308 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5309 | pub fn _mm256_range_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 { |
5310 | static_assert_uimm_bits!(IMM8, 4); |
5311 | _mm256_mask_range_ps::<IMM8>(src:_mm256_setzero_ps(), k:0xff, a, b) |
5312 | } |
5313 | |
5314 | /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
5315 | /// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using |
5316 | /// writemask k (elements are copied from src to dst if the corresponding mask bit is not set). |
5317 | /// Lower 2 bits of IMM8 specifies the operation control: |
5318 | /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
5319 | /// Upper 2 bits of IMM8 specifies the sign control: |
5320 | /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
5321 | /// |
5322 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_range_ps&ig_expand=5202) |
5323 | #[inline ] |
5324 | #[target_feature (enable = "avx512dq,avx512vl" )] |
5325 | #[cfg_attr (test, assert_instr(vrangeps, IMM8 = 5))] |
5326 | #[rustc_legacy_const_generics (4)] |
5327 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5328 | pub fn _mm256_mask_range_ps<const IMM8: i32>( |
5329 | src: __m256, |
5330 | k: __mmask8, |
5331 | a: __m256, |
5332 | b: __m256, |
5333 | ) -> __m256 { |
5334 | unsafe { |
5335 | static_assert_uimm_bits!(IMM8, 4); |
5336 | transmute(src:vrangeps_256( |
5337 | a.as_f32x8(), |
5338 | b.as_f32x8(), |
5339 | IMM8, |
5340 | src.as_f32x8(), |
5341 | k, |
5342 | )) |
5343 | } |
5344 | } |
5345 | |
5346 | /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
5347 | /// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using |
5348 | /// zeromask k (elements are zeroed out if the corresponding mask bit is not set). |
5349 | /// Lower 2 bits of IMM8 specifies the operation control: |
5350 | /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
5351 | /// Upper 2 bits of IMM8 specifies the sign control: |
5352 | /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
5353 | /// |
5354 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_range_ps&ig_expand=5203) |
5355 | #[inline ] |
5356 | #[target_feature (enable = "avx512dq,avx512vl" )] |
5357 | #[cfg_attr (test, assert_instr(vrangeps, IMM8 = 5))] |
5358 | #[rustc_legacy_const_generics (3)] |
5359 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5360 | pub fn _mm256_maskz_range_ps<const IMM8: i32>(k: __mmask8, a: __m256, b: __m256) -> __m256 { |
5361 | static_assert_uimm_bits!(IMM8, 4); |
5362 | _mm256_mask_range_ps::<IMM8>(src:_mm256_setzero_ps(), k, a, b) |
5363 | } |
5364 | |
5365 | /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
5366 | /// single-precision (32-bit) floating-point elements in a and b, and store the results in dst. |
5367 | /// Lower 2 bits of IMM8 specifies the operation control: |
5368 | /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
5369 | /// Upper 2 bits of IMM8 specifies the sign control: |
5370 | /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
5371 | /// |
5372 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_ps&ig_expand=5207) |
5373 | #[inline ] |
5374 | #[target_feature (enable = "avx512dq" )] |
5375 | #[cfg_attr (test, assert_instr(vrangeps, IMM8 = 5))] |
5376 | #[rustc_legacy_const_generics (2)] |
5377 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5378 | pub fn _mm512_range_ps<const IMM8: i32>(a: __m512, b: __m512) -> __m512 { |
5379 | static_assert_uimm_bits!(IMM8, 4); |
5380 | _mm512_mask_range_ps::<IMM8>(src:_mm512_setzero_ps(), k:0xffff, a, b) |
5381 | } |
5382 | |
5383 | /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
5384 | /// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using |
5385 | /// writemask k (elements are copied from src to dst if the corresponding mask bit is not set). |
5386 | /// Lower 2 bits of IMM8 specifies the operation control: |
5387 | /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
5388 | /// Upper 2 bits of IMM8 specifies the sign control: |
5389 | /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
5390 | /// |
5391 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_ps&ig_expand=5205) |
5392 | #[inline ] |
5393 | #[target_feature (enable = "avx512dq" )] |
5394 | #[cfg_attr (test, assert_instr(vrangeps, IMM8 = 5))] |
5395 | #[rustc_legacy_const_generics (4)] |
5396 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5397 | pub fn _mm512_mask_range_ps<const IMM8: i32>( |
5398 | src: __m512, |
5399 | k: __mmask16, |
5400 | a: __m512, |
5401 | b: __m512, |
5402 | ) -> __m512 { |
5403 | unsafe { |
5404 | static_assert_uimm_bits!(IMM8, 4); |
5405 | transmute(src:vrangeps_512( |
5406 | a.as_f32x16(), |
5407 | b.as_f32x16(), |
5408 | IMM8, |
5409 | src.as_f32x16(), |
5410 | k, |
5411 | _MM_FROUND_CUR_DIRECTION, |
5412 | )) |
5413 | } |
5414 | } |
5415 | |
5416 | /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
5417 | /// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using |
5418 | /// zeromask k (elements are zeroed out if the corresponding mask bit is not set). |
5419 | /// Lower 2 bits of IMM8 specifies the operation control: |
5420 | /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
5421 | /// Upper 2 bits of IMM8 specifies the sign control: |
5422 | /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
5423 | /// |
5424 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_ps&ig_expand=5206) |
5425 | #[inline ] |
5426 | #[target_feature (enable = "avx512dq" )] |
5427 | #[cfg_attr (test, assert_instr(vrangeps, IMM8 = 5))] |
5428 | #[rustc_legacy_const_generics (3)] |
5429 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5430 | pub fn _mm512_maskz_range_ps<const IMM8: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 { |
5431 | static_assert_uimm_bits!(IMM8, 4); |
5432 | _mm512_mask_range_ps::<IMM8>(src:_mm512_setzero_ps(), k, a, b) |
5433 | } |
5434 | |
5435 | /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower |
5436 | /// double-precision (64-bit) floating-point element in a and b, store the result in the lower element |
5437 | /// of dst, and copy the upper element from a to the upper element of dst. |
5438 | /// Lower 2 bits of IMM8 specifies the operation control: |
5439 | /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
5440 | /// Upper 2 bits of IMM8 specifies the sign control: |
5441 | /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
5442 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
5443 | /// |
5444 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_round_sd&ig_expand=5216) |
5445 | #[inline ] |
5446 | #[target_feature (enable = "avx512dq" )] |
5447 | #[cfg_attr (test, assert_instr(vrangesd, IMM8 = 5, SAE = 8))] |
5448 | #[rustc_legacy_const_generics (2, 3)] |
5449 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5450 | pub fn _mm_range_round_sd<const IMM8: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __m128d { |
5451 | static_assert_uimm_bits!(IMM8, 4); |
5452 | static_assert_sae!(SAE); |
5453 | _mm_mask_range_round_sd::<IMM8, SAE>(src:_mm_setzero_pd(), k:0xff, a, b) |
5454 | } |
5455 | |
5456 | /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower |
5457 | /// double-precision (64-bit) floating-point element in a and b, store the result in the lower element |
5458 | /// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the |
5459 | /// upper element from a to the upper element of dst. |
5460 | /// Lower 2 bits of IMM8 specifies the operation control: |
5461 | /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
5462 | /// Upper 2 bits of IMM8 specifies the sign control: |
5463 | /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
5464 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
5465 | /// |
5466 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_round_sd&ig_expand=5214) |
5467 | #[inline ] |
5468 | #[target_feature (enable = "avx512dq" )] |
5469 | #[cfg_attr (test, assert_instr(vrangesd, IMM8 = 5, SAE = 8))] |
5470 | #[rustc_legacy_const_generics (4, 5)] |
5471 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5472 | pub fn _mm_mask_range_round_sd<const IMM8: i32, const SAE: i32>( |
5473 | src: __m128d, |
5474 | k: __mmask8, |
5475 | a: __m128d, |
5476 | b: __m128d, |
5477 | ) -> __m128d { |
5478 | unsafe { |
5479 | static_assert_uimm_bits!(IMM8, 4); |
5480 | static_assert_sae!(SAE); |
5481 | transmute(src:vrangesd( |
5482 | a.as_f64x2(), |
5483 | b.as_f64x2(), |
5484 | src.as_f64x2(), |
5485 | k, |
5486 | IMM8, |
5487 | SAE, |
5488 | )) |
5489 | } |
5490 | } |
5491 | |
5492 | /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower |
5493 | /// double-precision (64-bit) floating-point element in a and b, store the result in the lower element |
5494 | /// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper |
5495 | /// element from a to the upper element of dst. |
5496 | /// Lower 2 bits of IMM8 specifies the operation control: |
5497 | /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
5498 | /// Upper 2 bits of IMM8 specifies the sign control: |
5499 | /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
5500 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
5501 | /// |
5502 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_round_sd&ig_expand=5215) |
5503 | #[inline ] |
5504 | #[target_feature (enable = "avx512dq" )] |
5505 | #[cfg_attr (test, assert_instr(vrangesd, IMM8 = 5, SAE = 8))] |
5506 | #[rustc_legacy_const_generics (3, 4)] |
5507 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5508 | pub fn _mm_maskz_range_round_sd<const IMM8: i32, const SAE: i32>( |
5509 | k: __mmask8, |
5510 | a: __m128d, |
5511 | b: __m128d, |
5512 | ) -> __m128d { |
5513 | static_assert_uimm_bits!(IMM8, 4); |
5514 | static_assert_sae!(SAE); |
5515 | _mm_mask_range_round_sd::<IMM8, SAE>(src:_mm_setzero_pd(), k, a, b) |
5516 | } |
5517 | |
5518 | /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower |
5519 | /// double-precision (64-bit) floating-point element in a and b, store the result in the lower element |
5520 | /// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the |
5521 | /// upper element from a to the upper element of dst. |
5522 | /// Lower 2 bits of IMM8 specifies the operation control: |
5523 | /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
5524 | /// Upper 2 bits of IMM8 specifies the sign control: |
5525 | /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
5526 | /// |
5527 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_sd&ig_expand=5220) |
5528 | #[inline ] |
5529 | #[target_feature (enable = "avx512dq" )] |
5530 | #[cfg_attr (test, assert_instr(vrangesd, IMM8 = 5))] |
5531 | #[rustc_legacy_const_generics (4)] |
5532 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5533 | pub fn _mm_mask_range_sd<const IMM8: i32>( |
5534 | src: __m128d, |
5535 | k: __mmask8, |
5536 | a: __m128d, |
5537 | b: __m128d, |
5538 | ) -> __m128d { |
5539 | unsafe { |
5540 | static_assert_uimm_bits!(IMM8, 4); |
5541 | transmute(src:vrangesd( |
5542 | a.as_f64x2(), |
5543 | b.as_f64x2(), |
5544 | src.as_f64x2(), |
5545 | k, |
5546 | IMM8, |
5547 | _MM_FROUND_CUR_DIRECTION, |
5548 | )) |
5549 | } |
5550 | } |
5551 | |
5552 | /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower |
5553 | /// double-precision (64-bit) floating-point element in a and b, store the result in the lower element |
5554 | /// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper |
5555 | /// element from a to the upper element of dst. |
5556 | /// Lower 2 bits of IMM8 specifies the operation control: |
5557 | /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
5558 | /// Upper 2 bits of IMM8 specifies the sign control: |
5559 | /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
5560 | /// |
5561 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_sd&ig_expand=5221) |
5562 | #[inline ] |
5563 | #[target_feature (enable = "avx512dq" )] |
5564 | #[cfg_attr (test, assert_instr(vrangesd, IMM8 = 5))] |
5565 | #[rustc_legacy_const_generics (3)] |
5566 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5567 | pub fn _mm_maskz_range_sd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { |
5568 | static_assert_uimm_bits!(IMM8, 4); |
5569 | _mm_mask_range_sd::<IMM8>(src:_mm_setzero_pd(), k, a, b) |
5570 | } |
5571 | |
5572 | /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower |
5573 | /// single-precision (32-bit) floating-point element in a and b, store the result in the lower element |
5574 | /// of dst, and copy the upper 3 packed elements from a to the upper elements of dst. |
5575 | /// Lower 2 bits of IMM8 specifies the operation control: |
5576 | /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
5577 | /// Upper 2 bits of IMM8 specifies the sign control: |
5578 | /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
5579 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
5580 | /// |
5581 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_round_ss&ig_expand=5219) |
5582 | #[inline ] |
5583 | #[target_feature (enable = "avx512dq" )] |
5584 | #[cfg_attr (test, assert_instr(vrangess, IMM8 = 5, SAE = 8))] |
5585 | #[rustc_legacy_const_generics (2, 3)] |
5586 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5587 | pub fn _mm_range_round_ss<const IMM8: i32, const SAE: i32>(a: __m128, b: __m128) -> __m128 { |
5588 | static_assert_uimm_bits!(IMM8, 4); |
5589 | static_assert_sae!(SAE); |
5590 | _mm_mask_range_round_ss::<IMM8, SAE>(src:_mm_setzero_ps(), k:0xff, a, b) |
5591 | } |
5592 | |
5593 | /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower |
5594 | /// single-precision (32-bit) floating-point element in a and b, store the result in the lower element |
5595 | /// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the |
5596 | /// upper 3 packed elements from a to the upper elements of dst. |
5597 | /// Lower 2 bits of IMM8 specifies the operation control: |
5598 | /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
5599 | /// Upper 2 bits of IMM8 specifies the sign control: |
5600 | /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
5601 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
5602 | /// |
5603 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_round_ss&ig_expand=5217) |
5604 | #[inline ] |
5605 | #[target_feature (enable = "avx512dq" )] |
5606 | #[cfg_attr (test, assert_instr(vrangess, IMM8 = 5, SAE = 8))] |
5607 | #[rustc_legacy_const_generics (4, 5)] |
5608 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5609 | pub fn _mm_mask_range_round_ss<const IMM8: i32, const SAE: i32>( |
5610 | src: __m128, |
5611 | k: __mmask8, |
5612 | a: __m128, |
5613 | b: __m128, |
5614 | ) -> __m128 { |
5615 | unsafe { |
5616 | static_assert_uimm_bits!(IMM8, 4); |
5617 | static_assert_sae!(SAE); |
5618 | transmute(src:vrangess( |
5619 | a.as_f32x4(), |
5620 | b.as_f32x4(), |
5621 | src.as_f32x4(), |
5622 | k, |
5623 | IMM8, |
5624 | SAE, |
5625 | )) |
5626 | } |
5627 | } |
5628 | |
5629 | /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower |
5630 | /// single-precision (32-bit) floating-point element in a and b, store the result in the lower element |
5631 | /// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper |
5632 | /// 3 packed elements from a to the upper elements of dst. |
5633 | /// Lower 2 bits of IMM8 specifies the operation control: |
5634 | /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
5635 | /// Upper 2 bits of IMM8 specifies the sign control: |
5636 | /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
5637 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
5638 | /// |
5639 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_round_ss&ig_expand=5218) |
5640 | #[inline ] |
5641 | #[target_feature (enable = "avx512dq" )] |
5642 | #[cfg_attr (test, assert_instr(vrangess, IMM8 = 5, SAE = 8))] |
5643 | #[rustc_legacy_const_generics (3, 4)] |
5644 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5645 | pub fn _mm_maskz_range_round_ss<const IMM8: i32, const SAE: i32>( |
5646 | k: __mmask8, |
5647 | a: __m128, |
5648 | b: __m128, |
5649 | ) -> __m128 { |
5650 | static_assert_uimm_bits!(IMM8, 4); |
5651 | static_assert_sae!(SAE); |
5652 | _mm_mask_range_round_ss::<IMM8, SAE>(src:_mm_setzero_ps(), k, a, b) |
5653 | } |
5654 | |
5655 | /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower |
5656 | /// single-precision (32-bit) floating-point element in a and b, store the result in the lower element |
5657 | /// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the |
5658 | /// upper 3 packed elements from a to the upper elements of dst. |
5659 | /// Lower 2 bits of IMM8 specifies the operation control: |
5660 | /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
5661 | /// Upper 2 bits of IMM8 specifies the sign control: |
5662 | /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
5663 | /// |
5664 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_ss&ig_expand=5222) |
5665 | #[inline ] |
5666 | #[target_feature (enable = "avx512dq" )] |
5667 | #[cfg_attr (test, assert_instr(vrangess, IMM8 = 5))] |
5668 | #[rustc_legacy_const_generics (4)] |
5669 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5670 | pub fn _mm_mask_range_ss<const IMM8: i32>( |
5671 | src: __m128, |
5672 | k: __mmask8, |
5673 | a: __m128, |
5674 | b: __m128, |
5675 | ) -> __m128 { |
5676 | unsafe { |
5677 | static_assert_uimm_bits!(IMM8, 4); |
5678 | transmute(src:vrangess( |
5679 | a.as_f32x4(), |
5680 | b.as_f32x4(), |
5681 | src.as_f32x4(), |
5682 | k, |
5683 | IMM8, |
5684 | _MM_FROUND_CUR_DIRECTION, |
5685 | )) |
5686 | } |
5687 | } |
5688 | |
5689 | /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower |
5690 | /// single-precision (32-bit) floating-point element in a and b, store the result in the lower element |
5691 | /// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper |
5692 | /// 3 packed elements from a to the upper elements of dst. |
5693 | /// Lower 2 bits of IMM8 specifies the operation control: |
5694 | /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
5695 | /// Upper 2 bits of IMM8 specifies the sign control: |
5696 | /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
5697 | /// |
5698 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_ss&ig_expand=5223) |
5699 | #[inline ] |
5700 | #[target_feature (enable = "avx512dq" )] |
5701 | #[cfg_attr (test, assert_instr(vrangess, IMM8 = 5))] |
5702 | #[rustc_legacy_const_generics (3)] |
5703 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5704 | pub fn _mm_maskz_range_ss<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 { |
5705 | static_assert_uimm_bits!(IMM8, 4); |
5706 | _mm_mask_range_ss::<IMM8>(src:_mm_setzero_ps(), k, a, b) |
5707 | } |
5708 | |
5709 | // Reduce |
5710 | |
5711 | /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by |
5712 | /// the number of bits specified by imm8, and store the results in dst. |
5713 | /// Rounding is done according to the imm8 parameter, which can be one of: |
5714 | /// |
5715 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
5716 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
5717 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
5718 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
5719 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5720 | /// |
5721 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
5722 | /// |
5723 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_round_pd&ig_expand=5438) |
5724 | #[inline ] |
5725 | #[target_feature (enable = "avx512dq" )] |
5726 | #[cfg_attr (test, assert_instr(vreducepd, IMM8 = 0, SAE = 8))] |
5727 | #[rustc_legacy_const_generics (1, 2)] |
5728 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5729 | pub fn _mm512_reduce_round_pd<const IMM8: i32, const SAE: i32>(a: __m512d) -> __m512d { |
5730 | static_assert_uimm_bits!(IMM8, 8); |
5731 | static_assert_sae!(SAE); |
5732 | _mm512_mask_reduce_round_pd::<IMM8, SAE>(src:_mm512_undefined_pd(), k:0xff, a) |
5733 | } |
5734 | |
5735 | /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by |
5736 | /// the number of bits specified by imm8, and store the results in dst using writemask k (elements are |
5737 | /// copied from src to dst if the corresponding mask bit is not set). |
5738 | /// Rounding is done according to the imm8 parameter, which can be one of: |
5739 | /// |
5740 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
5741 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
5742 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
5743 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
5744 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5745 | /// |
5746 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
5747 | /// |
5748 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_round_pd&ig_expand=5436) |
5749 | #[inline ] |
5750 | #[target_feature (enable = "avx512dq" )] |
5751 | #[cfg_attr (test, assert_instr(vreducepd, IMM8 = 0, SAE = 8))] |
5752 | #[rustc_legacy_const_generics (3, 4)] |
5753 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5754 | pub fn _mm512_mask_reduce_round_pd<const IMM8: i32, const SAE: i32>( |
5755 | src: __m512d, |
5756 | k: __mmask8, |
5757 | a: __m512d, |
5758 | ) -> __m512d { |
5759 | unsafe { |
5760 | static_assert_uimm_bits!(IMM8, 8); |
5761 | static_assert_sae!(SAE); |
5762 | transmute(src:vreducepd_512(a.as_f64x8(), IMM8, src.as_f64x8(), k, SAE)) |
5763 | } |
5764 | } |
5765 | |
5766 | /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by |
5767 | /// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are |
5768 | /// zeroed out if the corresponding mask bit is not set). |
5769 | /// Rounding is done according to the imm8 parameter, which can be one of: |
5770 | /// |
5771 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
5772 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
5773 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
5774 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
5775 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5776 | /// |
5777 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
5778 | /// |
5779 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_round_pd&ig_expand=5437) |
5780 | #[inline ] |
5781 | #[target_feature (enable = "avx512dq" )] |
5782 | #[cfg_attr (test, assert_instr(vreducepd, IMM8 = 0, SAE = 8))] |
5783 | #[rustc_legacy_const_generics (2, 3)] |
5784 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5785 | pub fn _mm512_maskz_reduce_round_pd<const IMM8: i32, const SAE: i32>( |
5786 | k: __mmask8, |
5787 | a: __m512d, |
5788 | ) -> __m512d { |
5789 | static_assert_uimm_bits!(IMM8, 8); |
5790 | static_assert_sae!(SAE); |
5791 | _mm512_mask_reduce_round_pd::<IMM8, SAE>(src:_mm512_setzero_pd(), k, a) |
5792 | } |
5793 | |
5794 | /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by |
5795 | /// the number of bits specified by imm8, and store the results in dst. |
5796 | /// Rounding is done according to the imm8 parameter, which can be one of: |
5797 | /// |
5798 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
5799 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
5800 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
5801 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
5802 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5803 | /// |
5804 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_pd&ig_expand=5411) |
5805 | #[inline ] |
5806 | #[target_feature (enable = "avx512dq,avx512vl" )] |
5807 | #[cfg_attr (test, assert_instr(vreducepd, IMM8 = 0))] |
5808 | #[rustc_legacy_const_generics (1)] |
5809 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5810 | pub fn _mm_reduce_pd<const IMM8: i32>(a: __m128d) -> __m128d { |
5811 | static_assert_uimm_bits!(IMM8, 8); |
5812 | _mm_mask_reduce_pd::<IMM8>(src:_mm_undefined_pd(), k:0xff, a) |
5813 | } |
5814 | |
5815 | /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by |
5816 | /// the number of bits specified by imm8, and store the results in dst using writemask k (elements are |
5817 | /// copied from src to dst if the corresponding mask bit is not set). |
5818 | /// Rounding is done according to the imm8 parameter, which can be one of: |
5819 | /// |
5820 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
5821 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
5822 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
5823 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
5824 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5825 | /// |
5826 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_pd&ig_expand=5409) |
5827 | #[inline ] |
5828 | #[target_feature (enable = "avx512dq,avx512vl" )] |
5829 | #[cfg_attr (test, assert_instr(vreducepd, IMM8 = 0))] |
5830 | #[rustc_legacy_const_generics (3)] |
5831 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5832 | pub fn _mm_mask_reduce_pd<const IMM8: i32>(src: __m128d, k: __mmask8, a: __m128d) -> __m128d { |
5833 | unsafe { |
5834 | static_assert_uimm_bits!(IMM8, 8); |
5835 | transmute(src:vreducepd_128(a.as_f64x2(), IMM8, src.as_f64x2(), k)) |
5836 | } |
5837 | } |
5838 | |
5839 | /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by |
5840 | /// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are |
5841 | /// zeroed out if the corresponding mask bit is not set). |
5842 | /// Rounding is done according to the imm8 parameter, which can be one of: |
5843 | /// |
5844 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
5845 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
5846 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
5847 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
5848 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5849 | /// |
5850 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_pd&ig_expand=5410) |
5851 | #[inline ] |
5852 | #[target_feature (enable = "avx512dq,avx512vl" )] |
5853 | #[cfg_attr (test, assert_instr(vreducepd, IMM8 = 0))] |
5854 | #[rustc_legacy_const_generics (2)] |
5855 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5856 | pub fn _mm_maskz_reduce_pd<const IMM8: i32>(k: __mmask8, a: __m128d) -> __m128d { |
5857 | static_assert_uimm_bits!(IMM8, 8); |
5858 | _mm_mask_reduce_pd::<IMM8>(src:_mm_setzero_pd(), k, a) |
5859 | } |
5860 | |
5861 | /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by |
5862 | /// the number of bits specified by imm8, and store the results in dst. |
5863 | /// Rounding is done according to the imm8 parameter, which can be one of: |
5864 | /// |
5865 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
5866 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
5867 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
5868 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
5869 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5870 | /// |
5871 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_pd&ig_expand=5414) |
5872 | #[inline ] |
5873 | #[target_feature (enable = "avx512dq,avx512vl" )] |
5874 | #[cfg_attr (test, assert_instr(vreducepd, IMM8 = 0))] |
5875 | #[rustc_legacy_const_generics (1)] |
5876 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5877 | pub fn _mm256_reduce_pd<const IMM8: i32>(a: __m256d) -> __m256d { |
5878 | static_assert_uimm_bits!(IMM8, 8); |
5879 | _mm256_mask_reduce_pd::<IMM8>(src:_mm256_undefined_pd(), k:0xff, a) |
5880 | } |
5881 | |
5882 | /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by |
5883 | /// the number of bits specified by imm8, and store the results in dst using writemask k (elements are |
5884 | /// copied from src to dst if the corresponding mask bit is not set). |
5885 | /// Rounding is done according to the imm8 parameter, which can be one of: |
5886 | /// |
5887 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
5888 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
5889 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
5890 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
5891 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5892 | /// |
5893 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_pd&ig_expand=5412) |
5894 | #[inline ] |
5895 | #[target_feature (enable = "avx512dq,avx512vl" )] |
5896 | #[cfg_attr (test, assert_instr(vreducepd, IMM8 = 0))] |
5897 | #[rustc_legacy_const_generics (3)] |
5898 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5899 | pub fn _mm256_mask_reduce_pd<const IMM8: i32>(src: __m256d, k: __mmask8, a: __m256d) -> __m256d { |
5900 | unsafe { |
5901 | static_assert_uimm_bits!(IMM8, 8); |
5902 | transmute(src:vreducepd_256(a.as_f64x4(), IMM8, src.as_f64x4(), k)) |
5903 | } |
5904 | } |
5905 | |
5906 | /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by |
5907 | /// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are |
5908 | /// zeroed out if the corresponding mask bit is not set). |
5909 | /// Rounding is done according to the imm8 parameter, which can be one of: |
5910 | /// |
5911 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
5912 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
5913 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
5914 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
5915 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5916 | /// |
5917 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_reduce_pd&ig_expand=5413) |
5918 | #[inline ] |
5919 | #[target_feature (enable = "avx512dq,avx512vl" )] |
5920 | #[cfg_attr (test, assert_instr(vreducepd, IMM8 = 0))] |
5921 | #[rustc_legacy_const_generics (2)] |
5922 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5923 | pub fn _mm256_maskz_reduce_pd<const IMM8: i32>(k: __mmask8, a: __m256d) -> __m256d { |
5924 | static_assert_uimm_bits!(IMM8, 8); |
5925 | _mm256_mask_reduce_pd::<IMM8>(src:_mm256_setzero_pd(), k, a) |
5926 | } |
5927 | |
5928 | /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by |
5929 | /// the number of bits specified by imm8, and store the results in dst. |
5930 | /// Rounding is done according to the imm8 parameter, which can be one of: |
5931 | /// |
5932 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
5933 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
5934 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
5935 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
5936 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5937 | /// |
5938 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_pd&ig_expand=5417) |
5939 | #[inline ] |
5940 | #[target_feature (enable = "avx512dq" )] |
5941 | #[cfg_attr (test, assert_instr(vreducepd, IMM8 = 0))] |
5942 | #[rustc_legacy_const_generics (1)] |
5943 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5944 | pub fn _mm512_reduce_pd<const IMM8: i32>(a: __m512d) -> __m512d { |
5945 | static_assert_uimm_bits!(IMM8, 8); |
5946 | _mm512_mask_reduce_pd::<IMM8>(src:_mm512_undefined_pd(), k:0xff, a) |
5947 | } |
5948 | |
5949 | /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by |
5950 | /// the number of bits specified by imm8, and store the results in dst using writemask k (elements are |
5951 | /// copied from src to dst if the corresponding mask bit is not set). |
5952 | /// Rounding is done according to the imm8 parameter, which can be one of: |
5953 | /// |
5954 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
5955 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
5956 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
5957 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
5958 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5959 | /// |
5960 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_pd&ig_expand=5415) |
5961 | #[inline ] |
5962 | #[target_feature (enable = "avx512dq" )] |
5963 | #[cfg_attr (test, assert_instr(vreducepd, IMM8 = 0))] |
5964 | #[rustc_legacy_const_generics (3)] |
5965 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5966 | pub fn _mm512_mask_reduce_pd<const IMM8: i32>(src: __m512d, k: __mmask8, a: __m512d) -> __m512d { |
5967 | unsafe { |
5968 | static_assert_uimm_bits!(IMM8, 8); |
5969 | transmute(src:vreducepd_512( |
5970 | a.as_f64x8(), |
5971 | IMM8, |
5972 | src.as_f64x8(), |
5973 | k, |
5974 | _MM_FROUND_CUR_DIRECTION, |
5975 | )) |
5976 | } |
5977 | } |
5978 | |
5979 | /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by |
5980 | /// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are |
5981 | /// zeroed out if the corresponding mask bit is not set). |
5982 | /// Rounding is done according to the imm8 parameter, which can be one of: |
5983 | /// |
5984 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
5985 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
5986 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
5987 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
5988 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
5989 | /// |
5990 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_pd&ig_expand=5416) |
5991 | #[inline ] |
5992 | #[target_feature (enable = "avx512dq" )] |
5993 | #[cfg_attr (test, assert_instr(vreducepd, IMM8 = 0))] |
5994 | #[rustc_legacy_const_generics (2)] |
5995 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
5996 | pub fn _mm512_maskz_reduce_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m512d { |
5997 | static_assert_uimm_bits!(IMM8, 8); |
5998 | _mm512_mask_reduce_pd::<IMM8>(src:_mm512_setzero_pd(), k, a) |
5999 | } |
6000 | |
6001 | /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by |
6002 | /// the number of bits specified by imm8, and store the results in dst. |
6003 | /// Rounding is done according to the imm8 parameter, which can be one of: |
6004 | /// |
6005 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
6006 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
6007 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
6008 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
6009 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6010 | /// |
6011 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
6012 | /// |
6013 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_round_ps&ig_expand=5444) |
6014 | #[inline ] |
6015 | #[target_feature (enable = "avx512dq" )] |
6016 | #[cfg_attr (test, assert_instr(vreduceps, IMM8 = 0, SAE = 8))] |
6017 | #[rustc_legacy_const_generics (1, 2)] |
6018 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
6019 | pub fn _mm512_reduce_round_ps<const IMM8: i32, const SAE: i32>(a: __m512) -> __m512 { |
6020 | static_assert_uimm_bits!(IMM8, 8); |
6021 | static_assert_sae!(SAE); |
6022 | _mm512_mask_reduce_round_ps::<IMM8, SAE>(src:_mm512_undefined_ps(), k:0xffff, a) |
6023 | } |
6024 | |
6025 | /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by |
6026 | /// the number of bits specified by imm8, and store the results in dst using writemask k (elements are |
6027 | /// copied from src to dst if the corresponding mask bit is not set). |
6028 | /// Rounding is done according to the imm8 parameter, which can be one of: |
6029 | /// |
6030 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
6031 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
6032 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
6033 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
6034 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6035 | /// |
6036 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
6037 | /// |
6038 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_round_ps&ig_expand=5442) |
6039 | #[inline ] |
6040 | #[target_feature (enable = "avx512dq" )] |
6041 | #[cfg_attr (test, assert_instr(vreduceps, IMM8 = 0, SAE = 8))] |
6042 | #[rustc_legacy_const_generics (3, 4)] |
6043 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
6044 | pub fn _mm512_mask_reduce_round_ps<const IMM8: i32, const SAE: i32>( |
6045 | src: __m512, |
6046 | k: __mmask16, |
6047 | a: __m512, |
6048 | ) -> __m512 { |
6049 | unsafe { |
6050 | static_assert_uimm_bits!(IMM8, 8); |
6051 | static_assert_sae!(SAE); |
6052 | transmute(src:vreduceps_512(a.as_f32x16(), IMM8, src.as_f32x16(), k, SAE)) |
6053 | } |
6054 | } |
6055 | |
6056 | /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by |
6057 | /// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are |
6058 | /// zeroed out if the corresponding mask bit is not set). |
6059 | /// Rounding is done according to the imm8 parameter, which can be one of: |
6060 | /// |
6061 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
6062 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
6063 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
6064 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
6065 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6066 | /// |
6067 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
6068 | /// |
6069 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_round_ps&ig_expand=5443) |
6070 | #[inline ] |
6071 | #[target_feature (enable = "avx512dq" )] |
6072 | #[cfg_attr (test, assert_instr(vreduceps, IMM8 = 0, SAE = 8))] |
6073 | #[rustc_legacy_const_generics (2, 3)] |
6074 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
6075 | pub fn _mm512_maskz_reduce_round_ps<const IMM8: i32, const SAE: i32>( |
6076 | k: __mmask16, |
6077 | a: __m512, |
6078 | ) -> __m512 { |
6079 | static_assert_uimm_bits!(IMM8, 8); |
6080 | static_assert_sae!(SAE); |
6081 | _mm512_mask_reduce_round_ps::<IMM8, SAE>(src:_mm512_setzero_ps(), k, a) |
6082 | } |
6083 | |
6084 | /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by |
6085 | /// the number of bits specified by imm8, and store the results in dst. |
6086 | /// Rounding is done according to the imm8 parameter, which can be one of: |
6087 | /// |
6088 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
6089 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
6090 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
6091 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
6092 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6093 | /// |
6094 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_ps&ig_expand=5429) |
6095 | #[inline ] |
6096 | #[target_feature (enable = "avx512dq,avx512vl" )] |
6097 | #[cfg_attr (test, assert_instr(vreduceps, IMM8 = 0))] |
6098 | #[rustc_legacy_const_generics (1)] |
6099 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
6100 | pub fn _mm_reduce_ps<const IMM8: i32>(a: __m128) -> __m128 { |
6101 | static_assert_uimm_bits!(IMM8, 8); |
6102 | _mm_mask_reduce_ps::<IMM8>(src:_mm_undefined_ps(), k:0xff, a) |
6103 | } |
6104 | |
6105 | /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by |
6106 | /// the number of bits specified by imm8, and store the results in dst using writemask k (elements are |
6107 | /// copied from src to dst if the corresponding mask bit is not set). |
6108 | /// Rounding is done according to the imm8 parameter, which can be one of: |
6109 | /// |
6110 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
6111 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
6112 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
6113 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
6114 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6115 | /// |
6116 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_ps&ig_expand=5427) |
6117 | #[inline ] |
6118 | #[target_feature (enable = "avx512dq,avx512vl" )] |
6119 | #[cfg_attr (test, assert_instr(vreduceps, IMM8 = 0))] |
6120 | #[rustc_legacy_const_generics (3)] |
6121 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
6122 | pub fn _mm_mask_reduce_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 { |
6123 | unsafe { |
6124 | static_assert_uimm_bits!(IMM8, 8); |
6125 | transmute(src:vreduceps_128(a.as_f32x4(), IMM8, src.as_f32x4(), k)) |
6126 | } |
6127 | } |
6128 | |
6129 | /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by |
6130 | /// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are |
6131 | /// zeroed out if the corresponding mask bit is not set). |
6132 | /// Rounding is done according to the imm8 parameter, which can be one of: |
6133 | /// |
6134 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
6135 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
6136 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
6137 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
6138 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6139 | /// |
6140 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_ps&ig_expand=5428) |
6141 | #[inline ] |
6142 | #[target_feature (enable = "avx512dq,avx512vl" )] |
6143 | #[cfg_attr (test, assert_instr(vreduceps, IMM8 = 0))] |
6144 | #[rustc_legacy_const_generics (2)] |
6145 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
6146 | pub fn _mm_maskz_reduce_ps<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128 { |
6147 | static_assert_uimm_bits!(IMM8, 8); |
6148 | _mm_mask_reduce_ps::<IMM8>(src:_mm_setzero_ps(), k, a) |
6149 | } |
6150 | |
6151 | /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by |
6152 | /// the number of bits specified by imm8, and store the results in dst. |
6153 | /// Rounding is done according to the imm8 parameter, which can be one of: |
6154 | /// |
6155 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
6156 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
6157 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
6158 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
6159 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6160 | /// |
6161 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_ps&ig_expand=5432) |
6162 | #[inline ] |
6163 | #[target_feature (enable = "avx512dq,avx512vl" )] |
6164 | #[cfg_attr (test, assert_instr(vreduceps, IMM8 = 0))] |
6165 | #[rustc_legacy_const_generics (1)] |
6166 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
6167 | pub fn _mm256_reduce_ps<const IMM8: i32>(a: __m256) -> __m256 { |
6168 | static_assert_uimm_bits!(IMM8, 8); |
6169 | _mm256_mask_reduce_ps::<IMM8>(src:_mm256_undefined_ps(), k:0xff, a) |
6170 | } |
6171 | |
6172 | /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by |
6173 | /// the number of bits specified by imm8, and store the results in dst using writemask k (elements are |
6174 | /// copied from src to dst if the corresponding mask bit is not set). |
6175 | /// Rounding is done according to the imm8 parameter, which can be one of: |
6176 | /// |
6177 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
6178 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
6179 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
6180 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
6181 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6182 | /// |
6183 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_ps&ig_expand=5430) |
6184 | #[inline ] |
6185 | #[target_feature (enable = "avx512dq,avx512vl" )] |
6186 | #[cfg_attr (test, assert_instr(vreduceps, IMM8 = 0))] |
6187 | #[rustc_legacy_const_generics (3)] |
6188 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
6189 | pub fn _mm256_mask_reduce_ps<const IMM8: i32>(src: __m256, k: __mmask8, a: __m256) -> __m256 { |
6190 | unsafe { |
6191 | static_assert_uimm_bits!(IMM8, 8); |
6192 | transmute(src:vreduceps_256(a.as_f32x8(), IMM8, src.as_f32x8(), k)) |
6193 | } |
6194 | } |
6195 | |
6196 | /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by |
6197 | /// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are |
6198 | /// zeroed out if the corresponding mask bit is not set). |
6199 | /// Rounding is done according to the imm8 parameter, which can be one of: |
6200 | /// |
6201 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
6202 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
6203 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
6204 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
6205 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6206 | /// |
6207 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_reduce_ps&ig_expand=5431) |
6208 | #[inline ] |
6209 | #[target_feature (enable = "avx512dq,avx512vl" )] |
6210 | #[cfg_attr (test, assert_instr(vreduceps, IMM8 = 0))] |
6211 | #[rustc_legacy_const_generics (2)] |
6212 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
6213 | pub fn _mm256_maskz_reduce_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m256 { |
6214 | static_assert_uimm_bits!(IMM8, 8); |
6215 | _mm256_mask_reduce_ps::<IMM8>(src:_mm256_setzero_ps(), k, a) |
6216 | } |
6217 | |
6218 | /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by |
6219 | /// the number of bits specified by imm8, and store the results in dst. |
6220 | /// Rounding is done according to the imm8 parameter, which can be one of: |
6221 | /// |
6222 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
6223 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
6224 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
6225 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
6226 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6227 | /// |
6228 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_ps&ig_expand=5435) |
6229 | #[inline ] |
6230 | #[target_feature (enable = "avx512dq" )] |
6231 | #[cfg_attr (test, assert_instr(vreduceps, IMM8 = 0))] |
6232 | #[rustc_legacy_const_generics (1)] |
6233 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
6234 | pub fn _mm512_reduce_ps<const IMM8: i32>(a: __m512) -> __m512 { |
6235 | static_assert_uimm_bits!(IMM8, 8); |
6236 | _mm512_mask_reduce_ps::<IMM8>(src:_mm512_undefined_ps(), k:0xffff, a) |
6237 | } |
6238 | |
6239 | /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by |
6240 | /// the number of bits specified by imm8, and store the results in dst using writemask k (elements are |
6241 | /// copied from src to dst if the corresponding mask bit is not set). |
6242 | /// Rounding is done according to the imm8 parameter, which can be one of: |
6243 | /// |
6244 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
6245 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
6246 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
6247 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
6248 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6249 | /// |
6250 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_ps&ig_expand=5433) |
6251 | #[inline ] |
6252 | #[target_feature (enable = "avx512dq" )] |
6253 | #[cfg_attr (test, assert_instr(vreduceps, IMM8 = 0))] |
6254 | #[rustc_legacy_const_generics (3)] |
6255 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
6256 | pub fn _mm512_mask_reduce_ps<const IMM8: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 { |
6257 | unsafe { |
6258 | static_assert_uimm_bits!(IMM8, 8); |
6259 | transmute(src:vreduceps_512( |
6260 | a.as_f32x16(), |
6261 | IMM8, |
6262 | src.as_f32x16(), |
6263 | k, |
6264 | _MM_FROUND_CUR_DIRECTION, |
6265 | )) |
6266 | } |
6267 | } |
6268 | |
6269 | /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by |
6270 | /// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are |
6271 | /// zeroed out if the corresponding mask bit is not set). |
6272 | /// Rounding is done according to the imm8 parameter, which can be one of: |
6273 | /// |
6274 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
6275 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
6276 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
6277 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
6278 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6279 | /// |
6280 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_ps&ig_expand=5434) |
6281 | #[inline ] |
6282 | #[target_feature (enable = "avx512dq" )] |
6283 | #[cfg_attr (test, assert_instr(vreduceps, IMM8 = 0))] |
6284 | #[rustc_legacy_const_generics (2)] |
6285 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
6286 | pub fn _mm512_maskz_reduce_ps<const IMM8: i32>(k: __mmask16, a: __m512) -> __m512 { |
6287 | static_assert_uimm_bits!(IMM8, 8); |
6288 | _mm512_mask_reduce_ps::<IMM8>(src:_mm512_setzero_ps(), k, a) |
6289 | } |
6290 | |
6291 | /// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b |
6292 | /// by the number of bits specified by imm8, store the result in the lower element of dst, and copy |
6293 | /// the upper element from a to the upper element of dst. |
6294 | /// Rounding is done according to the imm8 parameter, which can be one of: |
6295 | /// |
6296 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
6297 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
6298 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
6299 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
6300 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6301 | /// |
6302 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
6303 | /// |
6304 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_round_sd&ig_expand=5447) |
6305 | #[inline ] |
6306 | #[target_feature (enable = "avx512dq" )] |
6307 | #[cfg_attr (test, assert_instr(vreducesd, IMM8 = 0, SAE = 8))] |
6308 | #[rustc_legacy_const_generics (2, 3)] |
6309 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
6310 | pub fn _mm_reduce_round_sd<const IMM8: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __m128d { |
6311 | static_assert_uimm_bits!(IMM8, 8); |
6312 | static_assert_sae!(SAE); |
6313 | _mm_mask_reduce_round_sd::<IMM8, SAE>(src:_mm_undefined_pd(), k:0xff, a, b) |
6314 | } |
6315 | |
6316 | /// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b |
6317 | /// by the number of bits specified by imm8, store the result in the lower element of dst using writemask |
6318 | /// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a |
6319 | /// to the upper element of dst. |
6320 | /// Rounding is done according to the imm8 parameter, which can be one of: |
6321 | /// |
6322 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
6323 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
6324 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
6325 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
6326 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6327 | /// |
6328 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
6329 | /// |
6330 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_round_sd&ig_expand=5445) |
6331 | #[inline ] |
6332 | #[target_feature (enable = "avx512dq" )] |
6333 | #[cfg_attr (test, assert_instr(vreducesd, IMM8 = 0, SAE = 8))] |
6334 | #[rustc_legacy_const_generics (4, 5)] |
6335 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
6336 | pub fn _mm_mask_reduce_round_sd<const IMM8: i32, const SAE: i32>( |
6337 | src: __m128d, |
6338 | k: __mmask8, |
6339 | a: __m128d, |
6340 | b: __m128d, |
6341 | ) -> __m128d { |
6342 | unsafe { |
6343 | static_assert_uimm_bits!(IMM8, 8); |
6344 | static_assert_sae!(SAE); |
6345 | transmute(src:vreducesd( |
6346 | a.as_f64x2(), |
6347 | b.as_f64x2(), |
6348 | src.as_f64x2(), |
6349 | k, |
6350 | IMM8, |
6351 | SAE, |
6352 | )) |
6353 | } |
6354 | } |
6355 | |
6356 | /// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b |
6357 | /// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask |
6358 | /// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a |
6359 | /// to the upper element of dst. |
6360 | /// Rounding is done according to the imm8 parameter, which can be one of: |
6361 | /// |
6362 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
6363 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
6364 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
6365 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
6366 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6367 | /// |
6368 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
6369 | /// |
6370 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_round_sd&ig_expand=5446) |
6371 | #[inline ] |
6372 | #[target_feature (enable = "avx512dq" )] |
6373 | #[cfg_attr (test, assert_instr(vreducesd, IMM8 = 0, SAE = 8))] |
6374 | #[rustc_legacy_const_generics (3, 4)] |
6375 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
6376 | pub fn _mm_maskz_reduce_round_sd<const IMM8: i32, const SAE: i32>( |
6377 | k: __mmask8, |
6378 | a: __m128d, |
6379 | b: __m128d, |
6380 | ) -> __m128d { |
6381 | static_assert_uimm_bits!(IMM8, 8); |
6382 | static_assert_sae!(SAE); |
6383 | _mm_mask_reduce_round_sd::<IMM8, SAE>(src:_mm_setzero_pd(), k, a, b) |
6384 | } |
6385 | |
6386 | /// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b |
6387 | /// by the number of bits specified by imm8, store the result in the lower element of dst using, and |
6388 | /// copy the upper element from a. |
6389 | /// to the upper element of dst. |
6390 | /// Rounding is done according to the imm8 parameter, which can be one of: |
6391 | /// |
6392 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
6393 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
6394 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
6395 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
6396 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6397 | /// |
6398 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_sd&ig_expand=5456) |
6399 | #[inline ] |
6400 | #[target_feature (enable = "avx512dq" )] |
6401 | #[cfg_attr (test, assert_instr(vreducesd, IMM8 = 0))] |
6402 | #[rustc_legacy_const_generics (2)] |
6403 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
6404 | pub fn _mm_reduce_sd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d { |
6405 | static_assert_uimm_bits!(IMM8, 8); |
6406 | _mm_mask_reduce_sd::<IMM8>(src:_mm_undefined_pd(), k:0xff, a, b) |
6407 | } |
6408 | |
6409 | /// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b |
6410 | /// by the number of bits specified by imm8, store the result in the lower element of dst using writemask |
6411 | /// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a |
6412 | /// to the upper element of dst. |
6413 | /// Rounding is done according to the imm8 parameter, which can be one of: |
6414 | /// |
6415 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
6416 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
6417 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
6418 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
6419 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6420 | /// |
6421 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_sd&ig_expand=5454) |
6422 | #[inline ] |
6423 | #[target_feature (enable = "avx512dq" )] |
6424 | #[cfg_attr (test, assert_instr(vreducesd, IMM8 = 0))] |
6425 | #[rustc_legacy_const_generics (4)] |
6426 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
6427 | pub fn _mm_mask_reduce_sd<const IMM8: i32>( |
6428 | src: __m128d, |
6429 | k: __mmask8, |
6430 | a: __m128d, |
6431 | b: __m128d, |
6432 | ) -> __m128d { |
6433 | unsafe { |
6434 | static_assert_uimm_bits!(IMM8, 8); |
6435 | transmute(src:vreducesd( |
6436 | a.as_f64x2(), |
6437 | b.as_f64x2(), |
6438 | src.as_f64x2(), |
6439 | k, |
6440 | IMM8, |
6441 | _MM_FROUND_CUR_DIRECTION, |
6442 | )) |
6443 | } |
6444 | } |
6445 | |
6446 | /// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b |
6447 | /// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask |
6448 | /// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a |
6449 | /// to the upper element of dst. |
6450 | /// Rounding is done according to the imm8 parameter, which can be one of: |
6451 | /// |
6452 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
6453 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
6454 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
6455 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
6456 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6457 | /// |
6458 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_sd&ig_expand=5455) |
6459 | #[inline ] |
6460 | #[target_feature (enable = "avx512dq" )] |
6461 | #[cfg_attr (test, assert_instr(vreducesd, IMM8 = 0))] |
6462 | #[rustc_legacy_const_generics (3)] |
6463 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
6464 | pub fn _mm_maskz_reduce_sd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { |
6465 | static_assert_uimm_bits!(IMM8, 8); |
6466 | _mm_mask_reduce_sd::<IMM8>(src:_mm_setzero_pd(), k, a, b) |
6467 | } |
6468 | |
6469 | /// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b |
6470 | /// by the number of bits specified by imm8, store the result in the lower element of dst, and copy |
6471 | /// the upper element from a. |
6472 | /// to the upper element of dst. |
6473 | /// Rounding is done according to the imm8 parameter, which can be one of: |
6474 | /// |
6475 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
6476 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
6477 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
6478 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
6479 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6480 | /// |
6481 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
6482 | /// |
6483 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_round_ss&ig_expand=5453) |
6484 | #[inline ] |
6485 | #[target_feature (enable = "avx512dq" )] |
6486 | #[cfg_attr (test, assert_instr(vreducess, IMM8 = 0, SAE = 8))] |
6487 | #[rustc_legacy_const_generics (2, 3)] |
6488 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
6489 | pub fn _mm_reduce_round_ss<const IMM8: i32, const SAE: i32>(a: __m128, b: __m128) -> __m128 { |
6490 | static_assert_uimm_bits!(IMM8, 8); |
6491 | static_assert_sae!(SAE); |
6492 | _mm_mask_reduce_round_ss::<IMM8, SAE>(src:_mm_undefined_ps(), k:0xff, a, b) |
6493 | } |
6494 | |
6495 | /// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b |
6496 | /// by the number of bits specified by imm8, store the result in the lower element of dst using writemask |
6497 | /// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a. |
6498 | /// to the upper element of dst. |
6499 | /// Rounding is done according to the imm8 parameter, which can be one of: |
6500 | /// |
6501 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
6502 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
6503 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
6504 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
6505 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6506 | /// |
6507 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
6508 | /// |
6509 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_round_ss&ig_expand=5451) |
6510 | #[inline ] |
6511 | #[target_feature (enable = "avx512dq" )] |
6512 | #[cfg_attr (test, assert_instr(vreducess, IMM8 = 0, SAE = 8))] |
6513 | #[rustc_legacy_const_generics (4, 5)] |
6514 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
6515 | pub fn _mm_mask_reduce_round_ss<const IMM8: i32, const SAE: i32>( |
6516 | src: __m128, |
6517 | k: __mmask8, |
6518 | a: __m128, |
6519 | b: __m128, |
6520 | ) -> __m128 { |
6521 | unsafe { |
6522 | static_assert_uimm_bits!(IMM8, 8); |
6523 | static_assert_sae!(SAE); |
6524 | transmute(src:vreducess( |
6525 | a.as_f32x4(), |
6526 | b.as_f32x4(), |
6527 | src.as_f32x4(), |
6528 | k, |
6529 | IMM8, |
6530 | SAE, |
6531 | )) |
6532 | } |
6533 | } |
6534 | |
6535 | /// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b |
6536 | /// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask |
6537 | /// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a. |
6538 | /// to the upper element of dst. |
6539 | /// Rounding is done according to the imm8 parameter, which can be one of: |
6540 | /// |
6541 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
6542 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
6543 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
6544 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
6545 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6546 | /// |
6547 | /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
6548 | /// |
6549 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_round_ss&ig_expand=5452) |
6550 | #[inline ] |
6551 | #[target_feature (enable = "avx512dq" )] |
6552 | #[cfg_attr (test, assert_instr(vreducess, IMM8 = 0, SAE = 8))] |
6553 | #[rustc_legacy_const_generics (3, 4)] |
6554 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
6555 | pub fn _mm_maskz_reduce_round_ss<const IMM8: i32, const SAE: i32>( |
6556 | k: __mmask8, |
6557 | a: __m128, |
6558 | b: __m128, |
6559 | ) -> __m128 { |
6560 | static_assert_uimm_bits!(IMM8, 8); |
6561 | static_assert_sae!(SAE); |
6562 | _mm_mask_reduce_round_ss::<IMM8, SAE>(src:_mm_setzero_ps(), k, a, b) |
6563 | } |
6564 | |
6565 | /// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b |
6566 | /// by the number of bits specified by imm8, store the result in the lower element of dst, and copy |
6567 | /// the upper element from a. |
6568 | /// to the upper element of dst. |
6569 | /// Rounding is done according to the imm8 parameter, which can be one of: |
6570 | /// |
6571 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
6572 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
6573 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
6574 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
6575 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6576 | /// |
6577 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_ss&ig_expand=5462) |
6578 | #[inline ] |
6579 | #[target_feature (enable = "avx512dq" )] |
6580 | #[cfg_attr (test, assert_instr(vreducess, IMM8 = 0))] |
6581 | #[rustc_legacy_const_generics (2)] |
6582 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
6583 | pub fn _mm_reduce_ss<const IMM8: i32>(a: __m128, b: __m128) -> __m128 { |
6584 | static_assert_uimm_bits!(IMM8, 8); |
6585 | _mm_mask_reduce_ss::<IMM8>(src:_mm_undefined_ps(), k:0xff, a, b) |
6586 | } |
6587 | |
6588 | /// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b |
6589 | /// by the number of bits specified by imm8, store the result in the lower element of dst using writemask |
6590 | /// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a. |
6591 | /// to the upper element of dst. |
6592 | /// Rounding is done according to the imm8 parameter, which can be one of: |
6593 | /// |
6594 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
6595 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
6596 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
6597 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
6598 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6599 | /// |
6600 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_ss&ig_expand=5460) |
6601 | #[inline ] |
6602 | #[target_feature (enable = "avx512dq" )] |
6603 | #[cfg_attr (test, assert_instr(vreducess, IMM8 = 0))] |
6604 | #[rustc_legacy_const_generics (4)] |
6605 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
6606 | pub fn _mm_mask_reduce_ss<const IMM8: i32>( |
6607 | src: __m128, |
6608 | k: __mmask8, |
6609 | a: __m128, |
6610 | b: __m128, |
6611 | ) -> __m128 { |
6612 | unsafe { |
6613 | static_assert_uimm_bits!(IMM8, 8); |
6614 | transmute(src:vreducess( |
6615 | a.as_f32x4(), |
6616 | b.as_f32x4(), |
6617 | src.as_f32x4(), |
6618 | k, |
6619 | IMM8, |
6620 | _MM_FROUND_CUR_DIRECTION, |
6621 | )) |
6622 | } |
6623 | } |
6624 | |
6625 | /// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b |
6626 | /// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask |
6627 | /// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a. |
6628 | /// to the upper element of dst. |
6629 | /// Rounding is done according to the imm8 parameter, which can be one of: |
6630 | /// |
6631 | /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
6632 | /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
6633 | /// * [`_MM_FROUND_TO_POS_INF`] : round up |
6634 | /// * [`_MM_FROUND_TO_ZERO`] : truncate |
6635 | /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
6636 | /// |
6637 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_ss&ig_expand=5461) |
6638 | #[inline ] |
6639 | #[target_feature (enable = "avx512dq" )] |
6640 | #[cfg_attr (test, assert_instr(vreducess, IMM8 = 0))] |
6641 | #[rustc_legacy_const_generics (3)] |
6642 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
6643 | pub fn _mm_maskz_reduce_ss<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 { |
6644 | static_assert_uimm_bits!(IMM8, 8); |
6645 | _mm_mask_reduce_ss::<IMM8>(src:_mm_setzero_ps(), k, a, b) |
6646 | } |
6647 | |
6648 | // FP-Class |
6649 | |
6650 | /// Test packed double-precision (64-bit) floating-point elements in a for special categories specified |
6651 | /// by imm8, and store the results in mask vector k. |
6652 | /// imm can be a combination of: |
6653 | /// |
6654 | /// - 0x01 // QNaN |
6655 | /// - 0x02 // Positive Zero |
6656 | /// - 0x04 // Negative Zero |
6657 | /// - 0x08 // Positive Infinity |
6658 | /// - 0x10 // Negative Infinity |
6659 | /// - 0x20 // Denormal |
6660 | /// - 0x40 // Negative |
6661 | /// - 0x80 // SNaN |
6662 | /// |
6663 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_pd_mask&ig_expand=3493) |
6664 | #[inline ] |
6665 | #[target_feature (enable = "avx512dq,avx512vl" )] |
6666 | #[cfg_attr (test, assert_instr(vfpclasspd, IMM8 = 0))] |
6667 | #[rustc_legacy_const_generics (1)] |
6668 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
6669 | pub fn _mm_fpclass_pd_mask<const IMM8: i32>(a: __m128d) -> __mmask8 { |
6670 | static_assert_uimm_bits!(IMM8, 8); |
6671 | _mm_mask_fpclass_pd_mask::<IMM8>(k1:0xff, a) |
6672 | } |
6673 | |
6674 | /// Test packed double-precision (64-bit) floating-point elements in a for special categories specified |
6675 | /// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the |
6676 | /// corresponding mask bit is not set). |
6677 | /// imm can be a combination of: |
6678 | /// |
6679 | /// - 0x01 // QNaN |
6680 | /// - 0x02 // Positive Zero |
6681 | /// - 0x04 // Negative Zero |
6682 | /// - 0x08 // Positive Infinity |
6683 | /// - 0x10 // Negative Infinity |
6684 | /// - 0x20 // Denormal |
6685 | /// - 0x40 // Negative |
6686 | /// - 0x80 // SNaN |
6687 | /// |
6688 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_pd_mask&ig_expand=3494) |
6689 | #[inline ] |
6690 | #[target_feature (enable = "avx512dq,avx512vl" )] |
6691 | #[cfg_attr (test, assert_instr(vfpclasspd, IMM8 = 0))] |
6692 | #[rustc_legacy_const_generics (2)] |
6693 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
6694 | pub fn _mm_mask_fpclass_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d) -> __mmask8 { |
6695 | unsafe { |
6696 | static_assert_uimm_bits!(IMM8, 8); |
6697 | transmute(src:vfpclasspd_128(a.as_f64x2(), IMM8, k:k1)) |
6698 | } |
6699 | } |
6700 | |
6701 | /// Test packed double-precision (64-bit) floating-point elements in a for special categories specified |
6702 | /// by imm8, and store the results in mask vector k. |
6703 | /// imm can be a combination of: |
6704 | /// |
6705 | /// - 0x01 // QNaN |
6706 | /// - 0x02 // Positive Zero |
6707 | /// - 0x04 // Negative Zero |
6708 | /// - 0x08 // Positive Infinity |
6709 | /// - 0x10 // Negative Infinity |
6710 | /// - 0x20 // Denormal |
6711 | /// - 0x40 // Negative |
6712 | /// - 0x80 // SNaN |
6713 | /// |
6714 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fpclass_pd_mask&ig_expand=3495) |
6715 | #[inline ] |
6716 | #[target_feature (enable = "avx512dq,avx512vl" )] |
6717 | #[cfg_attr (test, assert_instr(vfpclasspd, IMM8 = 0))] |
6718 | #[rustc_legacy_const_generics (1)] |
6719 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
6720 | pub fn _mm256_fpclass_pd_mask<const IMM8: i32>(a: __m256d) -> __mmask8 { |
6721 | static_assert_uimm_bits!(IMM8, 8); |
6722 | _mm256_mask_fpclass_pd_mask::<IMM8>(k1:0xff, a) |
6723 | } |
6724 | |
6725 | /// Test packed double-precision (64-bit) floating-point elements in a for special categories specified |
6726 | /// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the |
6727 | /// corresponding mask bit is not set). |
6728 | /// imm can be a combination of: |
6729 | /// |
6730 | /// - 0x01 // QNaN |
6731 | /// - 0x02 // Positive Zero |
6732 | /// - 0x04 // Negative Zero |
6733 | /// - 0x08 // Positive Infinity |
6734 | /// - 0x10 // Negative Infinity |
6735 | /// - 0x20 // Denormal |
6736 | /// - 0x40 // Negative |
6737 | /// - 0x80 // SNaN |
6738 | /// |
6739 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fpclass_pd_mask&ig_expand=3496) |
6740 | #[inline ] |
6741 | #[target_feature (enable = "avx512dq,avx512vl" )] |
6742 | #[cfg_attr (test, assert_instr(vfpclasspd, IMM8 = 0))] |
6743 | #[rustc_legacy_const_generics (2)] |
6744 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
6745 | pub fn _mm256_mask_fpclass_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m256d) -> __mmask8 { |
6746 | unsafe { |
6747 | static_assert_uimm_bits!(IMM8, 8); |
6748 | transmute(src:vfpclasspd_256(a.as_f64x4(), IMM8, k:k1)) |
6749 | } |
6750 | } |
6751 | |
6752 | /// Test packed double-precision (64-bit) floating-point elements in a for special categories specified |
6753 | /// by imm8, and store the results in mask vector k. |
6754 | /// imm can be a combination of: |
6755 | /// |
6756 | /// - 0x01 // QNaN |
6757 | /// - 0x02 // Positive Zero |
6758 | /// - 0x04 // Negative Zero |
6759 | /// - 0x08 // Positive Infinity |
6760 | /// - 0x10 // Negative Infinity |
6761 | /// - 0x20 // Denormal |
6762 | /// - 0x40 // Negative |
6763 | /// - 0x80 // SNaN |
6764 | /// |
6765 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fpclass_pd_mask&ig_expand=3497) |
6766 | #[inline ] |
6767 | #[target_feature (enable = "avx512dq" )] |
6768 | #[cfg_attr (test, assert_instr(vfpclasspd, IMM8 = 0))] |
6769 | #[rustc_legacy_const_generics (1)] |
6770 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
6771 | pub fn _mm512_fpclass_pd_mask<const IMM8: i32>(a: __m512d) -> __mmask8 { |
6772 | static_assert_uimm_bits!(IMM8, 8); |
6773 | _mm512_mask_fpclass_pd_mask::<IMM8>(k1:0xff, a) |
6774 | } |
6775 | |
6776 | /// Test packed double-precision (64-bit) floating-point elements in a for special categories specified |
6777 | /// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the |
6778 | /// corresponding mask bit is not set). |
6779 | /// imm can be a combination of: |
6780 | /// |
6781 | /// - 0x01 // QNaN |
6782 | /// - 0x02 // Positive Zero |
6783 | /// - 0x04 // Negative Zero |
6784 | /// - 0x08 // Positive Infinity |
6785 | /// - 0x10 // Negative Infinity |
6786 | /// - 0x20 // Denormal |
6787 | /// - 0x40 // Negative |
6788 | /// - 0x80 // SNaN |
6789 | /// |
6790 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fpclass_pd_mask&ig_expand=3498) |
6791 | #[inline ] |
6792 | #[target_feature (enable = "avx512dq" )] |
6793 | #[cfg_attr (test, assert_instr(vfpclasspd, IMM8 = 0))] |
6794 | #[rustc_legacy_const_generics (2)] |
6795 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
6796 | pub fn _mm512_mask_fpclass_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m512d) -> __mmask8 { |
6797 | unsafe { |
6798 | static_assert_uimm_bits!(IMM8, 8); |
6799 | transmute(src:vfpclasspd_512(a.as_f64x8(), IMM8, k:k1)) |
6800 | } |
6801 | } |
6802 | |
6803 | /// Test packed single-precision (32-bit) floating-point elements in a for special categories specified |
6804 | /// by imm8, and store the results in mask vector k. |
6805 | /// imm can be a combination of: |
6806 | /// |
6807 | /// - 0x01 // QNaN |
6808 | /// - 0x02 // Positive Zero |
6809 | /// - 0x04 // Negative Zero |
6810 | /// - 0x08 // Positive Infinity |
6811 | /// - 0x10 // Negative Infinity |
6812 | /// - 0x20 // Denormal |
6813 | /// - 0x40 // Negative |
6814 | /// - 0x80 // SNaN |
6815 | /// |
6816 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_ps_mask&ig_expand=3505) |
6817 | #[inline ] |
6818 | #[target_feature (enable = "avx512dq,avx512vl" )] |
6819 | #[cfg_attr (test, assert_instr(vfpclassps, IMM8 = 0))] |
6820 | #[rustc_legacy_const_generics (1)] |
6821 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
6822 | pub fn _mm_fpclass_ps_mask<const IMM8: i32>(a: __m128) -> __mmask8 { |
6823 | static_assert_uimm_bits!(IMM8, 8); |
6824 | _mm_mask_fpclass_ps_mask::<IMM8>(k1:0xff, a) |
6825 | } |
6826 | |
6827 | /// Test packed single-precision (32-bit) floating-point elements in a for special categories specified |
6828 | /// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the |
6829 | /// corresponding mask bit is not set). |
6830 | /// imm can be a combination of: |
6831 | /// |
6832 | /// - 0x01 // QNaN |
6833 | /// - 0x02 // Positive Zero |
6834 | /// - 0x04 // Negative Zero |
6835 | /// - 0x08 // Positive Infinity |
6836 | /// - 0x10 // Negative Infinity |
6837 | /// - 0x20 // Denormal |
6838 | /// - 0x40 // Negative |
6839 | /// - 0x80 // SNaN |
6840 | /// |
6841 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_ps_mask&ig_expand=3506) |
6842 | #[inline ] |
6843 | #[target_feature (enable = "avx512dq,avx512vl" )] |
6844 | #[cfg_attr (test, assert_instr(vfpclassps, IMM8 = 0))] |
6845 | #[rustc_legacy_const_generics (2)] |
6846 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
6847 | pub fn _mm_mask_fpclass_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m128) -> __mmask8 { |
6848 | unsafe { |
6849 | static_assert_uimm_bits!(IMM8, 8); |
6850 | transmute(src:vfpclassps_128(a.as_f32x4(), IMM8, k:k1)) |
6851 | } |
6852 | } |
6853 | |
6854 | /// Test packed single-precision (32-bit) floating-point elements in a for special categories specified |
6855 | /// by imm8, and store the results in mask vector k. |
6856 | /// imm can be a combination of: |
6857 | /// |
6858 | /// - 0x01 // QNaN |
6859 | /// - 0x02 // Positive Zero |
6860 | /// - 0x04 // Negative Zero |
6861 | /// - 0x08 // Positive Infinity |
6862 | /// - 0x10 // Negative Infinity |
6863 | /// - 0x20 // Denormal |
6864 | /// - 0x40 // Negative |
6865 | /// - 0x80 // SNaN |
6866 | /// |
6867 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fpclass_ps_mask&ig_expand=3507) |
6868 | #[inline ] |
6869 | #[target_feature (enable = "avx512dq,avx512vl" )] |
6870 | #[cfg_attr (test, assert_instr(vfpclassps, IMM8 = 0))] |
6871 | #[rustc_legacy_const_generics (1)] |
6872 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
6873 | pub fn _mm256_fpclass_ps_mask<const IMM8: i32>(a: __m256) -> __mmask8 { |
6874 | static_assert_uimm_bits!(IMM8, 8); |
6875 | _mm256_mask_fpclass_ps_mask::<IMM8>(k1:0xff, a) |
6876 | } |
6877 | |
6878 | /// Test packed single-precision (32-bit) floating-point elements in a for special categories specified |
6879 | /// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the |
6880 | /// corresponding mask bit is not set). |
6881 | /// imm can be a combination of: |
6882 | /// |
6883 | /// - 0x01 // QNaN |
6884 | /// - 0x02 // Positive Zero |
6885 | /// - 0x04 // Negative Zero |
6886 | /// - 0x08 // Positive Infinity |
6887 | /// - 0x10 // Negative Infinity |
6888 | /// - 0x20 // Denormal |
6889 | /// - 0x40 // Negative |
6890 | /// - 0x80 // SNaN |
6891 | /// |
6892 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fpclass_ps_mask&ig_expand=3508) |
6893 | #[inline ] |
6894 | #[target_feature (enable = "avx512dq,avx512vl" )] |
6895 | #[cfg_attr (test, assert_instr(vfpclassps, IMM8 = 0))] |
6896 | #[rustc_legacy_const_generics (2)] |
6897 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
6898 | pub fn _mm256_mask_fpclass_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m256) -> __mmask8 { |
6899 | unsafe { |
6900 | static_assert_uimm_bits!(IMM8, 8); |
6901 | transmute(src:vfpclassps_256(a.as_f32x8(), IMM8, k:k1)) |
6902 | } |
6903 | } |
6904 | |
6905 | /// Test packed single-precision (32-bit) floating-point elements in a for special categories specified |
6906 | /// by imm8, and store the results in mask vector k. |
6907 | /// imm can be a combination of: |
6908 | /// |
6909 | /// - 0x01 // QNaN |
6910 | /// - 0x02 // Positive Zero |
6911 | /// - 0x04 // Negative Zero |
6912 | /// - 0x08 // Positive Infinity |
6913 | /// - 0x10 // Negative Infinity |
6914 | /// - 0x20 // Denormal |
6915 | /// - 0x40 // Negative |
6916 | /// - 0x80 // SNaN |
6917 | /// |
6918 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fpclass_ps_mask&ig_expand=3509) |
6919 | #[inline ] |
6920 | #[target_feature (enable = "avx512dq" )] |
6921 | #[cfg_attr (test, assert_instr(vfpclassps, IMM8 = 0))] |
6922 | #[rustc_legacy_const_generics (1)] |
6923 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
6924 | pub fn _mm512_fpclass_ps_mask<const IMM8: i32>(a: __m512) -> __mmask16 { |
6925 | static_assert_uimm_bits!(IMM8, 8); |
6926 | _mm512_mask_fpclass_ps_mask::<IMM8>(k1:0xffff, a) |
6927 | } |
6928 | |
6929 | /// Test packed single-precision (32-bit) floating-point elements in a for special categories specified |
6930 | /// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the |
6931 | /// corresponding mask bit is not set). |
6932 | /// imm can be a combination of: |
6933 | /// |
6934 | /// - 0x01 // QNaN |
6935 | /// - 0x02 // Positive Zero |
6936 | /// - 0x04 // Negative Zero |
6937 | /// - 0x08 // Positive Infinity |
6938 | /// - 0x10 // Negative Infinity |
6939 | /// - 0x20 // Denormal |
6940 | /// - 0x40 // Negative |
6941 | /// - 0x80 // SNaN |
6942 | /// |
6943 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fpclass_ps_mask&ig_expand=3510) |
6944 | #[inline ] |
6945 | #[target_feature (enable = "avx512dq" )] |
6946 | #[cfg_attr (test, assert_instr(vfpclassps, IMM8 = 0))] |
6947 | #[rustc_legacy_const_generics (2)] |
6948 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
6949 | pub fn _mm512_mask_fpclass_ps_mask<const IMM8: i32>(k1: __mmask16, a: __m512) -> __mmask16 { |
6950 | unsafe { |
6951 | static_assert_uimm_bits!(IMM8, 8); |
6952 | transmute(src:vfpclassps_512(a.as_f32x16(), IMM8, k:k1)) |
6953 | } |
6954 | } |
6955 | |
6956 | /// Test the lower double-precision (64-bit) floating-point element in a for special categories specified |
6957 | /// by imm8, and store the results in mask vector k. |
6958 | /// imm can be a combination of: |
6959 | /// |
6960 | /// - 0x01 // QNaN |
6961 | /// - 0x02 // Positive Zero |
6962 | /// - 0x04 // Negative Zero |
6963 | /// - 0x08 // Positive Infinity |
6964 | /// - 0x10 // Negative Infinity |
6965 | /// - 0x20 // Denormal |
6966 | /// - 0x40 // Negative |
6967 | /// - 0x80 // SNaN |
6968 | /// |
6969 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_sd_mask&ig_expand=3511) |
6970 | #[inline ] |
6971 | #[target_feature (enable = "avx512dq" )] |
6972 | #[cfg_attr (test, assert_instr(vfpclasssd, IMM8 = 0))] |
6973 | #[rustc_legacy_const_generics (1)] |
6974 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
6975 | pub fn _mm_fpclass_sd_mask<const IMM8: i32>(a: __m128d) -> __mmask8 { |
6976 | static_assert_uimm_bits!(IMM8, 8); |
6977 | _mm_mask_fpclass_sd_mask::<IMM8>(k1:0xff, a) |
6978 | } |
6979 | |
6980 | /// Test the lower double-precision (64-bit) floating-point element in a for special categories specified |
6981 | /// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the |
6982 | /// corresponding mask bit is not set). |
6983 | /// imm can be a combination of: |
6984 | /// |
6985 | /// - 0x01 // QNaN |
6986 | /// - 0x02 // Positive Zero |
6987 | /// - 0x04 // Negative Zero |
6988 | /// - 0x08 // Positive Infinity |
6989 | /// - 0x10 // Negative Infinity |
6990 | /// - 0x20 // Denormal |
6991 | /// - 0x40 // Negative |
6992 | /// - 0x80 // SNaN |
6993 | /// |
6994 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_sd_mask&ig_expand=3512) |
6995 | #[inline ] |
6996 | #[target_feature (enable = "avx512dq" )] |
6997 | #[cfg_attr (test, assert_instr(vfpclasssd, IMM8 = 0))] |
6998 | #[rustc_legacy_const_generics (2)] |
6999 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
7000 | pub fn _mm_mask_fpclass_sd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d) -> __mmask8 { |
7001 | unsafe { |
7002 | static_assert_uimm_bits!(IMM8, 8); |
7003 | vfpclasssd(a.as_f64x2(), IMM8, k:k1) |
7004 | } |
7005 | } |
7006 | |
7007 | /// Test the lower single-precision (32-bit) floating-point element in a for special categories specified |
7008 | /// by imm8, and store the results in mask vector k. |
7009 | /// imm can be a combination of: |
7010 | /// |
7011 | /// - 0x01 // QNaN |
7012 | /// - 0x02 // Positive Zero |
7013 | /// - 0x04 // Negative Zero |
7014 | /// - 0x08 // Positive Infinity |
7015 | /// - 0x10 // Negative Infinity |
7016 | /// - 0x20 // Denormal |
7017 | /// - 0x40 // Negative |
7018 | /// - 0x80 // SNaN |
7019 | /// |
7020 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_ss_mask&ig_expand=3515) |
7021 | #[inline ] |
7022 | #[target_feature (enable = "avx512dq" )] |
7023 | #[cfg_attr (test, assert_instr(vfpclassss, IMM8 = 0))] |
7024 | #[rustc_legacy_const_generics (1)] |
7025 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
7026 | pub fn _mm_fpclass_ss_mask<const IMM8: i32>(a: __m128) -> __mmask8 { |
7027 | static_assert_uimm_bits!(IMM8, 8); |
7028 | _mm_mask_fpclass_ss_mask::<IMM8>(k1:0xff, a) |
7029 | } |
7030 | |
7031 | /// Test the lower single-precision (32-bit) floating-point element in a for special categories specified |
7032 | /// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the |
7033 | /// corresponding mask bit is not set). |
7034 | /// imm can be a combination of: |
7035 | /// |
7036 | /// - 0x01 // QNaN |
7037 | /// - 0x02 // Positive Zero |
7038 | /// - 0x04 // Negative Zero |
7039 | /// - 0x08 // Positive Infinity |
7040 | /// - 0x10 // Negative Infinity |
7041 | /// - 0x20 // Denormal |
7042 | /// - 0x40 // Negative |
7043 | /// - 0x80 // SNaN |
7044 | /// |
7045 | /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_ss_mask&ig_expand=3516) |
7046 | #[inline ] |
7047 | #[target_feature (enable = "avx512dq" )] |
7048 | #[cfg_attr (test, assert_instr(vfpclassss, IMM8 = 0))] |
7049 | #[rustc_legacy_const_generics (2)] |
7050 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
7051 | pub fn _mm_mask_fpclass_ss_mask<const IMM8: i32>(k1: __mmask8, a: __m128) -> __mmask8 { |
7052 | unsafe { |
7053 | static_assert_uimm_bits!(IMM8, 8); |
7054 | vfpclassss(a.as_f32x4(), IMM8, k:k1) |
7055 | } |
7056 | } |
7057 | |
7058 | #[allow (improper_ctypes)] |
7059 | unsafe extern "C" { |
7060 | #[link_name = "llvm.x86.avx512.sitofp.round.v2f64.v2i64" ] |
7061 | unsafefn vcvtqq2pd_128(a: i64x2, rounding: i32) -> f64x2; |
7062 | #[link_name = "llvm.x86.avx512.sitofp.round.v4f64.v4i64" ] |
7063 | unsafefn vcvtqq2pd_256(a: i64x4, rounding: i32) -> f64x4; |
7064 | #[link_name = "llvm.x86.avx512.sitofp.round.v8f64.v8i64" ] |
7065 | unsafefn vcvtqq2pd_512(a: i64x8, rounding: i32) -> f64x8; |
7066 | |
7067 | #[link_name = "llvm.x86.avx512.mask.cvtqq2ps.128" ] |
7068 | unsafefn vcvtqq2ps_128(a: i64x2, src: f32x4, k: __mmask8) -> f32x4; |
7069 | #[link_name = "llvm.x86.avx512.sitofp.round.v4f32.v4i64" ] |
7070 | unsafefn vcvtqq2ps_256(a: i64x4, rounding: i32) -> f32x4; |
7071 | #[link_name = "llvm.x86.avx512.sitofp.round.v8f32.v8i64" ] |
7072 | unsafefn vcvtqq2ps_512(a: i64x8, rounding: i32) -> f32x8; |
7073 | |
7074 | #[link_name = "llvm.x86.avx512.uitofp.round.v2f64.v2u64" ] |
7075 | unsafefn vcvtuqq2pd_128(a: u64x2, rounding: i32) -> f64x2; |
7076 | #[link_name = "llvm.x86.avx512.uitofp.round.v4f64.v4u64" ] |
7077 | unsafefn vcvtuqq2pd_256(a: u64x4, rounding: i32) -> f64x4; |
7078 | #[link_name = "llvm.x86.avx512.uitofp.round.v8f64.v8u64" ] |
7079 | unsafefn vcvtuqq2pd_512(a: u64x8, rounding: i32) -> f64x8; |
7080 | |
7081 | #[link_name = "llvm.x86.avx512.mask.cvtuqq2ps.128" ] |
7082 | unsafefn vcvtuqq2ps_128(a: u64x2, src: f32x4, k: __mmask8) -> f32x4; |
7083 | #[link_name = "llvm.x86.avx512.uitofp.round.v4f32.v4u64" ] |
7084 | unsafefn vcvtuqq2ps_256(a: u64x4, rounding: i32) -> f32x4; |
7085 | #[link_name = "llvm.x86.avx512.uitofp.round.v8f32.v8u64" ] |
7086 | unsafefn vcvtuqq2ps_512(a: u64x8, rounding: i32) -> f32x8; |
7087 | |
7088 | #[link_name = "llvm.x86.avx512.mask.cvtpd2qq.128" ] |
7089 | unsafefn vcvtpd2qq_128(a: f64x2, src: i64x2, k: __mmask8) -> i64x2; |
7090 | #[link_name = "llvm.x86.avx512.mask.cvtpd2qq.256" ] |
7091 | unsafefn vcvtpd2qq_256(a: f64x4, src: i64x4, k: __mmask8) -> i64x4; |
7092 | #[link_name = "llvm.x86.avx512.mask.cvtpd2qq.512" ] |
7093 | unsafefn vcvtpd2qq_512(a: f64x8, src: i64x8, k: __mmask8, rounding: i32) -> i64x8; |
7094 | |
7095 | #[link_name = "llvm.x86.avx512.mask.cvtps2qq.128" ] |
7096 | unsafefn vcvtps2qq_128(a: f32x4, src: i64x2, k: __mmask8) -> i64x2; |
7097 | #[link_name = "llvm.x86.avx512.mask.cvtps2qq.256" ] |
7098 | unsafefn vcvtps2qq_256(a: f32x4, src: i64x4, k: __mmask8) -> i64x4; |
7099 | #[link_name = "llvm.x86.avx512.mask.cvtps2qq.512" ] |
7100 | unsafefn vcvtps2qq_512(a: f32x8, src: i64x8, k: __mmask8, rounding: i32) -> i64x8; |
7101 | |
7102 | #[link_name = "llvm.x86.avx512.mask.cvtpd2uqq.128" ] |
7103 | unsafefn vcvtpd2uqq_128(a: f64x2, src: u64x2, k: __mmask8) -> u64x2; |
7104 | #[link_name = "llvm.x86.avx512.mask.cvtpd2uqq.256" ] |
7105 | unsafefn vcvtpd2uqq_256(a: f64x4, src: u64x4, k: __mmask8) -> u64x4; |
7106 | #[link_name = "llvm.x86.avx512.mask.cvtpd2uqq.512" ] |
7107 | unsafefn vcvtpd2uqq_512(a: f64x8, src: u64x8, k: __mmask8, rounding: i32) -> u64x8; |
7108 | |
7109 | #[link_name = "llvm.x86.avx512.mask.cvtps2uqq.128" ] |
7110 | unsafefn vcvtps2uqq_128(a: f32x4, src: u64x2, k: __mmask8) -> u64x2; |
7111 | #[link_name = "llvm.x86.avx512.mask.cvtps2uqq.256" ] |
7112 | unsafefn vcvtps2uqq_256(a: f32x4, src: u64x4, k: __mmask8) -> u64x4; |
7113 | #[link_name = "llvm.x86.avx512.mask.cvtps2uqq.512" ] |
7114 | unsafefn vcvtps2uqq_512(a: f32x8, src: u64x8, k: __mmask8, rounding: i32) -> u64x8; |
7115 | |
7116 | #[link_name = "llvm.x86.avx512.mask.cvttpd2qq.128" ] |
7117 | unsafefn vcvttpd2qq_128(a: f64x2, src: i64x2, k: __mmask8) -> i64x2; |
7118 | #[link_name = "llvm.x86.avx512.mask.cvttpd2qq.256" ] |
7119 | unsafefn vcvttpd2qq_256(a: f64x4, src: i64x4, k: __mmask8) -> i64x4; |
7120 | #[link_name = "llvm.x86.avx512.mask.cvttpd2qq.512" ] |
7121 | unsafefn vcvttpd2qq_512(a: f64x8, src: i64x8, k: __mmask8, sae: i32) -> i64x8; |
7122 | |
7123 | #[link_name = "llvm.x86.avx512.mask.cvttps2qq.128" ] |
7124 | unsafefn vcvttps2qq_128(a: f32x4, src: i64x2, k: __mmask8) -> i64x2; |
7125 | #[link_name = "llvm.x86.avx512.mask.cvttps2qq.256" ] |
7126 | unsafefn vcvttps2qq_256(a: f32x4, src: i64x4, k: __mmask8) -> i64x4; |
7127 | #[link_name = "llvm.x86.avx512.mask.cvttps2qq.512" ] |
7128 | unsafefn vcvttps2qq_512(a: f32x8, src: i64x8, k: __mmask8, sae: i32) -> i64x8; |
7129 | |
7130 | #[link_name = "llvm.x86.avx512.mask.cvttpd2uqq.128" ] |
7131 | unsafefn vcvttpd2uqq_128(a: f64x2, src: u64x2, k: __mmask8) -> u64x2; |
7132 | #[link_name = "llvm.x86.avx512.mask.cvttpd2uqq.256" ] |
7133 | unsafefn vcvttpd2uqq_256(a: f64x4, src: u64x4, k: __mmask8) -> u64x4; |
7134 | #[link_name = "llvm.x86.avx512.mask.cvttpd2uqq.512" ] |
7135 | unsafefn vcvttpd2uqq_512(a: f64x8, src: u64x8, k: __mmask8, sae: i32) -> u64x8; |
7136 | |
7137 | #[link_name = "llvm.x86.avx512.mask.cvttps2uqq.128" ] |
7138 | unsafefn vcvttps2uqq_128(a: f32x4, src: u64x2, k: __mmask8) -> u64x2; |
7139 | #[link_name = "llvm.x86.avx512.mask.cvttps2uqq.256" ] |
7140 | unsafefn vcvttps2uqq_256(a: f32x4, src: u64x4, k: __mmask8) -> u64x4; |
7141 | #[link_name = "llvm.x86.avx512.mask.cvttps2uqq.512" ] |
7142 | unsafefn vcvttps2uqq_512(a: f32x8, src: u64x8, k: __mmask8, sae: i32) -> u64x8; |
7143 | |
7144 | #[link_name = "llvm.x86.avx512.mask.range.pd.128" ] |
7145 | unsafefn vrangepd_128(a: f64x2, b: f64x2, imm8: i32, src: f64x2, k: __mmask8) -> f64x2; |
7146 | #[link_name = "llvm.x86.avx512.mask.range.pd.256" ] |
7147 | unsafefn vrangepd_256(a: f64x4, b: f64x4, imm8: i32, src: f64x4, k: __mmask8) -> f64x4; |
7148 | #[link_name = "llvm.x86.avx512.mask.range.pd.512" ] |
7149 | unsafefn vrangepd_512(a: f64x8, b: f64x8, imm8: i32, src: f64x8, k: __mmask8, sae: i32) -> f64x8; |
7150 | |
7151 | #[link_name = "llvm.x86.avx512.mask.range.ps.128" ] |
7152 | unsafefn vrangeps_128(a: f32x4, b: f32x4, imm8: i32, src: f32x4, k: __mmask8) -> f32x4; |
7153 | #[link_name = "llvm.x86.avx512.mask.range.ps.256" ] |
7154 | unsafefn vrangeps_256(a: f32x8, b: f32x8, imm8: i32, src: f32x8, k: __mmask8) -> f32x8; |
7155 | #[link_name = "llvm.x86.avx512.mask.range.ps.512" ] |
7156 | unsafefn vrangeps_512(a: f32x16, b: f32x16, imm8: i32, src: f32x16, k: __mmask16, sae: i32) |
7157 | -> f32x16; |
7158 | |
7159 | #[link_name = "llvm.x86.avx512.mask.range.sd" ] |
7160 | unsafefn vrangesd(a: f64x2, b: f64x2, src: f64x2, k: __mmask8, imm8: i32, sae: i32) -> f64x2; |
7161 | #[link_name = "llvm.x86.avx512.mask.range.ss" ] |
7162 | unsafefn vrangess(a: f32x4, b: f32x4, src: f32x4, k: __mmask8, imm8: i32, sae: i32) -> f32x4; |
7163 | |
7164 | #[link_name = "llvm.x86.avx512.mask.reduce.pd.128" ] |
7165 | unsafefn vreducepd_128(a: f64x2, imm8: i32, src: f64x2, k: __mmask8) -> f64x2; |
7166 | #[link_name = "llvm.x86.avx512.mask.reduce.pd.256" ] |
7167 | unsafefn vreducepd_256(a: f64x4, imm8: i32, src: f64x4, k: __mmask8) -> f64x4; |
7168 | #[link_name = "llvm.x86.avx512.mask.reduce.pd.512" ] |
7169 | unsafefn vreducepd_512(a: f64x8, imm8: i32, src: f64x8, k: __mmask8, sae: i32) -> f64x8; |
7170 | |
7171 | #[link_name = "llvm.x86.avx512.mask.reduce.ps.128" ] |
7172 | unsafefn vreduceps_128(a: f32x4, imm8: i32, src: f32x4, k: __mmask8) -> f32x4; |
7173 | #[link_name = "llvm.x86.avx512.mask.reduce.ps.256" ] |
7174 | unsafefn vreduceps_256(a: f32x8, imm8: i32, src: f32x8, k: __mmask8) -> f32x8; |
7175 | #[link_name = "llvm.x86.avx512.mask.reduce.ps.512" ] |
7176 | unsafefn vreduceps_512(a: f32x16, imm8: i32, src: f32x16, k: __mmask16, sae: i32) -> f32x16; |
7177 | |
7178 | #[link_name = "llvm.x86.avx512.mask.reduce.sd" ] |
7179 | unsafefn vreducesd(a: f64x2, b: f64x2, src: f64x2, k: __mmask8, imm8: i32, sae: i32) -> f64x2; |
7180 | #[link_name = "llvm.x86.avx512.mask.reduce.ss" ] |
7181 | unsafefn vreducess(a: f32x4, b: f32x4, src: f32x4, k: __mmask8, imm8: i32, sae: i32) -> f32x4; |
7182 | |
7183 | #[link_name = "llvm.x86.avx512.mask.fpclass.pd.128" ] |
7184 | unsafefn vfpclasspd_128(a: f64x2, imm8: i32, k: __mmask8) -> __mmask8; |
7185 | #[link_name = "llvm.x86.avx512.mask.fpclass.pd.256" ] |
7186 | unsafefn vfpclasspd_256(a: f64x4, imm8: i32, k: __mmask8) -> __mmask8; |
7187 | #[link_name = "llvm.x86.avx512.mask.fpclass.pd.512" ] |
7188 | unsafefn vfpclasspd_512(a: f64x8, imm8: i32, k: __mmask8) -> __mmask8; |
7189 | |
7190 | #[link_name = "llvm.x86.avx512.mask.fpclass.ps.128" ] |
7191 | unsafefn vfpclassps_128(a: f32x4, imm8: i32, k: __mmask8) -> __mmask8; |
7192 | #[link_name = "llvm.x86.avx512.mask.fpclass.ps.256" ] |
7193 | unsafefn vfpclassps_256(a: f32x8, imm8: i32, k: __mmask8) -> __mmask8; |
7194 | #[link_name = "llvm.x86.avx512.mask.fpclass.ps.512" ] |
7195 | unsafefn vfpclassps_512(a: f32x16, imm8: i32, k: __mmask16) -> __mmask16; |
7196 | |
7197 | #[link_name = "llvm.x86.avx512.mask.fpclass.sd" ] |
7198 | unsafefn vfpclasssd(a: f64x2, imm8: i32, k: __mmask8) -> __mmask8; |
7199 | #[link_name = "llvm.x86.avx512.mask.fpclass.ss" ] |
7200 | unsafefn vfpclassss(a: f32x4, imm8: i32, k: __mmask8) -> __mmask8; |
7201 | } |
7202 | |
7203 | #[cfg (test)] |
7204 | mod tests { |
7205 | use super::*; |
7206 | |
7207 | use stdarch_test::simd_test; |
7208 | |
7209 | use crate::core_arch::x86::*; |
7210 | use crate::mem::transmute; |
7211 | |
7212 | const OPRND1_64: f64 = unsafe { transmute(0x3333333333333333_u64) }; |
7213 | const OPRND2_64: f64 = unsafe { transmute(0x5555555555555555_u64) }; |
7214 | |
7215 | const AND_64: f64 = unsafe { transmute(0x1111111111111111_u64) }; |
7216 | const ANDN_64: f64 = unsafe { transmute(0x4444444444444444_u64) }; |
7217 | const OR_64: f64 = unsafe { transmute(0x7777777777777777_u64) }; |
7218 | const XOR_64: f64 = unsafe { transmute(0x6666666666666666_u64) }; |
7219 | |
7220 | const OPRND1_32: f32 = unsafe { transmute(0x33333333_u32) }; |
7221 | const OPRND2_32: f32 = unsafe { transmute(0x55555555_u32) }; |
7222 | |
7223 | const AND_32: f32 = unsafe { transmute(0x11111111_u32) }; |
7224 | const ANDN_32: f32 = unsafe { transmute(0x44444444_u32) }; |
7225 | const OR_32: f32 = unsafe { transmute(0x77777777_u32) }; |
7226 | const XOR_32: f32 = unsafe { transmute(0x66666666_u32) }; |
7227 | |
7228 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7229 | unsafe fn test_mm_mask_and_pd() { |
7230 | let a = _mm_set1_pd(OPRND1_64); |
7231 | let b = _mm_set1_pd(OPRND2_64); |
7232 | let src = _mm_set_pd(1., 2.); |
7233 | let r = _mm_mask_and_pd(src, 0b01, a, b); |
7234 | let e = _mm_set_pd(1., AND_64); |
7235 | assert_eq_m128d(r, e); |
7236 | } |
7237 | |
7238 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7239 | unsafe fn test_mm_maskz_and_pd() { |
7240 | let a = _mm_set1_pd(OPRND1_64); |
7241 | let b = _mm_set1_pd(OPRND2_64); |
7242 | let r = _mm_maskz_and_pd(0b01, a, b); |
7243 | let e = _mm_set_pd(0.0, AND_64); |
7244 | assert_eq_m128d(r, e); |
7245 | } |
7246 | |
7247 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7248 | unsafe fn test_mm256_mask_and_pd() { |
7249 | let a = _mm256_set1_pd(OPRND1_64); |
7250 | let b = _mm256_set1_pd(OPRND2_64); |
7251 | let src = _mm256_set_pd(1., 2., 3., 4.); |
7252 | let r = _mm256_mask_and_pd(src, 0b0101, a, b); |
7253 | let e = _mm256_set_pd(1., AND_64, 3., AND_64); |
7254 | assert_eq_m256d(r, e); |
7255 | } |
7256 | |
7257 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7258 | unsafe fn test_mm256_maskz_and_pd() { |
7259 | let a = _mm256_set1_pd(OPRND1_64); |
7260 | let b = _mm256_set1_pd(OPRND2_64); |
7261 | let r = _mm256_maskz_and_pd(0b0101, a, b); |
7262 | let e = _mm256_set_pd(0.0, AND_64, 0.0, AND_64); |
7263 | assert_eq_m256d(r, e); |
7264 | } |
7265 | |
7266 | #[simd_test(enable = "avx512dq" )] |
7267 | unsafe fn test_mm512_and_pd() { |
7268 | let a = _mm512_set1_pd(OPRND1_64); |
7269 | let b = _mm512_set1_pd(OPRND2_64); |
7270 | let r = _mm512_and_pd(a, b); |
7271 | let e = _mm512_set1_pd(AND_64); |
7272 | assert_eq_m512d(r, e); |
7273 | } |
7274 | |
7275 | #[simd_test(enable = "avx512dq" )] |
7276 | unsafe fn test_mm512_mask_and_pd() { |
7277 | let a = _mm512_set1_pd(OPRND1_64); |
7278 | let b = _mm512_set1_pd(OPRND2_64); |
7279 | let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
7280 | let r = _mm512_mask_and_pd(src, 0b01010101, a, b); |
7281 | let e = _mm512_set_pd(1., AND_64, 3., AND_64, 5., AND_64, 7., AND_64); |
7282 | assert_eq_m512d(r, e); |
7283 | } |
7284 | |
7285 | #[simd_test(enable = "avx512dq" )] |
7286 | unsafe fn test_mm512_maskz_and_pd() { |
7287 | let a = _mm512_set1_pd(OPRND1_64); |
7288 | let b = _mm512_set1_pd(OPRND2_64); |
7289 | let r = _mm512_maskz_and_pd(0b01010101, a, b); |
7290 | let e = _mm512_set_pd(0.0, AND_64, 0.0, AND_64, 0.0, AND_64, 0.0, AND_64); |
7291 | assert_eq_m512d(r, e); |
7292 | } |
7293 | |
7294 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7295 | unsafe fn test_mm_mask_and_ps() { |
7296 | let a = _mm_set1_ps(OPRND1_32); |
7297 | let b = _mm_set1_ps(OPRND2_32); |
7298 | let src = _mm_set_ps(1., 2., 3., 4.); |
7299 | let r = _mm_mask_and_ps(src, 0b0101, a, b); |
7300 | let e = _mm_set_ps(1., AND_32, 3., AND_32); |
7301 | assert_eq_m128(r, e); |
7302 | } |
7303 | |
7304 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7305 | unsafe fn test_mm_maskz_and_ps() { |
7306 | let a = _mm_set1_ps(OPRND1_32); |
7307 | let b = _mm_set1_ps(OPRND2_32); |
7308 | let r = _mm_maskz_and_ps(0b0101, a, b); |
7309 | let e = _mm_set_ps(0.0, AND_32, 0.0, AND_32); |
7310 | assert_eq_m128(r, e); |
7311 | } |
7312 | |
7313 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7314 | unsafe fn test_mm256_mask_and_ps() { |
7315 | let a = _mm256_set1_ps(OPRND1_32); |
7316 | let b = _mm256_set1_ps(OPRND2_32); |
7317 | let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
7318 | let r = _mm256_mask_and_ps(src, 0b01010101, a, b); |
7319 | let e = _mm256_set_ps(1., AND_32, 3., AND_32, 5., AND_32, 7., AND_32); |
7320 | assert_eq_m256(r, e); |
7321 | } |
7322 | |
7323 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7324 | unsafe fn test_mm256_maskz_and_ps() { |
7325 | let a = _mm256_set1_ps(OPRND1_32); |
7326 | let b = _mm256_set1_ps(OPRND2_32); |
7327 | let r = _mm256_maskz_and_ps(0b01010101, a, b); |
7328 | let e = _mm256_set_ps(0.0, AND_32, 0.0, AND_32, 0.0, AND_32, 0.0, AND_32); |
7329 | assert_eq_m256(r, e); |
7330 | } |
7331 | |
7332 | #[simd_test(enable = "avx512dq" )] |
7333 | unsafe fn test_mm512_and_ps() { |
7334 | let a = _mm512_set1_ps(OPRND1_32); |
7335 | let b = _mm512_set1_ps(OPRND2_32); |
7336 | let r = _mm512_and_ps(a, b); |
7337 | let e = _mm512_set1_ps(AND_32); |
7338 | assert_eq_m512(r, e); |
7339 | } |
7340 | |
7341 | #[simd_test(enable = "avx512dq" )] |
7342 | unsafe fn test_mm512_mask_and_ps() { |
7343 | let a = _mm512_set1_ps(OPRND1_32); |
7344 | let b = _mm512_set1_ps(OPRND2_32); |
7345 | let src = _mm512_set_ps( |
7346 | 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
7347 | ); |
7348 | let r = _mm512_mask_and_ps(src, 0b0101010101010101, a, b); |
7349 | let e = _mm512_set_ps( |
7350 | 1., AND_32, 3., AND_32, 5., AND_32, 7., AND_32, 9., AND_32, 11., AND_32, 13., AND_32, |
7351 | 15., AND_32, |
7352 | ); |
7353 | assert_eq_m512(r, e); |
7354 | } |
7355 | |
7356 | #[simd_test(enable = "avx512dq" )] |
7357 | unsafe fn test_mm512_maskz_and_ps() { |
7358 | let a = _mm512_set1_ps(OPRND1_32); |
7359 | let b = _mm512_set1_ps(OPRND2_32); |
7360 | let r = _mm512_maskz_and_ps(0b0101010101010101, a, b); |
7361 | let e = _mm512_set_ps( |
7362 | 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., |
7363 | AND_32, |
7364 | ); |
7365 | assert_eq_m512(r, e); |
7366 | } |
7367 | |
7368 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7369 | unsafe fn test_mm_mask_andnot_pd() { |
7370 | let a = _mm_set1_pd(OPRND1_64); |
7371 | let b = _mm_set1_pd(OPRND2_64); |
7372 | let src = _mm_set_pd(1., 2.); |
7373 | let r = _mm_mask_andnot_pd(src, 0b01, a, b); |
7374 | let e = _mm_set_pd(1., ANDN_64); |
7375 | assert_eq_m128d(r, e); |
7376 | } |
7377 | |
7378 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7379 | unsafe fn test_mm_maskz_andnot_pd() { |
7380 | let a = _mm_set1_pd(OPRND1_64); |
7381 | let b = _mm_set1_pd(OPRND2_64); |
7382 | let r = _mm_maskz_andnot_pd(0b01, a, b); |
7383 | let e = _mm_set_pd(0.0, ANDN_64); |
7384 | assert_eq_m128d(r, e); |
7385 | } |
7386 | |
7387 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7388 | unsafe fn test_mm256_mask_andnot_pd() { |
7389 | let a = _mm256_set1_pd(OPRND1_64); |
7390 | let b = _mm256_set1_pd(OPRND2_64); |
7391 | let src = _mm256_set_pd(1., 2., 3., 4.); |
7392 | let r = _mm256_mask_andnot_pd(src, 0b0101, a, b); |
7393 | let e = _mm256_set_pd(1., ANDN_64, 3., ANDN_64); |
7394 | assert_eq_m256d(r, e); |
7395 | } |
7396 | |
7397 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7398 | unsafe fn test_mm256_maskz_andnot_pd() { |
7399 | let a = _mm256_set1_pd(OPRND1_64); |
7400 | let b = _mm256_set1_pd(OPRND2_64); |
7401 | let r = _mm256_maskz_andnot_pd(0b0101, a, b); |
7402 | let e = _mm256_set_pd(0.0, ANDN_64, 0.0, ANDN_64); |
7403 | assert_eq_m256d(r, e); |
7404 | } |
7405 | |
7406 | #[simd_test(enable = "avx512dq" )] |
7407 | unsafe fn test_mm512_andnot_pd() { |
7408 | let a = _mm512_set1_pd(OPRND1_64); |
7409 | let b = _mm512_set1_pd(OPRND2_64); |
7410 | let r = _mm512_andnot_pd(a, b); |
7411 | let e = _mm512_set1_pd(ANDN_64); |
7412 | assert_eq_m512d(r, e); |
7413 | } |
7414 | |
7415 | #[simd_test(enable = "avx512dq" )] |
7416 | unsafe fn test_mm512_mask_andnot_pd() { |
7417 | let a = _mm512_set1_pd(OPRND1_64); |
7418 | let b = _mm512_set1_pd(OPRND2_64); |
7419 | let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
7420 | let r = _mm512_mask_andnot_pd(src, 0b01010101, a, b); |
7421 | let e = _mm512_set_pd(1., ANDN_64, 3., ANDN_64, 5., ANDN_64, 7., ANDN_64); |
7422 | assert_eq_m512d(r, e); |
7423 | } |
7424 | |
7425 | #[simd_test(enable = "avx512dq" )] |
7426 | unsafe fn test_mm512_maskz_andnot_pd() { |
7427 | let a = _mm512_set1_pd(OPRND1_64); |
7428 | let b = _mm512_set1_pd(OPRND2_64); |
7429 | let r = _mm512_maskz_andnot_pd(0b01010101, a, b); |
7430 | let e = _mm512_set_pd(0.0, ANDN_64, 0.0, ANDN_64, 0.0, ANDN_64, 0.0, ANDN_64); |
7431 | assert_eq_m512d(r, e); |
7432 | } |
7433 | |
7434 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7435 | unsafe fn test_mm_mask_andnot_ps() { |
7436 | let a = _mm_set1_ps(OPRND1_32); |
7437 | let b = _mm_set1_ps(OPRND2_32); |
7438 | let src = _mm_set_ps(1., 2., 3., 4.); |
7439 | let r = _mm_mask_andnot_ps(src, 0b0101, a, b); |
7440 | let e = _mm_set_ps(1., ANDN_32, 3., ANDN_32); |
7441 | assert_eq_m128(r, e); |
7442 | } |
7443 | |
7444 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7445 | unsafe fn test_mm_maskz_andnot_ps() { |
7446 | let a = _mm_set1_ps(OPRND1_32); |
7447 | let b = _mm_set1_ps(OPRND2_32); |
7448 | let r = _mm_maskz_andnot_ps(0b0101, a, b); |
7449 | let e = _mm_set_ps(0.0, ANDN_32, 0.0, ANDN_32); |
7450 | assert_eq_m128(r, e); |
7451 | } |
7452 | |
7453 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7454 | unsafe fn test_mm256_mask_andnot_ps() { |
7455 | let a = _mm256_set1_ps(OPRND1_32); |
7456 | let b = _mm256_set1_ps(OPRND2_32); |
7457 | let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
7458 | let r = _mm256_mask_andnot_ps(src, 0b01010101, a, b); |
7459 | let e = _mm256_set_ps(1., ANDN_32, 3., ANDN_32, 5., ANDN_32, 7., ANDN_32); |
7460 | assert_eq_m256(r, e); |
7461 | } |
7462 | |
7463 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7464 | unsafe fn test_mm256_maskz_andnot_ps() { |
7465 | let a = _mm256_set1_ps(OPRND1_32); |
7466 | let b = _mm256_set1_ps(OPRND2_32); |
7467 | let r = _mm256_maskz_andnot_ps(0b01010101, a, b); |
7468 | let e = _mm256_set_ps(0.0, ANDN_32, 0.0, ANDN_32, 0.0, ANDN_32, 0.0, ANDN_32); |
7469 | assert_eq_m256(r, e); |
7470 | } |
7471 | |
7472 | #[simd_test(enable = "avx512dq" )] |
7473 | unsafe fn test_mm512_andnot_ps() { |
7474 | let a = _mm512_set1_ps(OPRND1_32); |
7475 | let b = _mm512_set1_ps(OPRND2_32); |
7476 | let r = _mm512_andnot_ps(a, b); |
7477 | let e = _mm512_set1_ps(ANDN_32); |
7478 | assert_eq_m512(r, e); |
7479 | } |
7480 | |
7481 | #[simd_test(enable = "avx512dq" )] |
7482 | unsafe fn test_mm512_mask_andnot_ps() { |
7483 | let a = _mm512_set1_ps(OPRND1_32); |
7484 | let b = _mm512_set1_ps(OPRND2_32); |
7485 | let src = _mm512_set_ps( |
7486 | 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
7487 | ); |
7488 | let r = _mm512_mask_andnot_ps(src, 0b0101010101010101, a, b); |
7489 | let e = _mm512_set_ps( |
7490 | 1., ANDN_32, 3., ANDN_32, 5., ANDN_32, 7., ANDN_32, 9., ANDN_32, 11., ANDN_32, 13., |
7491 | ANDN_32, 15., ANDN_32, |
7492 | ); |
7493 | assert_eq_m512(r, e); |
7494 | } |
7495 | |
7496 | #[simd_test(enable = "avx512dq" )] |
7497 | unsafe fn test_mm512_maskz_andnot_ps() { |
7498 | let a = _mm512_set1_ps(OPRND1_32); |
7499 | let b = _mm512_set1_ps(OPRND2_32); |
7500 | let r = _mm512_maskz_andnot_ps(0b0101010101010101, a, b); |
7501 | let e = _mm512_set_ps( |
7502 | 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., |
7503 | ANDN_32, 0., ANDN_32, |
7504 | ); |
7505 | assert_eq_m512(r, e); |
7506 | } |
7507 | |
7508 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7509 | unsafe fn test_mm_mask_or_pd() { |
7510 | let a = _mm_set1_pd(OPRND1_64); |
7511 | let b = _mm_set1_pd(OPRND2_64); |
7512 | let src = _mm_set_pd(1., 2.); |
7513 | let r = _mm_mask_or_pd(src, 0b01, a, b); |
7514 | let e = _mm_set_pd(1., OR_64); |
7515 | assert_eq_m128d(r, e); |
7516 | } |
7517 | |
7518 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7519 | unsafe fn test_mm_maskz_or_pd() { |
7520 | let a = _mm_set1_pd(OPRND1_64); |
7521 | let b = _mm_set1_pd(OPRND2_64); |
7522 | let r = _mm_maskz_or_pd(0b01, a, b); |
7523 | let e = _mm_set_pd(0.0, OR_64); |
7524 | assert_eq_m128d(r, e); |
7525 | } |
7526 | |
7527 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7528 | unsafe fn test_mm256_mask_or_pd() { |
7529 | let a = _mm256_set1_pd(OPRND1_64); |
7530 | let b = _mm256_set1_pd(OPRND2_64); |
7531 | let src = _mm256_set_pd(1., 2., 3., 4.); |
7532 | let r = _mm256_mask_or_pd(src, 0b0101, a, b); |
7533 | let e = _mm256_set_pd(1., OR_64, 3., OR_64); |
7534 | assert_eq_m256d(r, e); |
7535 | } |
7536 | |
7537 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7538 | unsafe fn test_mm256_maskz_or_pd() { |
7539 | let a = _mm256_set1_pd(OPRND1_64); |
7540 | let b = _mm256_set1_pd(OPRND2_64); |
7541 | let r = _mm256_maskz_or_pd(0b0101, a, b); |
7542 | let e = _mm256_set_pd(0.0, OR_64, 0.0, OR_64); |
7543 | assert_eq_m256d(r, e); |
7544 | } |
7545 | |
7546 | #[simd_test(enable = "avx512dq" )] |
7547 | unsafe fn test_mm512_or_pd() { |
7548 | let a = _mm512_set1_pd(OPRND1_64); |
7549 | let b = _mm512_set1_pd(OPRND2_64); |
7550 | let r = _mm512_or_pd(a, b); |
7551 | let e = _mm512_set1_pd(OR_64); |
7552 | assert_eq_m512d(r, e); |
7553 | } |
7554 | |
7555 | #[simd_test(enable = "avx512dq" )] |
7556 | unsafe fn test_mm512_mask_or_pd() { |
7557 | let a = _mm512_set1_pd(OPRND1_64); |
7558 | let b = _mm512_set1_pd(OPRND2_64); |
7559 | let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
7560 | let r = _mm512_mask_or_pd(src, 0b01010101, a, b); |
7561 | let e = _mm512_set_pd(1., OR_64, 3., OR_64, 5., OR_64, 7., OR_64); |
7562 | assert_eq_m512d(r, e); |
7563 | } |
7564 | |
7565 | #[simd_test(enable = "avx512dq" )] |
7566 | unsafe fn test_mm512_maskz_or_pd() { |
7567 | let a = _mm512_set1_pd(OPRND1_64); |
7568 | let b = _mm512_set1_pd(OPRND2_64); |
7569 | let r = _mm512_maskz_or_pd(0b01010101, a, b); |
7570 | let e = _mm512_set_pd(0.0, OR_64, 0.0, OR_64, 0.0, OR_64, 0.0, OR_64); |
7571 | assert_eq_m512d(r, e); |
7572 | } |
7573 | |
7574 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7575 | unsafe fn test_mm_mask_or_ps() { |
7576 | let a = _mm_set1_ps(OPRND1_32); |
7577 | let b = _mm_set1_ps(OPRND2_32); |
7578 | let src = _mm_set_ps(1., 2., 3., 4.); |
7579 | let r = _mm_mask_or_ps(src, 0b0101, a, b); |
7580 | let e = _mm_set_ps(1., OR_32, 3., OR_32); |
7581 | assert_eq_m128(r, e); |
7582 | } |
7583 | |
7584 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7585 | unsafe fn test_mm_maskz_or_ps() { |
7586 | let a = _mm_set1_ps(OPRND1_32); |
7587 | let b = _mm_set1_ps(OPRND2_32); |
7588 | let r = _mm_maskz_or_ps(0b0101, a, b); |
7589 | let e = _mm_set_ps(0.0, OR_32, 0.0, OR_32); |
7590 | assert_eq_m128(r, e); |
7591 | } |
7592 | |
7593 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7594 | unsafe fn test_mm256_mask_or_ps() { |
7595 | let a = _mm256_set1_ps(OPRND1_32); |
7596 | let b = _mm256_set1_ps(OPRND2_32); |
7597 | let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
7598 | let r = _mm256_mask_or_ps(src, 0b01010101, a, b); |
7599 | let e = _mm256_set_ps(1., OR_32, 3., OR_32, 5., OR_32, 7., OR_32); |
7600 | assert_eq_m256(r, e); |
7601 | } |
7602 | |
7603 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7604 | unsafe fn test_mm256_maskz_or_ps() { |
7605 | let a = _mm256_set1_ps(OPRND1_32); |
7606 | let b = _mm256_set1_ps(OPRND2_32); |
7607 | let r = _mm256_maskz_or_ps(0b01010101, a, b); |
7608 | let e = _mm256_set_ps(0.0, OR_32, 0.0, OR_32, 0.0, OR_32, 0.0, OR_32); |
7609 | assert_eq_m256(r, e); |
7610 | } |
7611 | |
7612 | #[simd_test(enable = "avx512dq" )] |
7613 | unsafe fn test_mm512_or_ps() { |
7614 | let a = _mm512_set1_ps(OPRND1_32); |
7615 | let b = _mm512_set1_ps(OPRND2_32); |
7616 | let r = _mm512_or_ps(a, b); |
7617 | let e = _mm512_set1_ps(OR_32); |
7618 | assert_eq_m512(r, e); |
7619 | } |
7620 | |
7621 | #[simd_test(enable = "avx512dq" )] |
7622 | unsafe fn test_mm512_mask_or_ps() { |
7623 | let a = _mm512_set1_ps(OPRND1_32); |
7624 | let b = _mm512_set1_ps(OPRND2_32); |
7625 | let src = _mm512_set_ps( |
7626 | 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
7627 | ); |
7628 | let r = _mm512_mask_or_ps(src, 0b0101010101010101, a, b); |
7629 | let e = _mm512_set_ps( |
7630 | 1., OR_32, 3., OR_32, 5., OR_32, 7., OR_32, 9., OR_32, 11., OR_32, 13., OR_32, 15., |
7631 | OR_32, |
7632 | ); |
7633 | assert_eq_m512(r, e); |
7634 | } |
7635 | |
7636 | #[simd_test(enable = "avx512dq" )] |
7637 | unsafe fn test_mm512_maskz_or_ps() { |
7638 | let a = _mm512_set1_ps(OPRND1_32); |
7639 | let b = _mm512_set1_ps(OPRND2_32); |
7640 | let r = _mm512_maskz_or_ps(0b0101010101010101, a, b); |
7641 | let e = _mm512_set_ps( |
7642 | 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, |
7643 | ); |
7644 | assert_eq_m512(r, e); |
7645 | } |
7646 | |
7647 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7648 | unsafe fn test_mm_mask_xor_pd() { |
7649 | let a = _mm_set1_pd(OPRND1_64); |
7650 | let b = _mm_set1_pd(OPRND2_64); |
7651 | let src = _mm_set_pd(1., 2.); |
7652 | let r = _mm_mask_xor_pd(src, 0b01, a, b); |
7653 | let e = _mm_set_pd(1., XOR_64); |
7654 | assert_eq_m128d(r, e); |
7655 | } |
7656 | |
7657 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7658 | unsafe fn test_mm_maskz_xor_pd() { |
7659 | let a = _mm_set1_pd(OPRND1_64); |
7660 | let b = _mm_set1_pd(OPRND2_64); |
7661 | let r = _mm_maskz_xor_pd(0b01, a, b); |
7662 | let e = _mm_set_pd(0.0, XOR_64); |
7663 | assert_eq_m128d(r, e); |
7664 | } |
7665 | |
7666 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7667 | unsafe fn test_mm256_mask_xor_pd() { |
7668 | let a = _mm256_set1_pd(OPRND1_64); |
7669 | let b = _mm256_set1_pd(OPRND2_64); |
7670 | let src = _mm256_set_pd(1., 2., 3., 4.); |
7671 | let r = _mm256_mask_xor_pd(src, 0b0101, a, b); |
7672 | let e = _mm256_set_pd(1., XOR_64, 3., XOR_64); |
7673 | assert_eq_m256d(r, e); |
7674 | } |
7675 | |
7676 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7677 | unsafe fn test_mm256_maskz_xor_pd() { |
7678 | let a = _mm256_set1_pd(OPRND1_64); |
7679 | let b = _mm256_set1_pd(OPRND2_64); |
7680 | let r = _mm256_maskz_xor_pd(0b0101, a, b); |
7681 | let e = _mm256_set_pd(0.0, XOR_64, 0.0, XOR_64); |
7682 | assert_eq_m256d(r, e); |
7683 | } |
7684 | |
7685 | #[simd_test(enable = "avx512dq" )] |
7686 | unsafe fn test_mm512_xor_pd() { |
7687 | let a = _mm512_set1_pd(OPRND1_64); |
7688 | let b = _mm512_set1_pd(OPRND2_64); |
7689 | let r = _mm512_xor_pd(a, b); |
7690 | let e = _mm512_set1_pd(XOR_64); |
7691 | assert_eq_m512d(r, e); |
7692 | } |
7693 | |
7694 | #[simd_test(enable = "avx512dq" )] |
7695 | unsafe fn test_mm512_mask_xor_pd() { |
7696 | let a = _mm512_set1_pd(OPRND1_64); |
7697 | let b = _mm512_set1_pd(OPRND2_64); |
7698 | let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
7699 | let r = _mm512_mask_xor_pd(src, 0b01010101, a, b); |
7700 | let e = _mm512_set_pd(1., XOR_64, 3., XOR_64, 5., XOR_64, 7., XOR_64); |
7701 | assert_eq_m512d(r, e); |
7702 | } |
7703 | |
7704 | #[simd_test(enable = "avx512dq" )] |
7705 | unsafe fn test_mm512_maskz_xor_pd() { |
7706 | let a = _mm512_set1_pd(OPRND1_64); |
7707 | let b = _mm512_set1_pd(OPRND2_64); |
7708 | let r = _mm512_maskz_xor_pd(0b01010101, a, b); |
7709 | let e = _mm512_set_pd(0.0, XOR_64, 0.0, XOR_64, 0.0, XOR_64, 0.0, XOR_64); |
7710 | assert_eq_m512d(r, e); |
7711 | } |
7712 | |
7713 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7714 | unsafe fn test_mm_mask_xor_ps() { |
7715 | let a = _mm_set1_ps(OPRND1_32); |
7716 | let b = _mm_set1_ps(OPRND2_32); |
7717 | let src = _mm_set_ps(1., 2., 3., 4.); |
7718 | let r = _mm_mask_xor_ps(src, 0b0101, a, b); |
7719 | let e = _mm_set_ps(1., XOR_32, 3., XOR_32); |
7720 | assert_eq_m128(r, e); |
7721 | } |
7722 | |
7723 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7724 | unsafe fn test_mm_maskz_xor_ps() { |
7725 | let a = _mm_set1_ps(OPRND1_32); |
7726 | let b = _mm_set1_ps(OPRND2_32); |
7727 | let r = _mm_maskz_xor_ps(0b0101, a, b); |
7728 | let e = _mm_set_ps(0.0, XOR_32, 0.0, XOR_32); |
7729 | assert_eq_m128(r, e); |
7730 | } |
7731 | |
7732 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7733 | unsafe fn test_mm256_mask_xor_ps() { |
7734 | let a = _mm256_set1_ps(OPRND1_32); |
7735 | let b = _mm256_set1_ps(OPRND2_32); |
7736 | let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
7737 | let r = _mm256_mask_xor_ps(src, 0b01010101, a, b); |
7738 | let e = _mm256_set_ps(1., XOR_32, 3., XOR_32, 5., XOR_32, 7., XOR_32); |
7739 | assert_eq_m256(r, e); |
7740 | } |
7741 | |
7742 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7743 | unsafe fn test_mm256_maskz_xor_ps() { |
7744 | let a = _mm256_set1_ps(OPRND1_32); |
7745 | let b = _mm256_set1_ps(OPRND2_32); |
7746 | let r = _mm256_maskz_xor_ps(0b01010101, a, b); |
7747 | let e = _mm256_set_ps(0.0, XOR_32, 0.0, XOR_32, 0.0, XOR_32, 0.0, XOR_32); |
7748 | assert_eq_m256(r, e); |
7749 | } |
7750 | |
7751 | #[simd_test(enable = "avx512dq" )] |
7752 | unsafe fn test_mm512_xor_ps() { |
7753 | let a = _mm512_set1_ps(OPRND1_32); |
7754 | let b = _mm512_set1_ps(OPRND2_32); |
7755 | let r = _mm512_xor_ps(a, b); |
7756 | let e = _mm512_set1_ps(XOR_32); |
7757 | assert_eq_m512(r, e); |
7758 | } |
7759 | |
7760 | #[simd_test(enable = "avx512dq" )] |
7761 | unsafe fn test_mm512_mask_xor_ps() { |
7762 | let a = _mm512_set1_ps(OPRND1_32); |
7763 | let b = _mm512_set1_ps(OPRND2_32); |
7764 | let src = _mm512_set_ps( |
7765 | 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
7766 | ); |
7767 | let r = _mm512_mask_xor_ps(src, 0b0101010101010101, a, b); |
7768 | let e = _mm512_set_ps( |
7769 | 1., XOR_32, 3., XOR_32, 5., XOR_32, 7., XOR_32, 9., XOR_32, 11., XOR_32, 13., XOR_32, |
7770 | 15., XOR_32, |
7771 | ); |
7772 | assert_eq_m512(r, e); |
7773 | } |
7774 | |
7775 | #[simd_test(enable = "avx512dq" )] |
7776 | unsafe fn test_mm512_maskz_xor_ps() { |
7777 | let a = _mm512_set1_ps(OPRND1_32); |
7778 | let b = _mm512_set1_ps(OPRND2_32); |
7779 | let r = _mm512_maskz_xor_ps(0b0101010101010101, a, b); |
7780 | let e = _mm512_set_ps( |
7781 | 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., |
7782 | XOR_32, |
7783 | ); |
7784 | assert_eq_m512(r, e); |
7785 | } |
7786 | |
7787 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7788 | unsafe fn test_mm256_broadcast_f32x2() { |
7789 | let a = _mm_set_ps(1., 2., 3., 4.); |
7790 | let r = _mm256_broadcast_f32x2(a); |
7791 | let e = _mm256_set_ps(3., 4., 3., 4., 3., 4., 3., 4.); |
7792 | assert_eq_m256(r, e); |
7793 | } |
7794 | |
7795 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7796 | unsafe fn test_mm256_mask_broadcast_f32x2() { |
7797 | let a = _mm_set_ps(1., 2., 3., 4.); |
7798 | let b = _mm256_set_ps(5., 6., 7., 8., 9., 10., 11., 12.); |
7799 | let r = _mm256_mask_broadcast_f32x2(b, 0b01101001, a); |
7800 | let e = _mm256_set_ps(5., 4., 3., 8., 3., 10., 11., 4.); |
7801 | assert_eq_m256(r, e); |
7802 | } |
7803 | |
7804 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7805 | unsafe fn test_mm256_maskz_broadcast_f32x2() { |
7806 | let a = _mm_set_ps(1., 2., 3., 4.); |
7807 | let r = _mm256_maskz_broadcast_f32x2(0b01101001, a); |
7808 | let e = _mm256_set_ps(0., 4., 3., 0., 3., 0., 0., 4.); |
7809 | assert_eq_m256(r, e); |
7810 | } |
7811 | |
7812 | #[simd_test(enable = "avx512dq" )] |
7813 | unsafe fn test_mm512_broadcast_f32x2() { |
7814 | let a = _mm_set_ps(1., 2., 3., 4.); |
7815 | let r = _mm512_broadcast_f32x2(a); |
7816 | let e = _mm512_set_ps( |
7817 | 3., 4., 3., 4., 3., 4., 3., 4., 3., 4., 3., 4., 3., 4., 3., 4., |
7818 | ); |
7819 | assert_eq_m512(r, e); |
7820 | } |
7821 | |
7822 | #[simd_test(enable = "avx512dq" )] |
7823 | unsafe fn test_mm512_mask_broadcast_f32x2() { |
7824 | let a = _mm_set_ps(1., 2., 3., 4.); |
7825 | let b = _mm512_set_ps( |
7826 | 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., |
7827 | ); |
7828 | let r = _mm512_mask_broadcast_f32x2(b, 0b0110100100111100, a); |
7829 | let e = _mm512_set_ps( |
7830 | 5., 4., 3., 8., 3., 10., 11., 4., 13., 14., 3., 4., 3., 4., 19., 20., |
7831 | ); |
7832 | assert_eq_m512(r, e); |
7833 | } |
7834 | |
7835 | #[simd_test(enable = "avx512dq" )] |
7836 | unsafe fn test_mm512_maskz_broadcast_f32x2() { |
7837 | let a = _mm_set_ps(1., 2., 3., 4.); |
7838 | let r = _mm512_maskz_broadcast_f32x2(0b0110100100111100, a); |
7839 | let e = _mm512_set_ps( |
7840 | 0., 4., 3., 0., 3., 0., 0., 4., 0., 0., 3., 4., 3., 4., 0., 0., |
7841 | ); |
7842 | assert_eq_m512(r, e); |
7843 | } |
7844 | |
7845 | #[simd_test(enable = "avx512dq" )] |
7846 | unsafe fn test_mm512_broadcast_f32x8() { |
7847 | let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
7848 | let r = _mm512_broadcast_f32x8(a); |
7849 | let e = _mm512_set_ps( |
7850 | 1., 2., 3., 4., 5., 6., 7., 8., 1., 2., 3., 4., 5., 6., 7., 8., |
7851 | ); |
7852 | assert_eq_m512(r, e); |
7853 | } |
7854 | |
7855 | #[simd_test(enable = "avx512dq" )] |
7856 | unsafe fn test_mm512_mask_broadcast_f32x8() { |
7857 | let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
7858 | let b = _mm512_set_ps( |
7859 | 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., |
7860 | ); |
7861 | let r = _mm512_mask_broadcast_f32x8(b, 0b0110100100111100, a); |
7862 | let e = _mm512_set_ps( |
7863 | 9., 2., 3., 12., 5., 14., 15., 8., 17., 18., 3., 4., 5., 6., 23., 24., |
7864 | ); |
7865 | assert_eq_m512(r, e); |
7866 | } |
7867 | |
7868 | #[simd_test(enable = "avx512dq" )] |
7869 | unsafe fn test_mm512_maskz_broadcast_f32x8() { |
7870 | let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
7871 | let r = _mm512_maskz_broadcast_f32x8(0b0110100100111100, a); |
7872 | let e = _mm512_set_ps( |
7873 | 0., 2., 3., 0., 5., 0., 0., 8., 0., 0., 3., 4., 5., 6., 0., 0., |
7874 | ); |
7875 | assert_eq_m512(r, e); |
7876 | } |
7877 | |
7878 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7879 | unsafe fn test_mm256_broadcast_f64x2() { |
7880 | let a = _mm_set_pd(1., 2.); |
7881 | let r = _mm256_broadcast_f64x2(a); |
7882 | let e = _mm256_set_pd(1., 2., 1., 2.); |
7883 | assert_eq_m256d(r, e); |
7884 | } |
7885 | |
7886 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7887 | unsafe fn test_mm256_mask_broadcast_f64x2() { |
7888 | let a = _mm_set_pd(1., 2.); |
7889 | let b = _mm256_set_pd(3., 4., 5., 6.); |
7890 | let r = _mm256_mask_broadcast_f64x2(b, 0b0110, a); |
7891 | let e = _mm256_set_pd(3., 2., 1., 6.); |
7892 | assert_eq_m256d(r, e); |
7893 | } |
7894 | |
7895 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7896 | unsafe fn test_mm256_maskz_broadcast_f64x2() { |
7897 | let a = _mm_set_pd(1., 2.); |
7898 | let r = _mm256_maskz_broadcast_f64x2(0b0110, a); |
7899 | let e = _mm256_set_pd(0., 2., 1., 0.); |
7900 | assert_eq_m256d(r, e); |
7901 | } |
7902 | |
7903 | #[simd_test(enable = "avx512dq" )] |
7904 | unsafe fn test_mm512_broadcast_f64x2() { |
7905 | let a = _mm_set_pd(1., 2.); |
7906 | let r = _mm512_broadcast_f64x2(a); |
7907 | let e = _mm512_set_pd(1., 2., 1., 2., 1., 2., 1., 2.); |
7908 | assert_eq_m512d(r, e); |
7909 | } |
7910 | |
7911 | #[simd_test(enable = "avx512dq" )] |
7912 | unsafe fn test_mm512_mask_broadcast_f64x2() { |
7913 | let a = _mm_set_pd(1., 2.); |
7914 | let b = _mm512_set_pd(3., 4., 5., 6., 7., 8., 9., 10.); |
7915 | let r = _mm512_mask_broadcast_f64x2(b, 0b01101001, a); |
7916 | let e = _mm512_set_pd(3., 2., 1., 6., 1., 8., 9., 2.); |
7917 | assert_eq_m512d(r, e); |
7918 | } |
7919 | |
7920 | #[simd_test(enable = "avx512dq" )] |
7921 | unsafe fn test_mm512_maskz_broadcast_f64x2() { |
7922 | let a = _mm_set_pd(1., 2.); |
7923 | let r = _mm512_maskz_broadcast_f64x2(0b01101001, a); |
7924 | let e = _mm512_set_pd(0., 2., 1., 0., 1., 0., 0., 2.); |
7925 | assert_eq_m512d(r, e); |
7926 | } |
7927 | |
7928 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7929 | unsafe fn test_mm_broadcast_i32x2() { |
7930 | let a = _mm_set_epi32(1, 2, 3, 4); |
7931 | let r = _mm_broadcast_i32x2(a); |
7932 | let e = _mm_set_epi32(3, 4, 3, 4); |
7933 | assert_eq_m128i(r, e); |
7934 | } |
7935 | |
7936 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7937 | unsafe fn test_mm_mask_broadcast_i32x2() { |
7938 | let a = _mm_set_epi32(1, 2, 3, 4); |
7939 | let b = _mm_set_epi32(5, 6, 7, 8); |
7940 | let r = _mm_mask_broadcast_i32x2(b, 0b0110, a); |
7941 | let e = _mm_set_epi32(5, 4, 3, 8); |
7942 | assert_eq_m128i(r, e); |
7943 | } |
7944 | |
7945 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7946 | unsafe fn test_mm_maskz_broadcast_i32x2() { |
7947 | let a = _mm_set_epi32(1, 2, 3, 4); |
7948 | let r = _mm_maskz_broadcast_i32x2(0b0110, a); |
7949 | let e = _mm_set_epi32(0, 4, 3, 0); |
7950 | assert_eq_m128i(r, e); |
7951 | } |
7952 | |
7953 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7954 | unsafe fn test_mm256_broadcast_i32x2() { |
7955 | let a = _mm_set_epi32(1, 2, 3, 4); |
7956 | let r = _mm256_broadcast_i32x2(a); |
7957 | let e = _mm256_set_epi32(3, 4, 3, 4, 3, 4, 3, 4); |
7958 | assert_eq_m256i(r, e); |
7959 | } |
7960 | |
7961 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7962 | unsafe fn test_mm256_mask_broadcast_i32x2() { |
7963 | let a = _mm_set_epi32(1, 2, 3, 4); |
7964 | let b = _mm256_set_epi32(5, 6, 7, 8, 9, 10, 11, 12); |
7965 | let r = _mm256_mask_broadcast_i32x2(b, 0b01101001, a); |
7966 | let e = _mm256_set_epi32(5, 4, 3, 8, 3, 10, 11, 4); |
7967 | assert_eq_m256i(r, e); |
7968 | } |
7969 | |
7970 | #[simd_test(enable = "avx512dq,avx512vl" )] |
7971 | unsafe fn test_mm256_maskz_broadcast_i32x2() { |
7972 | let a = _mm_set_epi32(1, 2, 3, 4); |
7973 | let r = _mm256_maskz_broadcast_i32x2(0b01101001, a); |
7974 | let e = _mm256_set_epi32(0, 4, 3, 0, 3, 0, 0, 4); |
7975 | assert_eq_m256i(r, e); |
7976 | } |
7977 | |
7978 | #[simd_test(enable = "avx512dq" )] |
7979 | unsafe fn test_mm512_broadcast_i32x2() { |
7980 | let a = _mm_set_epi32(1, 2, 3, 4); |
7981 | let r = _mm512_broadcast_i32x2(a); |
7982 | let e = _mm512_set_epi32(3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4); |
7983 | assert_eq_m512i(r, e); |
7984 | } |
7985 | |
7986 | #[simd_test(enable = "avx512dq" )] |
7987 | unsafe fn test_mm512_mask_broadcast_i32x2() { |
7988 | let a = _mm_set_epi32(1, 2, 3, 4); |
7989 | let b = _mm512_set_epi32(5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20); |
7990 | let r = _mm512_mask_broadcast_i32x2(b, 0b0110100100111100, a); |
7991 | let e = _mm512_set_epi32(5, 4, 3, 8, 3, 10, 11, 4, 13, 14, 3, 4, 3, 4, 19, 20); |
7992 | assert_eq_m512i(r, e); |
7993 | } |
7994 | |
7995 | #[simd_test(enable = "avx512dq" )] |
7996 | unsafe fn test_mm512_maskz_broadcast_i32x2() { |
7997 | let a = _mm_set_epi32(1, 2, 3, 4); |
7998 | let r = _mm512_maskz_broadcast_i32x2(0b0110100100111100, a); |
7999 | let e = _mm512_set_epi32(0, 4, 3, 0, 3, 0, 0, 4, 0, 0, 3, 4, 3, 4, 0, 0); |
8000 | assert_eq_m512i(r, e); |
8001 | } |
8002 | |
8003 | #[simd_test(enable = "avx512dq" )] |
8004 | unsafe fn test_mm512_broadcast_i32x8() { |
8005 | let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); |
8006 | let r = _mm512_broadcast_i32x8(a); |
8007 | let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); |
8008 | assert_eq_m512i(r, e); |
8009 | } |
8010 | |
8011 | #[simd_test(enable = "avx512dq" )] |
8012 | unsafe fn test_mm512_mask_broadcast_i32x8() { |
8013 | let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); |
8014 | let b = _mm512_set_epi32( |
8015 | 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
8016 | ); |
8017 | let r = _mm512_mask_broadcast_i32x8(b, 0b0110100100111100, a); |
8018 | let e = _mm512_set_epi32(9, 2, 3, 12, 5, 14, 15, 8, 17, 18, 3, 4, 5, 6, 23, 24); |
8019 | assert_eq_m512i(r, e); |
8020 | } |
8021 | |
8022 | #[simd_test(enable = "avx512dq" )] |
8023 | unsafe fn test_mm512_maskz_broadcast_i32x8() { |
8024 | let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); |
8025 | let r = _mm512_maskz_broadcast_i32x8(0b0110100100111100, a); |
8026 | let e = _mm512_set_epi32(0, 2, 3, 0, 5, 0, 0, 8, 0, 0, 3, 4, 5, 6, 0, 0); |
8027 | assert_eq_m512i(r, e); |
8028 | } |
8029 | |
8030 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8031 | unsafe fn test_mm256_broadcast_i64x2() { |
8032 | let a = _mm_set_epi64x(1, 2); |
8033 | let r = _mm256_broadcast_i64x2(a); |
8034 | let e = _mm256_set_epi64x(1, 2, 1, 2); |
8035 | assert_eq_m256i(r, e); |
8036 | } |
8037 | |
8038 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8039 | unsafe fn test_mm256_mask_broadcast_i64x2() { |
8040 | let a = _mm_set_epi64x(1, 2); |
8041 | let b = _mm256_set_epi64x(3, 4, 5, 6); |
8042 | let r = _mm256_mask_broadcast_i64x2(b, 0b0110, a); |
8043 | let e = _mm256_set_epi64x(3, 2, 1, 6); |
8044 | assert_eq_m256i(r, e); |
8045 | } |
8046 | |
8047 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8048 | unsafe fn test_mm256_maskz_broadcast_i64x2() { |
8049 | let a = _mm_set_epi64x(1, 2); |
8050 | let r = _mm256_maskz_broadcast_i64x2(0b0110, a); |
8051 | let e = _mm256_set_epi64x(0, 2, 1, 0); |
8052 | assert_eq_m256i(r, e); |
8053 | } |
8054 | |
8055 | #[simd_test(enable = "avx512dq" )] |
8056 | unsafe fn test_mm512_broadcast_i64x2() { |
8057 | let a = _mm_set_epi64x(1, 2); |
8058 | let r = _mm512_broadcast_i64x2(a); |
8059 | let e = _mm512_set_epi64(1, 2, 1, 2, 1, 2, 1, 2); |
8060 | assert_eq_m512i(r, e); |
8061 | } |
8062 | |
8063 | #[simd_test(enable = "avx512dq" )] |
8064 | unsafe fn test_mm512_mask_broadcast_i64x2() { |
8065 | let a = _mm_set_epi64x(1, 2); |
8066 | let b = _mm512_set_epi64(3, 4, 5, 6, 7, 8, 9, 10); |
8067 | let r = _mm512_mask_broadcast_i64x2(b, 0b01101001, a); |
8068 | let e = _mm512_set_epi64(3, 2, 1, 6, 1, 8, 9, 2); |
8069 | assert_eq_m512i(r, e); |
8070 | } |
8071 | |
8072 | #[simd_test(enable = "avx512dq" )] |
8073 | unsafe fn test_mm512_maskz_broadcast_i64x2() { |
8074 | let a = _mm_set_epi64x(1, 2); |
8075 | let r = _mm512_maskz_broadcast_i64x2(0b01101001, a); |
8076 | let e = _mm512_set_epi64(0, 2, 1, 0, 1, 0, 0, 2); |
8077 | assert_eq_m512i(r, e); |
8078 | } |
8079 | |
8080 | #[simd_test(enable = "avx512dq" )] |
8081 | unsafe fn test_mm512_extractf32x8_ps() { |
8082 | let a = _mm512_set_ps( |
8083 | 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
8084 | ); |
8085 | let r = _mm512_extractf32x8_ps::<1>(a); |
8086 | let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
8087 | assert_eq_m256(r, e); |
8088 | } |
8089 | |
8090 | #[simd_test(enable = "avx512dq" )] |
8091 | unsafe fn test_mm512_mask_extractf32x8_ps() { |
8092 | let a = _mm512_set_ps( |
8093 | 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
8094 | ); |
8095 | let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.); |
8096 | let r = _mm512_mask_extractf32x8_ps::<1>(b, 0b01101001, a); |
8097 | let e = _mm256_set_ps(17., 2., 3., 20., 5., 22., 23., 8.); |
8098 | assert_eq_m256(r, e); |
8099 | } |
8100 | |
8101 | #[simd_test(enable = "avx512dq" )] |
8102 | unsafe fn test_mm512_maskz_extractf32x8_ps() { |
8103 | let a = _mm512_set_ps( |
8104 | 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
8105 | ); |
8106 | let r = _mm512_maskz_extractf32x8_ps::<1>(0b01101001, a); |
8107 | let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.); |
8108 | assert_eq_m256(r, e); |
8109 | } |
8110 | |
8111 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8112 | unsafe fn test_mm256_extractf64x2_pd() { |
8113 | let a = _mm256_set_pd(1., 2., 3., 4.); |
8114 | let r = _mm256_extractf64x2_pd::<1>(a); |
8115 | let e = _mm_set_pd(1., 2.); |
8116 | assert_eq_m128d(r, e); |
8117 | } |
8118 | |
8119 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8120 | unsafe fn test_mm256_mask_extractf64x2_pd() { |
8121 | let a = _mm256_set_pd(1., 2., 3., 4.); |
8122 | let b = _mm_set_pd(5., 6.); |
8123 | let r = _mm256_mask_extractf64x2_pd::<1>(b, 0b01, a); |
8124 | let e = _mm_set_pd(5., 2.); |
8125 | assert_eq_m128d(r, e); |
8126 | } |
8127 | |
8128 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8129 | unsafe fn test_mm256_maskz_extractf64x2_pd() { |
8130 | let a = _mm256_set_pd(1., 2., 3., 4.); |
8131 | let r = _mm256_maskz_extractf64x2_pd::<1>(0b01, a); |
8132 | let e = _mm_set_pd(0., 2.); |
8133 | assert_eq_m128d(r, e); |
8134 | } |
8135 | |
8136 | #[simd_test(enable = "avx512dq" )] |
8137 | unsafe fn test_mm512_extractf64x2_pd() { |
8138 | let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
8139 | let r = _mm512_extractf64x2_pd::<2>(a); |
8140 | let e = _mm_set_pd(3., 4.); |
8141 | assert_eq_m128d(r, e); |
8142 | } |
8143 | |
8144 | #[simd_test(enable = "avx512dq" )] |
8145 | unsafe fn test_mm512_mask_extractf64x2_pd() { |
8146 | let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
8147 | let b = _mm_set_pd(9., 10.); |
8148 | let r = _mm512_mask_extractf64x2_pd::<2>(b, 0b01, a); |
8149 | let e = _mm_set_pd(9., 4.); |
8150 | assert_eq_m128d(r, e); |
8151 | } |
8152 | |
8153 | #[simd_test(enable = "avx512dq" )] |
8154 | unsafe fn test_mm512_maskz_extractf64x2_pd() { |
8155 | let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
8156 | let r = _mm512_maskz_extractf64x2_pd::<2>(0b01, a); |
8157 | let e = _mm_set_pd(0., 4.); |
8158 | assert_eq_m128d(r, e); |
8159 | } |
8160 | |
8161 | #[simd_test(enable = "avx512dq" )] |
8162 | unsafe fn test_mm512_extracti32x8_epi32() { |
8163 | let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
8164 | let r = _mm512_extracti32x8_epi32::<1>(a); |
8165 | let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); |
8166 | assert_eq_m256i(r, e); |
8167 | } |
8168 | |
8169 | #[simd_test(enable = "avx512dq" )] |
8170 | unsafe fn test_mm512_mask_extracti32x8_epi32() { |
8171 | let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
8172 | let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24); |
8173 | let r = _mm512_mask_extracti32x8_epi32::<1>(b, 0b01101001, a); |
8174 | let e = _mm256_set_epi32(17, 2, 3, 20, 5, 22, 23, 8); |
8175 | assert_eq_m256i(r, e); |
8176 | } |
8177 | |
8178 | #[simd_test(enable = "avx512dq" )] |
8179 | unsafe fn test_mm512_maskz_extracti32x8_epi32() { |
8180 | let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
8181 | let r = _mm512_maskz_extracti32x8_epi32::<1>(0b01101001, a); |
8182 | let e = _mm256_set_epi32(0, 2, 3, 0, 5, 0, 0, 8); |
8183 | assert_eq_m256i(r, e); |
8184 | } |
8185 | |
8186 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8187 | unsafe fn test_mm256_extracti64x2_epi64() { |
8188 | let a = _mm256_set_epi64x(1, 2, 3, 4); |
8189 | let r = _mm256_extracti64x2_epi64::<1>(a); |
8190 | let e = _mm_set_epi64x(1, 2); |
8191 | assert_eq_m128i(r, e); |
8192 | } |
8193 | |
8194 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8195 | unsafe fn test_mm256_mask_extracti64x2_epi64() { |
8196 | let a = _mm256_set_epi64x(1, 2, 3, 4); |
8197 | let b = _mm_set_epi64x(5, 6); |
8198 | let r = _mm256_mask_extracti64x2_epi64::<1>(b, 0b01, a); |
8199 | let e = _mm_set_epi64x(5, 2); |
8200 | assert_eq_m128i(r, e); |
8201 | } |
8202 | |
8203 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8204 | unsafe fn test_mm256_maskz_extracti64x2_epi64() { |
8205 | let a = _mm256_set_epi64x(1, 2, 3, 4); |
8206 | let r = _mm256_maskz_extracti64x2_epi64::<1>(0b01, a); |
8207 | let e = _mm_set_epi64x(0, 2); |
8208 | assert_eq_m128i(r, e); |
8209 | } |
8210 | |
8211 | #[simd_test(enable = "avx512dq" )] |
8212 | unsafe fn test_mm512_extracti64x2_epi64() { |
8213 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
8214 | let r = _mm512_extracti64x2_epi64::<2>(a); |
8215 | let e = _mm_set_epi64x(3, 4); |
8216 | assert_eq_m128i(r, e); |
8217 | } |
8218 | |
8219 | #[simd_test(enable = "avx512dq" )] |
8220 | unsafe fn test_mm512_mask_extracti64x2_epi64() { |
8221 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
8222 | let b = _mm_set_epi64x(9, 10); |
8223 | let r = _mm512_mask_extracti64x2_epi64::<2>(b, 0b01, a); |
8224 | let e = _mm_set_epi64x(9, 4); |
8225 | assert_eq_m128i(r, e); |
8226 | } |
8227 | |
8228 | #[simd_test(enable = "avx512dq" )] |
8229 | unsafe fn test_mm512_maskz_extracti64x2_epi64() { |
8230 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
8231 | let r = _mm512_maskz_extracti64x2_epi64::<2>(0b01, a); |
8232 | let e = _mm_set_epi64x(0, 4); |
8233 | assert_eq_m128i(r, e); |
8234 | } |
8235 | |
8236 | #[simd_test(enable = "avx512dq" )] |
8237 | unsafe fn test_mm512_insertf32x8() { |
8238 | let a = _mm512_set_ps( |
8239 | 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
8240 | ); |
8241 | let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.); |
8242 | let r = _mm512_insertf32x8::<1>(a, b); |
8243 | let e = _mm512_set_ps( |
8244 | 17., 18., 19., 20., 21., 22., 23., 24., 9., 10., 11., 12., 13., 14., 15., 16., |
8245 | ); |
8246 | assert_eq_m512(r, e); |
8247 | } |
8248 | |
8249 | #[simd_test(enable = "avx512dq" )] |
8250 | unsafe fn test_mm512_mask_insertf32x8() { |
8251 | let a = _mm512_set_ps( |
8252 | 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
8253 | ); |
8254 | let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.); |
8255 | let src = _mm512_set_ps( |
8256 | 25., 26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., |
8257 | ); |
8258 | let r = _mm512_mask_insertf32x8::<1>(src, 0b0110100100111100, a, b); |
8259 | let e = _mm512_set_ps( |
8260 | 25., 18., 19., 28., 21., 30., 31., 24., 33., 34., 11., 12., 13., 14., 39., 40., |
8261 | ); |
8262 | assert_eq_m512(r, e); |
8263 | } |
8264 | |
8265 | #[simd_test(enable = "avx512dq" )] |
8266 | unsafe fn test_mm512_maskz_insertf32x8() { |
8267 | let a = _mm512_set_ps( |
8268 | 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
8269 | ); |
8270 | let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.); |
8271 | let r = _mm512_maskz_insertf32x8::<1>(0b0110100100111100, a, b); |
8272 | let e = _mm512_set_ps( |
8273 | 0., 18., 19., 0., 21., 0., 0., 24., 0., 0., 11., 12., 13., 14., 0., 0., |
8274 | ); |
8275 | assert_eq_m512(r, e); |
8276 | } |
8277 | |
8278 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8279 | unsafe fn test_mm256_insertf64x2() { |
8280 | let a = _mm256_set_pd(1., 2., 3., 4.); |
8281 | let b = _mm_set_pd(5., 6.); |
8282 | let r = _mm256_insertf64x2::<1>(a, b); |
8283 | let e = _mm256_set_pd(5., 6., 3., 4.); |
8284 | assert_eq_m256d(r, e); |
8285 | } |
8286 | |
8287 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8288 | unsafe fn test_mm256_mask_insertf64x2() { |
8289 | let a = _mm256_set_pd(1., 2., 3., 4.); |
8290 | let b = _mm_set_pd(5., 6.); |
8291 | let src = _mm256_set_pd(7., 8., 9., 10.); |
8292 | let r = _mm256_mask_insertf64x2::<1>(src, 0b0110, a, b); |
8293 | let e = _mm256_set_pd(7., 6., 3., 10.); |
8294 | assert_eq_m256d(r, e); |
8295 | } |
8296 | |
8297 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8298 | unsafe fn test_mm256_maskz_insertf64x2() { |
8299 | let a = _mm256_set_pd(1., 2., 3., 4.); |
8300 | let b = _mm_set_pd(5., 6.); |
8301 | let r = _mm256_maskz_insertf64x2::<1>(0b0110, a, b); |
8302 | let e = _mm256_set_pd(0., 6., 3., 0.); |
8303 | assert_eq_m256d(r, e); |
8304 | } |
8305 | |
8306 | #[simd_test(enable = "avx512dq" )] |
8307 | unsafe fn test_mm512_insertf64x2() { |
8308 | let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
8309 | let b = _mm_set_pd(9., 10.); |
8310 | let r = _mm512_insertf64x2::<2>(a, b); |
8311 | let e = _mm512_set_pd(1., 2., 9., 10., 5., 6., 7., 8.); |
8312 | assert_eq_m512d(r, e); |
8313 | } |
8314 | |
8315 | #[simd_test(enable = "avx512dq" )] |
8316 | unsafe fn test_mm512_mask_insertf64x2() { |
8317 | let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
8318 | let b = _mm_set_pd(9., 10.); |
8319 | let src = _mm512_set_pd(11., 12., 13., 14., 15., 16., 17., 18.); |
8320 | let r = _mm512_mask_insertf64x2::<2>(src, 0b01101001, a, b); |
8321 | let e = _mm512_set_pd(11., 2., 9., 14., 5., 16., 17., 8.); |
8322 | assert_eq_m512d(r, e); |
8323 | } |
8324 | |
8325 | #[simd_test(enable = "avx512dq" )] |
8326 | unsafe fn test_mm512_maskz_insertf64x2() { |
8327 | let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
8328 | let b = _mm_set_pd(9., 10.); |
8329 | let r = _mm512_maskz_insertf64x2::<2>(0b01101001, a, b); |
8330 | let e = _mm512_set_pd(0., 2., 9., 0., 5., 0., 0., 8.); |
8331 | assert_eq_m512d(r, e); |
8332 | } |
8333 | |
8334 | #[simd_test(enable = "avx512dq" )] |
8335 | unsafe fn test_mm512_inserti32x8() { |
8336 | let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
8337 | let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24); |
8338 | let r = _mm512_inserti32x8::<1>(a, b); |
8339 | let e = _mm512_set_epi32( |
8340 | 17, 18, 19, 20, 21, 22, 23, 24, 9, 10, 11, 12, 13, 14, 15, 16, |
8341 | ); |
8342 | assert_eq_m512i(r, e); |
8343 | } |
8344 | |
8345 | #[simd_test(enable = "avx512dq" )] |
8346 | unsafe fn test_mm512_mask_inserti32x8() { |
8347 | let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
8348 | let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24); |
8349 | let src = _mm512_set_epi32( |
8350 | 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, |
8351 | ); |
8352 | let r = _mm512_mask_inserti32x8::<1>(src, 0b0110100100111100, a, b); |
8353 | let e = _mm512_set_epi32( |
8354 | 25, 18, 19, 28, 21, 30, 31, 24, 33, 34, 11, 12, 13, 14, 39, 40, |
8355 | ); |
8356 | assert_eq_m512i(r, e); |
8357 | } |
8358 | |
8359 | #[simd_test(enable = "avx512dq" )] |
8360 | unsafe fn test_mm512_maskz_inserti32x8() { |
8361 | let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
8362 | let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24); |
8363 | let r = _mm512_maskz_inserti32x8::<1>(0b0110100100111100, a, b); |
8364 | let e = _mm512_set_epi32(0, 18, 19, 0, 21, 0, 0, 24, 0, 0, 11, 12, 13, 14, 0, 0); |
8365 | assert_eq_m512i(r, e); |
8366 | } |
8367 | |
8368 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8369 | unsafe fn test_mm256_inserti64x2() { |
8370 | let a = _mm256_set_epi64x(1, 2, 3, 4); |
8371 | let b = _mm_set_epi64x(5, 6); |
8372 | let r = _mm256_inserti64x2::<1>(a, b); |
8373 | let e = _mm256_set_epi64x(5, 6, 3, 4); |
8374 | assert_eq_m256i(r, e); |
8375 | } |
8376 | |
8377 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8378 | unsafe fn test_mm256_mask_inserti64x2() { |
8379 | let a = _mm256_set_epi64x(1, 2, 3, 4); |
8380 | let b = _mm_set_epi64x(5, 6); |
8381 | let src = _mm256_set_epi64x(7, 8, 9, 10); |
8382 | let r = _mm256_mask_inserti64x2::<1>(src, 0b0110, a, b); |
8383 | let e = _mm256_set_epi64x(7, 6, 3, 10); |
8384 | assert_eq_m256i(r, e); |
8385 | } |
8386 | |
8387 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8388 | unsafe fn test_mm256_maskz_inserti64x2() { |
8389 | let a = _mm256_set_epi64x(1, 2, 3, 4); |
8390 | let b = _mm_set_epi64x(5, 6); |
8391 | let r = _mm256_maskz_inserti64x2::<1>(0b0110, a, b); |
8392 | let e = _mm256_set_epi64x(0, 6, 3, 0); |
8393 | assert_eq_m256i(r, e); |
8394 | } |
8395 | |
8396 | #[simd_test(enable = "avx512dq" )] |
8397 | unsafe fn test_mm512_inserti64x2() { |
8398 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
8399 | let b = _mm_set_epi64x(9, 10); |
8400 | let r = _mm512_inserti64x2::<2>(a, b); |
8401 | let e = _mm512_set_epi64(1, 2, 9, 10, 5, 6, 7, 8); |
8402 | assert_eq_m512i(r, e); |
8403 | } |
8404 | |
8405 | #[simd_test(enable = "avx512dq" )] |
8406 | unsafe fn test_mm512_mask_inserti64x2() { |
8407 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
8408 | let b = _mm_set_epi64x(9, 10); |
8409 | let src = _mm512_set_epi64(11, 12, 13, 14, 15, 16, 17, 18); |
8410 | let r = _mm512_mask_inserti64x2::<2>(src, 0b01101001, a, b); |
8411 | let e = _mm512_set_epi64(11, 2, 9, 14, 5, 16, 17, 8); |
8412 | assert_eq_m512i(r, e); |
8413 | } |
8414 | |
8415 | #[simd_test(enable = "avx512dq" )] |
8416 | unsafe fn test_mm512_maskz_inserti64x2() { |
8417 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
8418 | let b = _mm_set_epi64x(9, 10); |
8419 | let r = _mm512_maskz_inserti64x2::<2>(0b01101001, a, b); |
8420 | let e = _mm512_set_epi64(0, 2, 9, 0, 5, 0, 0, 8); |
8421 | assert_eq_m512i(r, e); |
8422 | } |
8423 | |
8424 | #[simd_test(enable = "avx512dq" )] |
8425 | unsafe fn test_mm512_cvt_roundepi64_pd() { |
8426 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
8427 | let r = _mm512_cvt_roundepi64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
8428 | let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
8429 | assert_eq_m512d(r, e); |
8430 | } |
8431 | |
8432 | #[simd_test(enable = "avx512dq" )] |
8433 | unsafe fn test_mm512_mask_cvt_roundepi64_pd() { |
8434 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
8435 | let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.); |
8436 | let r = _mm512_mask_cvt_roundepi64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
8437 | b, 0b01101001, a, |
8438 | ); |
8439 | let e = _mm512_set_pd(9., 2., 3., 12., 5., 14., 15., 8.); |
8440 | assert_eq_m512d(r, e); |
8441 | } |
8442 | |
8443 | #[simd_test(enable = "avx512dq" )] |
8444 | unsafe fn test_mm512_maskz_cvt_roundepi64_pd() { |
8445 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
8446 | let r = _mm512_maskz_cvt_roundepi64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
8447 | 0b01101001, a, |
8448 | ); |
8449 | let e = _mm512_set_pd(0., 2., 3., 0., 5., 0., 0., 8.); |
8450 | assert_eq_m512d(r, e); |
8451 | } |
8452 | |
8453 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8454 | unsafe fn test_mm_cvtepi64_pd() { |
8455 | let a = _mm_set_epi64x(1, 2); |
8456 | let r = _mm_cvtepi64_pd(a); |
8457 | let e = _mm_set_pd(1., 2.); |
8458 | assert_eq_m128d(r, e); |
8459 | } |
8460 | |
8461 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8462 | unsafe fn test_mm_mask_cvtepi64_pd() { |
8463 | let a = _mm_set_epi64x(1, 2); |
8464 | let b = _mm_set_pd(3., 4.); |
8465 | let r = _mm_mask_cvtepi64_pd(b, 0b01, a); |
8466 | let e = _mm_set_pd(3., 2.); |
8467 | assert_eq_m128d(r, e); |
8468 | } |
8469 | |
8470 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8471 | unsafe fn test_mm_maskz_cvtepi64_pd() { |
8472 | let a = _mm_set_epi64x(1, 2); |
8473 | let r = _mm_maskz_cvtepi64_pd(0b01, a); |
8474 | let e = _mm_set_pd(0., 2.); |
8475 | assert_eq_m128d(r, e); |
8476 | } |
8477 | |
8478 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8479 | unsafe fn test_mm256_cvtepi64_pd() { |
8480 | let a = _mm256_set_epi64x(1, 2, 3, 4); |
8481 | let r = _mm256_cvtepi64_pd(a); |
8482 | let e = _mm256_set_pd(1., 2., 3., 4.); |
8483 | assert_eq_m256d(r, e); |
8484 | } |
8485 | |
8486 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8487 | unsafe fn test_mm256_mask_cvtepi64_pd() { |
8488 | let a = _mm256_set_epi64x(1, 2, 3, 4); |
8489 | let b = _mm256_set_pd(5., 6., 7., 8.); |
8490 | let r = _mm256_mask_cvtepi64_pd(b, 0b0110, a); |
8491 | let e = _mm256_set_pd(5., 2., 3., 8.); |
8492 | assert_eq_m256d(r, e); |
8493 | } |
8494 | |
8495 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8496 | unsafe fn test_mm256_maskz_cvtepi64_pd() { |
8497 | let a = _mm256_set_epi64x(1, 2, 3, 4); |
8498 | let r = _mm256_maskz_cvtepi64_pd(0b0110, a); |
8499 | let e = _mm256_set_pd(0., 2., 3., 0.); |
8500 | assert_eq_m256d(r, e); |
8501 | } |
8502 | |
8503 | #[simd_test(enable = "avx512dq" )] |
8504 | unsafe fn test_mm512_cvtepi64_pd() { |
8505 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
8506 | let r = _mm512_cvtepi64_pd(a); |
8507 | let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
8508 | assert_eq_m512d(r, e); |
8509 | } |
8510 | |
8511 | #[simd_test(enable = "avx512dq" )] |
8512 | unsafe fn test_mm512_mask_cvtepi64_pd() { |
8513 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
8514 | let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.); |
8515 | let r = _mm512_mask_cvtepi64_pd(b, 0b01101001, a); |
8516 | let e = _mm512_set_pd(9., 2., 3., 12., 5., 14., 15., 8.); |
8517 | assert_eq_m512d(r, e); |
8518 | } |
8519 | |
8520 | #[simd_test(enable = "avx512dq" )] |
8521 | unsafe fn test_mm512_maskz_cvtepi64_pd() { |
8522 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
8523 | let r = _mm512_maskz_cvtepi64_pd(0b01101001, a); |
8524 | let e = _mm512_set_pd(0., 2., 3., 0., 5., 0., 0., 8.); |
8525 | assert_eq_m512d(r, e); |
8526 | } |
8527 | |
8528 | #[simd_test(enable = "avx512dq" )] |
8529 | unsafe fn test_mm512_cvt_roundepi64_ps() { |
8530 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
8531 | let r = _mm512_cvt_roundepi64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
8532 | let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
8533 | assert_eq_m256(r, e); |
8534 | } |
8535 | |
8536 | #[simd_test(enable = "avx512dq" )] |
8537 | unsafe fn test_mm512_mask_cvt_roundepi64_ps() { |
8538 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
8539 | let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.); |
8540 | let r = _mm512_mask_cvt_roundepi64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
8541 | b, 0b01101001, a, |
8542 | ); |
8543 | let e = _mm256_set_ps(9., 2., 3., 12., 5., 14., 15., 8.); |
8544 | assert_eq_m256(r, e); |
8545 | } |
8546 | |
8547 | #[simd_test(enable = "avx512dq" )] |
8548 | unsafe fn test_mm512_maskz_cvt_roundepi64_ps() { |
8549 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
8550 | let r = _mm512_maskz_cvt_roundepi64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
8551 | 0b01101001, a, |
8552 | ); |
8553 | let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.); |
8554 | assert_eq_m256(r, e); |
8555 | } |
8556 | |
8557 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8558 | unsafe fn test_mm_cvtepi64_ps() { |
8559 | let a = _mm_set_epi64x(1, 2); |
8560 | let r = _mm_cvtepi64_ps(a); |
8561 | let e = _mm_set_ps(0., 0., 1., 2.); |
8562 | assert_eq_m128(r, e); |
8563 | } |
8564 | |
8565 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8566 | unsafe fn test_mm_mask_cvtepi64_ps() { |
8567 | let a = _mm_set_epi64x(1, 2); |
8568 | let b = _mm_set_ps(3., 4., 5., 6.); |
8569 | let r = _mm_mask_cvtepi64_ps(b, 0b01, a); |
8570 | let e = _mm_set_ps(0., 0., 5., 2.); |
8571 | assert_eq_m128(r, e); |
8572 | } |
8573 | |
8574 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8575 | unsafe fn test_mm_maskz_cvtepi64_ps() { |
8576 | let a = _mm_set_epi64x(1, 2); |
8577 | let r = _mm_maskz_cvtepi64_ps(0b01, a); |
8578 | let e = _mm_set_ps(0., 0., 0., 2.); |
8579 | assert_eq_m128(r, e); |
8580 | } |
8581 | |
8582 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8583 | unsafe fn test_mm256_cvtepi64_ps() { |
8584 | let a = _mm256_set_epi64x(1, 2, 3, 4); |
8585 | let r = _mm256_cvtepi64_ps(a); |
8586 | let e = _mm_set_ps(1., 2., 3., 4.); |
8587 | assert_eq_m128(r, e); |
8588 | } |
8589 | |
8590 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8591 | unsafe fn test_mm256_mask_cvtepi64_ps() { |
8592 | let a = _mm256_set_epi64x(1, 2, 3, 4); |
8593 | let b = _mm_set_ps(5., 6., 7., 8.); |
8594 | let r = _mm256_mask_cvtepi64_ps(b, 0b0110, a); |
8595 | let e = _mm_set_ps(5., 2., 3., 8.); |
8596 | assert_eq_m128(r, e); |
8597 | } |
8598 | |
8599 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8600 | unsafe fn test_mm256_maskz_cvtepi64_ps() { |
8601 | let a = _mm256_set_epi64x(1, 2, 3, 4); |
8602 | let r = _mm256_maskz_cvtepi64_ps(0b0110, a); |
8603 | let e = _mm_set_ps(0., 2., 3., 0.); |
8604 | assert_eq_m128(r, e); |
8605 | } |
8606 | |
8607 | #[simd_test(enable = "avx512dq" )] |
8608 | unsafe fn test_mm512_cvtepi64_ps() { |
8609 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
8610 | let r = _mm512_cvtepi64_ps(a); |
8611 | let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
8612 | assert_eq_m256(r, e); |
8613 | } |
8614 | |
8615 | #[simd_test(enable = "avx512dq" )] |
8616 | unsafe fn test_mm512_mask_cvtepi64_ps() { |
8617 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
8618 | let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.); |
8619 | let r = _mm512_mask_cvtepi64_ps(b, 0b01101001, a); |
8620 | let e = _mm256_set_ps(9., 2., 3., 12., 5., 14., 15., 8.); |
8621 | assert_eq_m256(r, e); |
8622 | } |
8623 | |
8624 | #[simd_test(enable = "avx512dq" )] |
8625 | unsafe fn test_mm512_maskz_cvtepi64_ps() { |
8626 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
8627 | let r = _mm512_maskz_cvtepi64_ps(0b01101001, a); |
8628 | let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.); |
8629 | assert_eq_m256(r, e); |
8630 | } |
8631 | |
8632 | #[simd_test(enable = "avx512dq" )] |
8633 | unsafe fn test_mm512_cvt_roundepu64_pd() { |
8634 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
8635 | let r = _mm512_cvt_roundepu64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
8636 | let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
8637 | assert_eq_m512d(r, e); |
8638 | } |
8639 | |
8640 | #[simd_test(enable = "avx512dq" )] |
8641 | unsafe fn test_mm512_mask_cvt_roundepu64_pd() { |
8642 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
8643 | let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.); |
8644 | let r = _mm512_mask_cvt_roundepu64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
8645 | b, 0b01101001, a, |
8646 | ); |
8647 | let e = _mm512_set_pd(9., 2., 3., 12., 5., 14., 15., 8.); |
8648 | assert_eq_m512d(r, e); |
8649 | } |
8650 | |
8651 | #[simd_test(enable = "avx512dq" )] |
8652 | unsafe fn test_mm512_maskz_cvt_roundepu64_pd() { |
8653 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
8654 | let r = _mm512_maskz_cvt_roundepu64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
8655 | 0b01101001, a, |
8656 | ); |
8657 | let e = _mm512_set_pd(0., 2., 3., 0., 5., 0., 0., 8.); |
8658 | assert_eq_m512d(r, e); |
8659 | } |
8660 | |
8661 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8662 | unsafe fn test_mm_cvtepu64_pd() { |
8663 | let a = _mm_set_epi64x(1, 2); |
8664 | let r = _mm_cvtepu64_pd(a); |
8665 | let e = _mm_set_pd(1., 2.); |
8666 | assert_eq_m128d(r, e); |
8667 | } |
8668 | |
8669 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8670 | unsafe fn test_mm_mask_cvtepu64_pd() { |
8671 | let a = _mm_set_epi64x(1, 2); |
8672 | let b = _mm_set_pd(3., 4.); |
8673 | let r = _mm_mask_cvtepu64_pd(b, 0b01, a); |
8674 | let e = _mm_set_pd(3., 2.); |
8675 | assert_eq_m128d(r, e); |
8676 | } |
8677 | |
8678 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8679 | unsafe fn test_mm_maskz_cvtepu64_pd() { |
8680 | let a = _mm_set_epi64x(1, 2); |
8681 | let r = _mm_maskz_cvtepu64_pd(0b01, a); |
8682 | let e = _mm_set_pd(0., 2.); |
8683 | assert_eq_m128d(r, e); |
8684 | } |
8685 | |
8686 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8687 | unsafe fn test_mm256_cvtepu64_pd() { |
8688 | let a = _mm256_set_epi64x(1, 2, 3, 4); |
8689 | let r = _mm256_cvtepu64_pd(a); |
8690 | let e = _mm256_set_pd(1., 2., 3., 4.); |
8691 | assert_eq_m256d(r, e); |
8692 | } |
8693 | |
8694 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8695 | unsafe fn test_mm256_mask_cvtepu64_pd() { |
8696 | let a = _mm256_set_epi64x(1, 2, 3, 4); |
8697 | let b = _mm256_set_pd(5., 6., 7., 8.); |
8698 | let r = _mm256_mask_cvtepu64_pd(b, 0b0110, a); |
8699 | let e = _mm256_set_pd(5., 2., 3., 8.); |
8700 | assert_eq_m256d(r, e); |
8701 | } |
8702 | |
8703 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8704 | unsafe fn test_mm256_maskz_cvtepu64_pd() { |
8705 | let a = _mm256_set_epi64x(1, 2, 3, 4); |
8706 | let r = _mm256_maskz_cvtepu64_pd(0b0110, a); |
8707 | let e = _mm256_set_pd(0., 2., 3., 0.); |
8708 | assert_eq_m256d(r, e); |
8709 | } |
8710 | |
8711 | #[simd_test(enable = "avx512dq" )] |
8712 | unsafe fn test_mm512_cvtepu64_pd() { |
8713 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
8714 | let r = _mm512_cvtepu64_pd(a); |
8715 | let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
8716 | assert_eq_m512d(r, e); |
8717 | } |
8718 | |
8719 | #[simd_test(enable = "avx512dq" )] |
8720 | unsafe fn test_mm512_mask_cvtepu64_pd() { |
8721 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
8722 | let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.); |
8723 | let r = _mm512_mask_cvtepu64_pd(b, 0b01101001, a); |
8724 | let e = _mm512_set_pd(9., 2., 3., 12., 5., 14., 15., 8.); |
8725 | assert_eq_m512d(r, e); |
8726 | } |
8727 | |
8728 | #[simd_test(enable = "avx512dq" )] |
8729 | unsafe fn test_mm512_maskz_cvtepu64_pd() { |
8730 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
8731 | let r = _mm512_maskz_cvtepu64_pd(0b01101001, a); |
8732 | let e = _mm512_set_pd(0., 2., 3., 0., 5., 0., 0., 8.); |
8733 | assert_eq_m512d(r, e); |
8734 | } |
8735 | |
8736 | #[simd_test(enable = "avx512dq" )] |
8737 | unsafe fn test_mm512_cvt_roundepu64_ps() { |
8738 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
8739 | let r = _mm512_cvt_roundepu64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
8740 | let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
8741 | assert_eq_m256(r, e); |
8742 | } |
8743 | |
8744 | #[simd_test(enable = "avx512dq" )] |
8745 | unsafe fn test_mm512_mask_cvt_roundepu64_ps() { |
8746 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
8747 | let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.); |
8748 | let r = _mm512_mask_cvt_roundepu64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
8749 | b, 0b01101001, a, |
8750 | ); |
8751 | let e = _mm256_set_ps(9., 2., 3., 12., 5., 14., 15., 8.); |
8752 | assert_eq_m256(r, e); |
8753 | } |
8754 | |
8755 | #[simd_test(enable = "avx512dq" )] |
8756 | unsafe fn test_mm512_maskz_cvt_roundepu64_ps() { |
8757 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
8758 | let r = _mm512_maskz_cvt_roundepu64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
8759 | 0b01101001, a, |
8760 | ); |
8761 | let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.); |
8762 | assert_eq_m256(r, e); |
8763 | } |
8764 | |
8765 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8766 | unsafe fn test_mm_cvtepu64_ps() { |
8767 | let a = _mm_set_epi64x(1, 2); |
8768 | let r = _mm_cvtepu64_ps(a); |
8769 | let e = _mm_set_ps(0., 0., 1., 2.); |
8770 | assert_eq_m128(r, e); |
8771 | } |
8772 | |
8773 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8774 | unsafe fn test_mm_mask_cvtepu64_ps() { |
8775 | let a = _mm_set_epi64x(1, 2); |
8776 | let b = _mm_set_ps(3., 4., 5., 6.); |
8777 | let r = _mm_mask_cvtepu64_ps(b, 0b01, a); |
8778 | let e = _mm_set_ps(0., 0., 5., 2.); |
8779 | assert_eq_m128(r, e); |
8780 | } |
8781 | |
8782 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8783 | unsafe fn test_mm_maskz_cvtepu64_ps() { |
8784 | let a = _mm_set_epi64x(1, 2); |
8785 | let r = _mm_maskz_cvtepu64_ps(0b01, a); |
8786 | let e = _mm_set_ps(0., 0., 0., 2.); |
8787 | assert_eq_m128(r, e); |
8788 | } |
8789 | |
8790 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8791 | unsafe fn test_mm256_cvtepu64_ps() { |
8792 | let a = _mm256_set_epi64x(1, 2, 3, 4); |
8793 | let r = _mm256_cvtepu64_ps(a); |
8794 | let e = _mm_set_ps(1., 2., 3., 4.); |
8795 | assert_eq_m128(r, e); |
8796 | } |
8797 | |
8798 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8799 | unsafe fn test_mm256_mask_cvtepu64_ps() { |
8800 | let a = _mm256_set_epi64x(1, 2, 3, 4); |
8801 | let b = _mm_set_ps(5., 6., 7., 8.); |
8802 | let r = _mm256_mask_cvtepu64_ps(b, 0b0110, a); |
8803 | let e = _mm_set_ps(5., 2., 3., 8.); |
8804 | assert_eq_m128(r, e); |
8805 | } |
8806 | |
8807 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8808 | unsafe fn test_mm256_maskz_cvtepu64_ps() { |
8809 | let a = _mm256_set_epi64x(1, 2, 3, 4); |
8810 | let r = _mm256_maskz_cvtepu64_ps(0b0110, a); |
8811 | let e = _mm_set_ps(0., 2., 3., 0.); |
8812 | assert_eq_m128(r, e); |
8813 | } |
8814 | |
8815 | #[simd_test(enable = "avx512dq" )] |
8816 | unsafe fn test_mm512_cvtepu64_ps() { |
8817 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
8818 | let r = _mm512_cvtepu64_ps(a); |
8819 | let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
8820 | assert_eq_m256(r, e); |
8821 | } |
8822 | |
8823 | #[simd_test(enable = "avx512dq" )] |
8824 | unsafe fn test_mm512_mask_cvtepu64_ps() { |
8825 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
8826 | let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.); |
8827 | let r = _mm512_mask_cvtepu64_ps(b, 0b01101001, a); |
8828 | let e = _mm256_set_ps(9., 2., 3., 12., 5., 14., 15., 8.); |
8829 | assert_eq_m256(r, e); |
8830 | } |
8831 | |
8832 | #[simd_test(enable = "avx512dq" )] |
8833 | unsafe fn test_mm512_maskz_cvtepu64_ps() { |
8834 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
8835 | let r = _mm512_maskz_cvtepu64_ps(0b01101001, a); |
8836 | let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.); |
8837 | assert_eq_m256(r, e); |
8838 | } |
8839 | |
8840 | #[simd_test(enable = "avx512dq" )] |
8841 | unsafe fn test_mm512_cvt_roundpd_epi64() { |
8842 | let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
8843 | let r = _mm512_cvt_roundpd_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
8844 | let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
8845 | assert_eq_m512i(r, e); |
8846 | } |
8847 | |
8848 | #[simd_test(enable = "avx512dq" )] |
8849 | unsafe fn test_mm512_mask_cvt_roundpd_epi64() { |
8850 | let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
8851 | let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
8852 | let r = _mm512_mask_cvt_roundpd_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
8853 | b, 0b01101001, a, |
8854 | ); |
8855 | let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); |
8856 | assert_eq_m512i(r, e); |
8857 | } |
8858 | |
8859 | #[simd_test(enable = "avx512dq" )] |
8860 | unsafe fn test_mm512_maskz_cvt_roundpd_epi64() { |
8861 | let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
8862 | let r = _mm512_maskz_cvt_roundpd_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
8863 | 0b01101001, a, |
8864 | ); |
8865 | let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); |
8866 | assert_eq_m512i(r, e); |
8867 | } |
8868 | |
8869 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8870 | unsafe fn test_mm_cvtpd_epi64() { |
8871 | let a = _mm_set_pd(1., 2.); |
8872 | let r = _mm_cvtpd_epi64(a); |
8873 | let e = _mm_set_epi64x(1, 2); |
8874 | assert_eq_m128i(r, e); |
8875 | } |
8876 | |
8877 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8878 | unsafe fn test_mm_mask_cvtpd_epi64() { |
8879 | let a = _mm_set_pd(1., 2.); |
8880 | let b = _mm_set_epi64x(3, 4); |
8881 | let r = _mm_mask_cvtpd_epi64(b, 0b01, a); |
8882 | let e = _mm_set_epi64x(3, 2); |
8883 | assert_eq_m128i(r, e); |
8884 | } |
8885 | |
8886 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8887 | unsafe fn test_mm_maskz_cvtpd_epi64() { |
8888 | let a = _mm_set_pd(1., 2.); |
8889 | let r = _mm_maskz_cvtpd_epi64(0b01, a); |
8890 | let e = _mm_set_epi64x(0, 2); |
8891 | assert_eq_m128i(r, e); |
8892 | } |
8893 | |
8894 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8895 | unsafe fn test_mm256_cvtpd_epi64() { |
8896 | let a = _mm256_set_pd(1., 2., 3., 4.); |
8897 | let r = _mm256_cvtpd_epi64(a); |
8898 | let e = _mm256_set_epi64x(1, 2, 3, 4); |
8899 | assert_eq_m256i(r, e); |
8900 | } |
8901 | |
8902 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8903 | unsafe fn test_mm256_mask_cvtpd_epi64() { |
8904 | let a = _mm256_set_pd(1., 2., 3., 4.); |
8905 | let b = _mm256_set_epi64x(5, 6, 7, 8); |
8906 | let r = _mm256_mask_cvtpd_epi64(b, 0b0110, a); |
8907 | let e = _mm256_set_epi64x(5, 2, 3, 8); |
8908 | assert_eq_m256i(r, e); |
8909 | } |
8910 | |
8911 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8912 | unsafe fn test_mm256_maskz_cvtpd_epi64() { |
8913 | let a = _mm256_set_pd(1., 2., 3., 4.); |
8914 | let r = _mm256_maskz_cvtpd_epi64(0b0110, a); |
8915 | let e = _mm256_set_epi64x(0, 2, 3, 0); |
8916 | assert_eq_m256i(r, e); |
8917 | } |
8918 | |
8919 | #[simd_test(enable = "avx512dq" )] |
8920 | unsafe fn test_mm512_cvtpd_epi64() { |
8921 | let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
8922 | let r = _mm512_cvtpd_epi64(a); |
8923 | let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
8924 | assert_eq_m512i(r, e); |
8925 | } |
8926 | |
8927 | #[simd_test(enable = "avx512dq" )] |
8928 | unsafe fn test_mm512_mask_cvtpd_epi64() { |
8929 | let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
8930 | let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
8931 | let r = _mm512_mask_cvtpd_epi64(b, 0b01101001, a); |
8932 | let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); |
8933 | assert_eq_m512i(r, e); |
8934 | } |
8935 | |
8936 | #[simd_test(enable = "avx512dq" )] |
8937 | unsafe fn test_mm512_maskz_cvtpd_epi64() { |
8938 | let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
8939 | let r = _mm512_maskz_cvtpd_epi64(0b01101001, a); |
8940 | let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); |
8941 | assert_eq_m512i(r, e); |
8942 | } |
8943 | |
8944 | #[simd_test(enable = "avx512dq" )] |
8945 | unsafe fn test_mm512_cvt_roundps_epi64() { |
8946 | let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
8947 | let r = _mm512_cvt_roundps_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
8948 | let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
8949 | assert_eq_m512i(r, e); |
8950 | } |
8951 | |
8952 | #[simd_test(enable = "avx512dq" )] |
8953 | unsafe fn test_mm512_mask_cvt_roundps_epi64() { |
8954 | let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
8955 | let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
8956 | let r = _mm512_mask_cvt_roundps_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
8957 | b, 0b01101001, a, |
8958 | ); |
8959 | let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); |
8960 | assert_eq_m512i(r, e); |
8961 | } |
8962 | |
8963 | #[simd_test(enable = "avx512dq" )] |
8964 | unsafe fn test_mm512_maskz_cvt_roundps_epi64() { |
8965 | let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
8966 | let r = _mm512_maskz_cvt_roundps_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
8967 | 0b01101001, a, |
8968 | ); |
8969 | let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); |
8970 | assert_eq_m512i(r, e); |
8971 | } |
8972 | |
8973 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8974 | unsafe fn test_mm_cvtps_epi64() { |
8975 | let a = _mm_set_ps(1., 2., 3., 4.); |
8976 | let r = _mm_cvtps_epi64(a); |
8977 | let e = _mm_set_epi64x(3, 4); |
8978 | assert_eq_m128i(r, e); |
8979 | } |
8980 | |
8981 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8982 | unsafe fn test_mm_mask_cvtps_epi64() { |
8983 | let a = _mm_set_ps(1., 2., 3., 4.); |
8984 | let b = _mm_set_epi64x(5, 6); |
8985 | let r = _mm_mask_cvtps_epi64(b, 0b01, a); |
8986 | let e = _mm_set_epi64x(5, 4); |
8987 | assert_eq_m128i(r, e); |
8988 | } |
8989 | |
8990 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8991 | unsafe fn test_mm_maskz_cvtps_epi64() { |
8992 | let a = _mm_set_ps(1., 2., 3., 4.); |
8993 | let r = _mm_maskz_cvtps_epi64(0b01, a); |
8994 | let e = _mm_set_epi64x(0, 4); |
8995 | assert_eq_m128i(r, e); |
8996 | } |
8997 | |
8998 | #[simd_test(enable = "avx512dq,avx512vl" )] |
8999 | unsafe fn test_mm256_cvtps_epi64() { |
9000 | let a = _mm_set_ps(1., 2., 3., 4.); |
9001 | let r = _mm256_cvtps_epi64(a); |
9002 | let e = _mm256_set_epi64x(1, 2, 3, 4); |
9003 | assert_eq_m256i(r, e); |
9004 | } |
9005 | |
9006 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9007 | unsafe fn test_mm256_mask_cvtps_epi64() { |
9008 | let a = _mm_set_ps(1., 2., 3., 4.); |
9009 | let b = _mm256_set_epi64x(5, 6, 7, 8); |
9010 | let r = _mm256_mask_cvtps_epi64(b, 0b0110, a); |
9011 | let e = _mm256_set_epi64x(5, 2, 3, 8); |
9012 | assert_eq_m256i(r, e); |
9013 | } |
9014 | |
9015 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9016 | unsafe fn test_mm256_maskz_cvtps_epi64() { |
9017 | let a = _mm_set_ps(1., 2., 3., 4.); |
9018 | let r = _mm256_maskz_cvtps_epi64(0b0110, a); |
9019 | let e = _mm256_set_epi64x(0, 2, 3, 0); |
9020 | assert_eq_m256i(r, e); |
9021 | } |
9022 | |
9023 | #[simd_test(enable = "avx512dq" )] |
9024 | unsafe fn test_mm512_cvtps_epi64() { |
9025 | let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
9026 | let r = _mm512_cvtps_epi64(a); |
9027 | let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
9028 | assert_eq_m512i(r, e); |
9029 | } |
9030 | |
9031 | #[simd_test(enable = "avx512dq" )] |
9032 | unsafe fn test_mm512_mask_cvtps_epi64() { |
9033 | let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
9034 | let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
9035 | let r = _mm512_mask_cvtps_epi64(b, 0b01101001, a); |
9036 | let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); |
9037 | assert_eq_m512i(r, e); |
9038 | } |
9039 | |
9040 | #[simd_test(enable = "avx512dq" )] |
9041 | unsafe fn test_mm512_maskz_cvtps_epi64() { |
9042 | let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
9043 | let r = _mm512_maskz_cvtps_epi64(0b01101001, a); |
9044 | let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); |
9045 | assert_eq_m512i(r, e); |
9046 | } |
9047 | |
9048 | #[simd_test(enable = "avx512dq" )] |
9049 | unsafe fn test_mm512_cvt_roundpd_epu64() { |
9050 | let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
9051 | let r = _mm512_cvt_roundpd_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
9052 | let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
9053 | assert_eq_m512i(r, e); |
9054 | } |
9055 | |
9056 | #[simd_test(enable = "avx512dq" )] |
9057 | unsafe fn test_mm512_mask_cvt_roundpd_epu64() { |
9058 | let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
9059 | let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
9060 | let r = _mm512_mask_cvt_roundpd_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
9061 | b, 0b01101001, a, |
9062 | ); |
9063 | let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); |
9064 | assert_eq_m512i(r, e); |
9065 | } |
9066 | |
9067 | #[simd_test(enable = "avx512dq" )] |
9068 | unsafe fn test_mm512_maskz_cvt_roundpd_epu64() { |
9069 | let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
9070 | let r = _mm512_maskz_cvt_roundpd_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
9071 | 0b01101001, a, |
9072 | ); |
9073 | let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); |
9074 | assert_eq_m512i(r, e); |
9075 | } |
9076 | |
9077 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9078 | unsafe fn test_mm_cvtpd_epu64() { |
9079 | let a = _mm_set_pd(1., 2.); |
9080 | let r = _mm_cvtpd_epu64(a); |
9081 | let e = _mm_set_epi64x(1, 2); |
9082 | assert_eq_m128i(r, e); |
9083 | } |
9084 | |
9085 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9086 | unsafe fn test_mm_mask_cvtpd_epu64() { |
9087 | let a = _mm_set_pd(1., 2.); |
9088 | let b = _mm_set_epi64x(3, 4); |
9089 | let r = _mm_mask_cvtpd_epu64(b, 0b01, a); |
9090 | let e = _mm_set_epi64x(3, 2); |
9091 | assert_eq_m128i(r, e); |
9092 | } |
9093 | |
9094 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9095 | unsafe fn test_mm_maskz_cvtpd_epu64() { |
9096 | let a = _mm_set_pd(1., 2.); |
9097 | let r = _mm_maskz_cvtpd_epu64(0b01, a); |
9098 | let e = _mm_set_epi64x(0, 2); |
9099 | assert_eq_m128i(r, e); |
9100 | } |
9101 | |
9102 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9103 | unsafe fn test_mm256_cvtpd_epu64() { |
9104 | let a = _mm256_set_pd(1., 2., 3., 4.); |
9105 | let r = _mm256_cvtpd_epu64(a); |
9106 | let e = _mm256_set_epi64x(1, 2, 3, 4); |
9107 | assert_eq_m256i(r, e); |
9108 | } |
9109 | |
9110 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9111 | unsafe fn test_mm256_mask_cvtpd_epu64() { |
9112 | let a = _mm256_set_pd(1., 2., 3., 4.); |
9113 | let b = _mm256_set_epi64x(5, 6, 7, 8); |
9114 | let r = _mm256_mask_cvtpd_epu64(b, 0b0110, a); |
9115 | let e = _mm256_set_epi64x(5, 2, 3, 8); |
9116 | assert_eq_m256i(r, e); |
9117 | } |
9118 | |
9119 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9120 | unsafe fn test_mm256_maskz_cvtpd_epu64() { |
9121 | let a = _mm256_set_pd(1., 2., 3., 4.); |
9122 | let r = _mm256_maskz_cvtpd_epu64(0b0110, a); |
9123 | let e = _mm256_set_epi64x(0, 2, 3, 0); |
9124 | assert_eq_m256i(r, e); |
9125 | } |
9126 | |
9127 | #[simd_test(enable = "avx512dq" )] |
9128 | unsafe fn test_mm512_cvtpd_epu64() { |
9129 | let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
9130 | let r = _mm512_cvtpd_epu64(a); |
9131 | let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
9132 | assert_eq_m512i(r, e); |
9133 | } |
9134 | |
9135 | #[simd_test(enable = "avx512dq" )] |
9136 | unsafe fn test_mm512_mask_cvtpd_epu64() { |
9137 | let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
9138 | let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
9139 | let r = _mm512_mask_cvtpd_epu64(b, 0b01101001, a); |
9140 | let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); |
9141 | assert_eq_m512i(r, e); |
9142 | } |
9143 | |
9144 | #[simd_test(enable = "avx512dq" )] |
9145 | unsafe fn test_mm512_maskz_cvtpd_epu64() { |
9146 | let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
9147 | let r = _mm512_maskz_cvtpd_epu64(0b01101001, a); |
9148 | let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); |
9149 | assert_eq_m512i(r, e); |
9150 | } |
9151 | |
9152 | #[simd_test(enable = "avx512dq" )] |
9153 | unsafe fn test_mm512_cvt_roundps_epu64() { |
9154 | let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
9155 | let r = _mm512_cvt_roundps_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
9156 | let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
9157 | assert_eq_m512i(r, e); |
9158 | } |
9159 | |
9160 | #[simd_test(enable = "avx512dq" )] |
9161 | unsafe fn test_mm512_mask_cvt_roundps_epu64() { |
9162 | let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
9163 | let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
9164 | let r = _mm512_mask_cvt_roundps_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
9165 | b, 0b01101001, a, |
9166 | ); |
9167 | let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); |
9168 | assert_eq_m512i(r, e); |
9169 | } |
9170 | |
9171 | #[simd_test(enable = "avx512dq" )] |
9172 | unsafe fn test_mm512_maskz_cvt_roundps_epu64() { |
9173 | let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
9174 | let r = _mm512_maskz_cvt_roundps_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
9175 | 0b01101001, a, |
9176 | ); |
9177 | let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); |
9178 | assert_eq_m512i(r, e); |
9179 | } |
9180 | |
9181 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9182 | unsafe fn test_mm_cvtps_epu64() { |
9183 | let a = _mm_set_ps(1., 2., 3., 4.); |
9184 | let r = _mm_cvtps_epu64(a); |
9185 | let e = _mm_set_epi64x(3, 4); |
9186 | assert_eq_m128i(r, e); |
9187 | } |
9188 | |
9189 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9190 | unsafe fn test_mm_mask_cvtps_epu64() { |
9191 | let a = _mm_set_ps(1., 2., 3., 4.); |
9192 | let b = _mm_set_epi64x(5, 6); |
9193 | let r = _mm_mask_cvtps_epu64(b, 0b01, a); |
9194 | let e = _mm_set_epi64x(5, 4); |
9195 | assert_eq_m128i(r, e); |
9196 | } |
9197 | |
9198 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9199 | unsafe fn test_mm_maskz_cvtps_epu64() { |
9200 | let a = _mm_set_ps(1., 2., 3., 4.); |
9201 | let r = _mm_maskz_cvtps_epu64(0b01, a); |
9202 | let e = _mm_set_epi64x(0, 4); |
9203 | assert_eq_m128i(r, e); |
9204 | } |
9205 | |
9206 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9207 | unsafe fn test_mm256_cvtps_epu64() { |
9208 | let a = _mm_set_ps(1., 2., 3., 4.); |
9209 | let r = _mm256_cvtps_epu64(a); |
9210 | let e = _mm256_set_epi64x(1, 2, 3, 4); |
9211 | assert_eq_m256i(r, e); |
9212 | } |
9213 | |
9214 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9215 | unsafe fn test_mm256_mask_cvtps_epu64() { |
9216 | let a = _mm_set_ps(1., 2., 3., 4.); |
9217 | let b = _mm256_set_epi64x(5, 6, 7, 8); |
9218 | let r = _mm256_mask_cvtps_epu64(b, 0b0110, a); |
9219 | let e = _mm256_set_epi64x(5, 2, 3, 8); |
9220 | assert_eq_m256i(r, e); |
9221 | } |
9222 | |
9223 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9224 | unsafe fn test_mm256_maskz_cvtps_epu64() { |
9225 | let a = _mm_set_ps(1., 2., 3., 4.); |
9226 | let r = _mm256_maskz_cvtps_epu64(0b0110, a); |
9227 | let e = _mm256_set_epi64x(0, 2, 3, 0); |
9228 | assert_eq_m256i(r, e); |
9229 | } |
9230 | |
9231 | #[simd_test(enable = "avx512dq" )] |
9232 | unsafe fn test_mm512_cvtps_epu64() { |
9233 | let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
9234 | let r = _mm512_cvtps_epu64(a); |
9235 | let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
9236 | assert_eq_m512i(r, e); |
9237 | } |
9238 | |
9239 | #[simd_test(enable = "avx512dq" )] |
9240 | unsafe fn test_mm512_mask_cvtps_epu64() { |
9241 | let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
9242 | let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
9243 | let r = _mm512_mask_cvtps_epu64(b, 0b01101001, a); |
9244 | let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); |
9245 | assert_eq_m512i(r, e); |
9246 | } |
9247 | |
9248 | #[simd_test(enable = "avx512dq" )] |
9249 | unsafe fn test_mm512_maskz_cvtps_epu64() { |
9250 | let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
9251 | let r = _mm512_maskz_cvtps_epu64(0b01101001, a); |
9252 | let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); |
9253 | assert_eq_m512i(r, e); |
9254 | } |
9255 | |
9256 | #[simd_test(enable = "avx512dq" )] |
9257 | unsafe fn test_mm512_cvtt_roundpd_epi64() { |
9258 | let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
9259 | let r = _mm512_cvtt_roundpd_epi64::<_MM_FROUND_NO_EXC>(a); |
9260 | let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
9261 | assert_eq_m512i(r, e); |
9262 | } |
9263 | |
9264 | #[simd_test(enable = "avx512dq" )] |
9265 | unsafe fn test_mm512_mask_cvtt_roundpd_epi64() { |
9266 | let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
9267 | let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
9268 | let r = _mm512_mask_cvtt_roundpd_epi64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a); |
9269 | let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); |
9270 | assert_eq_m512i(r, e); |
9271 | } |
9272 | |
9273 | #[simd_test(enable = "avx512dq" )] |
9274 | unsafe fn test_mm512_maskz_cvtt_roundpd_epi64() { |
9275 | let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
9276 | let r = _mm512_maskz_cvtt_roundpd_epi64::<_MM_FROUND_NO_EXC>(0b01101001, a); |
9277 | let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); |
9278 | assert_eq_m512i(r, e); |
9279 | } |
9280 | |
9281 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9282 | unsafe fn test_mm_cvttpd_epi64() { |
9283 | let a = _mm_set_pd(1., 2.); |
9284 | let r = _mm_cvttpd_epi64(a); |
9285 | let e = _mm_set_epi64x(1, 2); |
9286 | assert_eq_m128i(r, e); |
9287 | } |
9288 | |
9289 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9290 | unsafe fn test_mm_mask_cvttpd_epi64() { |
9291 | let a = _mm_set_pd(1., 2.); |
9292 | let b = _mm_set_epi64x(3, 4); |
9293 | let r = _mm_mask_cvttpd_epi64(b, 0b01, a); |
9294 | let e = _mm_set_epi64x(3, 2); |
9295 | assert_eq_m128i(r, e); |
9296 | } |
9297 | |
9298 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9299 | unsafe fn test_mm_maskz_cvttpd_epi64() { |
9300 | let a = _mm_set_pd(1., 2.); |
9301 | let r = _mm_maskz_cvttpd_epi64(0b01, a); |
9302 | let e = _mm_set_epi64x(0, 2); |
9303 | assert_eq_m128i(r, e); |
9304 | } |
9305 | |
9306 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9307 | unsafe fn test_mm256_cvttpd_epi64() { |
9308 | let a = _mm256_set_pd(1., 2., 3., 4.); |
9309 | let r = _mm256_cvttpd_epi64(a); |
9310 | let e = _mm256_set_epi64x(1, 2, 3, 4); |
9311 | assert_eq_m256i(r, e); |
9312 | } |
9313 | |
9314 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9315 | unsafe fn test_mm256_mask_cvttpd_epi64() { |
9316 | let a = _mm256_set_pd(1., 2., 3., 4.); |
9317 | let b = _mm256_set_epi64x(5, 6, 7, 8); |
9318 | let r = _mm256_mask_cvttpd_epi64(b, 0b0110, a); |
9319 | let e = _mm256_set_epi64x(5, 2, 3, 8); |
9320 | assert_eq_m256i(r, e); |
9321 | } |
9322 | |
9323 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9324 | unsafe fn test_mm256_maskz_cvttpd_epi64() { |
9325 | let a = _mm256_set_pd(1., 2., 3., 4.); |
9326 | let r = _mm256_maskz_cvttpd_epi64(0b0110, a); |
9327 | let e = _mm256_set_epi64x(0, 2, 3, 0); |
9328 | assert_eq_m256i(r, e); |
9329 | } |
9330 | |
9331 | #[simd_test(enable = "avx512dq" )] |
9332 | unsafe fn test_mm512_cvttpd_epi64() { |
9333 | let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
9334 | let r = _mm512_cvttpd_epi64(a); |
9335 | let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
9336 | assert_eq_m512i(r, e); |
9337 | } |
9338 | |
9339 | #[simd_test(enable = "avx512dq" )] |
9340 | unsafe fn test_mm512_mask_cvttpd_epi64() { |
9341 | let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
9342 | let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
9343 | let r = _mm512_mask_cvttpd_epi64(b, 0b01101001, a); |
9344 | let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); |
9345 | assert_eq_m512i(r, e); |
9346 | } |
9347 | |
9348 | #[simd_test(enable = "avx512dq" )] |
9349 | unsafe fn test_mm512_maskz_cvttpd_epi64() { |
9350 | let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
9351 | let r = _mm512_maskz_cvttpd_epi64(0b01101001, a); |
9352 | let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); |
9353 | assert_eq_m512i(r, e); |
9354 | } |
9355 | |
9356 | #[simd_test(enable = "avx512dq" )] |
9357 | unsafe fn test_mm512_cvtt_roundps_epi64() { |
9358 | let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
9359 | let r = _mm512_cvtt_roundps_epi64::<_MM_FROUND_NO_EXC>(a); |
9360 | let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
9361 | assert_eq_m512i(r, e); |
9362 | } |
9363 | |
9364 | #[simd_test(enable = "avx512dq" )] |
9365 | unsafe fn test_mm512_mask_cvtt_roundps_epi64() { |
9366 | let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
9367 | let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
9368 | let r = _mm512_mask_cvtt_roundps_epi64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a); |
9369 | let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); |
9370 | assert_eq_m512i(r, e); |
9371 | } |
9372 | |
9373 | #[simd_test(enable = "avx512dq" )] |
9374 | unsafe fn test_mm512_maskz_cvtt_roundps_epi64() { |
9375 | let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
9376 | let r = _mm512_maskz_cvtt_roundps_epi64::<_MM_FROUND_NO_EXC>(0b01101001, a); |
9377 | let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); |
9378 | assert_eq_m512i(r, e); |
9379 | } |
9380 | |
9381 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9382 | unsafe fn test_mm_cvttps_epi64() { |
9383 | let a = _mm_set_ps(1., 2., 3., 4.); |
9384 | let r = _mm_cvttps_epi64(a); |
9385 | let e = _mm_set_epi64x(3, 4); |
9386 | assert_eq_m128i(r, e); |
9387 | } |
9388 | |
9389 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9390 | unsafe fn test_mm_mask_cvttps_epi64() { |
9391 | let a = _mm_set_ps(1., 2., 3., 4.); |
9392 | let b = _mm_set_epi64x(5, 6); |
9393 | let r = _mm_mask_cvttps_epi64(b, 0b01, a); |
9394 | let e = _mm_set_epi64x(5, 4); |
9395 | assert_eq_m128i(r, e); |
9396 | } |
9397 | |
9398 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9399 | unsafe fn test_mm_maskz_cvttps_epi64() { |
9400 | let a = _mm_set_ps(1., 2., 3., 4.); |
9401 | let r = _mm_maskz_cvttps_epi64(0b01, a); |
9402 | let e = _mm_set_epi64x(0, 4); |
9403 | assert_eq_m128i(r, e); |
9404 | } |
9405 | |
9406 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9407 | unsafe fn test_mm256_cvttps_epi64() { |
9408 | let a = _mm_set_ps(1., 2., 3., 4.); |
9409 | let r = _mm256_cvttps_epi64(a); |
9410 | let e = _mm256_set_epi64x(1, 2, 3, 4); |
9411 | assert_eq_m256i(r, e); |
9412 | } |
9413 | |
9414 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9415 | unsafe fn test_mm256_mask_cvttps_epi64() { |
9416 | let a = _mm_set_ps(1., 2., 3., 4.); |
9417 | let b = _mm256_set_epi64x(5, 6, 7, 8); |
9418 | let r = _mm256_mask_cvttps_epi64(b, 0b0110, a); |
9419 | let e = _mm256_set_epi64x(5, 2, 3, 8); |
9420 | assert_eq_m256i(r, e); |
9421 | } |
9422 | |
9423 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9424 | unsafe fn test_mm256_maskz_cvttps_epi64() { |
9425 | let a = _mm_set_ps(1., 2., 3., 4.); |
9426 | let r = _mm256_maskz_cvttps_epi64(0b0110, a); |
9427 | let e = _mm256_set_epi64x(0, 2, 3, 0); |
9428 | assert_eq_m256i(r, e); |
9429 | } |
9430 | |
9431 | #[simd_test(enable = "avx512dq" )] |
9432 | unsafe fn test_mm512_cvttps_epi64() { |
9433 | let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
9434 | let r = _mm512_cvttps_epi64(a); |
9435 | let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
9436 | assert_eq_m512i(r, e); |
9437 | } |
9438 | |
9439 | #[simd_test(enable = "avx512dq" )] |
9440 | unsafe fn test_mm512_mask_cvttps_epi64() { |
9441 | let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
9442 | let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
9443 | let r = _mm512_mask_cvttps_epi64(b, 0b01101001, a); |
9444 | let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); |
9445 | assert_eq_m512i(r, e); |
9446 | } |
9447 | |
9448 | #[simd_test(enable = "avx512dq" )] |
9449 | unsafe fn test_mm512_maskz_cvttps_epi64() { |
9450 | let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
9451 | let r = _mm512_maskz_cvttps_epi64(0b01101001, a); |
9452 | let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); |
9453 | assert_eq_m512i(r, e); |
9454 | } |
9455 | |
9456 | #[simd_test(enable = "avx512dq" )] |
9457 | unsafe fn test_mm512_cvtt_roundpd_epu64() { |
9458 | let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
9459 | let r = _mm512_cvtt_roundpd_epu64::<_MM_FROUND_NO_EXC>(a); |
9460 | let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
9461 | assert_eq_m512i(r, e); |
9462 | } |
9463 | |
9464 | #[simd_test(enable = "avx512dq" )] |
9465 | unsafe fn test_mm512_mask_cvtt_roundpd_epu64() { |
9466 | let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
9467 | let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
9468 | let r = _mm512_mask_cvtt_roundpd_epu64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a); |
9469 | let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); |
9470 | assert_eq_m512i(r, e); |
9471 | } |
9472 | |
9473 | #[simd_test(enable = "avx512dq" )] |
9474 | unsafe fn test_mm512_maskz_cvtt_roundpd_epu64() { |
9475 | let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
9476 | let r = _mm512_maskz_cvtt_roundpd_epu64::<_MM_FROUND_NO_EXC>(0b01101001, a); |
9477 | let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); |
9478 | assert_eq_m512i(r, e); |
9479 | } |
9480 | |
9481 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9482 | unsafe fn test_mm_cvttpd_epu64() { |
9483 | let a = _mm_set_pd(1., 2.); |
9484 | let r = _mm_cvttpd_epu64(a); |
9485 | let e = _mm_set_epi64x(1, 2); |
9486 | assert_eq_m128i(r, e); |
9487 | } |
9488 | |
9489 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9490 | unsafe fn test_mm_mask_cvttpd_epu64() { |
9491 | let a = _mm_set_pd(1., 2.); |
9492 | let b = _mm_set_epi64x(3, 4); |
9493 | let r = _mm_mask_cvttpd_epu64(b, 0b01, a); |
9494 | let e = _mm_set_epi64x(3, 2); |
9495 | assert_eq_m128i(r, e); |
9496 | } |
9497 | |
9498 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9499 | unsafe fn test_mm_maskz_cvttpd_epu64() { |
9500 | let a = _mm_set_pd(1., 2.); |
9501 | let r = _mm_maskz_cvttpd_epu64(0b01, a); |
9502 | let e = _mm_set_epi64x(0, 2); |
9503 | assert_eq_m128i(r, e); |
9504 | } |
9505 | |
9506 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9507 | unsafe fn test_mm256_cvttpd_epu64() { |
9508 | let a = _mm256_set_pd(1., 2., 3., 4.); |
9509 | let r = _mm256_cvttpd_epu64(a); |
9510 | let e = _mm256_set_epi64x(1, 2, 3, 4); |
9511 | assert_eq_m256i(r, e); |
9512 | } |
9513 | |
9514 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9515 | unsafe fn test_mm256_mask_cvttpd_epu64() { |
9516 | let a = _mm256_set_pd(1., 2., 3., 4.); |
9517 | let b = _mm256_set_epi64x(5, 6, 7, 8); |
9518 | let r = _mm256_mask_cvttpd_epu64(b, 0b0110, a); |
9519 | let e = _mm256_set_epi64x(5, 2, 3, 8); |
9520 | assert_eq_m256i(r, e); |
9521 | } |
9522 | |
9523 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9524 | unsafe fn test_mm256_maskz_cvttpd_epu64() { |
9525 | let a = _mm256_set_pd(1., 2., 3., 4.); |
9526 | let r = _mm256_maskz_cvttpd_epu64(0b0110, a); |
9527 | let e = _mm256_set_epi64x(0, 2, 3, 0); |
9528 | assert_eq_m256i(r, e); |
9529 | } |
9530 | |
9531 | #[simd_test(enable = "avx512dq" )] |
9532 | unsafe fn test_mm512_cvttpd_epu64() { |
9533 | let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
9534 | let r = _mm512_cvttpd_epu64(a); |
9535 | let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
9536 | assert_eq_m512i(r, e); |
9537 | } |
9538 | |
9539 | #[simd_test(enable = "avx512dq" )] |
9540 | unsafe fn test_mm512_mask_cvttpd_epu64() { |
9541 | let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
9542 | let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
9543 | let r = _mm512_mask_cvttpd_epu64(b, 0b01101001, a); |
9544 | let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); |
9545 | assert_eq_m512i(r, e); |
9546 | } |
9547 | |
9548 | #[simd_test(enable = "avx512dq" )] |
9549 | unsafe fn test_mm512_maskz_cvttpd_epu64() { |
9550 | let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
9551 | let r = _mm512_maskz_cvttpd_epu64(0b01101001, a); |
9552 | let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); |
9553 | assert_eq_m512i(r, e); |
9554 | } |
9555 | |
9556 | #[simd_test(enable = "avx512dq" )] |
9557 | unsafe fn test_mm512_cvtt_roundps_epu64() { |
9558 | let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
9559 | let r = _mm512_cvtt_roundps_epu64::<_MM_FROUND_NO_EXC>(a); |
9560 | let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
9561 | assert_eq_m512i(r, e); |
9562 | } |
9563 | |
9564 | #[simd_test(enable = "avx512dq" )] |
9565 | unsafe fn test_mm512_mask_cvtt_roundps_epu64() { |
9566 | let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
9567 | let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
9568 | let r = _mm512_mask_cvtt_roundps_epu64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a); |
9569 | let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); |
9570 | assert_eq_m512i(r, e); |
9571 | } |
9572 | |
9573 | #[simd_test(enable = "avx512dq" )] |
9574 | unsafe fn test_mm512_maskz_cvtt_roundps_epu64() { |
9575 | let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
9576 | let r = _mm512_maskz_cvtt_roundps_epu64::<_MM_FROUND_NO_EXC>(0b01101001, a); |
9577 | let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); |
9578 | assert_eq_m512i(r, e); |
9579 | } |
9580 | |
9581 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9582 | unsafe fn test_mm_cvttps_epu64() { |
9583 | let a = _mm_set_ps(1., 2., 3., 4.); |
9584 | let r = _mm_cvttps_epu64(a); |
9585 | let e = _mm_set_epi64x(3, 4); |
9586 | assert_eq_m128i(r, e); |
9587 | } |
9588 | |
9589 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9590 | unsafe fn test_mm_mask_cvttps_epu64() { |
9591 | let a = _mm_set_ps(1., 2., 3., 4.); |
9592 | let b = _mm_set_epi64x(5, 6); |
9593 | let r = _mm_mask_cvttps_epu64(b, 0b01, a); |
9594 | let e = _mm_set_epi64x(5, 4); |
9595 | assert_eq_m128i(r, e); |
9596 | } |
9597 | |
9598 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9599 | unsafe fn test_mm_maskz_cvttps_epu64() { |
9600 | let a = _mm_set_ps(1., 2., 3., 4.); |
9601 | let r = _mm_maskz_cvttps_epu64(0b01, a); |
9602 | let e = _mm_set_epi64x(0, 4); |
9603 | assert_eq_m128i(r, e); |
9604 | } |
9605 | |
9606 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9607 | unsafe fn test_mm256_cvttps_epu64() { |
9608 | let a = _mm_set_ps(1., 2., 3., 4.); |
9609 | let r = _mm256_cvttps_epu64(a); |
9610 | let e = _mm256_set_epi64x(1, 2, 3, 4); |
9611 | assert_eq_m256i(r, e); |
9612 | } |
9613 | |
9614 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9615 | unsafe fn test_mm256_mask_cvttps_epu64() { |
9616 | let a = _mm_set_ps(1., 2., 3., 4.); |
9617 | let b = _mm256_set_epi64x(5, 6, 7, 8); |
9618 | let r = _mm256_mask_cvttps_epu64(b, 0b0110, a); |
9619 | let e = _mm256_set_epi64x(5, 2, 3, 8); |
9620 | assert_eq_m256i(r, e); |
9621 | } |
9622 | |
9623 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9624 | unsafe fn test_mm256_maskz_cvttps_epu64() { |
9625 | let a = _mm_set_ps(1., 2., 3., 4.); |
9626 | let r = _mm256_maskz_cvttps_epu64(0b0110, a); |
9627 | let e = _mm256_set_epi64x(0, 2, 3, 0); |
9628 | assert_eq_m256i(r, e); |
9629 | } |
9630 | |
9631 | #[simd_test(enable = "avx512dq" )] |
9632 | unsafe fn test_mm512_cvttps_epu64() { |
9633 | let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
9634 | let r = _mm512_cvttps_epu64(a); |
9635 | let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
9636 | assert_eq_m512i(r, e); |
9637 | } |
9638 | |
9639 | #[simd_test(enable = "avx512dq" )] |
9640 | unsafe fn test_mm512_mask_cvttps_epu64() { |
9641 | let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
9642 | let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
9643 | let r = _mm512_mask_cvttps_epu64(b, 0b01101001, a); |
9644 | let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); |
9645 | assert_eq_m512i(r, e); |
9646 | } |
9647 | |
9648 | #[simd_test(enable = "avx512dq" )] |
9649 | unsafe fn test_mm512_maskz_cvttps_epu64() { |
9650 | let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
9651 | let r = _mm512_maskz_cvttps_epu64(0b01101001, a); |
9652 | let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); |
9653 | assert_eq_m512i(r, e); |
9654 | } |
9655 | |
9656 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9657 | unsafe fn test_mm_mullo_epi64() { |
9658 | let a = _mm_set_epi64x(1, 2); |
9659 | let b = _mm_set_epi64x(3, 4); |
9660 | let r = _mm_mullo_epi64(a, b); |
9661 | let e = _mm_set_epi64x(3, 8); |
9662 | assert_eq_m128i(r, e); |
9663 | } |
9664 | |
9665 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9666 | unsafe fn test_mm_mask_mullo_epi64() { |
9667 | let a = _mm_set_epi64x(1, 2); |
9668 | let b = _mm_set_epi64x(3, 4); |
9669 | let c = _mm_set_epi64x(5, 6); |
9670 | let r = _mm_mask_mullo_epi64(c, 0b01, a, b); |
9671 | let e = _mm_set_epi64x(5, 8); |
9672 | assert_eq_m128i(r, e); |
9673 | } |
9674 | |
9675 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9676 | unsafe fn test_mm_maskz_mullo_epi64() { |
9677 | let a = _mm_set_epi64x(1, 2); |
9678 | let b = _mm_set_epi64x(3, 4); |
9679 | let r = _mm_maskz_mullo_epi64(0b01, a, b); |
9680 | let e = _mm_set_epi64x(0, 8); |
9681 | assert_eq_m128i(r, e); |
9682 | } |
9683 | |
9684 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9685 | unsafe fn test_mm256_mullo_epi64() { |
9686 | let a = _mm256_set_epi64x(1, 2, 3, 4); |
9687 | let b = _mm256_set_epi64x(5, 6, 7, 8); |
9688 | let r = _mm256_mullo_epi64(a, b); |
9689 | let e = _mm256_set_epi64x(5, 12, 21, 32); |
9690 | assert_eq_m256i(r, e); |
9691 | } |
9692 | |
9693 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9694 | unsafe fn test_mm256_mask_mullo_epi64() { |
9695 | let a = _mm256_set_epi64x(1, 2, 3, 4); |
9696 | let b = _mm256_set_epi64x(5, 6, 7, 8); |
9697 | let c = _mm256_set_epi64x(9, 10, 11, 12); |
9698 | let r = _mm256_mask_mullo_epi64(c, 0b0110, a, b); |
9699 | let e = _mm256_set_epi64x(9, 12, 21, 12); |
9700 | assert_eq_m256i(r, e); |
9701 | } |
9702 | |
9703 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9704 | unsafe fn test_mm256_maskz_mullo_epi64() { |
9705 | let a = _mm256_set_epi64x(1, 2, 3, 4); |
9706 | let b = _mm256_set_epi64x(5, 6, 7, 8); |
9707 | let r = _mm256_maskz_mullo_epi64(0b0110, a, b); |
9708 | let e = _mm256_set_epi64x(0, 12, 21, 0); |
9709 | assert_eq_m256i(r, e); |
9710 | } |
9711 | |
9712 | #[simd_test(enable = "avx512dq" )] |
9713 | unsafe fn test_mm512_mullo_epi64() { |
9714 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
9715 | let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
9716 | let r = _mm512_mullo_epi64(a, b); |
9717 | let e = _mm512_set_epi64(9, 20, 33, 48, 65, 84, 105, 128); |
9718 | assert_eq_m512i(r, e); |
9719 | } |
9720 | |
9721 | #[simd_test(enable = "avx512dq" )] |
9722 | unsafe fn test_mm512_mask_mullo_epi64() { |
9723 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
9724 | let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
9725 | let c = _mm512_set_epi64(17, 18, 19, 20, 21, 22, 23, 24); |
9726 | let r = _mm512_mask_mullo_epi64(c, 0b01101001, a, b); |
9727 | let e = _mm512_set_epi64(17, 20, 33, 20, 65, 22, 23, 128); |
9728 | assert_eq_m512i(r, e); |
9729 | } |
9730 | |
9731 | #[simd_test(enable = "avx512dq" )] |
9732 | unsafe fn test_mm512_maskz_mullo_epi64() { |
9733 | let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
9734 | let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
9735 | let r = _mm512_maskz_mullo_epi64(0b01101001, a, b); |
9736 | let e = _mm512_set_epi64(0, 20, 33, 0, 65, 0, 0, 128); |
9737 | assert_eq_m512i(r, e); |
9738 | } |
9739 | |
9740 | #[simd_test(enable = "avx512dq" )] |
9741 | unsafe fn test_cvtmask8_u32() { |
9742 | let a: __mmask8 = 0b01101001; |
9743 | let r = _cvtmask8_u32(a); |
9744 | let e: u32 = 0b01101001; |
9745 | assert_eq!(r, e); |
9746 | } |
9747 | |
9748 | #[simd_test(enable = "avx512dq" )] |
9749 | unsafe fn test_cvtu32_mask8() { |
9750 | let a: u32 = 0b01101001; |
9751 | let r = _cvtu32_mask8(a); |
9752 | let e: __mmask8 = 0b01101001; |
9753 | assert_eq!(r, e); |
9754 | } |
9755 | |
9756 | #[simd_test(enable = "avx512dq" )] |
9757 | unsafe fn test_kadd_mask16() { |
9758 | let a: __mmask16 = 27549; |
9759 | let b: __mmask16 = 23434; |
9760 | let r = _kadd_mask16(a, b); |
9761 | let e: __mmask16 = 50983; |
9762 | assert_eq!(r, e); |
9763 | } |
9764 | |
9765 | #[simd_test(enable = "avx512dq" )] |
9766 | unsafe fn test_kadd_mask8() { |
9767 | let a: __mmask8 = 98; |
9768 | let b: __mmask8 = 117; |
9769 | let r = _kadd_mask8(a, b); |
9770 | let e: __mmask8 = 215; |
9771 | assert_eq!(r, e); |
9772 | } |
9773 | |
9774 | #[simd_test(enable = "avx512dq" )] |
9775 | unsafe fn test_kand_mask8() { |
9776 | let a: __mmask8 = 0b01101001; |
9777 | let b: __mmask8 = 0b10110011; |
9778 | let r = _kand_mask8(a, b); |
9779 | let e: __mmask8 = 0b00100001; |
9780 | assert_eq!(r, e); |
9781 | } |
9782 | |
9783 | #[simd_test(enable = "avx512dq" )] |
9784 | unsafe fn test_kandn_mask8() { |
9785 | let a: __mmask8 = 0b01101001; |
9786 | let b: __mmask8 = 0b10110011; |
9787 | let r = _kandn_mask8(a, b); |
9788 | let e: __mmask8 = 0b10010010; |
9789 | assert_eq!(r, e); |
9790 | } |
9791 | |
9792 | #[simd_test(enable = "avx512dq" )] |
9793 | unsafe fn test_knot_mask8() { |
9794 | let a: __mmask8 = 0b01101001; |
9795 | let r = _knot_mask8(a); |
9796 | let e: __mmask8 = 0b10010110; |
9797 | assert_eq!(r, e); |
9798 | } |
9799 | |
9800 | #[simd_test(enable = "avx512dq" )] |
9801 | unsafe fn test_kor_mask8() { |
9802 | let a: __mmask8 = 0b01101001; |
9803 | let b: __mmask8 = 0b10110011; |
9804 | let r = _kor_mask8(a, b); |
9805 | let e: __mmask8 = 0b11111011; |
9806 | assert_eq!(r, e); |
9807 | } |
9808 | |
9809 | #[simd_test(enable = "avx512dq" )] |
9810 | unsafe fn test_kxnor_mask8() { |
9811 | let a: __mmask8 = 0b01101001; |
9812 | let b: __mmask8 = 0b10110011; |
9813 | let r = _kxnor_mask8(a, b); |
9814 | let e: __mmask8 = 0b00100101; |
9815 | assert_eq!(r, e); |
9816 | } |
9817 | |
9818 | #[simd_test(enable = "avx512dq" )] |
9819 | unsafe fn test_kxor_mask8() { |
9820 | let a: __mmask8 = 0b01101001; |
9821 | let b: __mmask8 = 0b10110011; |
9822 | let r = _kxor_mask8(a, b); |
9823 | let e: __mmask8 = 0b11011010; |
9824 | assert_eq!(r, e); |
9825 | } |
9826 | |
9827 | #[simd_test(enable = "avx512dq" )] |
9828 | unsafe fn test_kortest_mask8_u8() { |
9829 | let a: __mmask8 = 0b01101001; |
9830 | let b: __mmask8 = 0b10110110; |
9831 | let mut all_ones: u8 = 0; |
9832 | let r = _kortest_mask8_u8(a, b, &mut all_ones); |
9833 | assert_eq!(r, 0); |
9834 | assert_eq!(all_ones, 1); |
9835 | } |
9836 | |
9837 | #[simd_test(enable = "avx512dq" )] |
9838 | unsafe fn test_kortestc_mask8_u8() { |
9839 | let a: __mmask8 = 0b01101001; |
9840 | let b: __mmask8 = 0b10110110; |
9841 | let r = _kortestc_mask8_u8(a, b); |
9842 | assert_eq!(r, 1); |
9843 | } |
9844 | |
9845 | #[simd_test(enable = "avx512dq" )] |
9846 | unsafe fn test_kortestz_mask8_u8() { |
9847 | let a: __mmask8 = 0b01101001; |
9848 | let b: __mmask8 = 0b10110110; |
9849 | let r = _kortestz_mask8_u8(a, b); |
9850 | assert_eq!(r, 0); |
9851 | } |
9852 | |
9853 | #[simd_test(enable = "avx512dq" )] |
9854 | unsafe fn test_kshiftli_mask8() { |
9855 | let a: __mmask8 = 0b01101001; |
9856 | let r = _kshiftli_mask8::<3>(a); |
9857 | let e: __mmask8 = 0b01001000; |
9858 | assert_eq!(r, e); |
9859 | } |
9860 | |
9861 | #[simd_test(enable = "avx512dq" )] |
9862 | unsafe fn test_kshiftri_mask8() { |
9863 | let a: __mmask8 = 0b01101001; |
9864 | let r = _kshiftri_mask8::<3>(a); |
9865 | let e: __mmask8 = 0b00001101; |
9866 | assert_eq!(r, e); |
9867 | } |
9868 | |
9869 | #[simd_test(enable = "avx512dq" )] |
9870 | unsafe fn test_ktest_mask8_u8() { |
9871 | let a: __mmask8 = 0b01101001; |
9872 | let b: __mmask8 = 0b10010110; |
9873 | let mut and_not: u8 = 0; |
9874 | let r = _ktest_mask8_u8(a, b, &mut and_not); |
9875 | assert_eq!(r, 1); |
9876 | assert_eq!(and_not, 0); |
9877 | } |
9878 | |
9879 | #[simd_test(enable = "avx512dq" )] |
9880 | unsafe fn test_ktestc_mask8_u8() { |
9881 | let a: __mmask8 = 0b01101001; |
9882 | let b: __mmask8 = 0b10010110; |
9883 | let r = _ktestc_mask8_u8(a, b); |
9884 | assert_eq!(r, 0); |
9885 | } |
9886 | |
9887 | #[simd_test(enable = "avx512dq" )] |
9888 | unsafe fn test_ktestz_mask8_u8() { |
9889 | let a: __mmask8 = 0b01101001; |
9890 | let b: __mmask8 = 0b10010110; |
9891 | let r = _ktestz_mask8_u8(a, b); |
9892 | assert_eq!(r, 1); |
9893 | } |
9894 | |
9895 | #[simd_test(enable = "avx512dq" )] |
9896 | unsafe fn test_ktest_mask16_u8() { |
9897 | let a: __mmask16 = 0b0110100100111100; |
9898 | let b: __mmask16 = 0b1001011011000011; |
9899 | let mut and_not: u8 = 0; |
9900 | let r = _ktest_mask16_u8(a, b, &mut and_not); |
9901 | assert_eq!(r, 1); |
9902 | assert_eq!(and_not, 0); |
9903 | } |
9904 | |
9905 | #[simd_test(enable = "avx512dq" )] |
9906 | unsafe fn test_ktestc_mask16_u8() { |
9907 | let a: __mmask16 = 0b0110100100111100; |
9908 | let b: __mmask16 = 0b1001011011000011; |
9909 | let r = _ktestc_mask16_u8(a, b); |
9910 | assert_eq!(r, 0); |
9911 | } |
9912 | |
9913 | #[simd_test(enable = "avx512dq" )] |
9914 | unsafe fn test_ktestz_mask16_u8() { |
9915 | let a: __mmask16 = 0b0110100100111100; |
9916 | let b: __mmask16 = 0b1001011011000011; |
9917 | let r = _ktestz_mask16_u8(a, b); |
9918 | assert_eq!(r, 1); |
9919 | } |
9920 | |
9921 | #[simd_test(enable = "avx512dq" )] |
9922 | unsafe fn test_load_mask8() { |
9923 | let a: __mmask8 = 0b01101001; |
9924 | let r = _load_mask8(&a); |
9925 | let e: __mmask8 = 0b01101001; |
9926 | assert_eq!(r, e); |
9927 | } |
9928 | |
9929 | #[simd_test(enable = "avx512dq" )] |
9930 | unsafe fn test_store_mask8() { |
9931 | let a: __mmask8 = 0b01101001; |
9932 | let mut r = 0; |
9933 | _store_mask8(&mut r, a); |
9934 | let e: __mmask8 = 0b01101001; |
9935 | assert_eq!(r, e); |
9936 | } |
9937 | |
9938 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9939 | unsafe fn test_mm_movepi32_mask() { |
9940 | let a = _mm_set_epi32(0, -2, -3, 4); |
9941 | let r = _mm_movepi32_mask(a); |
9942 | let e = 0b0110; |
9943 | assert_eq!(r, e); |
9944 | } |
9945 | |
9946 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9947 | unsafe fn test_mm256_movepi32_mask() { |
9948 | let a = _mm256_set_epi32(0, -2, -3, 4, -5, 6, 7, -8); |
9949 | let r = _mm256_movepi32_mask(a); |
9950 | let e = 0b01101001; |
9951 | assert_eq!(r, e); |
9952 | } |
9953 | |
9954 | #[simd_test(enable = "avx512dq" )] |
9955 | unsafe fn test_mm512_movepi32_mask() { |
9956 | let a = _mm512_set_epi32( |
9957 | 0, -2, -3, 4, -5, 6, 7, -8, 9, 10, -11, -12, -13, -14, 15, 16, |
9958 | ); |
9959 | let r = _mm512_movepi32_mask(a); |
9960 | let e = 0b0110100100111100; |
9961 | assert_eq!(r, e); |
9962 | } |
9963 | |
9964 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9965 | unsafe fn test_mm_movepi64_mask() { |
9966 | let a = _mm_set_epi64x(0, -2); |
9967 | let r = _mm_movepi64_mask(a); |
9968 | let e = 0b01; |
9969 | assert_eq!(r, e); |
9970 | } |
9971 | |
9972 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9973 | unsafe fn test_mm256_movepi64_mask() { |
9974 | let a = _mm256_set_epi64x(0, -2, -3, 4); |
9975 | let r = _mm256_movepi64_mask(a); |
9976 | let e = 0b0110; |
9977 | assert_eq!(r, e); |
9978 | } |
9979 | |
9980 | #[simd_test(enable = "avx512dq" )] |
9981 | unsafe fn test_mm512_movepi64_mask() { |
9982 | let a = _mm512_set_epi64(0, -2, -3, 4, -5, 6, 7, -8); |
9983 | let r = _mm512_movepi64_mask(a); |
9984 | let e = 0b01101001; |
9985 | assert_eq!(r, e); |
9986 | } |
9987 | |
9988 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9989 | unsafe fn test_mm_movm_epi32() { |
9990 | let a = 0b0110; |
9991 | let r = _mm_movm_epi32(a); |
9992 | let e = _mm_set_epi32(0, -1, -1, 0); |
9993 | assert_eq_m128i(r, e); |
9994 | } |
9995 | |
9996 | #[simd_test(enable = "avx512dq,avx512vl" )] |
9997 | unsafe fn test_mm256_movm_epi32() { |
9998 | let a = 0b01101001; |
9999 | let r = _mm256_movm_epi32(a); |
10000 | let e = _mm256_set_epi32(0, -1, -1, 0, -1, 0, 0, -1); |
10001 | assert_eq_m256i(r, e); |
10002 | } |
10003 | |
10004 | #[simd_test(enable = "avx512dq" )] |
10005 | unsafe fn test_mm512_movm_epi32() { |
10006 | let a = 0b0110100100111100; |
10007 | let r = _mm512_movm_epi32(a); |
10008 | let e = _mm512_set_epi32(0, -1, -1, 0, -1, 0, 0, -1, 0, 0, -1, -1, -1, -1, 0, 0); |
10009 | assert_eq_m512i(r, e); |
10010 | } |
10011 | |
10012 | #[simd_test(enable = "avx512dq,avx512vl" )] |
10013 | unsafe fn test_mm_movm_epi64() { |
10014 | let a = 0b01; |
10015 | let r = _mm_movm_epi64(a); |
10016 | let e = _mm_set_epi64x(0, -1); |
10017 | assert_eq_m128i(r, e); |
10018 | } |
10019 | |
10020 | #[simd_test(enable = "avx512dq,avx512vl" )] |
10021 | unsafe fn test_mm256_movm_epi64() { |
10022 | let a = 0b0110; |
10023 | let r = _mm256_movm_epi64(a); |
10024 | let e = _mm256_set_epi64x(0, -1, -1, 0); |
10025 | assert_eq_m256i(r, e); |
10026 | } |
10027 | |
10028 | #[simd_test(enable = "avx512dq" )] |
10029 | unsafe fn test_mm512_movm_epi64() { |
10030 | let a = 0b01101001; |
10031 | let r = _mm512_movm_epi64(a); |
10032 | let e = _mm512_set_epi64(0, -1, -1, 0, -1, 0, 0, -1); |
10033 | assert_eq_m512i(r, e); |
10034 | } |
10035 | |
10036 | #[simd_test(enable = "avx512dq" )] |
10037 | unsafe fn test_mm512_range_round_pd() { |
10038 | let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
10039 | let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.); |
10040 | let r = _mm512_range_round_pd::<0b0101, _MM_FROUND_NO_EXC>(a, b); |
10041 | let e = _mm512_set_pd(2., 2., 4., 4., 6., 6., 8., 8.); |
10042 | assert_eq_m512d(r, e); |
10043 | } |
10044 | |
10045 | #[simd_test(enable = "avx512dq" )] |
10046 | unsafe fn test_mm512_mask_range_round_pd() { |
10047 | let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
10048 | let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.); |
10049 | let c = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.); |
10050 | let r = _mm512_mask_range_round_pd::<0b0101, _MM_FROUND_NO_EXC>(c, 0b01101001, a, b); |
10051 | let e = _mm512_set_pd(9., 2., 4., 12., 6., 14., 15., 8.); |
10052 | assert_eq_m512d(r, e); |
10053 | } |
10054 | |
10055 | #[simd_test(enable = "avx512dq" )] |
10056 | unsafe fn test_mm512_maskz_range_round_pd() { |
10057 | let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
10058 | let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.); |
10059 | let r = _mm512_maskz_range_round_pd::<0b0101, _MM_FROUND_NO_EXC>(0b01101001, a, b); |
10060 | let e = _mm512_set_pd(0., 2., 4., 0., 6., 0., 0., 8.); |
10061 | assert_eq_m512d(r, e); |
10062 | } |
10063 | |
10064 | #[simd_test(enable = "avx512dq,avx512vl" )] |
10065 | unsafe fn test_mm_range_pd() { |
10066 | let a = _mm_set_pd(1., 2.); |
10067 | let b = _mm_set_pd(2., 1.); |
10068 | let r = _mm_range_pd::<0b0101>(a, b); |
10069 | let e = _mm_set_pd(2., 2.); |
10070 | assert_eq_m128d(r, e); |
10071 | } |
10072 | |
10073 | #[simd_test(enable = "avx512dq,avx512vl" )] |
10074 | unsafe fn test_mm_mask_range_pd() { |
10075 | let a = _mm_set_pd(1., 2.); |
10076 | let b = _mm_set_pd(2., 1.); |
10077 | let c = _mm_set_pd(3., 4.); |
10078 | let r = _mm_mask_range_pd::<0b0101>(c, 0b01, a, b); |
10079 | let e = _mm_set_pd(3., 2.); |
10080 | assert_eq_m128d(r, e); |
10081 | } |
10082 | |
10083 | #[simd_test(enable = "avx512dq,avx512vl" )] |
10084 | unsafe fn test_mm_maskz_range_pd() { |
10085 | let a = _mm_set_pd(1., 2.); |
10086 | let b = _mm_set_pd(2., 1.); |
10087 | let r = _mm_maskz_range_pd::<0b0101>(0b01, a, b); |
10088 | let e = _mm_set_pd(0., 2.); |
10089 | assert_eq_m128d(r, e); |
10090 | } |
10091 | |
10092 | #[simd_test(enable = "avx512dq,avx512vl" )] |
10093 | unsafe fn test_mm256_range_pd() { |
10094 | let a = _mm256_set_pd(1., 2., 3., 4.); |
10095 | let b = _mm256_set_pd(2., 1., 4., 3.); |
10096 | let r = _mm256_range_pd::<0b0101>(a, b); |
10097 | let e = _mm256_set_pd(2., 2., 4., 4.); |
10098 | assert_eq_m256d(r, e); |
10099 | } |
10100 | |
10101 | #[simd_test(enable = "avx512dq,avx512vl" )] |
10102 | unsafe fn test_mm256_mask_range_pd() { |
10103 | let a = _mm256_set_pd(1., 2., 3., 4.); |
10104 | let b = _mm256_set_pd(2., 1., 4., 3.); |
10105 | let c = _mm256_set_pd(5., 6., 7., 8.); |
10106 | let r = _mm256_mask_range_pd::<0b0101>(c, 0b0110, a, b); |
10107 | let e = _mm256_set_pd(5., 2., 4., 8.); |
10108 | assert_eq_m256d(r, e); |
10109 | } |
10110 | |
10111 | #[simd_test(enable = "avx512dq,avx512vl" )] |
10112 | unsafe fn test_mm256_maskz_range_pd() { |
10113 | let a = _mm256_set_pd(1., 2., 3., 4.); |
10114 | let b = _mm256_set_pd(2., 1., 4., 3.); |
10115 | let r = _mm256_maskz_range_pd::<0b0101>(0b0110, a, b); |
10116 | let e = _mm256_set_pd(0., 2., 4., 0.); |
10117 | assert_eq_m256d(r, e); |
10118 | } |
10119 | |
10120 | #[simd_test(enable = "avx512dq" )] |
10121 | unsafe fn test_mm512_range_pd() { |
10122 | let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
10123 | let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.); |
10124 | let r = _mm512_range_pd::<0b0101>(a, b); |
10125 | let e = _mm512_set_pd(2., 2., 4., 4., 6., 6., 8., 8.); |
10126 | assert_eq_m512d(r, e); |
10127 | } |
10128 | |
10129 | #[simd_test(enable = "avx512dq" )] |
10130 | unsafe fn test_mm512_mask_range_pd() { |
10131 | let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
10132 | let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.); |
10133 | let c = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.); |
10134 | let r = _mm512_mask_range_pd::<0b0101>(c, 0b01101001, a, b); |
10135 | let e = _mm512_set_pd(9., 2., 4., 12., 6., 14., 15., 8.); |
10136 | assert_eq_m512d(r, e); |
10137 | } |
10138 | |
10139 | #[simd_test(enable = "avx512dq" )] |
10140 | unsafe fn test_mm512_maskz_range_pd() { |
10141 | let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
10142 | let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.); |
10143 | let r = _mm512_maskz_range_pd::<0b0101>(0b01101001, a, b); |
10144 | let e = _mm512_set_pd(0., 2., 4., 0., 6., 0., 0., 8.); |
10145 | assert_eq_m512d(r, e); |
10146 | } |
10147 | |
10148 | #[simd_test(enable = "avx512dq" )] |
10149 | unsafe fn test_mm512_range_round_ps() { |
10150 | let a = _mm512_set_ps( |
10151 | 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
10152 | ); |
10153 | let b = _mm512_set_ps( |
10154 | 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15., |
10155 | ); |
10156 | let r = _mm512_range_round_ps::<0b0101, _MM_FROUND_NO_EXC>(a, b); |
10157 | let e = _mm512_set_ps( |
10158 | 2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16., |
10159 | ); |
10160 | assert_eq_m512(r, e); |
10161 | } |
10162 | |
10163 | #[simd_test(enable = "avx512dq" )] |
10164 | unsafe fn test_mm512_mask_range_round_ps() { |
10165 | let a = _mm512_set_ps( |
10166 | 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
10167 | ); |
10168 | let b = _mm512_set_ps( |
10169 | 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15., |
10170 | ); |
10171 | let c = _mm512_set_ps( |
10172 | 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., |
10173 | ); |
10174 | let r = |
10175 | _mm512_mask_range_round_ps::<0b0101, _MM_FROUND_NO_EXC>(c, 0b0110100100111100, a, b); |
10176 | let e = _mm512_set_ps( |
10177 | 17., 2., 4., 20., 6., 22., 23., 8., 25., 26., 12., 12., 14., 14., 31., 32., |
10178 | ); |
10179 | assert_eq_m512(r, e); |
10180 | } |
10181 | |
10182 | #[simd_test(enable = "avx512dq" )] |
10183 | unsafe fn test_mm512_maskz_range_round_ps() { |
10184 | let a = _mm512_set_ps( |
10185 | 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
10186 | ); |
10187 | let b = _mm512_set_ps( |
10188 | 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15., |
10189 | ); |
10190 | let r = _mm512_maskz_range_round_ps::<0b0101, _MM_FROUND_NO_EXC>(0b0110100100111100, a, b); |
10191 | let e = _mm512_set_ps( |
10192 | 0., 2., 4., 0., 6., 0., 0., 8., 0., 0., 12., 12., 14., 14., 0., 0., |
10193 | ); |
10194 | assert_eq_m512(r, e); |
10195 | } |
10196 | |
10197 | #[simd_test(enable = "avx512dq,avx512vl" )] |
10198 | unsafe fn test_mm_range_ps() { |
10199 | let a = _mm_set_ps(1., 2., 3., 4.); |
10200 | let b = _mm_set_ps(2., 1., 4., 3.); |
10201 | let r = _mm_range_ps::<0b0101>(a, b); |
10202 | let e = _mm_set_ps(2., 2., 4., 4.); |
10203 | assert_eq_m128(r, e); |
10204 | } |
10205 | |
10206 | #[simd_test(enable = "avx512dq,avx512vl" )] |
10207 | unsafe fn test_mm_mask_range_ps() { |
10208 | let a = _mm_set_ps(1., 2., 3., 4.); |
10209 | let b = _mm_set_ps(2., 1., 4., 3.); |
10210 | let c = _mm_set_ps(5., 6., 7., 8.); |
10211 | let r = _mm_mask_range_ps::<0b0101>(c, 0b0110, a, b); |
10212 | let e = _mm_set_ps(5., 2., 4., 8.); |
10213 | assert_eq_m128(r, e); |
10214 | } |
10215 | |
10216 | #[simd_test(enable = "avx512dq,avx512vl" )] |
10217 | unsafe fn test_mm_maskz_range_ps() { |
10218 | let a = _mm_set_ps(1., 2., 3., 4.); |
10219 | let b = _mm_set_ps(2., 1., 4., 3.); |
10220 | let r = _mm_maskz_range_ps::<0b0101>(0b0110, a, b); |
10221 | let e = _mm_set_ps(0., 2., 4., 0.); |
10222 | assert_eq_m128(r, e); |
10223 | } |
10224 | |
10225 | #[simd_test(enable = "avx512dq,avx512vl" )] |
10226 | unsafe fn test_mm256_range_ps() { |
10227 | let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
10228 | let b = _mm256_set_ps(2., 1., 4., 3., 6., 5., 8., 7.); |
10229 | let r = _mm256_range_ps::<0b0101>(a, b); |
10230 | let e = _mm256_set_ps(2., 2., 4., 4., 6., 6., 8., 8.); |
10231 | assert_eq_m256(r, e); |
10232 | } |
10233 | |
10234 | #[simd_test(enable = "avx512dq,avx512vl" )] |
10235 | unsafe fn test_mm256_mask_range_ps() { |
10236 | let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
10237 | let b = _mm256_set_ps(2., 1., 4., 3., 6., 5., 8., 7.); |
10238 | let c = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.); |
10239 | let r = _mm256_mask_range_ps::<0b0101>(c, 0b01101001, a, b); |
10240 | let e = _mm256_set_ps(9., 2., 4., 12., 6., 14., 15., 8.); |
10241 | assert_eq_m256(r, e); |
10242 | } |
10243 | |
10244 | #[simd_test(enable = "avx512dq,avx512vl" )] |
10245 | unsafe fn test_mm256_maskz_range_ps() { |
10246 | let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
10247 | let b = _mm256_set_ps(2., 1., 4., 3., 6., 5., 8., 7.); |
10248 | let r = _mm256_maskz_range_ps::<0b0101>(0b01101001, a, b); |
10249 | let e = _mm256_set_ps(0., 2., 4., 0., 6., 0., 0., 8.); |
10250 | assert_eq_m256(r, e); |
10251 | } |
10252 | |
10253 | #[simd_test(enable = "avx512dq" )] |
10254 | unsafe fn test_mm512_range_ps() { |
10255 | let a = _mm512_set_ps( |
10256 | 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
10257 | ); |
10258 | let b = _mm512_set_ps( |
10259 | 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15., |
10260 | ); |
10261 | let r = _mm512_range_ps::<0b0101>(a, b); |
10262 | let e = _mm512_set_ps( |
10263 | 2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16., |
10264 | ); |
10265 | assert_eq_m512(r, e); |
10266 | } |
10267 | |
10268 | #[simd_test(enable = "avx512dq" )] |
10269 | unsafe fn test_mm512_mask_range_ps() { |
10270 | let a = _mm512_set_ps( |
10271 | 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
10272 | ); |
10273 | let b = _mm512_set_ps( |
10274 | 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15., |
10275 | ); |
10276 | let c = _mm512_set_ps( |
10277 | 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., |
10278 | ); |
10279 | let r = _mm512_mask_range_ps::<0b0101>(c, 0b0110100100111100, a, b); |
10280 | let e = _mm512_set_ps( |
10281 | 17., 2., 4., 20., 6., 22., 23., 8., 25., 26., 12., 12., 14., 14., 31., 32., |
10282 | ); |
10283 | assert_eq_m512(r, e); |
10284 | } |
10285 | |
10286 | #[simd_test(enable = "avx512dq" )] |
10287 | unsafe fn test_mm512_maskz_range_ps() { |
10288 | let a = _mm512_set_ps( |
10289 | 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
10290 | ); |
10291 | let b = _mm512_set_ps( |
10292 | 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15., |
10293 | ); |
10294 | let r = _mm512_maskz_range_ps::<0b0101>(0b0110100100111100, a, b); |
10295 | let e = _mm512_set_ps( |
10296 | 0., 2., 4., 0., 6., 0., 0., 8., 0., 0., 12., 12., 14., 14., 0., 0., |
10297 | ); |
10298 | assert_eq_m512(r, e); |
10299 | } |
10300 | |
10301 | #[simd_test(enable = "avx512dq" )] |
10302 | unsafe fn test_mm_range_round_sd() { |
10303 | let a = _mm_set_sd(1.); |
10304 | let b = _mm_set_sd(2.); |
10305 | let r = _mm_range_round_sd::<0b0101, _MM_FROUND_NO_EXC>(a, b); |
10306 | let e = _mm_set_sd(2.); |
10307 | assert_eq_m128d(r, e); |
10308 | } |
10309 | |
10310 | #[simd_test(enable = "avx512dq" )] |
10311 | unsafe fn test_mm_mask_range_round_sd() { |
10312 | let a = _mm_set_sd(1.); |
10313 | let b = _mm_set_sd(2.); |
10314 | let c = _mm_set_sd(3.); |
10315 | let r = _mm_mask_range_round_sd::<0b0101, _MM_FROUND_NO_EXC>(c, 0b0, a, b); |
10316 | let e = _mm_set_sd(3.); |
10317 | assert_eq_m128d(r, e); |
10318 | } |
10319 | |
10320 | #[simd_test(enable = "avx512dq" )] |
10321 | unsafe fn test_mm_maskz_range_round_sd() { |
10322 | let a = _mm_set_sd(1.); |
10323 | let b = _mm_set_sd(2.); |
10324 | let r = _mm_maskz_range_round_sd::<0b0101, _MM_FROUND_NO_EXC>(0b0, a, b); |
10325 | let e = _mm_set_sd(0.); |
10326 | assert_eq_m128d(r, e); |
10327 | } |
10328 | |
10329 | #[simd_test(enable = "avx512dq" )] |
10330 | unsafe fn test_mm_mask_range_sd() { |
10331 | let a = _mm_set_sd(1.); |
10332 | let b = _mm_set_sd(2.); |
10333 | let c = _mm_set_sd(3.); |
10334 | let r = _mm_mask_range_sd::<0b0101>(c, 0b0, a, b); |
10335 | let e = _mm_set_sd(3.); |
10336 | assert_eq_m128d(r, e); |
10337 | } |
10338 | |
10339 | #[simd_test(enable = "avx512dq" )] |
10340 | unsafe fn test_mm_maskz_range_sd() { |
10341 | let a = _mm_set_sd(1.); |
10342 | let b = _mm_set_sd(2.); |
10343 | let r = _mm_maskz_range_sd::<0b0101>(0b0, a, b); |
10344 | let e = _mm_set_sd(0.); |
10345 | assert_eq_m128d(r, e); |
10346 | } |
10347 | |
10348 | #[simd_test(enable = "avx512dq" )] |
10349 | unsafe fn test_mm_range_round_ss() { |
10350 | let a = _mm_set_ss(1.); |
10351 | let b = _mm_set_ss(2.); |
10352 | let r = _mm_range_round_ss::<0b0101, _MM_FROUND_NO_EXC>(a, b); |
10353 | let e = _mm_set_ss(2.); |
10354 | assert_eq_m128(r, e); |
10355 | } |
10356 | |
10357 | #[simd_test(enable = "avx512dq" )] |
10358 | unsafe fn test_mm_mask_range_round_ss() { |
10359 | let a = _mm_set_ss(1.); |
10360 | let b = _mm_set_ss(2.); |
10361 | let c = _mm_set_ss(3.); |
10362 | let r = _mm_mask_range_round_ss::<0b0101, _MM_FROUND_NO_EXC>(c, 0b0, a, b); |
10363 | let e = _mm_set_ss(3.); |
10364 | assert_eq_m128(r, e); |
10365 | } |
10366 | |
10367 | #[simd_test(enable = "avx512dq" )] |
10368 | unsafe fn test_mm_maskz_range_round_ss() { |
10369 | let a = _mm_set_ss(1.); |
10370 | let b = _mm_set_ss(2.); |
10371 | let r = _mm_maskz_range_round_ss::<0b0101, _MM_FROUND_NO_EXC>(0b0, a, b); |
10372 | let e = _mm_set_ss(0.); |
10373 | assert_eq_m128(r, e); |
10374 | } |
10375 | |
10376 | #[simd_test(enable = "avx512dq" )] |
10377 | unsafe fn test_mm_mask_range_ss() { |
10378 | let a = _mm_set_ss(1.); |
10379 | let b = _mm_set_ss(2.); |
10380 | let c = _mm_set_ss(3.); |
10381 | let r = _mm_mask_range_ss::<0b0101>(c, 0b0, a, b); |
10382 | let e = _mm_set_ss(3.); |
10383 | assert_eq_m128(r, e); |
10384 | } |
10385 | |
10386 | #[simd_test(enable = "avx512dq" )] |
10387 | unsafe fn test_mm_maskz_range_ss() { |
10388 | let a = _mm_set_ss(1.); |
10389 | let b = _mm_set_ss(2.); |
10390 | let r = _mm_maskz_range_ss::<0b0101>(0b0, a, b); |
10391 | let e = _mm_set_ss(0.); |
10392 | assert_eq_m128(r, e); |
10393 | } |
10394 | |
10395 | #[simd_test(enable = "avx512dq" )] |
10396 | unsafe fn test_mm512_reduce_round_pd() { |
10397 | let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0); |
10398 | let r = _mm512_reduce_round_pd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a); |
10399 | let e = _mm512_set_pd(0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.); |
10400 | assert_eq_m512d(r, e); |
10401 | } |
10402 | |
10403 | #[simd_test(enable = "avx512dq" )] |
10404 | unsafe fn test_mm512_mask_reduce_round_pd() { |
10405 | let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0); |
10406 | let src = _mm512_set_pd(3., 4., 5., 6., 7., 8., 9., 10.); |
10407 | let r = _mm512_mask_reduce_round_pd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>( |
10408 | src, 0b01101001, a, |
10409 | ); |
10410 | let e = _mm512_set_pd(3., 0., 0.25, 6., 0.25, 8., 9., 0.); |
10411 | assert_eq_m512d(r, e); |
10412 | } |
10413 | |
10414 | #[simd_test(enable = "avx512dq" )] |
10415 | unsafe fn test_mm512_maskz_reduce_round_pd() { |
10416 | let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0); |
10417 | let r = _mm512_maskz_reduce_round_pd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>( |
10418 | 0b01101001, a, |
10419 | ); |
10420 | let e = _mm512_set_pd(0., 0., 0.25, 0., 0.25, 0., 0., 0.); |
10421 | assert_eq_m512d(r, e); |
10422 | } |
10423 | |
10424 | #[simd_test(enable = "avx512dq,avx512vl" )] |
10425 | unsafe fn test_mm_reduce_pd() { |
10426 | let a = _mm_set_pd(0.25, 0.50); |
10427 | let r = _mm_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(a); |
10428 | let e = _mm_set_pd(0.25, 0.); |
10429 | assert_eq_m128d(r, e); |
10430 | } |
10431 | |
10432 | #[simd_test(enable = "avx512dq,avx512vl" )] |
10433 | unsafe fn test_mm_mask_reduce_pd() { |
10434 | let a = _mm_set_pd(0.25, 0.50); |
10435 | let src = _mm_set_pd(3., 4.); |
10436 | let r = _mm_mask_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01, a); |
10437 | let e = _mm_set_pd(3., 0.); |
10438 | assert_eq_m128d(r, e); |
10439 | } |
10440 | |
10441 | #[simd_test(enable = "avx512dq,avx512vl" )] |
10442 | unsafe fn test_mm_maskz_reduce_pd() { |
10443 | let a = _mm_set_pd(0.25, 0.50); |
10444 | let r = _mm_maskz_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01, a); |
10445 | let e = _mm_set_pd(0., 0.); |
10446 | assert_eq_m128d(r, e); |
10447 | } |
10448 | |
10449 | #[simd_test(enable = "avx512dq,avx512vl" )] |
10450 | unsafe fn test_mm256_reduce_pd() { |
10451 | let a = _mm256_set_pd(0.25, 0.50, 0.75, 1.0); |
10452 | let r = _mm256_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(a); |
10453 | let e = _mm256_set_pd(0.25, 0., 0.25, 0.); |
10454 | assert_eq_m256d(r, e); |
10455 | } |
10456 | |
10457 | #[simd_test(enable = "avx512dq,avx512vl" )] |
10458 | unsafe fn test_mm256_mask_reduce_pd() { |
10459 | let a = _mm256_set_pd(0.25, 0.50, 0.75, 1.0); |
10460 | let src = _mm256_set_pd(3., 4., 5., 6.); |
10461 | let r = _mm256_mask_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0110, a); |
10462 | let e = _mm256_set_pd(3., 0., 0.25, 6.); |
10463 | assert_eq_m256d(r, e); |
10464 | } |
10465 | |
10466 | #[simd_test(enable = "avx512dq,avx512vl" )] |
10467 | unsafe fn test_mm256_maskz_reduce_pd() { |
10468 | let a = _mm256_set_pd(0.25, 0.50, 0.75, 1.0); |
10469 | let r = _mm256_maskz_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0110, a); |
10470 | let e = _mm256_set_pd(0., 0., 0.25, 0.); |
10471 | assert_eq_m256d(r, e); |
10472 | } |
10473 | |
10474 | #[simd_test(enable = "avx512dq" )] |
10475 | unsafe fn test_mm512_reduce_pd() { |
10476 | let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0); |
10477 | let r = _mm512_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(a); |
10478 | let e = _mm512_set_pd(0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.); |
10479 | assert_eq_m512d(r, e); |
10480 | } |
10481 | |
10482 | #[simd_test(enable = "avx512dq" )] |
10483 | unsafe fn test_mm512_mask_reduce_pd() { |
10484 | let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0); |
10485 | let src = _mm512_set_pd(3., 4., 5., 6., 7., 8., 9., 10.); |
10486 | let r = _mm512_mask_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01101001, a); |
10487 | let e = _mm512_set_pd(3., 0., 0.25, 6., 0.25, 8., 9., 0.); |
10488 | assert_eq_m512d(r, e); |
10489 | } |
10490 | |
10491 | #[simd_test(enable = "avx512dq" )] |
10492 | unsafe fn test_mm512_maskz_reduce_pd() { |
10493 | let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0); |
10494 | let r = _mm512_maskz_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01101001, a); |
10495 | let e = _mm512_set_pd(0., 0., 0.25, 0., 0.25, 0., 0., 0.); |
10496 | assert_eq_m512d(r, e); |
10497 | } |
10498 | |
10499 | #[simd_test(enable = "avx512dq" )] |
10500 | unsafe fn test_mm512_reduce_round_ps() { |
10501 | let a = _mm512_set_ps( |
10502 | 0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75, |
10503 | 4.0, |
10504 | ); |
10505 | let r = _mm512_reduce_round_ps::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a); |
10506 | let e = _mm512_set_ps( |
10507 | 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., |
10508 | ); |
10509 | assert_eq_m512(r, e); |
10510 | } |
10511 | |
10512 | #[simd_test(enable = "avx512dq" )] |
10513 | unsafe fn test_mm512_mask_reduce_round_ps() { |
10514 | let a = _mm512_set_ps( |
10515 | 0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75, |
10516 | 4.0, |
10517 | ); |
10518 | let src = _mm512_set_ps( |
10519 | 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., |
10520 | ); |
10521 | let r = _mm512_mask_reduce_round_ps::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>( |
10522 | src, |
10523 | 0b0110100100111100, |
10524 | a, |
10525 | ); |
10526 | let e = _mm512_set_ps( |
10527 | 5., 0., 0.25, 8., 0.25, 10., 11., 0., 13., 14., 0.25, 0., 0.25, 0., 19., 20., |
10528 | ); |
10529 | assert_eq_m512(r, e); |
10530 | } |
10531 | |
10532 | #[simd_test(enable = "avx512dq" )] |
10533 | unsafe fn test_mm512_maskz_reduce_round_ps() { |
10534 | let a = _mm512_set_ps( |
10535 | 0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75, |
10536 | 4.0, |
10537 | ); |
10538 | let r = _mm512_maskz_reduce_round_ps::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>( |
10539 | 0b0110100100111100, |
10540 | a, |
10541 | ); |
10542 | let e = _mm512_set_ps( |
10543 | 0., 0., 0.25, 0., 0.25, 0., 0., 0., 0., 0., 0.25, 0., 0.25, 0., 0., 0., |
10544 | ); |
10545 | assert_eq_m512(r, e); |
10546 | } |
10547 | |
10548 | #[simd_test(enable = "avx512dq,avx512vl" )] |
10549 | unsafe fn test_mm_reduce_ps() { |
10550 | let a = _mm_set_ps(0.25, 0.50, 0.75, 1.0); |
10551 | let r = _mm_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(a); |
10552 | let e = _mm_set_ps(0.25, 0., 0.25, 0.); |
10553 | assert_eq_m128(r, e); |
10554 | } |
10555 | |
10556 | #[simd_test(enable = "avx512dq,avx512vl" )] |
10557 | unsafe fn test_mm_mask_reduce_ps() { |
10558 | let a = _mm_set_ps(0.25, 0.50, 0.75, 1.0); |
10559 | let src = _mm_set_ps(2., 3., 4., 5.); |
10560 | let r = _mm_mask_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0110, a); |
10561 | let e = _mm_set_ps(2., 0., 0.25, 5.); |
10562 | assert_eq_m128(r, e); |
10563 | } |
10564 | |
10565 | #[simd_test(enable = "avx512dq,avx512vl" )] |
10566 | unsafe fn test_mm_maskz_reduce_ps() { |
10567 | let a = _mm_set_ps(0.25, 0.50, 0.75, 1.0); |
10568 | let r = _mm_maskz_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0110, a); |
10569 | let e = _mm_set_ps(0., 0., 0.25, 0.); |
10570 | assert_eq_m128(r, e); |
10571 | } |
10572 | |
10573 | #[simd_test(enable = "avx512dq,avx512vl" )] |
10574 | unsafe fn test_mm256_reduce_ps() { |
10575 | let a = _mm256_set_ps(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0); |
10576 | let r = _mm256_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(a); |
10577 | let e = _mm256_set_ps(0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.); |
10578 | assert_eq_m256(r, e); |
10579 | } |
10580 | |
10581 | #[simd_test(enable = "avx512dq,avx512vl" )] |
10582 | unsafe fn test_mm256_mask_reduce_ps() { |
10583 | let a = _mm256_set_ps(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0); |
10584 | let src = _mm256_set_ps(3., 4., 5., 6., 7., 8., 9., 10.); |
10585 | let r = _mm256_mask_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01101001, a); |
10586 | let e = _mm256_set_ps(3., 0., 0.25, 6., 0.25, 8., 9., 0.); |
10587 | assert_eq_m256(r, e); |
10588 | } |
10589 | |
10590 | #[simd_test(enable = "avx512dq,avx512vl" )] |
10591 | unsafe fn test_mm256_maskz_reduce_ps() { |
10592 | let a = _mm256_set_ps(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0); |
10593 | let r = _mm256_maskz_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01101001, a); |
10594 | let e = _mm256_set_ps(0., 0., 0.25, 0., 0.25, 0., 0., 0.); |
10595 | assert_eq_m256(r, e); |
10596 | } |
10597 | |
10598 | #[simd_test(enable = "avx512dq" )] |
10599 | unsafe fn test_mm512_reduce_ps() { |
10600 | let a = _mm512_set_ps( |
10601 | 0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75, |
10602 | 4.0, |
10603 | ); |
10604 | let r = _mm512_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(a); |
10605 | let e = _mm512_set_ps( |
10606 | 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., |
10607 | ); |
10608 | assert_eq_m512(r, e); |
10609 | } |
10610 | |
10611 | #[simd_test(enable = "avx512dq" )] |
10612 | unsafe fn test_mm512_mask_reduce_ps() { |
10613 | let a = _mm512_set_ps( |
10614 | 0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75, |
10615 | 4.0, |
10616 | ); |
10617 | let src = _mm512_set_ps( |
10618 | 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., |
10619 | ); |
10620 | let r = _mm512_mask_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0110100100111100, a); |
10621 | let e = _mm512_set_ps( |
10622 | 5., 0., 0.25, 8., 0.25, 10., 11., 0., 13., 14., 0.25, 0., 0.25, 0., 19., 20., |
10623 | ); |
10624 | assert_eq_m512(r, e); |
10625 | } |
10626 | |
10627 | #[simd_test(enable = "avx512dq" )] |
10628 | unsafe fn test_mm512_maskz_reduce_ps() { |
10629 | let a = _mm512_set_ps( |
10630 | 0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75, |
10631 | 4.0, |
10632 | ); |
10633 | let r = _mm512_maskz_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0110100100111100, a); |
10634 | let e = _mm512_set_ps( |
10635 | 0., 0., 0.25, 0., 0.25, 0., 0., 0., 0., 0., 0.25, 0., 0.25, 0., 0., 0., |
10636 | ); |
10637 | assert_eq_m512(r, e); |
10638 | } |
10639 | |
10640 | #[simd_test(enable = "avx512dq" )] |
10641 | unsafe fn test_mm_reduce_round_sd() { |
10642 | let a = _mm_set_pd(1., 2.); |
10643 | let b = _mm_set_sd(0.25); |
10644 | let r = _mm_reduce_round_sd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a, b); |
10645 | let e = _mm_set_pd(1., 0.25); |
10646 | assert_eq_m128d(r, e); |
10647 | } |
10648 | |
10649 | #[simd_test(enable = "avx512dq" )] |
10650 | unsafe fn test_mm_mask_reduce_round_sd() { |
10651 | let a = _mm_set_pd(1., 2.); |
10652 | let b = _mm_set_sd(0.25); |
10653 | let c = _mm_set_pd(3., 4.); |
10654 | let r = _mm_mask_reduce_round_sd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>( |
10655 | c, 0b0, a, b, |
10656 | ); |
10657 | let e = _mm_set_pd(1., 4.); |
10658 | assert_eq_m128d(r, e); |
10659 | } |
10660 | |
10661 | #[simd_test(enable = "avx512dq" )] |
10662 | unsafe fn test_mm_maskz_reduce_round_sd() { |
10663 | let a = _mm_set_pd(1., 2.); |
10664 | let b = _mm_set_sd(0.25); |
10665 | let r = |
10666 | _mm_maskz_reduce_round_sd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(0b0, a, b); |
10667 | let e = _mm_set_pd(1., 0.); |
10668 | assert_eq_m128d(r, e); |
10669 | } |
10670 | |
10671 | #[simd_test(enable = "avx512dq" )] |
10672 | unsafe fn test_mm_reduce_sd() { |
10673 | let a = _mm_set_pd(1., 2.); |
10674 | let b = _mm_set_sd(0.25); |
10675 | let r = _mm_reduce_sd::<{ 16 | _MM_FROUND_TO_ZERO }>(a, b); |
10676 | let e = _mm_set_pd(1., 0.25); |
10677 | assert_eq_m128d(r, e); |
10678 | } |
10679 | |
10680 | #[simd_test(enable = "avx512dq" )] |
10681 | unsafe fn test_mm_mask_reduce_sd() { |
10682 | let a = _mm_set_pd(1., 2.); |
10683 | let b = _mm_set_sd(0.25); |
10684 | let c = _mm_set_pd(3., 4.); |
10685 | let r = _mm_mask_reduce_sd::<{ 16 | _MM_FROUND_TO_ZERO }>(c, 0b0, a, b); |
10686 | let e = _mm_set_pd(1., 4.); |
10687 | assert_eq_m128d(r, e); |
10688 | } |
10689 | |
10690 | #[simd_test(enable = "avx512dq" )] |
10691 | unsafe fn test_mm_maskz_reduce_sd() { |
10692 | let a = _mm_set_pd(1., 2.); |
10693 | let b = _mm_set_sd(0.25); |
10694 | let r = _mm_maskz_reduce_sd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0, a, b); |
10695 | let e = _mm_set_pd(1., 0.); |
10696 | assert_eq_m128d(r, e); |
10697 | } |
10698 | |
10699 | #[simd_test(enable = "avx512dq" )] |
10700 | unsafe fn test_mm_reduce_round_ss() { |
10701 | let a = _mm_set_ps(1., 2., 3., 4.); |
10702 | let b = _mm_set_ss(0.25); |
10703 | let r = _mm_reduce_round_ss::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a, b); |
10704 | let e = _mm_set_ps(1., 2., 3., 0.25); |
10705 | assert_eq_m128(r, e); |
10706 | } |
10707 | |
10708 | #[simd_test(enable = "avx512dq" )] |
10709 | unsafe fn test_mm_mask_reduce_round_ss() { |
10710 | let a = _mm_set_ps(1., 2., 3., 4.); |
10711 | let b = _mm_set_ss(0.25); |
10712 | let c = _mm_set_ps(5., 6., 7., 8.); |
10713 | let r = _mm_mask_reduce_round_ss::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>( |
10714 | c, 0b0, a, b, |
10715 | ); |
10716 | let e = _mm_set_ps(1., 2., 3., 8.); |
10717 | assert_eq_m128(r, e); |
10718 | } |
10719 | |
10720 | #[simd_test(enable = "avx512dq" )] |
10721 | unsafe fn test_mm_maskz_reduce_round_ss() { |
10722 | let a = _mm_set_ps(1., 2., 3., 4.); |
10723 | let b = _mm_set_ss(0.25); |
10724 | let r = |
10725 | _mm_maskz_reduce_round_ss::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(0b0, a, b); |
10726 | let e = _mm_set_ps(1., 2., 3., 0.); |
10727 | assert_eq_m128(r, e); |
10728 | } |
10729 | |
10730 | #[simd_test(enable = "avx512dq" )] |
10731 | unsafe fn test_mm_reduce_ss() { |
10732 | let a = _mm_set_ps(1., 2., 3., 4.); |
10733 | let b = _mm_set_ss(0.25); |
10734 | let r = _mm_reduce_ss::<{ 16 | _MM_FROUND_TO_ZERO }>(a, b); |
10735 | let e = _mm_set_ps(1., 2., 3., 0.25); |
10736 | assert_eq_m128(r, e); |
10737 | } |
10738 | |
10739 | #[simd_test(enable = "avx512dq" )] |
10740 | unsafe fn test_mm_mask_reduce_ss() { |
10741 | let a = _mm_set_ps(1., 2., 3., 4.); |
10742 | let b = _mm_set_ss(0.25); |
10743 | let c = _mm_set_ps(5., 6., 7., 8.); |
10744 | let r = _mm_mask_reduce_ss::<{ 16 | _MM_FROUND_TO_ZERO }>(c, 0b0, a, b); |
10745 | let e = _mm_set_ps(1., 2., 3., 8.); |
10746 | assert_eq_m128(r, e); |
10747 | } |
10748 | |
10749 | #[simd_test(enable = "avx512dq" )] |
10750 | unsafe fn test_mm_maskz_reduce_ss() { |
10751 | let a = _mm_set_ps(1., 2., 3., 4.); |
10752 | let b = _mm_set_ss(0.25); |
10753 | let r = _mm_maskz_reduce_ss::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0, a, b); |
10754 | let e = _mm_set_ps(1., 2., 3., 0.); |
10755 | assert_eq_m128(r, e); |
10756 | } |
10757 | |
10758 | #[simd_test(enable = "avx512dq,avx512vl" )] |
10759 | unsafe fn test_mm_fpclass_pd_mask() { |
10760 | let a = _mm_set_pd(1., f64::INFINITY); |
10761 | let r = _mm_fpclass_pd_mask::<0x18>(a); |
10762 | let e = 0b01; |
10763 | assert_eq!(r, e); |
10764 | } |
10765 | |
10766 | #[simd_test(enable = "avx512dq,avx512vl" )] |
10767 | unsafe fn test_mm_mask_fpclass_pd_mask() { |
10768 | let a = _mm_set_pd(1., f64::INFINITY); |
10769 | let r = _mm_mask_fpclass_pd_mask::<0x18>(0b10, a); |
10770 | let e = 0b00; |
10771 | assert_eq!(r, e); |
10772 | } |
10773 | |
10774 | #[simd_test(enable = "avx512dq,avx512vl" )] |
10775 | unsafe fn test_mm256_fpclass_pd_mask() { |
10776 | let a = _mm256_set_pd(1., f64::INFINITY, f64::NEG_INFINITY, 0.0); |
10777 | let r = _mm256_fpclass_pd_mask::<0x18>(a); |
10778 | let e = 0b0110; |
10779 | assert_eq!(r, e); |
10780 | } |
10781 | |
10782 | #[simd_test(enable = "avx512dq,avx512vl" )] |
10783 | unsafe fn test_mm256_mask_fpclass_pd_mask() { |
10784 | let a = _mm256_set_pd(1., f64::INFINITY, f64::NEG_INFINITY, 0.0); |
10785 | let r = _mm256_mask_fpclass_pd_mask::<0x18>(0b1010, a); |
10786 | let e = 0b0010; |
10787 | assert_eq!(r, e); |
10788 | } |
10789 | |
10790 | #[simd_test(enable = "avx512dq" )] |
10791 | unsafe fn test_mm512_fpclass_pd_mask() { |
10792 | let a = _mm512_set_pd( |
10793 | 1., |
10794 | f64::INFINITY, |
10795 | f64::NEG_INFINITY, |
10796 | 0.0, |
10797 | -0.0, |
10798 | -2.0, |
10799 | f64::NAN, |
10800 | 1.0e-308, |
10801 | ); |
10802 | let r = _mm512_fpclass_pd_mask::<0x18>(a); |
10803 | let e = 0b01100000; |
10804 | assert_eq!(r, e); |
10805 | } |
10806 | |
10807 | #[simd_test(enable = "avx512dq" )] |
10808 | unsafe fn test_mm512_mask_fpclass_pd_mask() { |
10809 | let a = _mm512_set_pd( |
10810 | 1., |
10811 | f64::INFINITY, |
10812 | f64::NEG_INFINITY, |
10813 | 0.0, |
10814 | -0.0, |
10815 | -2.0, |
10816 | f64::NAN, |
10817 | 1.0e-308, |
10818 | ); |
10819 | let r = _mm512_mask_fpclass_pd_mask::<0x18>(0b10101010, a); |
10820 | let e = 0b00100000; |
10821 | assert_eq!(r, e); |
10822 | } |
10823 | |
10824 | #[simd_test(enable = "avx512dq,avx512vl" )] |
10825 | unsafe fn test_mm_fpclass_ps_mask() { |
10826 | let a = _mm_set_ps(1., f32::INFINITY, f32::NEG_INFINITY, 0.0); |
10827 | let r = _mm_fpclass_ps_mask::<0x18>(a); |
10828 | let e = 0b0110; |
10829 | assert_eq!(r, e); |
10830 | } |
10831 | |
10832 | #[simd_test(enable = "avx512dq,avx512vl" )] |
10833 | unsafe fn test_mm_mask_fpclass_ps_mask() { |
10834 | let a = _mm_set_ps(1., f32::INFINITY, f32::NEG_INFINITY, 0.0); |
10835 | let r = _mm_mask_fpclass_ps_mask::<0x18>(0b1010, a); |
10836 | let e = 0b0010; |
10837 | assert_eq!(r, e); |
10838 | } |
10839 | |
10840 | #[simd_test(enable = "avx512dq,avx512vl" )] |
10841 | unsafe fn test_mm256_fpclass_ps_mask() { |
10842 | let a = _mm256_set_ps( |
10843 | 1., |
10844 | f32::INFINITY, |
10845 | f32::NEG_INFINITY, |
10846 | 0.0, |
10847 | -0.0, |
10848 | -2.0, |
10849 | f32::NAN, |
10850 | 1.0e-38, |
10851 | ); |
10852 | let r = _mm256_fpclass_ps_mask::<0x18>(a); |
10853 | let e = 0b01100000; |
10854 | assert_eq!(r, e); |
10855 | } |
10856 | |
10857 | #[simd_test(enable = "avx512dq,avx512vl" )] |
10858 | unsafe fn test_mm256_mask_fpclass_ps_mask() { |
10859 | let a = _mm256_set_ps( |
10860 | 1., |
10861 | f32::INFINITY, |
10862 | f32::NEG_INFINITY, |
10863 | 0.0, |
10864 | -0.0, |
10865 | -2.0, |
10866 | f32::NAN, |
10867 | 1.0e-38, |
10868 | ); |
10869 | let r = _mm256_mask_fpclass_ps_mask::<0x18>(0b10101010, a); |
10870 | let e = 0b00100000; |
10871 | assert_eq!(r, e); |
10872 | } |
10873 | |
10874 | #[simd_test(enable = "avx512dq" )] |
10875 | unsafe fn test_mm512_fpclass_ps_mask() { |
10876 | let a = _mm512_set_ps( |
10877 | 1., |
10878 | f32::INFINITY, |
10879 | f32::NEG_INFINITY, |
10880 | 0.0, |
10881 | -0.0, |
10882 | -2.0, |
10883 | f32::NAN, |
10884 | 1.0e-38, |
10885 | -1., |
10886 | f32::NEG_INFINITY, |
10887 | f32::INFINITY, |
10888 | -0.0, |
10889 | 0.0, |
10890 | 2.0, |
10891 | f32::NAN, |
10892 | -1.0e-38, |
10893 | ); |
10894 | let r = _mm512_fpclass_ps_mask::<0x18>(a); |
10895 | let e = 0b0110000001100000; |
10896 | assert_eq!(r, e); |
10897 | } |
10898 | |
10899 | #[simd_test(enable = "avx512dq" )] |
10900 | unsafe fn test_mm512_mask_fpclass_ps_mask() { |
10901 | let a = _mm512_set_ps( |
10902 | 1., |
10903 | f32::INFINITY, |
10904 | f32::NEG_INFINITY, |
10905 | 0.0, |
10906 | -0.0, |
10907 | -2.0, |
10908 | f32::NAN, |
10909 | 1.0e-38, |
10910 | -1., |
10911 | f32::NEG_INFINITY, |
10912 | f32::INFINITY, |
10913 | -0.0, |
10914 | 0.0, |
10915 | 2.0, |
10916 | f32::NAN, |
10917 | -1.0e-38, |
10918 | ); |
10919 | let r = _mm512_mask_fpclass_ps_mask::<0x18>(0b1010101010101010, a); |
10920 | let e = 0b0010000000100000; |
10921 | assert_eq!(r, e); |
10922 | } |
10923 | |
10924 | #[simd_test(enable = "avx512dq" )] |
10925 | unsafe fn test_mm_fpclass_sd_mask() { |
10926 | let a = _mm_set_pd(1., f64::INFINITY); |
10927 | let r = _mm_fpclass_sd_mask::<0x18>(a); |
10928 | let e = 0b1; |
10929 | assert_eq!(r, e); |
10930 | } |
10931 | |
10932 | #[simd_test(enable = "avx512dq" )] |
10933 | unsafe fn test_mm_mask_fpclass_sd_mask() { |
10934 | let a = _mm_set_sd(f64::INFINITY); |
10935 | let r = _mm_mask_fpclass_sd_mask::<0x18>(0b0, a); |
10936 | let e = 0b0; |
10937 | assert_eq!(r, e); |
10938 | } |
10939 | |
10940 | #[simd_test(enable = "avx512dq" )] |
10941 | unsafe fn test_mm_fpclass_ss_mask() { |
10942 | let a = _mm_set_ss(f32::INFINITY); |
10943 | let r = _mm_fpclass_ss_mask::<0x18>(a); |
10944 | let e = 0b1; |
10945 | assert_eq!(r, e); |
10946 | } |
10947 | |
10948 | #[simd_test(enable = "avx512dq" )] |
10949 | unsafe fn test_mm_mask_fpclass_ss_mask() { |
10950 | let a = _mm_set_ss(f32::INFINITY); |
10951 | let r = _mm_mask_fpclass_ss_mask::<0x18>(0b0, a); |
10952 | let e = 0b0; |
10953 | assert_eq!(r, e); |
10954 | } |
10955 | } |
10956 | |