1 | use crate::core_arch::{simd::*, x86::*}; |
2 | use crate::intrinsics::simd::*; |
3 | |
4 | #[cfg (test)] |
5 | use stdarch_test::assert_instr; |
6 | |
7 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. |
8 | /// |
9 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpwssd_epi32&expand=2219) |
10 | #[inline ] |
11 | #[target_feature (enable = "avx512vnni" )] |
12 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
13 | #[cfg_attr (test, assert_instr(vpdpwssd))] |
14 | pub fn _mm512_dpwssd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i { |
15 | unsafe { transmute(src:vpdpwssd(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) } |
16 | } |
17 | |
18 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
19 | /// |
20 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpwssd_epi32&expand=2220) |
21 | #[inline ] |
22 | #[target_feature (enable = "avx512vnni" )] |
23 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
24 | #[cfg_attr (test, assert_instr(vpdpwssd))] |
25 | pub fn _mm512_mask_dpwssd_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { |
26 | unsafe { |
27 | let r: i32x16 = _mm512_dpwssd_epi32(src, a, b).as_i32x16(); |
28 | transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16())) |
29 | } |
30 | } |
31 | |
32 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
33 | /// |
34 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpwssd_epi32&expand=2221) |
35 | #[inline ] |
36 | #[target_feature (enable = "avx512vnni" )] |
37 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
38 | #[cfg_attr (test, assert_instr(vpdpwssd))] |
39 | pub fn _mm512_maskz_dpwssd_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i { |
40 | unsafe { |
41 | let r: i32x16 = _mm512_dpwssd_epi32(src, a, b).as_i32x16(); |
42 | transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x16::ZERO)) |
43 | } |
44 | } |
45 | |
46 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. |
47 | /// |
48 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssd_avx_epi32&expand=2713) |
49 | #[inline ] |
50 | #[target_feature (enable = "avxvnni" )] |
51 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
52 | #[cfg_attr ( |
53 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
54 | assert_instr(vpdpwssd) |
55 | )] |
56 | pub fn _mm256_dpwssd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
57 | unsafe { transmute(src:vpdpwssd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
58 | } |
59 | |
60 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. |
61 | /// |
62 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssd_epi32&expand=2216) |
63 | #[inline ] |
64 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
65 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
66 | #[cfg_attr (test, assert_instr(vpdpwssd))] |
67 | pub fn _mm256_dpwssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
68 | unsafe { transmute(src:vpdpwssd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
69 | } |
70 | |
71 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
72 | /// |
73 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpwssd_epi32&expand=2217) |
74 | #[inline ] |
75 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
76 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
77 | #[cfg_attr (test, assert_instr(vpdpwssd))] |
78 | pub fn _mm256_mask_dpwssd_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { |
79 | unsafe { |
80 | let r: i32x8 = _mm256_dpwssd_epi32(src, a, b).as_i32x8(); |
81 | transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8())) |
82 | } |
83 | } |
84 | |
85 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
86 | /// |
87 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpwssd_epi32&expand=2218) |
88 | #[inline ] |
89 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
90 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
91 | #[cfg_attr (test, assert_instr(vpdpwssd))] |
92 | pub fn _mm256_maskz_dpwssd_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
93 | unsafe { |
94 | let r: i32x8 = _mm256_dpwssd_epi32(src, a, b).as_i32x8(); |
95 | transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x8::ZERO)) |
96 | } |
97 | } |
98 | |
99 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. |
100 | /// |
101 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssd_avx_epi32&expand=2712) |
102 | #[inline ] |
103 | #[target_feature (enable = "avxvnni" )] |
104 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
105 | #[cfg_attr ( |
106 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
107 | assert_instr(vpdpwssd) |
108 | )] |
109 | pub fn _mm_dpwssd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
110 | unsafe { transmute(src:vpdpwssd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
111 | } |
112 | |
113 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. |
114 | /// |
115 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssd_epi32&expand=2213) |
116 | #[inline ] |
117 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
118 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
119 | #[cfg_attr (test, assert_instr(vpdpwssd))] |
120 | pub fn _mm_dpwssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
121 | unsafe { transmute(src:vpdpwssd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
122 | } |
123 | |
124 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
125 | /// |
126 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpwssd_epi32&expand=2214) |
127 | #[inline ] |
128 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
129 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
130 | #[cfg_attr (test, assert_instr(vpdpwssd))] |
131 | pub fn _mm_mask_dpwssd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
132 | unsafe { |
133 | let r: i32x4 = _mm_dpwssd_epi32(src, a, b).as_i32x4(); |
134 | transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4())) |
135 | } |
136 | } |
137 | |
138 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
139 | /// |
140 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpwssd_epi32&expand=2215) |
141 | #[inline ] |
142 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
143 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
144 | #[cfg_attr (test, assert_instr(vpdpwssd))] |
145 | pub fn _mm_maskz_dpwssd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
146 | unsafe { |
147 | let r: i32x4 = _mm_dpwssd_epi32(src, a, b).as_i32x4(); |
148 | transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x4::ZERO)) |
149 | } |
150 | } |
151 | |
152 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. |
153 | /// |
154 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpwssds_epi32&expand=2228) |
155 | #[inline ] |
156 | #[target_feature (enable = "avx512vnni" )] |
157 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
158 | #[cfg_attr (test, assert_instr(vpdpwssds))] |
159 | pub fn _mm512_dpwssds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i { |
160 | unsafe { transmute(src:vpdpwssds(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) } |
161 | } |
162 | |
163 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
164 | /// |
165 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpwssds_epi32&expand=2229) |
166 | #[inline ] |
167 | #[target_feature (enable = "avx512vnni" )] |
168 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
169 | #[cfg_attr (test, assert_instr(vpdpwssds))] |
170 | pub fn _mm512_mask_dpwssds_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { |
171 | unsafe { |
172 | let r: i32x16 = _mm512_dpwssds_epi32(src, a, b).as_i32x16(); |
173 | transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16())) |
174 | } |
175 | } |
176 | |
177 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
178 | /// |
179 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpwssds_epi32&expand=2230) |
180 | #[inline ] |
181 | #[target_feature (enable = "avx512vnni" )] |
182 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
183 | #[cfg_attr (test, assert_instr(vpdpwssds))] |
184 | pub fn _mm512_maskz_dpwssds_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i { |
185 | unsafe { |
186 | let r: i32x16 = _mm512_dpwssds_epi32(src, a, b).as_i32x16(); |
187 | transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x16::ZERO)) |
188 | } |
189 | } |
190 | |
191 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. |
192 | /// |
193 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssds_avx_epi32&expand=2726) |
194 | #[inline ] |
195 | #[target_feature (enable = "avxvnni" )] |
196 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
197 | #[cfg_attr ( |
198 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
199 | assert_instr(vpdpwssds) |
200 | )] |
201 | pub fn _mm256_dpwssds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
202 | unsafe { transmute(src:vpdpwssds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
203 | } |
204 | |
205 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. |
206 | /// |
207 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssds_epi32&expand=2225) |
208 | #[inline ] |
209 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
210 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
211 | #[cfg_attr (test, assert_instr(vpdpwssds))] |
212 | pub fn _mm256_dpwssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
213 | unsafe { transmute(src:vpdpwssds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
214 | } |
215 | |
216 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
217 | /// |
218 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpwssds_epi32&expand=2226) |
219 | #[inline ] |
220 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
221 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
222 | #[cfg_attr (test, assert_instr(vpdpwssds))] |
223 | pub fn _mm256_mask_dpwssds_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { |
224 | unsafe { |
225 | let r: i32x8 = _mm256_dpwssds_epi32(src, a, b).as_i32x8(); |
226 | transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8())) |
227 | } |
228 | } |
229 | |
230 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
231 | /// |
232 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpwssds_epi32&expand=2227) |
233 | #[inline ] |
234 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
235 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
236 | #[cfg_attr (test, assert_instr(vpdpwssds))] |
237 | pub fn _mm256_maskz_dpwssds_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
238 | unsafe { |
239 | let r: i32x8 = _mm256_dpwssds_epi32(src, a, b).as_i32x8(); |
240 | transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x8::ZERO)) |
241 | } |
242 | } |
243 | |
244 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. |
245 | /// |
246 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssds_avx_epi32&expand=2725) |
247 | #[inline ] |
248 | #[target_feature (enable = "avxvnni" )] |
249 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
250 | #[cfg_attr ( |
251 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
252 | assert_instr(vpdpwssds) |
253 | )] |
254 | pub fn _mm_dpwssds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
255 | unsafe { transmute(src:vpdpwssds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
256 | } |
257 | |
258 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. |
259 | /// |
260 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssds_epi32&expand=2222) |
261 | #[inline ] |
262 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
263 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
264 | #[cfg_attr (test, assert_instr(vpdpwssds))] |
265 | pub fn _mm_dpwssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
266 | unsafe { transmute(src:vpdpwssds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
267 | } |
268 | |
269 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
270 | /// |
271 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpwssds_epi32&expand=2223) |
272 | #[inline ] |
273 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
274 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
275 | #[cfg_attr (test, assert_instr(vpdpwssds))] |
276 | pub fn _mm_mask_dpwssds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
277 | unsafe { |
278 | let r: i32x4 = _mm_dpwssds_epi32(src, a, b).as_i32x4(); |
279 | transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4())) |
280 | } |
281 | } |
282 | |
283 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
284 | /// |
285 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpwssds_epi32&expand=2224) |
286 | #[inline ] |
287 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
288 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
289 | #[cfg_attr (test, assert_instr(vpdpwssds))] |
290 | pub fn _mm_maskz_dpwssds_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
291 | unsafe { |
292 | let r: i32x4 = _mm_dpwssds_epi32(src, a, b).as_i32x4(); |
293 | transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x4::ZERO)) |
294 | } |
295 | } |
296 | |
297 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. |
298 | /// |
299 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpbusd_epi32&expand=2201) |
300 | #[inline ] |
301 | #[target_feature (enable = "avx512vnni" )] |
302 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
303 | #[cfg_attr (test, assert_instr(vpdpbusd))] |
304 | pub fn _mm512_dpbusd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i { |
305 | unsafe { transmute(src:vpdpbusd(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) } |
306 | } |
307 | |
308 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
309 | /// |
310 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpbusd_epi32&expand=2202) |
311 | #[inline ] |
312 | #[target_feature (enable = "avx512vnni" )] |
313 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
314 | #[cfg_attr (test, assert_instr(vpdpbusd))] |
315 | pub fn _mm512_mask_dpbusd_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { |
316 | unsafe { |
317 | let r: i32x16 = _mm512_dpbusd_epi32(src, a, b).as_i32x16(); |
318 | transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16())) |
319 | } |
320 | } |
321 | |
322 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
323 | /// |
324 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpbusd_epi32&expand=2203) |
325 | #[inline ] |
326 | #[target_feature (enable = "avx512vnni" )] |
327 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
328 | #[cfg_attr (test, assert_instr(vpdpbusd))] |
329 | pub fn _mm512_maskz_dpbusd_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i { |
330 | unsafe { |
331 | let r: i32x16 = _mm512_dpbusd_epi32(src, a, b).as_i32x16(); |
332 | transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x16::ZERO)) |
333 | } |
334 | } |
335 | |
336 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. |
337 | /// |
338 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusd_avx_epi32&expand=2683) |
339 | #[inline ] |
340 | #[target_feature (enable = "avxvnni" )] |
341 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
342 | #[cfg_attr ( |
343 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
344 | assert_instr(vpdpbusd) |
345 | )] |
346 | pub fn _mm256_dpbusd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
347 | unsafe { transmute(src:vpdpbusd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
348 | } |
349 | |
350 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. |
351 | /// |
352 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusd_epi32&expand=2198) |
353 | #[inline ] |
354 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
355 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
356 | #[cfg_attr (test, assert_instr(vpdpbusd))] |
357 | pub fn _mm256_dpbusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
358 | unsafe { transmute(src:vpdpbusd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
359 | } |
360 | |
361 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
362 | /// |
363 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpbusd_epi32&expand=2199) |
364 | #[inline ] |
365 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
366 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
367 | #[cfg_attr (test, assert_instr(vpdpbusd))] |
368 | pub fn _mm256_mask_dpbusd_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { |
369 | unsafe { |
370 | let r: i32x8 = _mm256_dpbusd_epi32(src, a, b).as_i32x8(); |
371 | transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8())) |
372 | } |
373 | } |
374 | |
375 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
376 | /// |
377 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpbusd_epi32&expand=2200) |
378 | #[inline ] |
379 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
380 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
381 | #[cfg_attr (test, assert_instr(vpdpbusd))] |
382 | pub fn _mm256_maskz_dpbusd_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
383 | unsafe { |
384 | let r: i32x8 = _mm256_dpbusd_epi32(src, a, b).as_i32x8(); |
385 | transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x8::ZERO)) |
386 | } |
387 | } |
388 | |
389 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. |
390 | /// |
391 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusd_avx_epi32&expand=2682) |
392 | #[inline ] |
393 | #[target_feature (enable = "avxvnni" )] |
394 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
395 | #[cfg_attr ( |
396 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
397 | assert_instr(vpdpbusd) |
398 | )] |
399 | pub fn _mm_dpbusd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
400 | unsafe { transmute(src:vpdpbusd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
401 | } |
402 | |
403 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. |
404 | /// |
405 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusd_epi32&expand=2195) |
406 | #[inline ] |
407 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
408 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
409 | #[cfg_attr (test, assert_instr(vpdpbusd))] |
410 | pub fn _mm_dpbusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
411 | unsafe { transmute(src:vpdpbusd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
412 | } |
413 | |
414 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
415 | /// |
416 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpbusd_epi32&expand=2196) |
417 | #[inline ] |
418 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
419 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
420 | #[cfg_attr (test, assert_instr(vpdpbusd))] |
421 | pub fn _mm_mask_dpbusd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
422 | unsafe { |
423 | let r: i32x4 = _mm_dpbusd_epi32(src, a, b).as_i32x4(); |
424 | transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4())) |
425 | } |
426 | } |
427 | |
428 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
429 | /// |
430 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpbusd_epi32&expand=2197) |
431 | #[inline ] |
432 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
433 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
434 | #[cfg_attr (test, assert_instr(vpdpbusd))] |
435 | pub fn _mm_maskz_dpbusd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
436 | unsafe { |
437 | let r: i32x4 = _mm_dpbusd_epi32(src, a, b).as_i32x4(); |
438 | transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x4::ZERO)) |
439 | } |
440 | } |
441 | |
442 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. |
443 | /// |
444 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpbusds_epi32&expand=2210) |
445 | #[inline ] |
446 | #[target_feature (enable = "avx512vnni" )] |
447 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
448 | #[cfg_attr (test, assert_instr(vpdpbusds))] |
449 | pub fn _mm512_dpbusds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i { |
450 | unsafe { transmute(src:vpdpbusds(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) } |
451 | } |
452 | |
453 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
454 | /// |
455 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpbusds_epi32&expand=2211) |
456 | #[inline ] |
457 | #[target_feature (enable = "avx512vnni" )] |
458 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
459 | #[cfg_attr (test, assert_instr(vpdpbusds))] |
460 | pub fn _mm512_mask_dpbusds_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { |
461 | unsafe { |
462 | let r: i32x16 = _mm512_dpbusds_epi32(src, a, b).as_i32x16(); |
463 | transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16())) |
464 | } |
465 | } |
466 | |
467 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
468 | /// |
469 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpbusds_epi32&expand=2212) |
470 | #[inline ] |
471 | #[target_feature (enable = "avx512vnni" )] |
472 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
473 | #[cfg_attr (test, assert_instr(vpdpbusds))] |
474 | pub fn _mm512_maskz_dpbusds_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i { |
475 | unsafe { |
476 | let r: i32x16 = _mm512_dpbusds_epi32(src, a, b).as_i32x16(); |
477 | transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x16::ZERO)) |
478 | } |
479 | } |
480 | |
481 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. |
482 | /// |
483 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusds_avx_epi32&expand=2696) |
484 | #[inline ] |
485 | #[target_feature (enable = "avxvnni" )] |
486 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
487 | #[cfg_attr ( |
488 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
489 | assert_instr(vpdpbusds) |
490 | )] |
491 | pub fn _mm256_dpbusds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
492 | unsafe { transmute(src:vpdpbusds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
493 | } |
494 | |
495 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. |
496 | /// |
497 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusds_epi32&expand=2207) |
498 | #[inline ] |
499 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
500 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
501 | #[cfg_attr (test, assert_instr(vpdpbusds))] |
502 | pub fn _mm256_dpbusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
503 | unsafe { transmute(src:vpdpbusds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
504 | } |
505 | |
506 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
507 | /// |
508 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpbusds_epi32&expand=2208) |
509 | #[inline ] |
510 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
511 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
512 | #[cfg_attr (test, assert_instr(vpdpbusds))] |
513 | pub fn _mm256_mask_dpbusds_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { |
514 | unsafe { |
515 | let r: i32x8 = _mm256_dpbusds_epi32(src, a, b).as_i32x8(); |
516 | transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8())) |
517 | } |
518 | } |
519 | |
520 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
521 | /// |
522 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpbusds_epi32&expand=2209) |
523 | #[inline ] |
524 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
525 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
526 | #[cfg_attr (test, assert_instr(vpdpbusds))] |
527 | pub fn _mm256_maskz_dpbusds_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
528 | unsafe { |
529 | let r: i32x8 = _mm256_dpbusds_epi32(src, a, b).as_i32x8(); |
530 | transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x8::ZERO)) |
531 | } |
532 | } |
533 | |
534 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. |
535 | /// |
536 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusds_avx_epi32&expand=2695) |
537 | #[inline ] |
538 | #[target_feature (enable = "avxvnni" )] |
539 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
540 | #[cfg_attr ( |
541 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
542 | assert_instr(vpdpbusds) |
543 | )] |
544 | pub fn _mm_dpbusds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
545 | unsafe { transmute(src:vpdpbusds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
546 | } |
547 | |
548 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. |
549 | /// |
550 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusds_epi32&expand=2204) |
551 | #[inline ] |
552 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
553 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
554 | #[cfg_attr (test, assert_instr(vpdpbusds))] |
555 | pub fn _mm_dpbusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
556 | unsafe { transmute(src:vpdpbusds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
557 | } |
558 | |
559 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
560 | /// |
561 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpbusds_epi32&expand=2205) |
562 | #[inline ] |
563 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
564 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
565 | #[cfg_attr (test, assert_instr(vpdpbusds))] |
566 | pub fn _mm_mask_dpbusds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
567 | unsafe { |
568 | let r: i32x4 = _mm_dpbusds_epi32(src, a, b).as_i32x4(); |
569 | transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4())) |
570 | } |
571 | } |
572 | |
573 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
574 | /// |
575 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpbusds_epi32&expand=2206) |
576 | #[inline ] |
577 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
578 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
579 | #[cfg_attr (test, assert_instr(vpdpbusds))] |
580 | pub fn _mm_maskz_dpbusds_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
581 | unsafe { |
582 | let r: i32x4 = _mm_dpbusds_epi32(src, a, b).as_i32x4(); |
583 | transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x4::ZERO)) |
584 | } |
585 | } |
586 | |
587 | /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit |
588 | /// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding |
589 | /// 32-bit integer in src, and store the packed 32-bit results in dst. |
590 | /// |
591 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbssd_epi32&expand=2674) |
592 | #[inline ] |
593 | #[target_feature (enable = "avxvnniint8" )] |
594 | #[cfg_attr ( |
595 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
596 | assert_instr(vpdpbssd) |
597 | )] |
598 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
599 | pub fn _mm_dpbssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
600 | unsafe { transmute(src:vpdpbssd_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
601 | } |
602 | |
603 | /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit |
604 | /// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding |
605 | /// 32-bit integer in src, and store the packed 32-bit results in dst. |
606 | /// |
607 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbssd_epi32&expand=2675) |
608 | #[inline ] |
609 | #[target_feature (enable = "avxvnniint8" )] |
610 | #[cfg_attr ( |
611 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
612 | assert_instr(vpdpbssd) |
613 | )] |
614 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
615 | pub fn _mm256_dpbssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
616 | unsafe { transmute(src:vpdpbssd_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
617 | } |
618 | |
619 | /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit |
620 | /// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding |
621 | /// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst. |
622 | /// |
623 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbssds_epi32&expand=2676) |
624 | #[inline ] |
625 | #[target_feature (enable = "avxvnniint8" )] |
626 | #[cfg_attr ( |
627 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
628 | assert_instr(vpdpbssds) |
629 | )] |
630 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
631 | pub fn _mm_dpbssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
632 | unsafe { transmute(src:vpdpbssds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
633 | } |
634 | |
635 | /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit |
636 | /// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding |
637 | /// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst. |
638 | /// |
639 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbssds_epi32&expand=2677) |
640 | #[inline ] |
641 | #[target_feature (enable = "avxvnniint8" )] |
642 | #[cfg_attr ( |
643 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
644 | assert_instr(vpdpbssds) |
645 | )] |
646 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
647 | pub fn _mm256_dpbssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
648 | unsafe { transmute(src:vpdpbssds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
649 | } |
650 | |
651 | /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit |
652 | /// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding |
653 | /// 32-bit integer in src, and store the packed 32-bit results in dst. |
654 | /// |
655 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbsud_epi32&expand=2678) |
656 | #[inline ] |
657 | #[target_feature (enable = "avxvnniint8" )] |
658 | #[cfg_attr ( |
659 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
660 | assert_instr(vpdpbsud) |
661 | )] |
662 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
663 | pub fn _mm_dpbsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
664 | unsafe { transmute(src:vpdpbsud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
665 | } |
666 | |
667 | /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit |
668 | /// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding |
669 | /// 32-bit integer in src, and store the packed 32-bit results in dst. |
670 | /// |
671 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbsud_epi32&expand=2679) |
672 | #[inline ] |
673 | #[target_feature (enable = "avxvnniint8" )] |
674 | #[cfg_attr ( |
675 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
676 | assert_instr(vpdpbsud) |
677 | )] |
678 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
679 | pub fn _mm256_dpbsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
680 | unsafe { transmute(src:vpdpbsud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
681 | } |
682 | |
683 | /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit |
684 | /// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding |
685 | /// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst. |
686 | /// |
687 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbsuds_epi32&expand=2680) |
688 | #[inline ] |
689 | #[target_feature (enable = "avxvnniint8" )] |
690 | #[cfg_attr ( |
691 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
692 | assert_instr(vpdpbsuds) |
693 | )] |
694 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
695 | pub fn _mm_dpbsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
696 | unsafe { transmute(src:vpdpbsuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
697 | } |
698 | |
699 | /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit |
700 | /// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding |
701 | /// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst. |
702 | /// |
703 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbsuds_epi32&expand=2681) |
704 | #[inline ] |
705 | #[target_feature (enable = "avxvnniint8" )] |
706 | #[cfg_attr ( |
707 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
708 | assert_instr(vpdpbsuds) |
709 | )] |
710 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
711 | pub fn _mm256_dpbsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
712 | unsafe { transmute(src:vpdpbsuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
713 | } |
714 | |
715 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit |
716 | /// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding |
717 | /// 32-bit integer in src, and store the packed 32-bit results in dst. |
718 | /// |
719 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbuud_epi32&expand=2708) |
720 | #[inline ] |
721 | #[target_feature (enable = "avxvnniint8" )] |
722 | #[cfg_attr ( |
723 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
724 | assert_instr(vpdpbuud) |
725 | )] |
726 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
727 | pub fn _mm_dpbuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
728 | unsafe { transmute(src:vpdpbuud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
729 | } |
730 | |
731 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit |
732 | /// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding |
733 | /// 32-bit integer in src, and store the packed 32-bit results in dst. |
734 | /// |
735 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbuud_epi32&expand=2709) |
736 | #[inline ] |
737 | #[target_feature (enable = "avxvnniint8" )] |
738 | #[cfg_attr ( |
739 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
740 | assert_instr(vpdpbuud) |
741 | )] |
742 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
743 | pub fn _mm256_dpbuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
744 | unsafe { transmute(src:vpdpbuud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
745 | } |
746 | |
747 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit |
748 | /// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding |
749 | /// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst. |
750 | /// |
751 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbuuds_epi32&expand=2710) |
752 | #[inline ] |
753 | #[target_feature (enable = "avxvnniint8" )] |
754 | #[cfg_attr ( |
755 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
756 | assert_instr(vpdpbuuds) |
757 | )] |
758 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
759 | pub fn _mm_dpbuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
760 | unsafe { transmute(src:vpdpbuuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
761 | } |
762 | |
763 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit |
764 | /// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding |
765 | /// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst. |
766 | /// |
767 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbuuds_epi32&expand=2711) |
768 | #[inline ] |
769 | #[target_feature (enable = "avxvnniint8" )] |
770 | #[cfg_attr ( |
771 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
772 | assert_instr(vpdpbuuds) |
773 | )] |
774 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
775 | pub fn _mm256_dpbuuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
776 | unsafe { transmute(src:vpdpbuuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
777 | } |
778 | |
779 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit |
780 | /// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding |
781 | /// 32-bit integer in src, and store the packed 32-bit results in dst. |
782 | /// |
783 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwsud_epi32&expand=2738) |
784 | #[inline ] |
785 | #[target_feature (enable = "avxvnniint16" )] |
786 | #[cfg_attr ( |
787 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
788 | assert_instr(vpdpwsud) |
789 | )] |
790 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
791 | pub fn _mm_dpwsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
792 | unsafe { transmute(src:vpdpwsud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
793 | } |
794 | |
795 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit |
796 | /// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding |
797 | /// 32-bit integer in src, and store the packed 32-bit results in dst. |
798 | /// |
799 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwsud_epi32&expand=2739) |
800 | #[inline ] |
801 | #[target_feature (enable = "avxvnniint16" )] |
802 | #[cfg_attr ( |
803 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
804 | assert_instr(vpdpwsud) |
805 | )] |
806 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
807 | pub fn _mm256_dpwsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
808 | unsafe { transmute(src:vpdpwsud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
809 | } |
810 | |
811 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit |
812 | /// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding |
813 | /// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst. |
814 | /// |
815 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwsuds_epi32&expand=2740) |
816 | #[inline ] |
817 | #[target_feature (enable = "avxvnniint16" )] |
818 | #[cfg_attr ( |
819 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
820 | assert_instr(vpdpwsuds) |
821 | )] |
822 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
823 | pub fn _mm_dpwsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
824 | unsafe { transmute(src:vpdpwsuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
825 | } |
826 | |
827 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit |
828 | /// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding |
829 | /// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst. |
830 | /// |
831 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwsuds_epi32&expand=2741) |
832 | #[inline ] |
833 | #[target_feature (enable = "avxvnniint16" )] |
834 | #[cfg_attr ( |
835 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
836 | assert_instr(vpdpwsuds) |
837 | )] |
838 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
839 | pub fn _mm256_dpwsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
840 | unsafe { transmute(src:vpdpwsuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
841 | } |
842 | |
843 | /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit |
844 | /// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding |
845 | /// 32-bit integer in src, and store the packed 32-bit results in dst. |
846 | /// |
847 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwusd_epi32&expand=2742) |
848 | #[inline ] |
849 | #[target_feature (enable = "avxvnniint16" )] |
850 | #[cfg_attr ( |
851 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
852 | assert_instr(vpdpwusd) |
853 | )] |
854 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
855 | pub fn _mm_dpwusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
856 | unsafe { transmute(src:vpdpwusd_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
857 | } |
858 | |
859 | /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit |
860 | /// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding |
861 | /// 32-bit integer in src, and store the packed 32-bit results in dst. |
862 | /// |
863 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwusd_epi32&expand=2743) |
864 | #[inline ] |
865 | #[target_feature (enable = "avxvnniint16" )] |
866 | #[cfg_attr ( |
867 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
868 | assert_instr(vpdpwusd) |
869 | )] |
870 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
871 | pub fn _mm256_dpwusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
872 | unsafe { transmute(src:vpdpwusd_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
873 | } |
874 | |
875 | /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit |
876 | /// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding |
877 | /// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst. |
878 | /// |
879 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwusds_epi32&expand=2744) |
880 | #[inline ] |
881 | #[target_feature (enable = "avxvnniint16" )] |
882 | #[cfg_attr ( |
883 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
884 | assert_instr(vpdpwusds) |
885 | )] |
886 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
887 | pub fn _mm_dpwusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
888 | unsafe { transmute(src:vpdpwusds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
889 | } |
890 | |
891 | /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit |
892 | /// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding |
893 | /// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst. |
894 | /// |
895 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwusds_epi32&expand=2745) |
896 | #[inline ] |
897 | #[target_feature (enable = "avxvnniint16" )] |
898 | #[cfg_attr ( |
899 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
900 | assert_instr(vpdpwusds) |
901 | )] |
902 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
903 | pub fn _mm256_dpwusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
904 | unsafe { transmute(src:vpdpwusds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
905 | } |
906 | |
907 | /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit |
908 | /// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding |
909 | /// 32-bit integer in src, and store the packed 32-bit results in dst. |
910 | /// |
911 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwuud_epi32&expand=2746) |
912 | #[inline ] |
913 | #[target_feature (enable = "avxvnniint16" )] |
914 | #[cfg_attr ( |
915 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
916 | assert_instr(vpdpwuud) |
917 | )] |
918 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
919 | pub fn _mm_dpwuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
920 | unsafe { transmute(src:vpdpwuud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
921 | } |
922 | |
923 | /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit |
924 | /// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding |
925 | /// 32-bit integer in src, and store the packed 32-bit results in dst. |
926 | /// |
927 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwuud_epi32&expand=2747) |
928 | #[inline ] |
929 | #[target_feature (enable = "avxvnniint16" )] |
930 | #[cfg_attr ( |
931 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
932 | assert_instr(vpdpwuud) |
933 | )] |
934 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
935 | pub fn _mm256_dpwuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
936 | unsafe { transmute(src:vpdpwuud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
937 | } |
938 | |
939 | /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit |
940 | /// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding |
941 | /// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst. |
942 | /// |
943 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwuuds_epi32&expand=2748) |
944 | #[inline ] |
945 | #[target_feature (enable = "avxvnniint16" )] |
946 | #[cfg_attr ( |
947 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
948 | assert_instr(vpdpwuuds) |
949 | )] |
950 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
951 | pub fn _mm_dpwuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
952 | unsafe { transmute(src:vpdpwuuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
953 | } |
954 | |
955 | /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit |
956 | /// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding |
957 | /// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst. |
958 | /// |
959 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwuuds_epi32&expand=2749) |
960 | #[inline ] |
961 | #[target_feature (enable = "avxvnniint16" )] |
962 | #[cfg_attr ( |
963 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
964 | assert_instr(vpdpwuuds) |
965 | )] |
966 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
967 | pub fn _mm256_dpwuuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
968 | unsafe { transmute(src:vpdpwuuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
969 | } |
970 | |
971 | #[allow (improper_ctypes)] |
972 | unsafe extern "C" { |
973 | #[link_name = "llvm.x86.avx512.vpdpwssd.512" ] |
974 | unsafefn vpdpwssd(src: i32x16, a: i32x16, b: i32x16) -> i32x16; |
975 | #[link_name = "llvm.x86.avx512.vpdpwssd.256" ] |
976 | unsafefn vpdpwssd256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; |
977 | #[link_name = "llvm.x86.avx512.vpdpwssd.128" ] |
978 | unsafefn vpdpwssd128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; |
979 | |
980 | #[link_name = "llvm.x86.avx512.vpdpwssds.512" ] |
981 | unsafefn vpdpwssds(src: i32x16, a: i32x16, b: i32x16) -> i32x16; |
982 | #[link_name = "llvm.x86.avx512.vpdpwssds.256" ] |
983 | unsafefn vpdpwssds256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; |
984 | #[link_name = "llvm.x86.avx512.vpdpwssds.128" ] |
985 | unsafefn vpdpwssds128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; |
986 | |
987 | #[link_name = "llvm.x86.avx512.vpdpbusd.512" ] |
988 | unsafefn vpdpbusd(src: i32x16, a: i32x16, b: i32x16) -> i32x16; |
989 | #[link_name = "llvm.x86.avx512.vpdpbusd.256" ] |
990 | unsafefn vpdpbusd256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; |
991 | #[link_name = "llvm.x86.avx512.vpdpbusd.128" ] |
992 | unsafefn vpdpbusd128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; |
993 | |
994 | #[link_name = "llvm.x86.avx512.vpdpbusds.512" ] |
995 | unsafefn vpdpbusds(src: i32x16, a: i32x16, b: i32x16) -> i32x16; |
996 | #[link_name = "llvm.x86.avx512.vpdpbusds.256" ] |
997 | unsafefn vpdpbusds256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; |
998 | #[link_name = "llvm.x86.avx512.vpdpbusds.128" ] |
999 | unsafefn vpdpbusds128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; |
1000 | |
1001 | #[link_name = "llvm.x86.avx2.vpdpbssd.128" ] |
1002 | unsafefn vpdpbssd_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; |
1003 | #[link_name = "llvm.x86.avx2.vpdpbssd.256" ] |
1004 | unsafefn vpdpbssd_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; |
1005 | |
1006 | #[link_name = "llvm.x86.avx2.vpdpbssds.128" ] |
1007 | unsafefn vpdpbssds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; |
1008 | #[link_name = "llvm.x86.avx2.vpdpbssds.256" ] |
1009 | unsafefn vpdpbssds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; |
1010 | |
1011 | #[link_name = "llvm.x86.avx2.vpdpbsud.128" ] |
1012 | unsafefn vpdpbsud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; |
1013 | #[link_name = "llvm.x86.avx2.vpdpbsud.256" ] |
1014 | unsafefn vpdpbsud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; |
1015 | |
1016 | #[link_name = "llvm.x86.avx2.vpdpbsuds.128" ] |
1017 | unsafefn vpdpbsuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; |
1018 | #[link_name = "llvm.x86.avx2.vpdpbsuds.256" ] |
1019 | unsafefn vpdpbsuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; |
1020 | |
1021 | #[link_name = "llvm.x86.avx2.vpdpbuud.128" ] |
1022 | unsafefn vpdpbuud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; |
1023 | #[link_name = "llvm.x86.avx2.vpdpbuud.256" ] |
1024 | unsafefn vpdpbuud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; |
1025 | |
1026 | #[link_name = "llvm.x86.avx2.vpdpbuuds.128" ] |
1027 | unsafefn vpdpbuuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; |
1028 | #[link_name = "llvm.x86.avx2.vpdpbuuds.256" ] |
1029 | unsafefn vpdpbuuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; |
1030 | |
1031 | #[link_name = "llvm.x86.avx2.vpdpwsud.128" ] |
1032 | unsafefn vpdpwsud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; |
1033 | #[link_name = "llvm.x86.avx2.vpdpwsud.256" ] |
1034 | unsafefn vpdpwsud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; |
1035 | |
1036 | #[link_name = "llvm.x86.avx2.vpdpwsuds.128" ] |
1037 | unsafefn vpdpwsuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; |
1038 | #[link_name = "llvm.x86.avx2.vpdpwsuds.256" ] |
1039 | unsafefn vpdpwsuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; |
1040 | |
1041 | #[link_name = "llvm.x86.avx2.vpdpwusd.128" ] |
1042 | unsafefn vpdpwusd_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; |
1043 | #[link_name = "llvm.x86.avx2.vpdpwusd.256" ] |
1044 | unsafefn vpdpwusd_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; |
1045 | |
1046 | #[link_name = "llvm.x86.avx2.vpdpwusds.128" ] |
1047 | unsafefn vpdpwusds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; |
1048 | #[link_name = "llvm.x86.avx2.vpdpwusds.256" ] |
1049 | unsafefn vpdpwusds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; |
1050 | |
1051 | #[link_name = "llvm.x86.avx2.vpdpwuud.128" ] |
1052 | unsafefn vpdpwuud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; |
1053 | #[link_name = "llvm.x86.avx2.vpdpwuud.256" ] |
1054 | unsafefn vpdpwuud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; |
1055 | |
1056 | #[link_name = "llvm.x86.avx2.vpdpwuuds.128" ] |
1057 | unsafefn vpdpwuuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; |
1058 | #[link_name = "llvm.x86.avx2.vpdpwuuds.256" ] |
1059 | unsafefn vpdpwuuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; |
1060 | } |
1061 | |
1062 | #[cfg (test)] |
1063 | mod tests { |
1064 | |
1065 | use crate::core_arch::x86::*; |
1066 | use stdarch_test::simd_test; |
1067 | |
1068 | #[simd_test(enable = "avx512vnni" )] |
1069 | unsafe fn test_mm512_dpwssd_epi32() { |
1070 | let src = _mm512_set1_epi32(1); |
1071 | let a = _mm512_set1_epi32(1 << 16 | 1 << 0); |
1072 | let b = _mm512_set1_epi32(1 << 16 | 1 << 0); |
1073 | let r = _mm512_dpwssd_epi32(src, a, b); |
1074 | let e = _mm512_set1_epi32(3); |
1075 | assert_eq_m512i(r, e); |
1076 | } |
1077 | |
1078 | #[simd_test(enable = "avx512vnni" )] |
1079 | unsafe fn test_mm512_mask_dpwssd_epi32() { |
1080 | let src = _mm512_set1_epi32(1); |
1081 | let a = _mm512_set1_epi32(1 << 16 | 1 << 0); |
1082 | let b = _mm512_set1_epi32(1 << 16 | 1 << 0); |
1083 | let r = _mm512_mask_dpwssd_epi32(src, 0b00000000_00000000, a, b); |
1084 | assert_eq_m512i(r, src); |
1085 | let r = _mm512_mask_dpwssd_epi32(src, 0b11111111_11111111, a, b); |
1086 | let e = _mm512_set1_epi32(3); |
1087 | assert_eq_m512i(r, e); |
1088 | } |
1089 | |
1090 | #[simd_test(enable = "avx512vnni" )] |
1091 | unsafe fn test_mm512_maskz_dpwssd_epi32() { |
1092 | let src = _mm512_set1_epi32(1); |
1093 | let a = _mm512_set1_epi32(1 << 16 | 1 << 0); |
1094 | let b = _mm512_set1_epi32(1 << 16 | 1 << 0); |
1095 | let r = _mm512_maskz_dpwssd_epi32(0b00000000_00000000, src, a, b); |
1096 | assert_eq_m512i(r, _mm512_setzero_si512()); |
1097 | let r = _mm512_maskz_dpwssd_epi32(0b11111111_11111111, src, a, b); |
1098 | let e = _mm512_set1_epi32(3); |
1099 | assert_eq_m512i(r, e); |
1100 | } |
1101 | |
1102 | #[simd_test(enable = "avxvnni" )] |
1103 | unsafe fn test_mm256_dpwssd_avx_epi32() { |
1104 | let src = _mm256_set1_epi32(1); |
1105 | let a = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1106 | let b = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1107 | let r = _mm256_dpwssd_avx_epi32(src, a, b); |
1108 | let e = _mm256_set1_epi32(3); |
1109 | assert_eq_m256i(r, e); |
1110 | } |
1111 | |
1112 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1113 | unsafe fn test_mm256_dpwssd_epi32() { |
1114 | let src = _mm256_set1_epi32(1); |
1115 | let a = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1116 | let b = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1117 | let r = _mm256_dpwssd_epi32(src, a, b); |
1118 | let e = _mm256_set1_epi32(3); |
1119 | assert_eq_m256i(r, e); |
1120 | } |
1121 | |
1122 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1123 | unsafe fn test_mm256_mask_dpwssd_epi32() { |
1124 | let src = _mm256_set1_epi32(1); |
1125 | let a = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1126 | let b = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1127 | let r = _mm256_mask_dpwssd_epi32(src, 0b00000000, a, b); |
1128 | assert_eq_m256i(r, src); |
1129 | let r = _mm256_mask_dpwssd_epi32(src, 0b11111111, a, b); |
1130 | let e = _mm256_set1_epi32(3); |
1131 | assert_eq_m256i(r, e); |
1132 | } |
1133 | |
1134 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1135 | unsafe fn test_mm256_maskz_dpwssd_epi32() { |
1136 | let src = _mm256_set1_epi32(1); |
1137 | let a = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1138 | let b = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1139 | let r = _mm256_maskz_dpwssd_epi32(0b00000000, src, a, b); |
1140 | assert_eq_m256i(r, _mm256_setzero_si256()); |
1141 | let r = _mm256_maskz_dpwssd_epi32(0b11111111, src, a, b); |
1142 | let e = _mm256_set1_epi32(3); |
1143 | assert_eq_m256i(r, e); |
1144 | } |
1145 | |
1146 | #[simd_test(enable = "avxvnni" )] |
1147 | unsafe fn test_mm_dpwssd_avx_epi32() { |
1148 | let src = _mm_set1_epi32(1); |
1149 | let a = _mm_set1_epi32(1 << 16 | 1 << 0); |
1150 | let b = _mm_set1_epi32(1 << 16 | 1 << 0); |
1151 | let r = _mm_dpwssd_avx_epi32(src, a, b); |
1152 | let e = _mm_set1_epi32(3); |
1153 | assert_eq_m128i(r, e); |
1154 | } |
1155 | |
1156 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1157 | unsafe fn test_mm_dpwssd_epi32() { |
1158 | let src = _mm_set1_epi32(1); |
1159 | let a = _mm_set1_epi32(1 << 16 | 1 << 0); |
1160 | let b = _mm_set1_epi32(1 << 16 | 1 << 0); |
1161 | let r = _mm_dpwssd_epi32(src, a, b); |
1162 | let e = _mm_set1_epi32(3); |
1163 | assert_eq_m128i(r, e); |
1164 | } |
1165 | |
1166 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1167 | unsafe fn test_mm_mask_dpwssd_epi32() { |
1168 | let src = _mm_set1_epi32(1); |
1169 | let a = _mm_set1_epi32(1 << 16 | 1 << 0); |
1170 | let b = _mm_set1_epi32(1 << 16 | 1 << 0); |
1171 | let r = _mm_mask_dpwssd_epi32(src, 0b00000000, a, b); |
1172 | assert_eq_m128i(r, src); |
1173 | let r = _mm_mask_dpwssd_epi32(src, 0b00001111, a, b); |
1174 | let e = _mm_set1_epi32(3); |
1175 | assert_eq_m128i(r, e); |
1176 | } |
1177 | |
1178 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1179 | unsafe fn test_mm_maskz_dpwssd_epi32() { |
1180 | let src = _mm_set1_epi32(1); |
1181 | let a = _mm_set1_epi32(1 << 16 | 1 << 0); |
1182 | let b = _mm_set1_epi32(1 << 16 | 1 << 0); |
1183 | let r = _mm_maskz_dpwssd_epi32(0b00000000, src, a, b); |
1184 | assert_eq_m128i(r, _mm_setzero_si128()); |
1185 | let r = _mm_maskz_dpwssd_epi32(0b00001111, src, a, b); |
1186 | let e = _mm_set1_epi32(3); |
1187 | assert_eq_m128i(r, e); |
1188 | } |
1189 | |
1190 | #[simd_test(enable = "avx512vnni" )] |
1191 | unsafe fn test_mm512_dpwssds_epi32() { |
1192 | let src = _mm512_set1_epi32(1); |
1193 | let a = _mm512_set1_epi32(1 << 16 | 1 << 0); |
1194 | let b = _mm512_set1_epi32(1 << 16 | 1 << 0); |
1195 | let r = _mm512_dpwssds_epi32(src, a, b); |
1196 | let e = _mm512_set1_epi32(3); |
1197 | assert_eq_m512i(r, e); |
1198 | } |
1199 | |
1200 | #[simd_test(enable = "avx512vnni" )] |
1201 | unsafe fn test_mm512_mask_dpwssds_epi32() { |
1202 | let src = _mm512_set1_epi32(1); |
1203 | let a = _mm512_set1_epi32(1 << 16 | 1 << 0); |
1204 | let b = _mm512_set1_epi32(1 << 16 | 1 << 0); |
1205 | let r = _mm512_mask_dpwssds_epi32(src, 0b00000000_00000000, a, b); |
1206 | assert_eq_m512i(r, src); |
1207 | let r = _mm512_mask_dpwssds_epi32(src, 0b11111111_11111111, a, b); |
1208 | let e = _mm512_set1_epi32(3); |
1209 | assert_eq_m512i(r, e); |
1210 | } |
1211 | |
1212 | #[simd_test(enable = "avx512vnni" )] |
1213 | unsafe fn test_mm512_maskz_dpwssds_epi32() { |
1214 | let src = _mm512_set1_epi32(1); |
1215 | let a = _mm512_set1_epi32(1 << 16 | 1 << 0); |
1216 | let b = _mm512_set1_epi32(1 << 16 | 1 << 0); |
1217 | let r = _mm512_maskz_dpwssds_epi32(0b00000000_00000000, src, a, b); |
1218 | assert_eq_m512i(r, _mm512_setzero_si512()); |
1219 | let r = _mm512_maskz_dpwssds_epi32(0b11111111_11111111, src, a, b); |
1220 | let e = _mm512_set1_epi32(3); |
1221 | assert_eq_m512i(r, e); |
1222 | } |
1223 | |
1224 | #[simd_test(enable = "avxvnni" )] |
1225 | unsafe fn test_mm256_dpwssds_avx_epi32() { |
1226 | let src = _mm256_set1_epi32(1); |
1227 | let a = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1228 | let b = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1229 | let r = _mm256_dpwssds_avx_epi32(src, a, b); |
1230 | let e = _mm256_set1_epi32(3); |
1231 | assert_eq_m256i(r, e); |
1232 | } |
1233 | |
1234 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1235 | unsafe fn test_mm256_dpwssds_epi32() { |
1236 | let src = _mm256_set1_epi32(1); |
1237 | let a = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1238 | let b = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1239 | let r = _mm256_dpwssds_epi32(src, a, b); |
1240 | let e = _mm256_set1_epi32(3); |
1241 | assert_eq_m256i(r, e); |
1242 | } |
1243 | |
1244 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1245 | unsafe fn test_mm256_mask_dpwssds_epi32() { |
1246 | let src = _mm256_set1_epi32(1); |
1247 | let a = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1248 | let b = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1249 | let r = _mm256_mask_dpwssds_epi32(src, 0b00000000, a, b); |
1250 | assert_eq_m256i(r, src); |
1251 | let r = _mm256_mask_dpwssds_epi32(src, 0b11111111, a, b); |
1252 | let e = _mm256_set1_epi32(3); |
1253 | assert_eq_m256i(r, e); |
1254 | } |
1255 | |
1256 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1257 | unsafe fn test_mm256_maskz_dpwssds_epi32() { |
1258 | let src = _mm256_set1_epi32(1); |
1259 | let a = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1260 | let b = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1261 | let r = _mm256_maskz_dpwssds_epi32(0b00000000, src, a, b); |
1262 | assert_eq_m256i(r, _mm256_setzero_si256()); |
1263 | let r = _mm256_maskz_dpwssds_epi32(0b11111111, src, a, b); |
1264 | let e = _mm256_set1_epi32(3); |
1265 | assert_eq_m256i(r, e); |
1266 | } |
1267 | |
1268 | #[simd_test(enable = "avxvnni" )] |
1269 | unsafe fn test_mm_dpwssds_avx_epi32() { |
1270 | let src = _mm_set1_epi32(1); |
1271 | let a = _mm_set1_epi32(1 << 16 | 1 << 0); |
1272 | let b = _mm_set1_epi32(1 << 16 | 1 << 0); |
1273 | let r = _mm_dpwssds_avx_epi32(src, a, b); |
1274 | let e = _mm_set1_epi32(3); |
1275 | assert_eq_m128i(r, e); |
1276 | } |
1277 | |
1278 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1279 | unsafe fn test_mm_dpwssds_epi32() { |
1280 | let src = _mm_set1_epi32(1); |
1281 | let a = _mm_set1_epi32(1 << 16 | 1 << 0); |
1282 | let b = _mm_set1_epi32(1 << 16 | 1 << 0); |
1283 | let r = _mm_dpwssds_epi32(src, a, b); |
1284 | let e = _mm_set1_epi32(3); |
1285 | assert_eq_m128i(r, e); |
1286 | } |
1287 | |
1288 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1289 | unsafe fn test_mm_mask_dpwssds_epi32() { |
1290 | let src = _mm_set1_epi32(1); |
1291 | let a = _mm_set1_epi32(1 << 16 | 1 << 0); |
1292 | let b = _mm_set1_epi32(1 << 16 | 1 << 0); |
1293 | let r = _mm_mask_dpwssds_epi32(src, 0b00000000, a, b); |
1294 | assert_eq_m128i(r, src); |
1295 | let r = _mm_mask_dpwssds_epi32(src, 0b00001111, a, b); |
1296 | let e = _mm_set1_epi32(3); |
1297 | assert_eq_m128i(r, e); |
1298 | } |
1299 | |
1300 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1301 | unsafe fn test_mm_maskz_dpwssds_epi32() { |
1302 | let src = _mm_set1_epi32(1); |
1303 | let a = _mm_set1_epi32(1 << 16 | 1 << 0); |
1304 | let b = _mm_set1_epi32(1 << 16 | 1 << 0); |
1305 | let r = _mm_maskz_dpwssds_epi32(0b00000000, src, a, b); |
1306 | assert_eq_m128i(r, _mm_setzero_si128()); |
1307 | let r = _mm_maskz_dpwssds_epi32(0b00001111, src, a, b); |
1308 | let e = _mm_set1_epi32(3); |
1309 | assert_eq_m128i(r, e); |
1310 | } |
1311 | |
1312 | #[simd_test(enable = "avx512vnni" )] |
1313 | unsafe fn test_mm512_dpbusd_epi32() { |
1314 | let src = _mm512_set1_epi32(1); |
1315 | let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1316 | let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1317 | let r = _mm512_dpbusd_epi32(src, a, b); |
1318 | let e = _mm512_set1_epi32(5); |
1319 | assert_eq_m512i(r, e); |
1320 | } |
1321 | |
1322 | #[simd_test(enable = "avx512vnni" )] |
1323 | unsafe fn test_mm512_mask_dpbusd_epi32() { |
1324 | let src = _mm512_set1_epi32(1); |
1325 | let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1326 | let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1327 | let r = _mm512_mask_dpbusd_epi32(src, 0b00000000_00000000, a, b); |
1328 | assert_eq_m512i(r, src); |
1329 | let r = _mm512_mask_dpbusd_epi32(src, 0b11111111_11111111, a, b); |
1330 | let e = _mm512_set1_epi32(5); |
1331 | assert_eq_m512i(r, e); |
1332 | } |
1333 | |
1334 | #[simd_test(enable = "avx512vnni" )] |
1335 | unsafe fn test_mm512_maskz_dpbusd_epi32() { |
1336 | let src = _mm512_set1_epi32(1); |
1337 | let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1338 | let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1339 | let r = _mm512_maskz_dpbusd_epi32(0b00000000_00000000, src, a, b); |
1340 | assert_eq_m512i(r, _mm512_setzero_si512()); |
1341 | let r = _mm512_maskz_dpbusd_epi32(0b11111111_11111111, src, a, b); |
1342 | let e = _mm512_set1_epi32(5); |
1343 | assert_eq_m512i(r, e); |
1344 | } |
1345 | |
1346 | #[simd_test(enable = "avxvnni" )] |
1347 | unsafe fn test_mm256_dpbusd_avx_epi32() { |
1348 | let src = _mm256_set1_epi32(1); |
1349 | let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1350 | let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1351 | let r = _mm256_dpbusd_avx_epi32(src, a, b); |
1352 | let e = _mm256_set1_epi32(5); |
1353 | assert_eq_m256i(r, e); |
1354 | } |
1355 | |
1356 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1357 | unsafe fn test_mm256_dpbusd_epi32() { |
1358 | let src = _mm256_set1_epi32(1); |
1359 | let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1360 | let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1361 | let r = _mm256_dpbusd_epi32(src, a, b); |
1362 | let e = _mm256_set1_epi32(5); |
1363 | assert_eq_m256i(r, e); |
1364 | } |
1365 | |
1366 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1367 | unsafe fn test_mm256_mask_dpbusd_epi32() { |
1368 | let src = _mm256_set1_epi32(1); |
1369 | let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1370 | let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1371 | let r = _mm256_mask_dpbusd_epi32(src, 0b00000000, a, b); |
1372 | assert_eq_m256i(r, src); |
1373 | let r = _mm256_mask_dpbusd_epi32(src, 0b11111111, a, b); |
1374 | let e = _mm256_set1_epi32(5); |
1375 | assert_eq_m256i(r, e); |
1376 | } |
1377 | |
1378 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1379 | unsafe fn test_mm256_maskz_dpbusd_epi32() { |
1380 | let src = _mm256_set1_epi32(1); |
1381 | let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1382 | let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1383 | let r = _mm256_maskz_dpbusd_epi32(0b00000000, src, a, b); |
1384 | assert_eq_m256i(r, _mm256_setzero_si256()); |
1385 | let r = _mm256_maskz_dpbusd_epi32(0b11111111, src, a, b); |
1386 | let e = _mm256_set1_epi32(5); |
1387 | assert_eq_m256i(r, e); |
1388 | } |
1389 | |
1390 | #[simd_test(enable = "avxvnni" )] |
1391 | unsafe fn test_mm_dpbusd_avx_epi32() { |
1392 | let src = _mm_set1_epi32(1); |
1393 | let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1394 | let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1395 | let r = _mm_dpbusd_avx_epi32(src, a, b); |
1396 | let e = _mm_set1_epi32(5); |
1397 | assert_eq_m128i(r, e); |
1398 | } |
1399 | |
1400 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1401 | unsafe fn test_mm_dpbusd_epi32() { |
1402 | let src = _mm_set1_epi32(1); |
1403 | let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1404 | let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1405 | let r = _mm_dpbusd_epi32(src, a, b); |
1406 | let e = _mm_set1_epi32(5); |
1407 | assert_eq_m128i(r, e); |
1408 | } |
1409 | |
1410 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1411 | unsafe fn test_mm_mask_dpbusd_epi32() { |
1412 | let src = _mm_set1_epi32(1); |
1413 | let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1414 | let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1415 | let r = _mm_mask_dpbusd_epi32(src, 0b00000000, a, b); |
1416 | assert_eq_m128i(r, src); |
1417 | let r = _mm_mask_dpbusd_epi32(src, 0b00001111, a, b); |
1418 | let e = _mm_set1_epi32(5); |
1419 | assert_eq_m128i(r, e); |
1420 | } |
1421 | |
1422 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1423 | unsafe fn test_mm_maskz_dpbusd_epi32() { |
1424 | let src = _mm_set1_epi32(1); |
1425 | let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1426 | let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1427 | let r = _mm_maskz_dpbusd_epi32(0b00000000, src, a, b); |
1428 | assert_eq_m128i(r, _mm_setzero_si128()); |
1429 | let r = _mm_maskz_dpbusd_epi32(0b00001111, src, a, b); |
1430 | let e = _mm_set1_epi32(5); |
1431 | assert_eq_m128i(r, e); |
1432 | } |
1433 | |
1434 | #[simd_test(enable = "avx512vnni" )] |
1435 | unsafe fn test_mm512_dpbusds_epi32() { |
1436 | let src = _mm512_set1_epi32(1); |
1437 | let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1438 | let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1439 | let r = _mm512_dpbusds_epi32(src, a, b); |
1440 | let e = _mm512_set1_epi32(5); |
1441 | assert_eq_m512i(r, e); |
1442 | } |
1443 | |
1444 | #[simd_test(enable = "avx512vnni" )] |
1445 | unsafe fn test_mm512_mask_dpbusds_epi32() { |
1446 | let src = _mm512_set1_epi32(1); |
1447 | let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1448 | let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1449 | let r = _mm512_mask_dpbusds_epi32(src, 0b00000000_00000000, a, b); |
1450 | assert_eq_m512i(r, src); |
1451 | let r = _mm512_mask_dpbusds_epi32(src, 0b11111111_11111111, a, b); |
1452 | let e = _mm512_set1_epi32(5); |
1453 | assert_eq_m512i(r, e); |
1454 | } |
1455 | |
1456 | #[simd_test(enable = "avx512vnni" )] |
1457 | unsafe fn test_mm512_maskz_dpbusds_epi32() { |
1458 | let src = _mm512_set1_epi32(1); |
1459 | let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1460 | let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1461 | let r = _mm512_maskz_dpbusds_epi32(0b00000000_00000000, src, a, b); |
1462 | assert_eq_m512i(r, _mm512_setzero_si512()); |
1463 | let r = _mm512_maskz_dpbusds_epi32(0b11111111_11111111, src, a, b); |
1464 | let e = _mm512_set1_epi32(5); |
1465 | assert_eq_m512i(r, e); |
1466 | } |
1467 | |
1468 | #[simd_test(enable = "avxvnni" )] |
1469 | unsafe fn test_mm256_dpbusds_avx_epi32() { |
1470 | let src = _mm256_set1_epi32(1); |
1471 | let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1472 | let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1473 | let r = _mm256_dpbusds_avx_epi32(src, a, b); |
1474 | let e = _mm256_set1_epi32(5); |
1475 | assert_eq_m256i(r, e); |
1476 | } |
1477 | |
1478 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1479 | unsafe fn test_mm256_dpbusds_epi32() { |
1480 | let src = _mm256_set1_epi32(1); |
1481 | let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1482 | let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1483 | let r = _mm256_dpbusds_epi32(src, a, b); |
1484 | let e = _mm256_set1_epi32(5); |
1485 | assert_eq_m256i(r, e); |
1486 | } |
1487 | |
1488 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1489 | unsafe fn test_mm256_mask_dpbusds_epi32() { |
1490 | let src = _mm256_set1_epi32(1); |
1491 | let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1492 | let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1493 | let r = _mm256_mask_dpbusds_epi32(src, 0b00000000, a, b); |
1494 | assert_eq_m256i(r, src); |
1495 | let r = _mm256_mask_dpbusds_epi32(src, 0b11111111, a, b); |
1496 | let e = _mm256_set1_epi32(5); |
1497 | assert_eq_m256i(r, e); |
1498 | } |
1499 | |
1500 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1501 | unsafe fn test_mm256_maskz_dpbusds_epi32() { |
1502 | let src = _mm256_set1_epi32(1); |
1503 | let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1504 | let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1505 | let r = _mm256_maskz_dpbusds_epi32(0b00000000, src, a, b); |
1506 | assert_eq_m256i(r, _mm256_setzero_si256()); |
1507 | let r = _mm256_maskz_dpbusds_epi32(0b11111111, src, a, b); |
1508 | let e = _mm256_set1_epi32(5); |
1509 | assert_eq_m256i(r, e); |
1510 | } |
1511 | |
1512 | #[simd_test(enable = "avxvnni" )] |
1513 | unsafe fn test_mm_dpbusds_avx_epi32() { |
1514 | let src = _mm_set1_epi32(1); |
1515 | let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1516 | let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1517 | let r = _mm_dpbusds_avx_epi32(src, a, b); |
1518 | let e = _mm_set1_epi32(5); |
1519 | assert_eq_m128i(r, e); |
1520 | } |
1521 | |
1522 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1523 | unsafe fn test_mm_dpbusds_epi32() { |
1524 | let src = _mm_set1_epi32(1); |
1525 | let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1526 | let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1527 | let r = _mm_dpbusds_epi32(src, a, b); |
1528 | let e = _mm_set1_epi32(5); |
1529 | assert_eq_m128i(r, e); |
1530 | } |
1531 | |
1532 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1533 | unsafe fn test_mm_mask_dpbusds_epi32() { |
1534 | let src = _mm_set1_epi32(1); |
1535 | let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1536 | let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1537 | let r = _mm_mask_dpbusds_epi32(src, 0b00000000, a, b); |
1538 | assert_eq_m128i(r, src); |
1539 | let r = _mm_mask_dpbusds_epi32(src, 0b00001111, a, b); |
1540 | let e = _mm_set1_epi32(5); |
1541 | assert_eq_m128i(r, e); |
1542 | } |
1543 | |
1544 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1545 | unsafe fn test_mm_maskz_dpbusds_epi32() { |
1546 | let src = _mm_set1_epi32(1); |
1547 | let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1548 | let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1549 | let r = _mm_maskz_dpbusds_epi32(0b00000000, src, a, b); |
1550 | assert_eq_m128i(r, _mm_setzero_si128()); |
1551 | let r = _mm_maskz_dpbusds_epi32(0b00001111, src, a, b); |
1552 | let e = _mm_set1_epi32(5); |
1553 | assert_eq_m128i(r, e); |
1554 | } |
1555 | |
1556 | #[simd_test(enable = "avxvnniint8" )] |
1557 | unsafe fn test_mm_dpbssd_epi32() { |
1558 | let src = _mm_set1_epi32(1); |
1559 | let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1560 | let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1561 | let r = _mm_dpbssd_epi32(src, a, b); |
1562 | let e = _mm_set1_epi32(5); |
1563 | assert_eq_m128i(r, e); |
1564 | } |
1565 | |
1566 | #[simd_test(enable = "avxvnniint8" )] |
1567 | unsafe fn test_mm256_dpbssd_epi32() { |
1568 | let src = _mm256_set1_epi32(1); |
1569 | let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1570 | let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1571 | let r = _mm256_dpbssd_epi32(src, a, b); |
1572 | let e = _mm256_set1_epi32(5); |
1573 | assert_eq_m256i(r, e); |
1574 | } |
1575 | |
1576 | #[simd_test(enable = "avxvnniint8" )] |
1577 | unsafe fn test_mm_dpbssds_epi32() { |
1578 | let src = _mm_set1_epi32(1); |
1579 | let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1580 | let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1581 | let r = _mm_dpbssds_epi32(src, a, b); |
1582 | let e = _mm_set1_epi32(5); |
1583 | assert_eq_m128i(r, e); |
1584 | } |
1585 | |
1586 | #[simd_test(enable = "avxvnniint8" )] |
1587 | unsafe fn test_mm256_dpbssds_epi32() { |
1588 | let src = _mm256_set1_epi32(1); |
1589 | let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1590 | let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1591 | let r = _mm256_dpbssds_epi32(src, a, b); |
1592 | let e = _mm256_set1_epi32(5); |
1593 | assert_eq_m256i(r, e); |
1594 | } |
1595 | |
1596 | #[simd_test(enable = "avxvnniint8" )] |
1597 | unsafe fn test_mm_dpbsud_epi32() { |
1598 | let src = _mm_set1_epi32(1); |
1599 | let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1600 | let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1601 | let r = _mm_dpbsud_epi32(src, a, b); |
1602 | let e = _mm_set1_epi32(5); |
1603 | assert_eq_m128i(r, e); |
1604 | } |
1605 | |
1606 | #[simd_test(enable = "avxvnniint8" )] |
1607 | unsafe fn test_mm256_dpbsud_epi32() { |
1608 | let src = _mm256_set1_epi32(1); |
1609 | let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1610 | let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1611 | let r = _mm256_dpbsud_epi32(src, a, b); |
1612 | let e = _mm256_set1_epi32(5); |
1613 | assert_eq_m256i(r, e); |
1614 | } |
1615 | |
1616 | #[simd_test(enable = "avxvnniint8" )] |
1617 | unsafe fn test_mm_dpbsuds_epi32() { |
1618 | let src = _mm_set1_epi32(1); |
1619 | let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1620 | let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1621 | let r = _mm_dpbsuds_epi32(src, a, b); |
1622 | let e = _mm_set1_epi32(5); |
1623 | assert_eq_m128i(r, e); |
1624 | } |
1625 | |
1626 | #[simd_test(enable = "avxvnniint8" )] |
1627 | unsafe fn test_mm256_dpbsuds_epi32() { |
1628 | let src = _mm256_set1_epi32(1); |
1629 | let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1630 | let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1631 | let r = _mm256_dpbsuds_epi32(src, a, b); |
1632 | let e = _mm256_set1_epi32(5); |
1633 | assert_eq_m256i(r, e); |
1634 | } |
1635 | |
1636 | #[simd_test(enable = "avxvnniint8" )] |
1637 | unsafe fn test_mm_dpbuud_epi32() { |
1638 | let src = _mm_set1_epi32(1); |
1639 | let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1640 | let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1641 | let r = _mm_dpbuud_epi32(src, a, b); |
1642 | let e = _mm_set1_epi32(5); |
1643 | assert_eq_m128i(r, e); |
1644 | } |
1645 | |
1646 | #[simd_test(enable = "avxvnniint8" )] |
1647 | unsafe fn test_mm256_dpbuud_epi32() { |
1648 | let src = _mm256_set1_epi32(1); |
1649 | let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1650 | let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1651 | let r = _mm256_dpbuud_epi32(src, a, b); |
1652 | let e = _mm256_set1_epi32(5); |
1653 | assert_eq_m256i(r, e); |
1654 | } |
1655 | |
1656 | #[simd_test(enable = "avxvnniint8" )] |
1657 | unsafe fn test_mm_dpbuuds_epi32() { |
1658 | let src = _mm_set1_epi32(1); |
1659 | let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1660 | let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1661 | let r = _mm_dpbuuds_epi32(src, a, b); |
1662 | let e = _mm_set1_epi32(5); |
1663 | assert_eq_m128i(r, e); |
1664 | } |
1665 | |
1666 | #[simd_test(enable = "avxvnniint8" )] |
1667 | unsafe fn test_mm256_dpbuuds_epi32() { |
1668 | let src = _mm256_set1_epi32(1); |
1669 | let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1670 | let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1671 | let r = _mm256_dpbuuds_epi32(src, a, b); |
1672 | let e = _mm256_set1_epi32(5); |
1673 | assert_eq_m256i(r, e); |
1674 | } |
1675 | |
1676 | #[simd_test(enable = "avxvnniint16" )] |
1677 | unsafe fn test_mm_dpwsud_epi32() { |
1678 | let src = _mm_set1_epi32(1); |
1679 | let a = _mm_set1_epi32(1 << 16 | 1 << 0); |
1680 | let b = _mm_set1_epi32(1 << 16 | 1 << 0); |
1681 | let r = _mm_dpwsud_epi32(src, a, b); |
1682 | let e = _mm_set1_epi32(3); |
1683 | assert_eq_m128i(r, e); |
1684 | } |
1685 | |
1686 | #[simd_test(enable = "avxvnniint16" )] |
1687 | unsafe fn test_mm256_dpwsud_epi32() { |
1688 | let src = _mm256_set1_epi32(1); |
1689 | let a = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1690 | let b = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1691 | let r = _mm256_dpwsud_epi32(src, a, b); |
1692 | let e = _mm256_set1_epi32(3); |
1693 | assert_eq_m256i(r, e); |
1694 | } |
1695 | |
1696 | #[simd_test(enable = "avxvnniint16" )] |
1697 | unsafe fn test_mm_dpwsuds_epi32() { |
1698 | let src = _mm_set1_epi32(1); |
1699 | let a = _mm_set1_epi32(1 << 16 | 1 << 0); |
1700 | let b = _mm_set1_epi32(1 << 16 | 1 << 0); |
1701 | let r = _mm_dpwsuds_epi32(src, a, b); |
1702 | let e = _mm_set1_epi32(3); |
1703 | assert_eq_m128i(r, e); |
1704 | } |
1705 | |
1706 | #[simd_test(enable = "avxvnniint16" )] |
1707 | unsafe fn test_mm256_dpwsuds_epi32() { |
1708 | let src = _mm256_set1_epi32(1); |
1709 | let a = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1710 | let b = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1711 | let r = _mm256_dpwsuds_epi32(src, a, b); |
1712 | let e = _mm256_set1_epi32(3); |
1713 | assert_eq_m256i(r, e); |
1714 | } |
1715 | |
1716 | #[simd_test(enable = "avxvnniint16" )] |
1717 | unsafe fn test_mm_dpwusd_epi32() { |
1718 | let src = _mm_set1_epi32(1); |
1719 | let a = _mm_set1_epi32(1 << 16 | 1 << 0); |
1720 | let b = _mm_set1_epi32(1 << 16 | 1 << 0); |
1721 | let r = _mm_dpwusd_epi32(src, a, b); |
1722 | let e = _mm_set1_epi32(3); |
1723 | assert_eq_m128i(r, e); |
1724 | } |
1725 | |
1726 | #[simd_test(enable = "avxvnniint16" )] |
1727 | unsafe fn test_mm256_dpwusd_epi32() { |
1728 | let src = _mm256_set1_epi32(1); |
1729 | let a = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1730 | let b = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1731 | let r = _mm256_dpwusd_epi32(src, a, b); |
1732 | let e = _mm256_set1_epi32(3); |
1733 | assert_eq_m256i(r, e); |
1734 | } |
1735 | |
1736 | #[simd_test(enable = "avxvnniint16" )] |
1737 | unsafe fn test_mm_dpwusds_epi32() { |
1738 | let src = _mm_set1_epi32(1); |
1739 | let a = _mm_set1_epi32(1 << 16 | 1 << 0); |
1740 | let b = _mm_set1_epi32(1 << 16 | 1 << 0); |
1741 | let r = _mm_dpwusds_epi32(src, a, b); |
1742 | let e = _mm_set1_epi32(3); |
1743 | assert_eq_m128i(r, e); |
1744 | } |
1745 | |
1746 | #[simd_test(enable = "avxvnniint16" )] |
1747 | unsafe fn test_mm256_dpwusds_epi32() { |
1748 | let src = _mm256_set1_epi32(1); |
1749 | let a = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1750 | let b = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1751 | let r = _mm256_dpwusds_epi32(src, a, b); |
1752 | let e = _mm256_set1_epi32(3); |
1753 | assert_eq_m256i(r, e); |
1754 | } |
1755 | |
1756 | #[simd_test(enable = "avxvnniint16" )] |
1757 | unsafe fn test_mm_dpwuud_epi32() { |
1758 | let src = _mm_set1_epi32(1); |
1759 | let a = _mm_set1_epi32(1 << 16 | 1 << 0); |
1760 | let b = _mm_set1_epi32(1 << 16 | 1 << 0); |
1761 | let r = _mm_dpwuud_epi32(src, a, b); |
1762 | let e = _mm_set1_epi32(3); |
1763 | assert_eq_m128i(r, e); |
1764 | } |
1765 | |
1766 | #[simd_test(enable = "avxvnniint16" )] |
1767 | unsafe fn test_mm256_dpwuud_epi32() { |
1768 | let src = _mm256_set1_epi32(1); |
1769 | let a = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1770 | let b = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1771 | let r = _mm256_dpwuud_epi32(src, a, b); |
1772 | let e = _mm256_set1_epi32(3); |
1773 | assert_eq_m256i(r, e); |
1774 | } |
1775 | |
1776 | #[simd_test(enable = "avxvnniint16" )] |
1777 | unsafe fn test_mm_dpwuuds_epi32() { |
1778 | let src = _mm_set1_epi32(1); |
1779 | let a = _mm_set1_epi32(1 << 16 | 1 << 0); |
1780 | let b = _mm_set1_epi32(1 << 16 | 1 << 0); |
1781 | let r = _mm_dpwuuds_epi32(src, a, b); |
1782 | let e = _mm_set1_epi32(3); |
1783 | assert_eq_m128i(r, e); |
1784 | } |
1785 | |
1786 | #[simd_test(enable = "avxvnniint16" )] |
1787 | unsafe fn test_mm256_dpwuuds_epi32() { |
1788 | let src = _mm256_set1_epi32(1); |
1789 | let a = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1790 | let b = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1791 | let r = _mm256_dpwuuds_epi32(src, a, b); |
1792 | let e = _mm256_set1_epi32(3); |
1793 | assert_eq_m256i(r, e); |
1794 | } |
1795 | } |
1796 | |