1 | use crate::core_arch::{simd::*, x86::*}; |
2 | use crate::intrinsics::simd::*; |
3 | |
4 | #[cfg (test)] |
5 | use stdarch_test::assert_instr; |
6 | |
7 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. |
8 | /// |
9 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpwssd_epi32&expand=2219) |
10 | #[inline ] |
11 | #[target_feature (enable = "avx512vnni" )] |
12 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
13 | #[cfg_attr (test, assert_instr(vpdpwssd))] |
14 | pub fn _mm512_dpwssd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i { |
15 | unsafe { transmute(src:vpdpwssd(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) } |
16 | } |
17 | |
18 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
19 | /// |
20 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpwssd_epi32&expand=2220) |
21 | #[inline ] |
22 | #[target_feature (enable = "avx512vnni" )] |
23 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
24 | #[cfg_attr (test, assert_instr(vpdpwssd))] |
25 | pub fn _mm512_mask_dpwssd_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { |
26 | unsafe { |
27 | let r: i32x16 = _mm512_dpwssd_epi32(src, a, b).as_i32x16(); |
28 | transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16())) |
29 | } |
30 | } |
31 | |
32 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
33 | /// |
34 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpwssd_epi32&expand=2221) |
35 | #[inline ] |
36 | #[target_feature (enable = "avx512vnni" )] |
37 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
38 | #[cfg_attr (test, assert_instr(vpdpwssd))] |
39 | pub fn _mm512_maskz_dpwssd_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i { |
40 | unsafe { |
41 | let r: i32x16 = _mm512_dpwssd_epi32(src, a, b).as_i32x16(); |
42 | transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x16::ZERO)) |
43 | } |
44 | } |
45 | |
46 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. |
47 | /// |
48 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssd_avx_epi32&expand=2713) |
49 | #[inline ] |
50 | #[target_feature (enable = "avxvnni" )] |
51 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
52 | #[cfg_attr (test, assert_instr(vpdpwssd))] |
53 | pub fn _mm256_dpwssd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
54 | unsafe { transmute(src:vpdpwssd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
55 | } |
56 | |
57 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. |
58 | /// |
59 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssd_epi32&expand=2216) |
60 | #[inline ] |
61 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
62 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
63 | #[cfg_attr (test, assert_instr(vpdpwssd))] |
64 | pub fn _mm256_dpwssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
65 | unsafe { transmute(src:vpdpwssd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
66 | } |
67 | |
68 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
69 | /// |
70 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpwssd_epi32&expand=2217) |
71 | #[inline ] |
72 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
73 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
74 | #[cfg_attr (test, assert_instr(vpdpwssd))] |
75 | pub fn _mm256_mask_dpwssd_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { |
76 | unsafe { |
77 | let r: i32x8 = _mm256_dpwssd_epi32(src, a, b).as_i32x8(); |
78 | transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8())) |
79 | } |
80 | } |
81 | |
82 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
83 | /// |
84 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpwssd_epi32&expand=2218) |
85 | #[inline ] |
86 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
87 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
88 | #[cfg_attr (test, assert_instr(vpdpwssd))] |
89 | pub fn _mm256_maskz_dpwssd_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
90 | unsafe { |
91 | let r: i32x8 = _mm256_dpwssd_epi32(src, a, b).as_i32x8(); |
92 | transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x8::ZERO)) |
93 | } |
94 | } |
95 | |
96 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. |
97 | /// |
98 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssd_avx_epi32&expand=2712) |
99 | #[inline ] |
100 | #[target_feature (enable = "avxvnni" )] |
101 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
102 | #[cfg_attr (test, assert_instr(vpdpwssd))] |
103 | pub fn _mm_dpwssd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
104 | unsafe { transmute(src:vpdpwssd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
105 | } |
106 | |
107 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. |
108 | /// |
109 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssd_epi32&expand=2213) |
110 | #[inline ] |
111 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
112 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
113 | #[cfg_attr (test, assert_instr(vpdpwssd))] |
114 | pub fn _mm_dpwssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
115 | unsafe { transmute(src:vpdpwssd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
116 | } |
117 | |
118 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
119 | /// |
120 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpwssd_epi32&expand=2214) |
121 | #[inline ] |
122 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
123 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
124 | #[cfg_attr (test, assert_instr(vpdpwssd))] |
125 | pub fn _mm_mask_dpwssd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
126 | unsafe { |
127 | let r: i32x4 = _mm_dpwssd_epi32(src, a, b).as_i32x4(); |
128 | transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4())) |
129 | } |
130 | } |
131 | |
132 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
133 | /// |
134 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpwssd_epi32&expand=2215) |
135 | #[inline ] |
136 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
137 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
138 | #[cfg_attr (test, assert_instr(vpdpwssd))] |
139 | pub fn _mm_maskz_dpwssd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
140 | unsafe { |
141 | let r: i32x4 = _mm_dpwssd_epi32(src, a, b).as_i32x4(); |
142 | transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x4::ZERO)) |
143 | } |
144 | } |
145 | |
146 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. |
147 | /// |
148 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpwssds_epi32&expand=2228) |
149 | #[inline ] |
150 | #[target_feature (enable = "avx512vnni" )] |
151 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
152 | #[cfg_attr (test, assert_instr(vpdpwssds))] |
153 | pub fn _mm512_dpwssds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i { |
154 | unsafe { transmute(src:vpdpwssds(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) } |
155 | } |
156 | |
157 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
158 | /// |
159 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpwssds_epi32&expand=2229) |
160 | #[inline ] |
161 | #[target_feature (enable = "avx512vnni" )] |
162 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
163 | #[cfg_attr (test, assert_instr(vpdpwssds))] |
164 | pub fn _mm512_mask_dpwssds_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { |
165 | unsafe { |
166 | let r: i32x16 = _mm512_dpwssds_epi32(src, a, b).as_i32x16(); |
167 | transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16())) |
168 | } |
169 | } |
170 | |
171 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
172 | /// |
173 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpwssds_epi32&expand=2230) |
174 | #[inline ] |
175 | #[target_feature (enable = "avx512vnni" )] |
176 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
177 | #[cfg_attr (test, assert_instr(vpdpwssds))] |
178 | pub fn _mm512_maskz_dpwssds_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i { |
179 | unsafe { |
180 | let r: i32x16 = _mm512_dpwssds_epi32(src, a, b).as_i32x16(); |
181 | transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x16::ZERO)) |
182 | } |
183 | } |
184 | |
185 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. |
186 | /// |
187 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssds_avx_epi32&expand=2726) |
188 | #[inline ] |
189 | #[target_feature (enable = "avxvnni" )] |
190 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
191 | #[cfg_attr (test, assert_instr(vpdpwssds))] |
192 | pub fn _mm256_dpwssds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
193 | unsafe { transmute(src:vpdpwssds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
194 | } |
195 | |
196 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. |
197 | /// |
198 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssds_epi32&expand=2225) |
199 | #[inline ] |
200 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
201 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
202 | #[cfg_attr (test, assert_instr(vpdpwssds))] |
203 | pub fn _mm256_dpwssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
204 | unsafe { transmute(src:vpdpwssds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
205 | } |
206 | |
207 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
208 | /// |
209 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpwssds_epi32&expand=2226) |
210 | #[inline ] |
211 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
212 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
213 | #[cfg_attr (test, assert_instr(vpdpwssds))] |
214 | pub fn _mm256_mask_dpwssds_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { |
215 | unsafe { |
216 | let r: i32x8 = _mm256_dpwssds_epi32(src, a, b).as_i32x8(); |
217 | transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8())) |
218 | } |
219 | } |
220 | |
221 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
222 | /// |
223 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpwssds_epi32&expand=2227) |
224 | #[inline ] |
225 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
226 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
227 | #[cfg_attr (test, assert_instr(vpdpwssds))] |
228 | pub fn _mm256_maskz_dpwssds_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
229 | unsafe { |
230 | let r: i32x8 = _mm256_dpwssds_epi32(src, a, b).as_i32x8(); |
231 | transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x8::ZERO)) |
232 | } |
233 | } |
234 | |
235 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. |
236 | /// |
237 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssds_avx_epi32&expand=2725) |
238 | #[inline ] |
239 | #[target_feature (enable = "avxvnni" )] |
240 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
241 | #[cfg_attr (test, assert_instr(vpdpwssds))] |
242 | pub fn _mm_dpwssds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
243 | unsafe { transmute(src:vpdpwssds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
244 | } |
245 | |
246 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. |
247 | /// |
248 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssds_epi32&expand=2222) |
249 | #[inline ] |
250 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
251 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
252 | #[cfg_attr (test, assert_instr(vpdpwssds))] |
253 | pub fn _mm_dpwssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
254 | unsafe { transmute(src:vpdpwssds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
255 | } |
256 | |
257 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
258 | /// |
259 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpwssds_epi32&expand=2223) |
260 | #[inline ] |
261 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
262 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
263 | #[cfg_attr (test, assert_instr(vpdpwssds))] |
264 | pub fn _mm_mask_dpwssds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
265 | unsafe { |
266 | let r: i32x4 = _mm_dpwssds_epi32(src, a, b).as_i32x4(); |
267 | transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4())) |
268 | } |
269 | } |
270 | |
271 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
272 | /// |
273 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpwssds_epi32&expand=2224) |
274 | #[inline ] |
275 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
276 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
277 | #[cfg_attr (test, assert_instr(vpdpwssds))] |
278 | pub fn _mm_maskz_dpwssds_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
279 | unsafe { |
280 | let r: i32x4 = _mm_dpwssds_epi32(src, a, b).as_i32x4(); |
281 | transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x4::ZERO)) |
282 | } |
283 | } |
284 | |
285 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. |
286 | /// |
287 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpbusd_epi32&expand=2201) |
288 | #[inline ] |
289 | #[target_feature (enable = "avx512vnni" )] |
290 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
291 | #[cfg_attr (test, assert_instr(vpdpbusd))] |
292 | pub fn _mm512_dpbusd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i { |
293 | unsafe { transmute(src:vpdpbusd(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) } |
294 | } |
295 | |
296 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
297 | /// |
298 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpbusd_epi32&expand=2202) |
299 | #[inline ] |
300 | #[target_feature (enable = "avx512vnni" )] |
301 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
302 | #[cfg_attr (test, assert_instr(vpdpbusd))] |
303 | pub fn _mm512_mask_dpbusd_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { |
304 | unsafe { |
305 | let r: i32x16 = _mm512_dpbusd_epi32(src, a, b).as_i32x16(); |
306 | transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16())) |
307 | } |
308 | } |
309 | |
310 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
311 | /// |
312 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpbusd_epi32&expand=2203) |
313 | #[inline ] |
314 | #[target_feature (enable = "avx512vnni" )] |
315 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
316 | #[cfg_attr (test, assert_instr(vpdpbusd))] |
317 | pub fn _mm512_maskz_dpbusd_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i { |
318 | unsafe { |
319 | let r: i32x16 = _mm512_dpbusd_epi32(src, a, b).as_i32x16(); |
320 | transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x16::ZERO)) |
321 | } |
322 | } |
323 | |
324 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. |
325 | /// |
326 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusd_avx_epi32&expand=2683) |
327 | #[inline ] |
328 | #[target_feature (enable = "avxvnni" )] |
329 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
330 | #[cfg_attr (test, assert_instr(vpdpbusd))] |
331 | pub fn _mm256_dpbusd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
332 | unsafe { transmute(src:vpdpbusd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
333 | } |
334 | |
335 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. |
336 | /// |
337 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusd_epi32&expand=2198) |
338 | #[inline ] |
339 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
340 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
341 | #[cfg_attr (test, assert_instr(vpdpbusd))] |
342 | pub fn _mm256_dpbusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
343 | unsafe { transmute(src:vpdpbusd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
344 | } |
345 | |
346 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
347 | /// |
348 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpbusd_epi32&expand=2199) |
349 | #[inline ] |
350 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
351 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
352 | #[cfg_attr (test, assert_instr(vpdpbusd))] |
353 | pub fn _mm256_mask_dpbusd_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { |
354 | unsafe { |
355 | let r: i32x8 = _mm256_dpbusd_epi32(src, a, b).as_i32x8(); |
356 | transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8())) |
357 | } |
358 | } |
359 | |
360 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
361 | /// |
362 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpbusd_epi32&expand=2200) |
363 | #[inline ] |
364 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
365 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
366 | #[cfg_attr (test, assert_instr(vpdpbusd))] |
367 | pub fn _mm256_maskz_dpbusd_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
368 | unsafe { |
369 | let r: i32x8 = _mm256_dpbusd_epi32(src, a, b).as_i32x8(); |
370 | transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x8::ZERO)) |
371 | } |
372 | } |
373 | |
374 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. |
375 | /// |
376 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusd_avx_epi32&expand=2682) |
377 | #[inline ] |
378 | #[target_feature (enable = "avxvnni" )] |
379 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
380 | #[cfg_attr (test, assert_instr(vpdpbusd))] |
381 | pub fn _mm_dpbusd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
382 | unsafe { transmute(src:vpdpbusd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
383 | } |
384 | |
385 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. |
386 | /// |
387 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusd_epi32&expand=2195) |
388 | #[inline ] |
389 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
390 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
391 | #[cfg_attr (test, assert_instr(vpdpbusd))] |
392 | pub fn _mm_dpbusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
393 | unsafe { transmute(src:vpdpbusd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
394 | } |
395 | |
396 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
397 | /// |
398 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpbusd_epi32&expand=2196) |
399 | #[inline ] |
400 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
401 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
402 | #[cfg_attr (test, assert_instr(vpdpbusd))] |
403 | pub fn _mm_mask_dpbusd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
404 | unsafe { |
405 | let r: i32x4 = _mm_dpbusd_epi32(src, a, b).as_i32x4(); |
406 | transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4())) |
407 | } |
408 | } |
409 | |
410 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
411 | /// |
412 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpbusd_epi32&expand=2197) |
413 | #[inline ] |
414 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
415 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
416 | #[cfg_attr (test, assert_instr(vpdpbusd))] |
417 | pub fn _mm_maskz_dpbusd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
418 | unsafe { |
419 | let r: i32x4 = _mm_dpbusd_epi32(src, a, b).as_i32x4(); |
420 | transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x4::ZERO)) |
421 | } |
422 | } |
423 | |
424 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. |
425 | /// |
426 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpbusds_epi32&expand=2210) |
427 | #[inline ] |
428 | #[target_feature (enable = "avx512vnni" )] |
429 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
430 | #[cfg_attr (test, assert_instr(vpdpbusds))] |
431 | pub fn _mm512_dpbusds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i { |
432 | unsafe { transmute(src:vpdpbusds(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) } |
433 | } |
434 | |
435 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
436 | /// |
437 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpbusds_epi32&expand=2211) |
438 | #[inline ] |
439 | #[target_feature (enable = "avx512vnni" )] |
440 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
441 | #[cfg_attr (test, assert_instr(vpdpbusds))] |
442 | pub fn _mm512_mask_dpbusds_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { |
443 | unsafe { |
444 | let r: i32x16 = _mm512_dpbusds_epi32(src, a, b).as_i32x16(); |
445 | transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16())) |
446 | } |
447 | } |
448 | |
449 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
450 | /// |
451 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpbusds_epi32&expand=2212) |
452 | #[inline ] |
453 | #[target_feature (enable = "avx512vnni" )] |
454 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
455 | #[cfg_attr (test, assert_instr(vpdpbusds))] |
456 | pub fn _mm512_maskz_dpbusds_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i { |
457 | unsafe { |
458 | let r: i32x16 = _mm512_dpbusds_epi32(src, a, b).as_i32x16(); |
459 | transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x16::ZERO)) |
460 | } |
461 | } |
462 | |
463 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. |
464 | /// |
465 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusds_avx_epi32&expand=2696) |
466 | #[inline ] |
467 | #[target_feature (enable = "avxvnni" )] |
468 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
469 | #[cfg_attr (test, assert_instr(vpdpbusds))] |
470 | pub fn _mm256_dpbusds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
471 | unsafe { transmute(src:vpdpbusds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
472 | } |
473 | |
474 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. |
475 | /// |
476 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusds_epi32&expand=2207) |
477 | #[inline ] |
478 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
479 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
480 | #[cfg_attr (test, assert_instr(vpdpbusds))] |
481 | pub fn _mm256_dpbusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
482 | unsafe { transmute(src:vpdpbusds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
483 | } |
484 | |
485 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
486 | /// |
487 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpbusds_epi32&expand=2208) |
488 | #[inline ] |
489 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
490 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
491 | #[cfg_attr (test, assert_instr(vpdpbusds))] |
492 | pub fn _mm256_mask_dpbusds_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { |
493 | unsafe { |
494 | let r: i32x8 = _mm256_dpbusds_epi32(src, a, b).as_i32x8(); |
495 | transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8())) |
496 | } |
497 | } |
498 | |
499 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
500 | /// |
501 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpbusds_epi32&expand=2209) |
502 | #[inline ] |
503 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
504 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
505 | #[cfg_attr (test, assert_instr(vpdpbusds))] |
506 | pub fn _mm256_maskz_dpbusds_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
507 | unsafe { |
508 | let r: i32x8 = _mm256_dpbusds_epi32(src, a, b).as_i32x8(); |
509 | transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x8::ZERO)) |
510 | } |
511 | } |
512 | |
513 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. |
514 | /// |
515 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusds_avx_epi32&expand=2695) |
516 | #[inline ] |
517 | #[target_feature (enable = "avxvnni" )] |
518 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
519 | #[cfg_attr (test, assert_instr(vpdpbusds))] |
520 | pub fn _mm_dpbusds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
521 | unsafe { transmute(src:vpdpbusds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
522 | } |
523 | |
524 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. |
525 | /// |
526 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusds_epi32&expand=2204) |
527 | #[inline ] |
528 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
529 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
530 | #[cfg_attr (test, assert_instr(vpdpbusds))] |
531 | pub fn _mm_dpbusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
532 | unsafe { transmute(src:vpdpbusds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
533 | } |
534 | |
535 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
536 | /// |
537 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpbusds_epi32&expand=2205) |
538 | #[inline ] |
539 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
540 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
541 | #[cfg_attr (test, assert_instr(vpdpbusds))] |
542 | pub fn _mm_mask_dpbusds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
543 | unsafe { |
544 | let r: i32x4 = _mm_dpbusds_epi32(src, a, b).as_i32x4(); |
545 | transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4())) |
546 | } |
547 | } |
548 | |
549 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
550 | /// |
551 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpbusds_epi32&expand=2206) |
552 | #[inline ] |
553 | #[target_feature (enable = "avx512vnni,avx512vl" )] |
554 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
555 | #[cfg_attr (test, assert_instr(vpdpbusds))] |
556 | pub fn _mm_maskz_dpbusds_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
557 | unsafe { |
558 | let r: i32x4 = _mm_dpbusds_epi32(src, a, b).as_i32x4(); |
559 | transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x4::ZERO)) |
560 | } |
561 | } |
562 | |
563 | /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit |
564 | /// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding |
565 | /// 32-bit integer in src, and store the packed 32-bit results in dst. |
566 | /// |
567 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbssd_epi32&expand=2674) |
568 | #[inline ] |
569 | #[target_feature (enable = "avxvnniint8" )] |
570 | #[cfg_attr (test, assert_instr(vpdpbssd))] |
571 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
572 | pub fn _mm_dpbssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
573 | unsafe { transmute(src:vpdpbssd_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
574 | } |
575 | |
576 | /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit |
577 | /// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding |
578 | /// 32-bit integer in src, and store the packed 32-bit results in dst. |
579 | /// |
580 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbssd_epi32&expand=2675) |
581 | #[inline ] |
582 | #[target_feature (enable = "avxvnniint8" )] |
583 | #[cfg_attr (test, assert_instr(vpdpbssd))] |
584 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
585 | pub fn _mm256_dpbssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
586 | unsafe { transmute(src:vpdpbssd_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
587 | } |
588 | |
589 | /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit |
590 | /// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding |
591 | /// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst. |
592 | /// |
593 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbssds_epi32&expand=2676) |
594 | #[inline ] |
595 | #[target_feature (enable = "avxvnniint8" )] |
596 | #[cfg_attr (test, assert_instr(vpdpbssds))] |
597 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
598 | pub fn _mm_dpbssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
599 | unsafe { transmute(src:vpdpbssds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
600 | } |
601 | |
602 | /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit |
603 | /// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding |
604 | /// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst. |
605 | /// |
606 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbssds_epi32&expand=2677) |
607 | #[inline ] |
608 | #[target_feature (enable = "avxvnniint8" )] |
609 | #[cfg_attr (test, assert_instr(vpdpbssds))] |
610 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
611 | pub fn _mm256_dpbssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
612 | unsafe { transmute(src:vpdpbssds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
613 | } |
614 | |
615 | /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit |
616 | /// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding |
617 | /// 32-bit integer in src, and store the packed 32-bit results in dst. |
618 | /// |
619 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbsud_epi32&expand=2678) |
620 | #[inline ] |
621 | #[target_feature (enable = "avxvnniint8" )] |
622 | #[cfg_attr (test, assert_instr(vpdpbsud))] |
623 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
624 | pub fn _mm_dpbsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
625 | unsafe { transmute(src:vpdpbsud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
626 | } |
627 | |
628 | /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit |
629 | /// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding |
630 | /// 32-bit integer in src, and store the packed 32-bit results in dst. |
631 | /// |
632 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbsud_epi32&expand=2679) |
633 | #[inline ] |
634 | #[target_feature (enable = "avxvnniint8" )] |
635 | #[cfg_attr (test, assert_instr(vpdpbsud))] |
636 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
637 | pub fn _mm256_dpbsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
638 | unsafe { transmute(src:vpdpbsud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
639 | } |
640 | |
641 | /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit |
642 | /// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding |
643 | /// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst. |
644 | /// |
645 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbsuds_epi32&expand=2680) |
646 | #[inline ] |
647 | #[target_feature (enable = "avxvnniint8" )] |
648 | #[cfg_attr (test, assert_instr(vpdpbsuds))] |
649 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
650 | pub fn _mm_dpbsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
651 | unsafe { transmute(src:vpdpbsuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
652 | } |
653 | |
654 | /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit |
655 | /// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding |
656 | /// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst. |
657 | /// |
658 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbsuds_epi32&expand=2681) |
659 | #[inline ] |
660 | #[target_feature (enable = "avxvnniint8" )] |
661 | #[cfg_attr (test, assert_instr(vpdpbsuds))] |
662 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
663 | pub fn _mm256_dpbsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
664 | unsafe { transmute(src:vpdpbsuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
665 | } |
666 | |
667 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit |
668 | /// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding |
669 | /// 32-bit integer in src, and store the packed 32-bit results in dst. |
670 | /// |
671 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbuud_epi32&expand=2708) |
672 | #[inline ] |
673 | #[target_feature (enable = "avxvnniint8" )] |
674 | #[cfg_attr (test, assert_instr(vpdpbuud))] |
675 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
676 | pub fn _mm_dpbuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
677 | unsafe { transmute(src:vpdpbuud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
678 | } |
679 | |
680 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit |
681 | /// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding |
682 | /// 32-bit integer in src, and store the packed 32-bit results in dst. |
683 | /// |
684 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbuud_epi32&expand=2709) |
685 | #[inline ] |
686 | #[target_feature (enable = "avxvnniint8" )] |
687 | #[cfg_attr (test, assert_instr(vpdpbuud))] |
688 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
689 | pub fn _mm256_dpbuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
690 | unsafe { transmute(src:vpdpbuud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
691 | } |
692 | |
693 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit |
694 | /// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding |
695 | /// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst. |
696 | /// |
697 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbuuds_epi32&expand=2710) |
698 | #[inline ] |
699 | #[target_feature (enable = "avxvnniint8" )] |
700 | #[cfg_attr (test, assert_instr(vpdpbuuds))] |
701 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
702 | pub fn _mm_dpbuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
703 | unsafe { transmute(src:vpdpbuuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
704 | } |
705 | |
706 | /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit |
707 | /// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding |
708 | /// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst. |
709 | /// |
710 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbuuds_epi32&expand=2711) |
711 | #[inline ] |
712 | #[target_feature (enable = "avxvnniint8" )] |
713 | #[cfg_attr (test, assert_instr(vpdpbuuds))] |
714 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
715 | pub fn _mm256_dpbuuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
716 | unsafe { transmute(src:vpdpbuuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
717 | } |
718 | |
719 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit |
720 | /// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding |
721 | /// 32-bit integer in src, and store the packed 32-bit results in dst. |
722 | /// |
723 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwsud_epi32&expand=2738) |
724 | #[inline ] |
725 | #[target_feature (enable = "avxvnniint16" )] |
726 | #[cfg_attr (test, assert_instr(vpdpwsud))] |
727 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
728 | pub fn _mm_dpwsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
729 | unsafe { transmute(src:vpdpwsud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
730 | } |
731 | |
732 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit |
733 | /// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding |
734 | /// 32-bit integer in src, and store the packed 32-bit results in dst. |
735 | /// |
736 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwsud_epi32&expand=2739) |
737 | #[inline ] |
738 | #[target_feature (enable = "avxvnniint16" )] |
739 | #[cfg_attr (test, assert_instr(vpdpwsud))] |
740 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
741 | pub fn _mm256_dpwsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
742 | unsafe { transmute(src:vpdpwsud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
743 | } |
744 | |
745 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit |
746 | /// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding |
747 | /// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst. |
748 | /// |
749 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwsuds_epi32&expand=2740) |
750 | #[inline ] |
751 | #[target_feature (enable = "avxvnniint16" )] |
752 | #[cfg_attr (test, assert_instr(vpdpwsuds))] |
753 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
754 | pub fn _mm_dpwsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
755 | unsafe { transmute(src:vpdpwsuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
756 | } |
757 | |
758 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit |
759 | /// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding |
760 | /// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst. |
761 | /// |
762 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwsuds_epi32&expand=2741) |
763 | #[inline ] |
764 | #[target_feature (enable = "avxvnniint16" )] |
765 | #[cfg_attr (test, assert_instr(vpdpwsuds))] |
766 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
767 | pub fn _mm256_dpwsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
768 | unsafe { transmute(src:vpdpwsuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
769 | } |
770 | |
771 | /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit |
772 | /// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding |
773 | /// 32-bit integer in src, and store the packed 32-bit results in dst. |
774 | /// |
775 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwusd_epi32&expand=2742) |
776 | #[inline ] |
777 | #[target_feature (enable = "avxvnniint16" )] |
778 | #[cfg_attr (test, assert_instr(vpdpwusd))] |
779 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
780 | pub fn _mm_dpwusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
781 | unsafe { transmute(src:vpdpwusd_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
782 | } |
783 | |
784 | /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit |
785 | /// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding |
786 | /// 32-bit integer in src, and store the packed 32-bit results in dst. |
787 | /// |
788 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwusd_epi32&expand=2743) |
789 | #[inline ] |
790 | #[target_feature (enable = "avxvnniint16" )] |
791 | #[cfg_attr (test, assert_instr(vpdpwusd))] |
792 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
793 | pub fn _mm256_dpwusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
794 | unsafe { transmute(src:vpdpwusd_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
795 | } |
796 | |
797 | /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit |
798 | /// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding |
799 | /// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst. |
800 | /// |
801 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwusds_epi32&expand=2744) |
802 | #[inline ] |
803 | #[target_feature (enable = "avxvnniint16" )] |
804 | #[cfg_attr (test, assert_instr(vpdpwusds))] |
805 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
806 | pub fn _mm_dpwusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
807 | unsafe { transmute(src:vpdpwusds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
808 | } |
809 | |
810 | /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit |
811 | /// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding |
812 | /// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst. |
813 | /// |
814 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwusds_epi32&expand=2745) |
815 | #[inline ] |
816 | #[target_feature (enable = "avxvnniint16" )] |
817 | #[cfg_attr (test, assert_instr(vpdpwusds))] |
818 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
819 | pub fn _mm256_dpwusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
820 | unsafe { transmute(src:vpdpwusds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
821 | } |
822 | |
823 | /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit |
824 | /// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding |
825 | /// 32-bit integer in src, and store the packed 32-bit results in dst. |
826 | /// |
827 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwuud_epi32&expand=2746) |
828 | #[inline ] |
829 | #[target_feature (enable = "avxvnniint16" )] |
830 | #[cfg_attr (test, assert_instr(vpdpwuud))] |
831 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
832 | pub fn _mm_dpwuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
833 | unsafe { transmute(src:vpdpwuud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
834 | } |
835 | |
836 | /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit |
837 | /// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding |
838 | /// 32-bit integer in src, and store the packed 32-bit results in dst. |
839 | /// |
840 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwuud_epi32&expand=2747) |
841 | #[inline ] |
842 | #[target_feature (enable = "avxvnniint16" )] |
843 | #[cfg_attr (test, assert_instr(vpdpwuud))] |
844 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
845 | pub fn _mm256_dpwuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
846 | unsafe { transmute(src:vpdpwuud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
847 | } |
848 | |
849 | /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit |
850 | /// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding |
851 | /// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst. |
852 | /// |
853 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwuuds_epi32&expand=2748) |
854 | #[inline ] |
855 | #[target_feature (enable = "avxvnniint16" )] |
856 | #[cfg_attr (test, assert_instr(vpdpwuuds))] |
857 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
858 | pub fn _mm_dpwuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { |
859 | unsafe { transmute(src:vpdpwuuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } |
860 | } |
861 | |
862 | /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit |
863 | /// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding |
864 | /// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst. |
865 | /// |
866 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwuuds_epi32&expand=2749) |
867 | #[inline ] |
868 | #[target_feature (enable = "avxvnniint16" )] |
869 | #[cfg_attr (test, assert_instr(vpdpwuuds))] |
870 | #[stable (feature = "stdarch_x86_avx512" , since = "1.89" )] |
871 | pub fn _mm256_dpwuuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { |
872 | unsafe { transmute(src:vpdpwuuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } |
873 | } |
874 | |
875 | #[allow (improper_ctypes)] |
876 | unsafe extern "C" { |
877 | #[link_name = "llvm.x86.avx512.vpdpwssd.512" ] |
878 | unsafefn vpdpwssd(src: i32x16, a: i32x16, b: i32x16) -> i32x16; |
879 | #[link_name = "llvm.x86.avx512.vpdpwssd.256" ] |
880 | unsafefn vpdpwssd256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; |
881 | #[link_name = "llvm.x86.avx512.vpdpwssd.128" ] |
882 | unsafefn vpdpwssd128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; |
883 | |
884 | #[link_name = "llvm.x86.avx512.vpdpwssds.512" ] |
885 | unsafefn vpdpwssds(src: i32x16, a: i32x16, b: i32x16) -> i32x16; |
886 | #[link_name = "llvm.x86.avx512.vpdpwssds.256" ] |
887 | unsafefn vpdpwssds256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; |
888 | #[link_name = "llvm.x86.avx512.vpdpwssds.128" ] |
889 | unsafefn vpdpwssds128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; |
890 | |
891 | #[link_name = "llvm.x86.avx512.vpdpbusd.512" ] |
892 | unsafefn vpdpbusd(src: i32x16, a: i32x16, b: i32x16) -> i32x16; |
893 | #[link_name = "llvm.x86.avx512.vpdpbusd.256" ] |
894 | unsafefn vpdpbusd256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; |
895 | #[link_name = "llvm.x86.avx512.vpdpbusd.128" ] |
896 | unsafefn vpdpbusd128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; |
897 | |
898 | #[link_name = "llvm.x86.avx512.vpdpbusds.512" ] |
899 | unsafefn vpdpbusds(src: i32x16, a: i32x16, b: i32x16) -> i32x16; |
900 | #[link_name = "llvm.x86.avx512.vpdpbusds.256" ] |
901 | unsafefn vpdpbusds256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; |
902 | #[link_name = "llvm.x86.avx512.vpdpbusds.128" ] |
903 | unsafefn vpdpbusds128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; |
904 | |
905 | #[link_name = "llvm.x86.avx2.vpdpbssd.128" ] |
906 | unsafefn vpdpbssd_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; |
907 | #[link_name = "llvm.x86.avx2.vpdpbssd.256" ] |
908 | unsafefn vpdpbssd_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; |
909 | |
910 | #[link_name = "llvm.x86.avx2.vpdpbssds.128" ] |
911 | unsafefn vpdpbssds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; |
912 | #[link_name = "llvm.x86.avx2.vpdpbssds.256" ] |
913 | unsafefn vpdpbssds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; |
914 | |
915 | #[link_name = "llvm.x86.avx2.vpdpbsud.128" ] |
916 | unsafefn vpdpbsud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; |
917 | #[link_name = "llvm.x86.avx2.vpdpbsud.256" ] |
918 | unsafefn vpdpbsud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; |
919 | |
920 | #[link_name = "llvm.x86.avx2.vpdpbsuds.128" ] |
921 | unsafefn vpdpbsuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; |
922 | #[link_name = "llvm.x86.avx2.vpdpbsuds.256" ] |
923 | unsafefn vpdpbsuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; |
924 | |
925 | #[link_name = "llvm.x86.avx2.vpdpbuud.128" ] |
926 | unsafefn vpdpbuud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; |
927 | #[link_name = "llvm.x86.avx2.vpdpbuud.256" ] |
928 | unsafefn vpdpbuud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; |
929 | |
930 | #[link_name = "llvm.x86.avx2.vpdpbuuds.128" ] |
931 | unsafefn vpdpbuuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; |
932 | #[link_name = "llvm.x86.avx2.vpdpbuuds.256" ] |
933 | unsafefn vpdpbuuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; |
934 | |
935 | #[link_name = "llvm.x86.avx2.vpdpwsud.128" ] |
936 | unsafefn vpdpwsud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; |
937 | #[link_name = "llvm.x86.avx2.vpdpwsud.256" ] |
938 | unsafefn vpdpwsud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; |
939 | |
940 | #[link_name = "llvm.x86.avx2.vpdpwsuds.128" ] |
941 | unsafefn vpdpwsuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; |
942 | #[link_name = "llvm.x86.avx2.vpdpwsuds.256" ] |
943 | unsafefn vpdpwsuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; |
944 | |
945 | #[link_name = "llvm.x86.avx2.vpdpwusd.128" ] |
946 | unsafefn vpdpwusd_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; |
947 | #[link_name = "llvm.x86.avx2.vpdpwusd.256" ] |
948 | unsafefn vpdpwusd_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; |
949 | |
950 | #[link_name = "llvm.x86.avx2.vpdpwusds.128" ] |
951 | unsafefn vpdpwusds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; |
952 | #[link_name = "llvm.x86.avx2.vpdpwusds.256" ] |
953 | unsafefn vpdpwusds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; |
954 | |
955 | #[link_name = "llvm.x86.avx2.vpdpwuud.128" ] |
956 | unsafefn vpdpwuud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; |
957 | #[link_name = "llvm.x86.avx2.vpdpwuud.256" ] |
958 | unsafefn vpdpwuud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; |
959 | |
960 | #[link_name = "llvm.x86.avx2.vpdpwuuds.128" ] |
961 | unsafefn vpdpwuuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4; |
962 | #[link_name = "llvm.x86.avx2.vpdpwuuds.256" ] |
963 | unsafefn vpdpwuuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8; |
964 | } |
965 | |
966 | #[cfg (test)] |
967 | mod tests { |
968 | |
969 | use crate::core_arch::x86::*; |
970 | use stdarch_test::simd_test; |
971 | |
972 | #[simd_test(enable = "avx512vnni" )] |
973 | unsafe fn test_mm512_dpwssd_epi32() { |
974 | let src = _mm512_set1_epi32(1); |
975 | let a = _mm512_set1_epi32(1 << 16 | 1 << 0); |
976 | let b = _mm512_set1_epi32(1 << 16 | 1 << 0); |
977 | let r = _mm512_dpwssd_epi32(src, a, b); |
978 | let e = _mm512_set1_epi32(3); |
979 | assert_eq_m512i(r, e); |
980 | } |
981 | |
982 | #[simd_test(enable = "avx512vnni" )] |
983 | unsafe fn test_mm512_mask_dpwssd_epi32() { |
984 | let src = _mm512_set1_epi32(1); |
985 | let a = _mm512_set1_epi32(1 << 16 | 1 << 0); |
986 | let b = _mm512_set1_epi32(1 << 16 | 1 << 0); |
987 | let r = _mm512_mask_dpwssd_epi32(src, 0b00000000_00000000, a, b); |
988 | assert_eq_m512i(r, src); |
989 | let r = _mm512_mask_dpwssd_epi32(src, 0b11111111_11111111, a, b); |
990 | let e = _mm512_set1_epi32(3); |
991 | assert_eq_m512i(r, e); |
992 | } |
993 | |
994 | #[simd_test(enable = "avx512vnni" )] |
995 | unsafe fn test_mm512_maskz_dpwssd_epi32() { |
996 | let src = _mm512_set1_epi32(1); |
997 | let a = _mm512_set1_epi32(1 << 16 | 1 << 0); |
998 | let b = _mm512_set1_epi32(1 << 16 | 1 << 0); |
999 | let r = _mm512_maskz_dpwssd_epi32(0b00000000_00000000, src, a, b); |
1000 | assert_eq_m512i(r, _mm512_setzero_si512()); |
1001 | let r = _mm512_maskz_dpwssd_epi32(0b11111111_11111111, src, a, b); |
1002 | let e = _mm512_set1_epi32(3); |
1003 | assert_eq_m512i(r, e); |
1004 | } |
1005 | |
1006 | #[simd_test(enable = "avxvnni" )] |
1007 | unsafe fn test_mm256_dpwssd_avx_epi32() { |
1008 | let src = _mm256_set1_epi32(1); |
1009 | let a = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1010 | let b = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1011 | let r = _mm256_dpwssd_avx_epi32(src, a, b); |
1012 | let e = _mm256_set1_epi32(3); |
1013 | assert_eq_m256i(r, e); |
1014 | } |
1015 | |
1016 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1017 | unsafe fn test_mm256_dpwssd_epi32() { |
1018 | let src = _mm256_set1_epi32(1); |
1019 | let a = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1020 | let b = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1021 | let r = _mm256_dpwssd_epi32(src, a, b); |
1022 | let e = _mm256_set1_epi32(3); |
1023 | assert_eq_m256i(r, e); |
1024 | } |
1025 | |
1026 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1027 | unsafe fn test_mm256_mask_dpwssd_epi32() { |
1028 | let src = _mm256_set1_epi32(1); |
1029 | let a = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1030 | let b = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1031 | let r = _mm256_mask_dpwssd_epi32(src, 0b00000000, a, b); |
1032 | assert_eq_m256i(r, src); |
1033 | let r = _mm256_mask_dpwssd_epi32(src, 0b11111111, a, b); |
1034 | let e = _mm256_set1_epi32(3); |
1035 | assert_eq_m256i(r, e); |
1036 | } |
1037 | |
1038 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1039 | unsafe fn test_mm256_maskz_dpwssd_epi32() { |
1040 | let src = _mm256_set1_epi32(1); |
1041 | let a = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1042 | let b = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1043 | let r = _mm256_maskz_dpwssd_epi32(0b00000000, src, a, b); |
1044 | assert_eq_m256i(r, _mm256_setzero_si256()); |
1045 | let r = _mm256_maskz_dpwssd_epi32(0b11111111, src, a, b); |
1046 | let e = _mm256_set1_epi32(3); |
1047 | assert_eq_m256i(r, e); |
1048 | } |
1049 | |
1050 | #[simd_test(enable = "avxvnni" )] |
1051 | unsafe fn test_mm_dpwssd_avx_epi32() { |
1052 | let src = _mm_set1_epi32(1); |
1053 | let a = _mm_set1_epi32(1 << 16 | 1 << 0); |
1054 | let b = _mm_set1_epi32(1 << 16 | 1 << 0); |
1055 | let r = _mm_dpwssd_avx_epi32(src, a, b); |
1056 | let e = _mm_set1_epi32(3); |
1057 | assert_eq_m128i(r, e); |
1058 | } |
1059 | |
1060 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1061 | unsafe fn test_mm_dpwssd_epi32() { |
1062 | let src = _mm_set1_epi32(1); |
1063 | let a = _mm_set1_epi32(1 << 16 | 1 << 0); |
1064 | let b = _mm_set1_epi32(1 << 16 | 1 << 0); |
1065 | let r = _mm_dpwssd_epi32(src, a, b); |
1066 | let e = _mm_set1_epi32(3); |
1067 | assert_eq_m128i(r, e); |
1068 | } |
1069 | |
1070 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1071 | unsafe fn test_mm_mask_dpwssd_epi32() { |
1072 | let src = _mm_set1_epi32(1); |
1073 | let a = _mm_set1_epi32(1 << 16 | 1 << 0); |
1074 | let b = _mm_set1_epi32(1 << 16 | 1 << 0); |
1075 | let r = _mm_mask_dpwssd_epi32(src, 0b00000000, a, b); |
1076 | assert_eq_m128i(r, src); |
1077 | let r = _mm_mask_dpwssd_epi32(src, 0b00001111, a, b); |
1078 | let e = _mm_set1_epi32(3); |
1079 | assert_eq_m128i(r, e); |
1080 | } |
1081 | |
1082 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1083 | unsafe fn test_mm_maskz_dpwssd_epi32() { |
1084 | let src = _mm_set1_epi32(1); |
1085 | let a = _mm_set1_epi32(1 << 16 | 1 << 0); |
1086 | let b = _mm_set1_epi32(1 << 16 | 1 << 0); |
1087 | let r = _mm_maskz_dpwssd_epi32(0b00000000, src, a, b); |
1088 | assert_eq_m128i(r, _mm_setzero_si128()); |
1089 | let r = _mm_maskz_dpwssd_epi32(0b00001111, src, a, b); |
1090 | let e = _mm_set1_epi32(3); |
1091 | assert_eq_m128i(r, e); |
1092 | } |
1093 | |
1094 | #[simd_test(enable = "avx512vnni" )] |
1095 | unsafe fn test_mm512_dpwssds_epi32() { |
1096 | let src = _mm512_set1_epi32(1); |
1097 | let a = _mm512_set1_epi32(1 << 16 | 1 << 0); |
1098 | let b = _mm512_set1_epi32(1 << 16 | 1 << 0); |
1099 | let r = _mm512_dpwssds_epi32(src, a, b); |
1100 | let e = _mm512_set1_epi32(3); |
1101 | assert_eq_m512i(r, e); |
1102 | } |
1103 | |
1104 | #[simd_test(enable = "avx512vnni" )] |
1105 | unsafe fn test_mm512_mask_dpwssds_epi32() { |
1106 | let src = _mm512_set1_epi32(1); |
1107 | let a = _mm512_set1_epi32(1 << 16 | 1 << 0); |
1108 | let b = _mm512_set1_epi32(1 << 16 | 1 << 0); |
1109 | let r = _mm512_mask_dpwssds_epi32(src, 0b00000000_00000000, a, b); |
1110 | assert_eq_m512i(r, src); |
1111 | let r = _mm512_mask_dpwssds_epi32(src, 0b11111111_11111111, a, b); |
1112 | let e = _mm512_set1_epi32(3); |
1113 | assert_eq_m512i(r, e); |
1114 | } |
1115 | |
1116 | #[simd_test(enable = "avx512vnni" )] |
1117 | unsafe fn test_mm512_maskz_dpwssds_epi32() { |
1118 | let src = _mm512_set1_epi32(1); |
1119 | let a = _mm512_set1_epi32(1 << 16 | 1 << 0); |
1120 | let b = _mm512_set1_epi32(1 << 16 | 1 << 0); |
1121 | let r = _mm512_maskz_dpwssds_epi32(0b00000000_00000000, src, a, b); |
1122 | assert_eq_m512i(r, _mm512_setzero_si512()); |
1123 | let r = _mm512_maskz_dpwssds_epi32(0b11111111_11111111, src, a, b); |
1124 | let e = _mm512_set1_epi32(3); |
1125 | assert_eq_m512i(r, e); |
1126 | } |
1127 | |
1128 | #[simd_test(enable = "avxvnni" )] |
1129 | unsafe fn test_mm256_dpwssds_avx_epi32() { |
1130 | let src = _mm256_set1_epi32(1); |
1131 | let a = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1132 | let b = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1133 | let r = _mm256_dpwssds_avx_epi32(src, a, b); |
1134 | let e = _mm256_set1_epi32(3); |
1135 | assert_eq_m256i(r, e); |
1136 | } |
1137 | |
1138 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1139 | unsafe fn test_mm256_dpwssds_epi32() { |
1140 | let src = _mm256_set1_epi32(1); |
1141 | let a = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1142 | let b = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1143 | let r = _mm256_dpwssds_epi32(src, a, b); |
1144 | let e = _mm256_set1_epi32(3); |
1145 | assert_eq_m256i(r, e); |
1146 | } |
1147 | |
1148 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1149 | unsafe fn test_mm256_mask_dpwssds_epi32() { |
1150 | let src = _mm256_set1_epi32(1); |
1151 | let a = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1152 | let b = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1153 | let r = _mm256_mask_dpwssds_epi32(src, 0b00000000, a, b); |
1154 | assert_eq_m256i(r, src); |
1155 | let r = _mm256_mask_dpwssds_epi32(src, 0b11111111, a, b); |
1156 | let e = _mm256_set1_epi32(3); |
1157 | assert_eq_m256i(r, e); |
1158 | } |
1159 | |
1160 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1161 | unsafe fn test_mm256_maskz_dpwssds_epi32() { |
1162 | let src = _mm256_set1_epi32(1); |
1163 | let a = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1164 | let b = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1165 | let r = _mm256_maskz_dpwssds_epi32(0b00000000, src, a, b); |
1166 | assert_eq_m256i(r, _mm256_setzero_si256()); |
1167 | let r = _mm256_maskz_dpwssds_epi32(0b11111111, src, a, b); |
1168 | let e = _mm256_set1_epi32(3); |
1169 | assert_eq_m256i(r, e); |
1170 | } |
1171 | |
1172 | #[simd_test(enable = "avxvnni" )] |
1173 | unsafe fn test_mm_dpwssds_avx_epi32() { |
1174 | let src = _mm_set1_epi32(1); |
1175 | let a = _mm_set1_epi32(1 << 16 | 1 << 0); |
1176 | let b = _mm_set1_epi32(1 << 16 | 1 << 0); |
1177 | let r = _mm_dpwssds_avx_epi32(src, a, b); |
1178 | let e = _mm_set1_epi32(3); |
1179 | assert_eq_m128i(r, e); |
1180 | } |
1181 | |
1182 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1183 | unsafe fn test_mm_dpwssds_epi32() { |
1184 | let src = _mm_set1_epi32(1); |
1185 | let a = _mm_set1_epi32(1 << 16 | 1 << 0); |
1186 | let b = _mm_set1_epi32(1 << 16 | 1 << 0); |
1187 | let r = _mm_dpwssds_epi32(src, a, b); |
1188 | let e = _mm_set1_epi32(3); |
1189 | assert_eq_m128i(r, e); |
1190 | } |
1191 | |
1192 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1193 | unsafe fn test_mm_mask_dpwssds_epi32() { |
1194 | let src = _mm_set1_epi32(1); |
1195 | let a = _mm_set1_epi32(1 << 16 | 1 << 0); |
1196 | let b = _mm_set1_epi32(1 << 16 | 1 << 0); |
1197 | let r = _mm_mask_dpwssds_epi32(src, 0b00000000, a, b); |
1198 | assert_eq_m128i(r, src); |
1199 | let r = _mm_mask_dpwssds_epi32(src, 0b00001111, a, b); |
1200 | let e = _mm_set1_epi32(3); |
1201 | assert_eq_m128i(r, e); |
1202 | } |
1203 | |
1204 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1205 | unsafe fn test_mm_maskz_dpwssds_epi32() { |
1206 | let src = _mm_set1_epi32(1); |
1207 | let a = _mm_set1_epi32(1 << 16 | 1 << 0); |
1208 | let b = _mm_set1_epi32(1 << 16 | 1 << 0); |
1209 | let r = _mm_maskz_dpwssds_epi32(0b00000000, src, a, b); |
1210 | assert_eq_m128i(r, _mm_setzero_si128()); |
1211 | let r = _mm_maskz_dpwssds_epi32(0b00001111, src, a, b); |
1212 | let e = _mm_set1_epi32(3); |
1213 | assert_eq_m128i(r, e); |
1214 | } |
1215 | |
1216 | #[simd_test(enable = "avx512vnni" )] |
1217 | unsafe fn test_mm512_dpbusd_epi32() { |
1218 | let src = _mm512_set1_epi32(1); |
1219 | let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1220 | let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1221 | let r = _mm512_dpbusd_epi32(src, a, b); |
1222 | let e = _mm512_set1_epi32(5); |
1223 | assert_eq_m512i(r, e); |
1224 | } |
1225 | |
1226 | #[simd_test(enable = "avx512vnni" )] |
1227 | unsafe fn test_mm512_mask_dpbusd_epi32() { |
1228 | let src = _mm512_set1_epi32(1); |
1229 | let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1230 | let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1231 | let r = _mm512_mask_dpbusd_epi32(src, 0b00000000_00000000, a, b); |
1232 | assert_eq_m512i(r, src); |
1233 | let r = _mm512_mask_dpbusd_epi32(src, 0b11111111_11111111, a, b); |
1234 | let e = _mm512_set1_epi32(5); |
1235 | assert_eq_m512i(r, e); |
1236 | } |
1237 | |
1238 | #[simd_test(enable = "avx512vnni" )] |
1239 | unsafe fn test_mm512_maskz_dpbusd_epi32() { |
1240 | let src = _mm512_set1_epi32(1); |
1241 | let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1242 | let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1243 | let r = _mm512_maskz_dpbusd_epi32(0b00000000_00000000, src, a, b); |
1244 | assert_eq_m512i(r, _mm512_setzero_si512()); |
1245 | let r = _mm512_maskz_dpbusd_epi32(0b11111111_11111111, src, a, b); |
1246 | let e = _mm512_set1_epi32(5); |
1247 | assert_eq_m512i(r, e); |
1248 | } |
1249 | |
1250 | #[simd_test(enable = "avxvnni" )] |
1251 | unsafe fn test_mm256_dpbusd_avx_epi32() { |
1252 | let src = _mm256_set1_epi32(1); |
1253 | let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1254 | let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1255 | let r = _mm256_dpbusd_avx_epi32(src, a, b); |
1256 | let e = _mm256_set1_epi32(5); |
1257 | assert_eq_m256i(r, e); |
1258 | } |
1259 | |
1260 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1261 | unsafe fn test_mm256_dpbusd_epi32() { |
1262 | let src = _mm256_set1_epi32(1); |
1263 | let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1264 | let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1265 | let r = _mm256_dpbusd_epi32(src, a, b); |
1266 | let e = _mm256_set1_epi32(5); |
1267 | assert_eq_m256i(r, e); |
1268 | } |
1269 | |
1270 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1271 | unsafe fn test_mm256_mask_dpbusd_epi32() { |
1272 | let src = _mm256_set1_epi32(1); |
1273 | let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1274 | let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1275 | let r = _mm256_mask_dpbusd_epi32(src, 0b00000000, a, b); |
1276 | assert_eq_m256i(r, src); |
1277 | let r = _mm256_mask_dpbusd_epi32(src, 0b11111111, a, b); |
1278 | let e = _mm256_set1_epi32(5); |
1279 | assert_eq_m256i(r, e); |
1280 | } |
1281 | |
1282 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1283 | unsafe fn test_mm256_maskz_dpbusd_epi32() { |
1284 | let src = _mm256_set1_epi32(1); |
1285 | let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1286 | let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1287 | let r = _mm256_maskz_dpbusd_epi32(0b00000000, src, a, b); |
1288 | assert_eq_m256i(r, _mm256_setzero_si256()); |
1289 | let r = _mm256_maskz_dpbusd_epi32(0b11111111, src, a, b); |
1290 | let e = _mm256_set1_epi32(5); |
1291 | assert_eq_m256i(r, e); |
1292 | } |
1293 | |
1294 | #[simd_test(enable = "avxvnni" )] |
1295 | unsafe fn test_mm_dpbusd_avx_epi32() { |
1296 | let src = _mm_set1_epi32(1); |
1297 | let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1298 | let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1299 | let r = _mm_dpbusd_avx_epi32(src, a, b); |
1300 | let e = _mm_set1_epi32(5); |
1301 | assert_eq_m128i(r, e); |
1302 | } |
1303 | |
1304 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1305 | unsafe fn test_mm_dpbusd_epi32() { |
1306 | let src = _mm_set1_epi32(1); |
1307 | let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1308 | let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1309 | let r = _mm_dpbusd_epi32(src, a, b); |
1310 | let e = _mm_set1_epi32(5); |
1311 | assert_eq_m128i(r, e); |
1312 | } |
1313 | |
1314 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1315 | unsafe fn test_mm_mask_dpbusd_epi32() { |
1316 | let src = _mm_set1_epi32(1); |
1317 | let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1318 | let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1319 | let r = _mm_mask_dpbusd_epi32(src, 0b00000000, a, b); |
1320 | assert_eq_m128i(r, src); |
1321 | let r = _mm_mask_dpbusd_epi32(src, 0b00001111, a, b); |
1322 | let e = _mm_set1_epi32(5); |
1323 | assert_eq_m128i(r, e); |
1324 | } |
1325 | |
1326 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1327 | unsafe fn test_mm_maskz_dpbusd_epi32() { |
1328 | let src = _mm_set1_epi32(1); |
1329 | let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1330 | let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1331 | let r = _mm_maskz_dpbusd_epi32(0b00000000, src, a, b); |
1332 | assert_eq_m128i(r, _mm_setzero_si128()); |
1333 | let r = _mm_maskz_dpbusd_epi32(0b00001111, src, a, b); |
1334 | let e = _mm_set1_epi32(5); |
1335 | assert_eq_m128i(r, e); |
1336 | } |
1337 | |
1338 | #[simd_test(enable = "avx512vnni" )] |
1339 | unsafe fn test_mm512_dpbusds_epi32() { |
1340 | let src = _mm512_set1_epi32(1); |
1341 | let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1342 | let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1343 | let r = _mm512_dpbusds_epi32(src, a, b); |
1344 | let e = _mm512_set1_epi32(5); |
1345 | assert_eq_m512i(r, e); |
1346 | } |
1347 | |
1348 | #[simd_test(enable = "avx512vnni" )] |
1349 | unsafe fn test_mm512_mask_dpbusds_epi32() { |
1350 | let src = _mm512_set1_epi32(1); |
1351 | let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1352 | let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1353 | let r = _mm512_mask_dpbusds_epi32(src, 0b00000000_00000000, a, b); |
1354 | assert_eq_m512i(r, src); |
1355 | let r = _mm512_mask_dpbusds_epi32(src, 0b11111111_11111111, a, b); |
1356 | let e = _mm512_set1_epi32(5); |
1357 | assert_eq_m512i(r, e); |
1358 | } |
1359 | |
1360 | #[simd_test(enable = "avx512vnni" )] |
1361 | unsafe fn test_mm512_maskz_dpbusds_epi32() { |
1362 | let src = _mm512_set1_epi32(1); |
1363 | let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1364 | let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1365 | let r = _mm512_maskz_dpbusds_epi32(0b00000000_00000000, src, a, b); |
1366 | assert_eq_m512i(r, _mm512_setzero_si512()); |
1367 | let r = _mm512_maskz_dpbusds_epi32(0b11111111_11111111, src, a, b); |
1368 | let e = _mm512_set1_epi32(5); |
1369 | assert_eq_m512i(r, e); |
1370 | } |
1371 | |
1372 | #[simd_test(enable = "avxvnni" )] |
1373 | unsafe fn test_mm256_dpbusds_avx_epi32() { |
1374 | let src = _mm256_set1_epi32(1); |
1375 | let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1376 | let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1377 | let r = _mm256_dpbusds_avx_epi32(src, a, b); |
1378 | let e = _mm256_set1_epi32(5); |
1379 | assert_eq_m256i(r, e); |
1380 | } |
1381 | |
1382 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1383 | unsafe fn test_mm256_dpbusds_epi32() { |
1384 | let src = _mm256_set1_epi32(1); |
1385 | let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1386 | let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1387 | let r = _mm256_dpbusds_epi32(src, a, b); |
1388 | let e = _mm256_set1_epi32(5); |
1389 | assert_eq_m256i(r, e); |
1390 | } |
1391 | |
1392 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1393 | unsafe fn test_mm256_mask_dpbusds_epi32() { |
1394 | let src = _mm256_set1_epi32(1); |
1395 | let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1396 | let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1397 | let r = _mm256_mask_dpbusds_epi32(src, 0b00000000, a, b); |
1398 | assert_eq_m256i(r, src); |
1399 | let r = _mm256_mask_dpbusds_epi32(src, 0b11111111, a, b); |
1400 | let e = _mm256_set1_epi32(5); |
1401 | assert_eq_m256i(r, e); |
1402 | } |
1403 | |
1404 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1405 | unsafe fn test_mm256_maskz_dpbusds_epi32() { |
1406 | let src = _mm256_set1_epi32(1); |
1407 | let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1408 | let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1409 | let r = _mm256_maskz_dpbusds_epi32(0b00000000, src, a, b); |
1410 | assert_eq_m256i(r, _mm256_setzero_si256()); |
1411 | let r = _mm256_maskz_dpbusds_epi32(0b11111111, src, a, b); |
1412 | let e = _mm256_set1_epi32(5); |
1413 | assert_eq_m256i(r, e); |
1414 | } |
1415 | |
1416 | #[simd_test(enable = "avxvnni" )] |
1417 | unsafe fn test_mm_dpbusds_avx_epi32() { |
1418 | let src = _mm_set1_epi32(1); |
1419 | let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1420 | let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1421 | let r = _mm_dpbusds_avx_epi32(src, a, b); |
1422 | let e = _mm_set1_epi32(5); |
1423 | assert_eq_m128i(r, e); |
1424 | } |
1425 | |
1426 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1427 | unsafe fn test_mm_dpbusds_epi32() { |
1428 | let src = _mm_set1_epi32(1); |
1429 | let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1430 | let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1431 | let r = _mm_dpbusds_epi32(src, a, b); |
1432 | let e = _mm_set1_epi32(5); |
1433 | assert_eq_m128i(r, e); |
1434 | } |
1435 | |
1436 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1437 | unsafe fn test_mm_mask_dpbusds_epi32() { |
1438 | let src = _mm_set1_epi32(1); |
1439 | let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1440 | let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1441 | let r = _mm_mask_dpbusds_epi32(src, 0b00000000, a, b); |
1442 | assert_eq_m128i(r, src); |
1443 | let r = _mm_mask_dpbusds_epi32(src, 0b00001111, a, b); |
1444 | let e = _mm_set1_epi32(5); |
1445 | assert_eq_m128i(r, e); |
1446 | } |
1447 | |
1448 | #[simd_test(enable = "avx512vnni,avx512vl" )] |
1449 | unsafe fn test_mm_maskz_dpbusds_epi32() { |
1450 | let src = _mm_set1_epi32(1); |
1451 | let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1452 | let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1453 | let r = _mm_maskz_dpbusds_epi32(0b00000000, src, a, b); |
1454 | assert_eq_m128i(r, _mm_setzero_si128()); |
1455 | let r = _mm_maskz_dpbusds_epi32(0b00001111, src, a, b); |
1456 | let e = _mm_set1_epi32(5); |
1457 | assert_eq_m128i(r, e); |
1458 | } |
1459 | |
1460 | #[simd_test(enable = "avxvnniint8" )] |
1461 | unsafe fn test_mm_dpbssd_epi32() { |
1462 | let src = _mm_set1_epi32(1); |
1463 | let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1464 | let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1465 | let r = _mm_dpbssd_epi32(src, a, b); |
1466 | let e = _mm_set1_epi32(5); |
1467 | assert_eq_m128i(r, e); |
1468 | } |
1469 | |
1470 | #[simd_test(enable = "avxvnniint8" )] |
1471 | unsafe fn test_mm256_dpbssd_epi32() { |
1472 | let src = _mm256_set1_epi32(1); |
1473 | let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1474 | let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1475 | let r = _mm256_dpbssd_epi32(src, a, b); |
1476 | let e = _mm256_set1_epi32(5); |
1477 | assert_eq_m256i(r, e); |
1478 | } |
1479 | |
1480 | #[simd_test(enable = "avxvnniint8" )] |
1481 | unsafe fn test_mm_dpbssds_epi32() { |
1482 | let src = _mm_set1_epi32(1); |
1483 | let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1484 | let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1485 | let r = _mm_dpbssds_epi32(src, a, b); |
1486 | let e = _mm_set1_epi32(5); |
1487 | assert_eq_m128i(r, e); |
1488 | } |
1489 | |
1490 | #[simd_test(enable = "avxvnniint8" )] |
1491 | unsafe fn test_mm256_dpbssds_epi32() { |
1492 | let src = _mm256_set1_epi32(1); |
1493 | let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1494 | let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1495 | let r = _mm256_dpbssds_epi32(src, a, b); |
1496 | let e = _mm256_set1_epi32(5); |
1497 | assert_eq_m256i(r, e); |
1498 | } |
1499 | |
1500 | #[simd_test(enable = "avxvnniint8" )] |
1501 | unsafe fn test_mm_dpbsud_epi32() { |
1502 | let src = _mm_set1_epi32(1); |
1503 | let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1504 | let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1505 | let r = _mm_dpbsud_epi32(src, a, b); |
1506 | let e = _mm_set1_epi32(5); |
1507 | assert_eq_m128i(r, e); |
1508 | } |
1509 | |
1510 | #[simd_test(enable = "avxvnniint8" )] |
1511 | unsafe fn test_mm256_dpbsud_epi32() { |
1512 | let src = _mm256_set1_epi32(1); |
1513 | let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1514 | let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1515 | let r = _mm256_dpbsud_epi32(src, a, b); |
1516 | let e = _mm256_set1_epi32(5); |
1517 | assert_eq_m256i(r, e); |
1518 | } |
1519 | |
1520 | #[simd_test(enable = "avxvnniint8" )] |
1521 | unsafe fn test_mm_dpbsuds_epi32() { |
1522 | let src = _mm_set1_epi32(1); |
1523 | let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1524 | let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1525 | let r = _mm_dpbsuds_epi32(src, a, b); |
1526 | let e = _mm_set1_epi32(5); |
1527 | assert_eq_m128i(r, e); |
1528 | } |
1529 | |
1530 | #[simd_test(enable = "avxvnniint8" )] |
1531 | unsafe fn test_mm256_dpbsuds_epi32() { |
1532 | let src = _mm256_set1_epi32(1); |
1533 | let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1534 | let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1535 | let r = _mm256_dpbsuds_epi32(src, a, b); |
1536 | let e = _mm256_set1_epi32(5); |
1537 | assert_eq_m256i(r, e); |
1538 | } |
1539 | |
1540 | #[simd_test(enable = "avxvnniint8" )] |
1541 | unsafe fn test_mm_dpbuud_epi32() { |
1542 | let src = _mm_set1_epi32(1); |
1543 | let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1544 | let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1545 | let r = _mm_dpbuud_epi32(src, a, b); |
1546 | let e = _mm_set1_epi32(5); |
1547 | assert_eq_m128i(r, e); |
1548 | } |
1549 | |
1550 | #[simd_test(enable = "avxvnniint8" )] |
1551 | unsafe fn test_mm256_dpbuud_epi32() { |
1552 | let src = _mm256_set1_epi32(1); |
1553 | let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1554 | let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1555 | let r = _mm256_dpbuud_epi32(src, a, b); |
1556 | let e = _mm256_set1_epi32(5); |
1557 | assert_eq_m256i(r, e); |
1558 | } |
1559 | |
1560 | #[simd_test(enable = "avxvnniint8" )] |
1561 | unsafe fn test_mm_dpbuuds_epi32() { |
1562 | let src = _mm_set1_epi32(1); |
1563 | let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1564 | let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1565 | let r = _mm_dpbuuds_epi32(src, a, b); |
1566 | let e = _mm_set1_epi32(5); |
1567 | assert_eq_m128i(r, e); |
1568 | } |
1569 | |
1570 | #[simd_test(enable = "avxvnniint8" )] |
1571 | unsafe fn test_mm256_dpbuuds_epi32() { |
1572 | let src = _mm256_set1_epi32(1); |
1573 | let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1574 | let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0); |
1575 | let r = _mm256_dpbuuds_epi32(src, a, b); |
1576 | let e = _mm256_set1_epi32(5); |
1577 | assert_eq_m256i(r, e); |
1578 | } |
1579 | |
1580 | #[simd_test(enable = "avxvnniint16" )] |
1581 | unsafe fn test_mm_dpwsud_epi32() { |
1582 | let src = _mm_set1_epi32(1); |
1583 | let a = _mm_set1_epi32(1 << 16 | 1 << 0); |
1584 | let b = _mm_set1_epi32(1 << 16 | 1 << 0); |
1585 | let r = _mm_dpwsud_epi32(src, a, b); |
1586 | let e = _mm_set1_epi32(3); |
1587 | assert_eq_m128i(r, e); |
1588 | } |
1589 | |
1590 | #[simd_test(enable = "avxvnniint16" )] |
1591 | unsafe fn test_mm256_dpwsud_epi32() { |
1592 | let src = _mm256_set1_epi32(1); |
1593 | let a = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1594 | let b = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1595 | let r = _mm256_dpwsud_epi32(src, a, b); |
1596 | let e = _mm256_set1_epi32(3); |
1597 | assert_eq_m256i(r, e); |
1598 | } |
1599 | |
1600 | #[simd_test(enable = "avxvnniint16" )] |
1601 | unsafe fn test_mm_dpwsuds_epi32() { |
1602 | let src = _mm_set1_epi32(1); |
1603 | let a = _mm_set1_epi32(1 << 16 | 1 << 0); |
1604 | let b = _mm_set1_epi32(1 << 16 | 1 << 0); |
1605 | let r = _mm_dpwsuds_epi32(src, a, b); |
1606 | let e = _mm_set1_epi32(3); |
1607 | assert_eq_m128i(r, e); |
1608 | } |
1609 | |
1610 | #[simd_test(enable = "avxvnniint16" )] |
1611 | unsafe fn test_mm256_dpwsuds_epi32() { |
1612 | let src = _mm256_set1_epi32(1); |
1613 | let a = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1614 | let b = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1615 | let r = _mm256_dpwsuds_epi32(src, a, b); |
1616 | let e = _mm256_set1_epi32(3); |
1617 | assert_eq_m256i(r, e); |
1618 | } |
1619 | |
1620 | #[simd_test(enable = "avxvnniint16" )] |
1621 | unsafe fn test_mm_dpwusd_epi32() { |
1622 | let src = _mm_set1_epi32(1); |
1623 | let a = _mm_set1_epi32(1 << 16 | 1 << 0); |
1624 | let b = _mm_set1_epi32(1 << 16 | 1 << 0); |
1625 | let r = _mm_dpwusd_epi32(src, a, b); |
1626 | let e = _mm_set1_epi32(3); |
1627 | assert_eq_m128i(r, e); |
1628 | } |
1629 | |
1630 | #[simd_test(enable = "avxvnniint16" )] |
1631 | unsafe fn test_mm256_dpwusd_epi32() { |
1632 | let src = _mm256_set1_epi32(1); |
1633 | let a = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1634 | let b = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1635 | let r = _mm256_dpwusd_epi32(src, a, b); |
1636 | let e = _mm256_set1_epi32(3); |
1637 | assert_eq_m256i(r, e); |
1638 | } |
1639 | |
1640 | #[simd_test(enable = "avxvnniint16" )] |
1641 | unsafe fn test_mm_dpwusds_epi32() { |
1642 | let src = _mm_set1_epi32(1); |
1643 | let a = _mm_set1_epi32(1 << 16 | 1 << 0); |
1644 | let b = _mm_set1_epi32(1 << 16 | 1 << 0); |
1645 | let r = _mm_dpwusds_epi32(src, a, b); |
1646 | let e = _mm_set1_epi32(3); |
1647 | assert_eq_m128i(r, e); |
1648 | } |
1649 | |
1650 | #[simd_test(enable = "avxvnniint16" )] |
1651 | unsafe fn test_mm256_dpwusds_epi32() { |
1652 | let src = _mm256_set1_epi32(1); |
1653 | let a = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1654 | let b = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1655 | let r = _mm256_dpwusds_epi32(src, a, b); |
1656 | let e = _mm256_set1_epi32(3); |
1657 | assert_eq_m256i(r, e); |
1658 | } |
1659 | |
1660 | #[simd_test(enable = "avxvnniint16" )] |
1661 | unsafe fn test_mm_dpwuud_epi32() { |
1662 | let src = _mm_set1_epi32(1); |
1663 | let a = _mm_set1_epi32(1 << 16 | 1 << 0); |
1664 | let b = _mm_set1_epi32(1 << 16 | 1 << 0); |
1665 | let r = _mm_dpwuud_epi32(src, a, b); |
1666 | let e = _mm_set1_epi32(3); |
1667 | assert_eq_m128i(r, e); |
1668 | } |
1669 | |
1670 | #[simd_test(enable = "avxvnniint16" )] |
1671 | unsafe fn test_mm256_dpwuud_epi32() { |
1672 | let src = _mm256_set1_epi32(1); |
1673 | let a = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1674 | let b = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1675 | let r = _mm256_dpwuud_epi32(src, a, b); |
1676 | let e = _mm256_set1_epi32(3); |
1677 | assert_eq_m256i(r, e); |
1678 | } |
1679 | |
1680 | #[simd_test(enable = "avxvnniint16" )] |
1681 | unsafe fn test_mm_dpwuuds_epi32() { |
1682 | let src = _mm_set1_epi32(1); |
1683 | let a = _mm_set1_epi32(1 << 16 | 1 << 0); |
1684 | let b = _mm_set1_epi32(1 << 16 | 1 << 0); |
1685 | let r = _mm_dpwuuds_epi32(src, a, b); |
1686 | let e = _mm_set1_epi32(3); |
1687 | assert_eq_m128i(r, e); |
1688 | } |
1689 | |
1690 | #[simd_test(enable = "avxvnniint16" )] |
1691 | unsafe fn test_mm256_dpwuuds_epi32() { |
1692 | let src = _mm256_set1_epi32(1); |
1693 | let a = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1694 | let b = _mm256_set1_epi32(1 << 16 | 1 << 0); |
1695 | let r = _mm256_dpwuuds_epi32(src, a, b); |
1696 | let e = _mm256_set1_epi32(3); |
1697 | assert_eq_m256i(r, e); |
1698 | } |
1699 | } |
1700 | |