avx512ifma.rs source code [crates/core_arch/src/x86/avx512ifma.rs]

1	use crate::core_arch::x86::*;
2
3	#[cfg(test)]
4	use stdarch_test::assert_instr;
5
6	/// Multiply packed unsigned 52-bit integers in each 64-bit element of
7	/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
8	/// unsigned integer from the intermediate result with the
9	/// corresponding unsigned 64-bit integer in `a`, and store the
10	/// results in `dst`.
11	///
12	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512IFMA52&expand=3488)
13	#[inline]
14	#[target_feature(enable = "avx512ifma")]
15	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16	#[cfg_attr(test, assert_instr(vpmadd52huq))]
17	pub unsafe fn _mm512_madd52hi_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
18	vpmadd52huq_512(z:a, x:b, y:c)
19	}
20
21	/// Multiply packed unsigned 52-bit integers in each 64-bit element of
22	/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
23	/// unsigned integer from the intermediate result with the
24	/// corresponding unsigned 64-bit integer in `a`, and store the
25	/// results in `dst`.
26	///
27	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=3497&avx512techs=AVX512IFMA52)
28	#[inline]
29	#[target_feature(enable = "avx512ifma")]
30	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31	#[cfg_attr(test, assert_instr(vpmadd52luq))]
32	pub unsafe fn _mm512_madd52lo_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
33	vpmadd52luq_512(z:a, x:b, y:c)
34	}
35
36	/// Multiply packed unsigned 52-bit integers in each 64-bit element of
37	/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
38	/// unsigned integer from the intermediate result with the
39	/// corresponding unsigned 64-bit integer in `a`, and store the
40	/// results in `dst`.
41	///
42	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=vpmadd52&avx512techs=AVX512IFMA52,AVX512VL&expand=3485)
43	#[inline]
44	#[target_feature(enable = "avx512ifma,avx512vl")]
45	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
46	#[cfg_attr(test, assert_instr(vpmadd52huq))]
47	pub unsafe fn _mm256_madd52hi_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
48	vpmadd52huq_256(z:a, x:b, y:c)
49	}
50
51	/// Multiply packed unsigned 52-bit integers in each 64-bit element of
52	/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
53	/// unsigned integer from the intermediate result with the
54	/// corresponding unsigned 64-bit integer in `a`, and store the
55	/// results in `dst`.
56	///
57	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=vpmadd52&avx512techs=AVX512IFMA52,AVX512VL&expand=3494)
58	#[inline]
59	#[target_feature(enable = "avx512ifma,avx512vl")]
60	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
61	#[cfg_attr(test, assert_instr(vpmadd52luq))]
62	pub unsafe fn _mm256_madd52lo_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
63	vpmadd52luq_256(z:a, x:b, y:c)
64	}
65
66	/// Multiply packed unsigned 52-bit integers in each 64-bit element of
67	/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
68	/// unsigned integer from the intermediate result with the
69	/// corresponding unsigned 64-bit integer in `a`, and store the
70	/// results in `dst`.
71	///
72	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=3488,3482&text=vpmadd52&avx512techs=AVX512IFMA52,AVX512VL)
73	#[inline]
74	#[target_feature(enable = "avx512ifma,avx512vl")]
75	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
76	#[cfg_attr(test, assert_instr(vpmadd52huq))]
77	pub unsafe fn _mm_madd52hi_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
78	vpmadd52huq_128(z:a, x:b, y:c)
79	}
80
81	/// Multiply packed unsigned 52-bit integers in each 64-bit element of
82	/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
83	/// unsigned integer from the intermediate result with the
84	/// corresponding unsigned 64-bit integer in `a`, and store the
85	/// results in `dst`.
86	///
87	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=3488,3491&text=vpmadd52&avx512techs=AVX512IFMA52,AVX512VL)
88	#[inline]
89	#[target_feature(enable = "avx512ifma,avx512vl")]
90	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
91	#[cfg_attr(test, assert_instr(vpmadd52luq))]
92	pub unsafe fn _mm_madd52lo_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
93	vpmadd52luq_128(z:a, x:b, y:c)
94	}
95
96	#[allow(improper_ctypes)]
97	extern "C" {
98	#[link_name = "llvm.x86.avx512.vpmadd52l.uq.128"]
99	fn vpmadd52luq_128(z: __m128i, x: __m128i, y: __m128i) -> __m128i;
100	#[link_name = "llvm.x86.avx512.vpmadd52h.uq.128"]
101	fn vpmadd52huq_128(z: __m128i, x: __m128i, y: __m128i) -> __m128i;
102	#[link_name = "llvm.x86.avx512.vpmadd52l.uq.256"]
103	fn vpmadd52luq_256(z: __m256i, x: __m256i, y: __m256i) -> __m256i;
104	#[link_name = "llvm.x86.avx512.vpmadd52h.uq.256"]
105	fn vpmadd52huq_256(z: __m256i, x: __m256i, y: __m256i) -> __m256i;
106	#[link_name = "llvm.x86.avx512.vpmadd52l.uq.512"]
107	fn vpmadd52luq_512(z: __m512i, x: __m512i, y: __m512i) -> __m512i;
108	#[link_name = "llvm.x86.avx512.vpmadd52h.uq.512"]
109	fn vpmadd52huq_512(z: __m512i, x: __m512i, y: __m512i) -> __m512i;
110	}
111
112	#[cfg(test)]
113	mod tests {
114
115	use stdarch_test::simd_test;
116
117	use crate::core_arch::x86::*;
118
119	#[simd_test(enable = "avx512ifma")]
120	unsafe fn test_mm512_madd52hi_epu64() {
121	let mut a = _mm512_set1_epi64(`10` << `40`);
122	let b = _mm512_set1_epi64((`11` << `40`) + `4`);
123	let c = _mm512_set1_epi64((`12` << `40`) + `3`);
124
125	a = _mm512_madd52hi_epu64(a, b, c);
126
127	// (10 << 40) + ((((11 << 40) + 4) ((12 << 40) + 3)) >> 52)*
128	let expected = _mm512_set1_epi64(`11030549757952`);
129
130	assert_eq_m512i(a, expected);
131	}
132
133	#[simd_test(enable = "avx512ifma")]
134	unsafe fn test_mm512_madd52lo_epu64() {
135	let mut a = _mm512_set1_epi64(`10` << `40`);
136	let b = _mm512_set1_epi64((`11` << `40`) + `4`);
137	let c = _mm512_set1_epi64((`12` << `40`) + `3`);
138
139	a = _mm512_madd52lo_epu64(a, b, c);
140
141	// (10 << 40) + ((((11 << 40) + 4) ((12 << 40) + 3)) % (1 << 52))*
142	let expected = _mm512_set1_epi64(`100055558127628`);
143
144	assert_eq_m512i(a, expected);
145	}
146
147	#[simd_test(enable = "avx512ifma,avx512vl")]
148	unsafe fn test_mm256_madd52hi_epu64() {
149	let mut a = _mm256_set1_epi64x(`10` << `40`);
150	let b = _mm256_set1_epi64x((`11` << `40`) + `4`);
151	let c = _mm256_set1_epi64x((`12` << `40`) + `3`);
152
153	a = _mm256_madd52hi_epu64(a, b, c);
154
155	// (10 << 40) + ((((11 << 40) + 4) ((12 << 40) + 3)) >> 52)*
156	let expected = _mm256_set1_epi64x(`11030549757952`);
157
158	assert_eq_m256i(a, expected);
159	}
160
161	#[simd_test(enable = "avx512ifma,avx512vl")]
162	unsafe fn test_mm256_madd52lo_epu64() {
163	let mut a = _mm256_set1_epi64x(`10` << `40`);
164	let b = _mm256_set1_epi64x((`11` << `40`) + `4`);
165	let c = _mm256_set1_epi64x((`12` << `40`) + `3`);
166
167	a = _mm256_madd52lo_epu64(a, b, c);
168
169	// (10 << 40) + ((((11 << 40) + 4) ((12 << 40) + 3)) % (1 << 52))*
170	let expected = _mm256_set1_epi64x(`100055558127628`);
171
172	assert_eq_m256i(a, expected);
173	}
174
175	#[simd_test(enable = "avx512ifma,avx512vl")]
176	unsafe fn test_mm_madd52hi_epu64() {
177	let mut a = _mm_set1_epi64x(`10` << `40`);
178	let b = _mm_set1_epi64x((`11` << `40`) + `4`);
179	let c = _mm_set1_epi64x((`12` << `40`) + `3`);
180
181	a = _mm_madd52hi_epu64(a, b, c);
182
183	// (10 << 40) + ((((11 << 40) + 4) ((12 << 40) + 3)) >> 52)*
184	let expected = _mm_set1_epi64x(`11030549757952`);
185
186	assert_eq_m128i(a, expected);
187	}
188
189	#[simd_test(enable = "avx512ifma,avx512vl")]
190	unsafe fn test_mm_madd52lo_epu64() {
191	let mut a = _mm_set1_epi64x(`10` << `40`);
192	let b = _mm_set1_epi64x((`11` << `40`) + `4`);
193	let c = _mm_set1_epi64x((`12` << `40`) + `3`);
194
195	a = _mm_madd52hi_epu64(a, b, c);
196
197	// (10 << 40) + ((((11 << 40) + 4) ((12 << 40) + 3)) >> 52)*
198	let expected = _mm_set1_epi64x(`11030549757952`);
199
200	assert_eq_m128i(a, expected);
201	}
202	}
203