avx512ifma.rs source code [crates/core_arch/src/x86/avx512ifma.rs]

1	use crate::core_arch::x86::*;
2
3	#[cfg(test)]
4	use stdarch_test::assert_instr;
5
6	/// Multiply packed unsigned 52-bit integers in each 64-bit element of
7	/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
8	/// unsigned integer from the intermediate result with the
9	/// corresponding unsigned 64-bit integer in `a`, and store the
10	/// results in `dst`.
11	///
12	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512IFMA52&expand=3488)
13	#[inline]
14	#[target_feature(enable = "avx512ifma")]
15	#[cfg_attr(test, assert_instr(vpmadd52huq))]
16	pub unsafe fn _mm512_madd52hi_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
17	vpmadd52huq_512(z:a, x:b, y:c)
18	}
19
20	/// Multiply packed unsigned 52-bit integers in each 64-bit element of
21	/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
22	/// unsigned integer from the intermediate result with the
23	/// corresponding unsigned 64-bit integer in `a`, and store the
24	/// results in `dst`.
25	///
26	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=3497&avx512techs=AVX512IFMA52)
27	#[inline]
28	#[target_feature(enable = "avx512ifma")]
29	#[cfg_attr(test, assert_instr(vpmadd52luq))]
30	pub unsafe fn _mm512_madd52lo_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
31	vpmadd52luq_512(z:a, x:b, y:c)
32	}
33
34	/// Multiply packed unsigned 52-bit integers in each 64-bit element of
35	/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
36	/// unsigned integer from the intermediate result with the
37	/// corresponding unsigned 64-bit integer in `a`, and store the
38	/// results in `dst`.
39	///
40	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=vpmadd52&avx512techs=AVX512IFMA52,AVX512VL&expand=3485)
41	#[inline]
42	#[target_feature(enable = "avx512ifma,avx512vl")]
43	#[cfg_attr(test, assert_instr(vpmadd52huq))]
44	pub unsafe fn _mm256_madd52hi_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
45	vpmadd52huq_256(z:a, x:b, y:c)
46	}
47
48	/// Multiply packed unsigned 52-bit integers in each 64-bit element of
49	/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
50	/// unsigned integer from the intermediate result with the
51	/// corresponding unsigned 64-bit integer in `a`, and store the
52	/// results in `dst`.
53	///
54	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=vpmadd52&avx512techs=AVX512IFMA52,AVX512VL&expand=3494)
55	#[inline]
56	#[target_feature(enable = "avx512ifma,avx512vl")]
57	#[cfg_attr(test, assert_instr(vpmadd52luq))]
58	pub unsafe fn _mm256_madd52lo_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
59	vpmadd52luq_256(z:a, x:b, y:c)
60	}
61
62	/// Multiply packed unsigned 52-bit integers in each 64-bit element of
63	/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
64	/// unsigned integer from the intermediate result with the
65	/// corresponding unsigned 64-bit integer in `a`, and store the
66	/// results in `dst`.
67	///
68	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=3488,3482&text=vpmadd52&avx512techs=AVX512IFMA52,AVX512VL)
69	#[inline]
70	#[target_feature(enable = "avx512ifma,avx512vl")]
71	#[cfg_attr(test, assert_instr(vpmadd52huq))]
72	pub unsafe fn _mm_madd52hi_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
73	vpmadd52huq_128(z:a, x:b, y:c)
74	}
75
76	/// Multiply packed unsigned 52-bit integers in each 64-bit element of
77	/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
78	/// unsigned integer from the intermediate result with the
79	/// corresponding unsigned 64-bit integer in `a`, and store the
80	/// results in `dst`.
81	///
82	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=3488,3491&text=vpmadd52&avx512techs=AVX512IFMA52,AVX512VL)
83	#[inline]
84	#[target_feature(enable = "avx512ifma,avx512vl")]
85	#[cfg_attr(test, assert_instr(vpmadd52luq))]
86	pub unsafe fn _mm_madd52lo_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
87	vpmadd52luq_128(z:a, x:b, y:c)
88	}
89
90	#[allow(improper_ctypes)]
91	extern "C" {
92	#[link_name = "llvm.x86.avx512.vpmadd52l.uq.128"]
93	fn vpmadd52luq_128(z: __m128i, x: __m128i, y: __m128i) -> __m128i;
94	#[link_name = "llvm.x86.avx512.vpmadd52h.uq.128"]
95	fn vpmadd52huq_128(z: __m128i, x: __m128i, y: __m128i) -> __m128i;
96	#[link_name = "llvm.x86.avx512.vpmadd52l.uq.256"]
97	fn vpmadd52luq_256(z: __m256i, x: __m256i, y: __m256i) -> __m256i;
98	#[link_name = "llvm.x86.avx512.vpmadd52h.uq.256"]
99	fn vpmadd52huq_256(z: __m256i, x: __m256i, y: __m256i) -> __m256i;
100	#[link_name = "llvm.x86.avx512.vpmadd52l.uq.512"]
101	fn vpmadd52luq_512(z: __m512i, x: __m512i, y: __m512i) -> __m512i;
102	#[link_name = "llvm.x86.avx512.vpmadd52h.uq.512"]
103	fn vpmadd52huq_512(z: __m512i, x: __m512i, y: __m512i) -> __m512i;
104	}
105
106	#[cfg(test)]
107	mod tests {
108
109	use stdarch_test::simd_test;
110
111	use crate::core_arch::x86::*;
112
113	#[simd_test(enable = "avx512ifma")]
114	unsafe fn test_mm512_madd52hi_epu64() {
115	let mut a = _mm512_set1_epi64(`10` << `40`);
116	let b = _mm512_set1_epi64((`11` << `40`) + `4`);
117	let c = _mm512_set1_epi64((`12` << `40`) + `3`);
118
119	a = _mm512_madd52hi_epu64(a, b, c);
120
121	// (10 << 40) + ((((11 << 40) + 4) ((12 << 40) + 3)) >> 52)*
122	let expected = _mm512_set1_epi64(`11030549757952`);
123
124	assert_eq_m512i(a, expected);
125	}
126
127	#[simd_test(enable = "avx512ifma")]
128	unsafe fn test_mm512_madd52lo_epu64() {
129	let mut a = _mm512_set1_epi64(`10` << `40`);
130	let b = _mm512_set1_epi64((`11` << `40`) + `4`);
131	let c = _mm512_set1_epi64((`12` << `40`) + `3`);
132
133	a = _mm512_madd52lo_epu64(a, b, c);
134
135	// (10 << 40) + ((((11 << 40) + 4) ((12 << 40) + 3)) % (1 << 52))*
136	let expected = _mm512_set1_epi64(`100055558127628`);
137
138	assert_eq_m512i(a, expected);
139	}
140
141	#[simd_test(enable = "avx512ifma,avx512vl")]
142	unsafe fn test_mm256_madd52hi_epu64() {
143	let mut a = _mm256_set1_epi64x(`10` << `40`);
144	let b = _mm256_set1_epi64x((`11` << `40`) + `4`);
145	let c = _mm256_set1_epi64x((`12` << `40`) + `3`);
146
147	a = _mm256_madd52hi_epu64(a, b, c);
148
149	// (10 << 40) + ((((11 << 40) + 4) ((12 << 40) + 3)) >> 52)*
150	let expected = _mm256_set1_epi64x(`11030549757952`);
151
152	assert_eq_m256i(a, expected);
153	}
154
155	#[simd_test(enable = "avx512ifma,avx512vl")]
156	unsafe fn test_mm256_madd52lo_epu64() {
157	let mut a = _mm256_set1_epi64x(`10` << `40`);
158	let b = _mm256_set1_epi64x((`11` << `40`) + `4`);
159	let c = _mm256_set1_epi64x((`12` << `40`) + `3`);
160
161	a = _mm256_madd52lo_epu64(a, b, c);
162
163	// (10 << 40) + ((((11 << 40) + 4) ((12 << 40) + 3)) % (1 << 52))*
164	let expected = _mm256_set1_epi64x(`100055558127628`);
165
166	assert_eq_m256i(a, expected);
167	}
168
169	#[simd_test(enable = "avx512ifma,avx512vl")]
170	unsafe fn test_mm_madd52hi_epu64() {
171	let mut a = _mm_set1_epi64x(`10` << `40`);
172	let b = _mm_set1_epi64x((`11` << `40`) + `4`);
173	let c = _mm_set1_epi64x((`12` << `40`) + `3`);
174
175	a = _mm_madd52hi_epu64(a, b, c);
176
177	// (10 << 40) + ((((11 << 40) + 4) ((12 << 40) + 3)) >> 52)*
178	let expected = _mm_set1_epi64x(`11030549757952`);
179
180	assert_eq_m128i(a, expected);
181	}
182
183	#[simd_test(enable = "avx512ifma,avx512vl")]
184	unsafe fn test_mm_madd52lo_epu64() {
185	let mut a = _mm_set1_epi64x(`10` << `40`);
186	let b = _mm_set1_epi64x((`11` << `40`) + `4`);
187	let c = _mm_set1_epi64x((`12` << `40`) + `3`);
188
189	a = _mm_madd52hi_epu64(a, b, c);
190
191	// (10 << 40) + ((((11 << 40) + 4) ((12 << 40) + 3)) >> 52)*
192	let expected = _mm_set1_epi64x(`11030549757952`);
193
194	assert_eq_m128i(a, expected);
195	}
196	}
197