1//! Vectorized AES Instructions (VAES)
2//!
3//! The intrinsics here correspond to those in the `immintrin.h` C header.
4//!
5//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
6//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
7//!
8//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
9
10use crate::core_arch::x86::__m256i;
11use crate::core_arch::x86::__m512i;
12
13#[cfg(test)]
14use stdarch_test::assert_instr;
15
16#[allow(improper_ctypes)]
17extern "C" {
18 #[link_name = "llvm.x86.aesni.aesenc.256"]
19 fn aesenc_256(a: __m256i, round_key: __m256i) -> __m256i;
20 #[link_name = "llvm.x86.aesni.aesenclast.256"]
21 fn aesenclast_256(a: __m256i, round_key: __m256i) -> __m256i;
22 #[link_name = "llvm.x86.aesni.aesdec.256"]
23 fn aesdec_256(a: __m256i, round_key: __m256i) -> __m256i;
24 #[link_name = "llvm.x86.aesni.aesdeclast.256"]
25 fn aesdeclast_256(a: __m256i, round_key: __m256i) -> __m256i;
26 #[link_name = "llvm.x86.aesni.aesenc.512"]
27 fn aesenc_512(a: __m512i, round_key: __m512i) -> __m512i;
28 #[link_name = "llvm.x86.aesni.aesenclast.512"]
29 fn aesenclast_512(a: __m512i, round_key: __m512i) -> __m512i;
30 #[link_name = "llvm.x86.aesni.aesdec.512"]
31 fn aesdec_512(a: __m512i, round_key: __m512i) -> __m512i;
32 #[link_name = "llvm.x86.aesni.aesdeclast.512"]
33 fn aesdeclast_512(a: __m512i, round_key: __m512i) -> __m512i;
34}
35
36/// Performs one round of an AES encryption flow on each 128-bit word (state) in `a` using
37/// the corresponding 128-bit word (key) in `round_key`.
38///
39/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_aesenc_epi128)
40#[inline]
41#[target_feature(enable = "vaes")]
42#[cfg_attr(test, assert_instr(vaesenc))]
43pub unsafe fn _mm256_aesenc_epi128(a: __m256i, round_key: __m256i) -> __m256i {
44 aesenc_256(a, round_key)
45}
46
47/// Performs the last round of an AES encryption flow on each 128-bit word (state) in `a` using
48/// the corresponding 128-bit word (key) in `round_key`.
49///
50/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_aesenclast_epi128)
51#[inline]
52#[target_feature(enable = "vaes")]
53#[cfg_attr(test, assert_instr(vaesenclast))]
54pub unsafe fn _mm256_aesenclast_epi128(a: __m256i, round_key: __m256i) -> __m256i {
55 aesenclast_256(a, round_key)
56}
57
58/// Performs one round of an AES decryption flow on each 128-bit word (state) in `a` using
59/// the corresponding 128-bit word (key) in `round_key`.
60///
61/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_aesdec_epi128)
62#[inline]
63#[target_feature(enable = "vaes")]
64#[cfg_attr(test, assert_instr(vaesdec))]
65pub unsafe fn _mm256_aesdec_epi128(a: __m256i, round_key: __m256i) -> __m256i {
66 aesdec_256(a, round_key)
67}
68
69/// Performs the last round of an AES decryption flow on each 128-bit word (state) in `a` using
70/// the corresponding 128-bit word (key) in `round_key`.
71///
72/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_aesdeclast_epi128)
73#[inline]
74#[target_feature(enable = "vaes")]
75#[cfg_attr(test, assert_instr(vaesdeclast))]
76pub unsafe fn _mm256_aesdeclast_epi128(a: __m256i, round_key: __m256i) -> __m256i {
77 aesdeclast_256(a, round_key)
78}
79
80/// Performs one round of an AES encryption flow on each 128-bit word (state) in `a` using
81/// the corresponding 128-bit word (key) in `round_key`.
82///
83/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_aesenc_epi128)
84#[inline]
85#[target_feature(enable = "vaes,avx512f")]
86#[cfg_attr(test, assert_instr(vaesenc))]
87pub unsafe fn _mm512_aesenc_epi128(a: __m512i, round_key: __m512i) -> __m512i {
88 aesenc_512(a, round_key)
89}
90
91/// Performs the last round of an AES encryption flow on each 128-bit word (state) in `a` using
92/// the corresponding 128-bit word (key) in `round_key`.
93///
94/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_aesenclast_epi128)
95#[inline]
96#[target_feature(enable = "vaes,avx512f")]
97#[cfg_attr(test, assert_instr(vaesenclast))]
98pub unsafe fn _mm512_aesenclast_epi128(a: __m512i, round_key: __m512i) -> __m512i {
99 aesenclast_512(a, round_key)
100}
101
102/// Performs one round of an AES decryption flow on each 128-bit word (state) in `a` using
103/// the corresponding 128-bit word (key) in `round_key`.
104///
105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_aesdec_epi128)
106#[inline]
107#[target_feature(enable = "vaes,avx512f")]
108#[cfg_attr(test, assert_instr(vaesdec))]
109pub unsafe fn _mm512_aesdec_epi128(a: __m512i, round_key: __m512i) -> __m512i {
110 aesdec_512(a, round_key)
111}
112
113/// Performs the last round of an AES decryption flow on each 128-bit word (state) in `a` using
114/// the corresponding 128-bit word (key) in `round_key`.
115///
116/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_aesdeclast_epi128)
117#[inline]
118#[target_feature(enable = "vaes,avx512f")]
119#[cfg_attr(test, assert_instr(vaesdeclast))]
120pub unsafe fn _mm512_aesdeclast_epi128(a: __m512i, round_key: __m512i) -> __m512i {
121 aesdeclast_512(a, round_key)
122}
123
124#[cfg(test)]
125mod tests {
126 // The constants in the tests below are just bit patterns. They should not
127 // be interpreted as integers; signedness does not make sense for them, but
128 // __mXXXi happens to be defined in terms of signed integers.
129 #![allow(overflowing_literals)]
130
131 use stdarch_test::simd_test;
132
133 use crate::core_arch::x86::*;
134
135 // the first parts of these tests are straight ports from the AES-NI tests
136 // the second parts directly compare the two, for inputs that are different across lanes
137 // and "more random" than the standard test vectors
138 // ideally we'd be using quickcheck here instead
139
140 #[target_feature(enable = "avx2")]
141 unsafe fn helper_for_256_vaes(
142 linear: unsafe fn(__m128i, __m128i) -> __m128i,
143 vectorized: unsafe fn(__m256i, __m256i) -> __m256i,
144 ) {
145 let a = _mm256_set_epi64x(
146 0xDCB4DB3657BF0B7D,
147 0x18DB0601068EDD9F,
148 0xB76B908233200DC5,
149 0xE478235FA8E22D5E,
150 );
151 let k = _mm256_set_epi64x(
152 0x672F6F105A94CEA7,
153 0x8298B8FFCA5F829C,
154 0xA3927047B3FB61D8,
155 0x978093862CDE7187,
156 );
157 let mut a_decomp = [_mm_setzero_si128(); 2];
158 a_decomp[0] = _mm256_extracti128_si256::<0>(a);
159 a_decomp[1] = _mm256_extracti128_si256::<1>(a);
160 let mut k_decomp = [_mm_setzero_si128(); 2];
161 k_decomp[0] = _mm256_extracti128_si256::<0>(k);
162 k_decomp[1] = _mm256_extracti128_si256::<1>(k);
163 let r = vectorized(a, k);
164 let mut e_decomp = [_mm_setzero_si128(); 2];
165 for i in 0..2 {
166 e_decomp[i] = linear(a_decomp[i], k_decomp[i]);
167 }
168 assert_eq_m128i(_mm256_extracti128_si256::<0>(r), e_decomp[0]);
169 assert_eq_m128i(_mm256_extracti128_si256::<1>(r), e_decomp[1]);
170 }
171
172 #[target_feature(enable = "sse2")]
173 unsafe fn setup_state_key<T>(broadcast: unsafe fn(__m128i) -> T) -> (T, T) {
174 // Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx.
175 let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff);
176 let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee);
177 (broadcast(a), broadcast(k))
178 }
179
180 #[target_feature(enable = "avx2")]
181 unsafe fn setup_state_key_256() -> (__m256i, __m256i) {
182 setup_state_key(_mm256_broadcastsi128_si256)
183 }
184
185 #[target_feature(enable = "avx512f")]
186 unsafe fn setup_state_key_512() -> (__m512i, __m512i) {
187 setup_state_key(_mm512_broadcast_i32x4)
188 }
189
190 #[simd_test(enable = "vaes,avx512vl")]
191 unsafe fn test_mm256_aesdec_epi128() {
192 // Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx.
193 let (a, k) = setup_state_key_256();
194 let e = _mm_set_epi64x(0x044e4f5176fec48f, 0xb57ecfa381da39ee);
195 let e = _mm256_broadcastsi128_si256(e);
196 let r = _mm256_aesdec_epi128(a, k);
197 assert_eq_m256i(r, e);
198
199 helper_for_256_vaes(_mm_aesdec_si128, _mm256_aesdec_epi128);
200 }
201
202 #[simd_test(enable = "vaes,avx512vl")]
203 unsafe fn test_mm256_aesdeclast_epi128() {
204 // Constants taken from https://msdn.microsoft.com/en-us/library/cc714178.aspx.
205 let (a, k) = setup_state_key_256();
206 let e = _mm_set_epi64x(0x36cad57d9072bf9e, 0xf210dd981fa4a493);
207 let e = _mm256_broadcastsi128_si256(e);
208 let r = _mm256_aesdeclast_epi128(a, k);
209 assert_eq_m256i(r, e);
210
211 helper_for_256_vaes(_mm_aesdeclast_si128, _mm256_aesdeclast_epi128);
212 }
213
214 #[simd_test(enable = "vaes,avx512vl")]
215 unsafe fn test_mm256_aesenc_epi128() {
216 // Constants taken from https://msdn.microsoft.com/en-us/library/cc664810.aspx.
217 // they are repeated appropriately
218 let (a, k) = setup_state_key_256();
219 let e = _mm_set_epi64x(0x16ab0e57dfc442ed, 0x28e4ee1884504333);
220 let e = _mm256_broadcastsi128_si256(e);
221 let r = _mm256_aesenc_epi128(a, k);
222 assert_eq_m256i(r, e);
223
224 helper_for_256_vaes(_mm_aesenc_si128, _mm256_aesenc_epi128);
225 }
226
227 #[simd_test(enable = "vaes,avx512vl")]
228 unsafe fn test_mm256_aesenclast_epi128() {
229 // Constants taken from https://msdn.microsoft.com/en-us/library/cc714136.aspx.
230 let (a, k) = setup_state_key_256();
231 let e = _mm_set_epi64x(0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8);
232 let e = _mm256_broadcastsi128_si256(e);
233 let r = _mm256_aesenclast_epi128(a, k);
234 assert_eq_m256i(r, e);
235
236 helper_for_256_vaes(_mm_aesenclast_si128, _mm256_aesenclast_epi128);
237 }
238
239 #[target_feature(enable = "avx512f")]
240 unsafe fn helper_for_512_vaes(
241 linear: unsafe fn(__m128i, __m128i) -> __m128i,
242 vectorized: unsafe fn(__m512i, __m512i) -> __m512i,
243 ) {
244 let a = _mm512_set_epi64(
245 0xDCB4DB3657BF0B7D,
246 0x18DB0601068EDD9F,
247 0xB76B908233200DC5,
248 0xE478235FA8E22D5E,
249 0xAB05CFFA2621154C,
250 0x1171B47A186174C9,
251 0x8C6B6C0E7595CEC9,
252 0xBE3E7D4934E961BD,
253 );
254 let k = _mm512_set_epi64(
255 0x672F6F105A94CEA7,
256 0x8298B8FFCA5F829C,
257 0xA3927047B3FB61D8,
258 0x978093862CDE7187,
259 0xB1927AB22F31D0EC,
260 0xA9A5DA619BE4D7AF,
261 0xCA2590F56884FDC6,
262 0x19BE9F660038BDB5,
263 );
264 let mut a_decomp = [_mm_setzero_si128(); 4];
265 a_decomp[0] = _mm512_extracti32x4_epi32::<0>(a);
266 a_decomp[1] = _mm512_extracti32x4_epi32::<1>(a);
267 a_decomp[2] = _mm512_extracti32x4_epi32::<2>(a);
268 a_decomp[3] = _mm512_extracti32x4_epi32::<3>(a);
269 let mut k_decomp = [_mm_setzero_si128(); 4];
270 k_decomp[0] = _mm512_extracti32x4_epi32::<0>(k);
271 k_decomp[1] = _mm512_extracti32x4_epi32::<1>(k);
272 k_decomp[2] = _mm512_extracti32x4_epi32::<2>(k);
273 k_decomp[3] = _mm512_extracti32x4_epi32::<3>(k);
274 let r = vectorized(a, k);
275 let mut e_decomp = [_mm_setzero_si128(); 4];
276 for i in 0..4 {
277 e_decomp[i] = linear(a_decomp[i], k_decomp[i]);
278 }
279 assert_eq_m128i(_mm512_extracti32x4_epi32::<0>(r), e_decomp[0]);
280 assert_eq_m128i(_mm512_extracti32x4_epi32::<1>(r), e_decomp[1]);
281 assert_eq_m128i(_mm512_extracti32x4_epi32::<2>(r), e_decomp[2]);
282 assert_eq_m128i(_mm512_extracti32x4_epi32::<3>(r), e_decomp[3]);
283 }
284
285 #[simd_test(enable = "vaes,avx512f")]
286 unsafe fn test_mm512_aesdec_epi128() {
287 // Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx.
288 let (a, k) = setup_state_key_512();
289 let e = _mm_set_epi64x(0x044e4f5176fec48f, 0xb57ecfa381da39ee);
290 let e = _mm512_broadcast_i32x4(e);
291 let r = _mm512_aesdec_epi128(a, k);
292 assert_eq_m512i(r, e);
293
294 helper_for_512_vaes(_mm_aesdec_si128, _mm512_aesdec_epi128);
295 }
296
297 #[simd_test(enable = "vaes,avx512f")]
298 unsafe fn test_mm512_aesdeclast_epi128() {
299 // Constants taken from https://msdn.microsoft.com/en-us/library/cc714178.aspx.
300 let (a, k) = setup_state_key_512();
301 let e = _mm_set_epi64x(0x36cad57d9072bf9e, 0xf210dd981fa4a493);
302 let e = _mm512_broadcast_i32x4(e);
303 let r = _mm512_aesdeclast_epi128(a, k);
304 assert_eq_m512i(r, e);
305
306 helper_for_512_vaes(_mm_aesdeclast_si128, _mm512_aesdeclast_epi128);
307 }
308
309 #[simd_test(enable = "vaes,avx512f")]
310 unsafe fn test_mm512_aesenc_epi128() {
311 // Constants taken from https://msdn.microsoft.com/en-us/library/cc664810.aspx.
312 let (a, k) = setup_state_key_512();
313 let e = _mm_set_epi64x(0x16ab0e57dfc442ed, 0x28e4ee1884504333);
314 let e = _mm512_broadcast_i32x4(e);
315 let r = _mm512_aesenc_epi128(a, k);
316 assert_eq_m512i(r, e);
317
318 helper_for_512_vaes(_mm_aesenc_si128, _mm512_aesenc_epi128);
319 }
320
321 #[simd_test(enable = "vaes,avx512f")]
322 unsafe fn test_mm512_aesenclast_epi128() {
323 // Constants taken from https://msdn.microsoft.com/en-us/library/cc714136.aspx.
324 let (a, k) = setup_state_key_512();
325 let e = _mm_set_epi64x(0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8);
326 let e = _mm512_broadcast_i32x4(e);
327 let r = _mm512_aesenclast_epi128(a, k);
328 assert_eq_m512i(r, e);
329
330 helper_for_512_vaes(_mm_aesenclast_si128, _mm512_aesenclast_epi128);
331 }
332}
333