1use crate::core_arch::{simd::*, x86::*};
2
3#[allow(improper_ctypes)]
4unsafe extern "C" {
5 #[link_name = "llvm.x86.sha1msg1"]
6 unsafefn sha1msg1(a: i32x4, b: i32x4) -> i32x4;
7 #[link_name = "llvm.x86.sha1msg2"]
8 unsafefn sha1msg2(a: i32x4, b: i32x4) -> i32x4;
9 #[link_name = "llvm.x86.sha1nexte"]
10 unsafefn sha1nexte(a: i32x4, b: i32x4) -> i32x4;
11 #[link_name = "llvm.x86.sha1rnds4"]
12 unsafefn sha1rnds4(a: i32x4, b: i32x4, c: i8) -> i32x4;
13 #[link_name = "llvm.x86.sha256msg1"]
14 unsafefn sha256msg1(a: i32x4, b: i32x4) -> i32x4;
15 #[link_name = "llvm.x86.sha256msg2"]
16 unsafefn sha256msg2(a: i32x4, b: i32x4) -> i32x4;
17 #[link_name = "llvm.x86.sha256rnds2"]
18 unsafefn sha256rnds2(a: i32x4, b: i32x4, k: i32x4) -> i32x4;
19 #[link_name = "llvm.x86.vsha512msg1"]
20 unsafefn vsha512msg1(a: i64x4, b: i64x2) -> i64x4;
21 #[link_name = "llvm.x86.vsha512msg2"]
22 unsafefn vsha512msg2(a: i64x4, b: i64x4) -> i64x4;
23 #[link_name = "llvm.x86.vsha512rnds2"]
24 unsafefn vsha512rnds2(a: i64x4, b: i64x4, k: i64x2) -> i64x4;
25 #[link_name = "llvm.x86.vsm3msg1"]
26 unsafefn vsm3msg1(a: i32x4, b: i32x4, c: i32x4) -> i32x4;
27 #[link_name = "llvm.x86.vsm3msg2"]
28 unsafefn vsm3msg2(a: i32x4, b: i32x4, c: i32x4) -> i32x4;
29 #[link_name = "llvm.x86.vsm3rnds2"]
30 unsafefn vsm3rnds2(a: i32x4, b: i32x4, c: i32x4, d: i32) -> i32x4;
31 #[link_name = "llvm.x86.vsm4key4128"]
32 unsafefn vsm4key4128(a: i32x4, b: i32x4) -> i32x4;
33 #[link_name = "llvm.x86.vsm4key4256"]
34 unsafefn vsm4key4256(a: i32x8, b: i32x8) -> i32x8;
35 #[link_name = "llvm.x86.vsm4rnds4128"]
36 unsafefn vsm4rnds4128(a: i32x4, b: i32x4) -> i32x4;
37 #[link_name = "llvm.x86.vsm4rnds4256"]
38 unsafefn vsm4rnds4256(a: i32x8, b: i32x8) -> i32x8;
39}
40
41#[cfg(test)]
42use stdarch_test::assert_instr;
43
44/// Performs an intermediate calculation for the next four SHA1 message values
45/// (unsigned 32-bit integers) using previous message values from `a` and `b`,
46/// and returning the result.
47///
48/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1msg1_epu32)
49#[inline]
50#[target_feature(enable = "sha")]
51#[cfg_attr(test, assert_instr(sha1msg1))]
52#[stable(feature = "simd_x86", since = "1.27.0")]
53pub fn _mm_sha1msg1_epu32(a: __m128i, b: __m128i) -> __m128i {
54 unsafe { transmute(src:sha1msg1(a.as_i32x4(), b.as_i32x4())) }
55}
56
57/// Performs the final calculation for the next four SHA1 message values
58/// (unsigned 32-bit integers) using the intermediate result in `a` and the
59/// previous message values in `b`, and returns the result.
60///
61/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1msg2_epu32)
62#[inline]
63#[target_feature(enable = "sha")]
64#[cfg_attr(test, assert_instr(sha1msg2))]
65#[stable(feature = "simd_x86", since = "1.27.0")]
66pub fn _mm_sha1msg2_epu32(a: __m128i, b: __m128i) -> __m128i {
67 unsafe { transmute(src:sha1msg2(a.as_i32x4(), b.as_i32x4())) }
68}
69
70/// Calculate SHA1 state variable E after four rounds of operation from the
71/// current SHA1 state variable `a`, add that value to the scheduled values
72/// (unsigned 32-bit integers) in `b`, and returns the result.
73///
74/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1nexte_epu32)
75#[inline]
76#[target_feature(enable = "sha")]
77#[cfg_attr(test, assert_instr(sha1nexte))]
78#[stable(feature = "simd_x86", since = "1.27.0")]
79pub fn _mm_sha1nexte_epu32(a: __m128i, b: __m128i) -> __m128i {
80 unsafe { transmute(src:sha1nexte(a.as_i32x4(), b.as_i32x4())) }
81}
82
83/// Performs four rounds of SHA1 operation using an initial SHA1 state (A,B,C,D)
84/// from `a` and some pre-computed sum of the next 4 round message values
85/// (unsigned 32-bit integers), and state variable E from `b`, and return the
86/// updated SHA1 state (A,B,C,D). `FUNC` contains the logic functions and round
87/// constants.
88///
89/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1rnds4_epu32)
90#[inline]
91#[target_feature(enable = "sha")]
92#[cfg_attr(test, assert_instr(sha1rnds4, FUNC = 0))]
93#[rustc_legacy_const_generics(2)]
94#[stable(feature = "simd_x86", since = "1.27.0")]
95pub fn _mm_sha1rnds4_epu32<const FUNC: i32>(a: __m128i, b: __m128i) -> __m128i {
96 static_assert_uimm_bits!(FUNC, 2);
97 unsafe { transmute(src:sha1rnds4(a.as_i32x4(), b.as_i32x4(), FUNC as i8)) }
98}
99
100/// Performs an intermediate calculation for the next four SHA256 message values
101/// (unsigned 32-bit integers) using previous message values from `a` and `b`,
102/// and return the result.
103///
104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256msg1_epu32)
105#[inline]
106#[target_feature(enable = "sha")]
107#[cfg_attr(test, assert_instr(sha256msg1))]
108#[stable(feature = "simd_x86", since = "1.27.0")]
109pub fn _mm_sha256msg1_epu32(a: __m128i, b: __m128i) -> __m128i {
110 unsafe { transmute(src:sha256msg1(a.as_i32x4(), b.as_i32x4())) }
111}
112
113/// Performs the final calculation for the next four SHA256 message values
114/// (unsigned 32-bit integers) using previous message values from `a` and `b`,
115/// and return the result.
116///
117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256msg2_epu32)
118#[inline]
119#[target_feature(enable = "sha")]
120#[cfg_attr(test, assert_instr(sha256msg2))]
121#[stable(feature = "simd_x86", since = "1.27.0")]
122pub fn _mm_sha256msg2_epu32(a: __m128i, b: __m128i) -> __m128i {
123 unsafe { transmute(src:sha256msg2(a.as_i32x4(), b.as_i32x4())) }
124}
125
126/// Performs 2 rounds of SHA256 operation using an initial SHA256 state
127/// (C,D,G,H) from `a`, an initial SHA256 state (A,B,E,F) from `b`, and a
128/// pre-computed sum of the next 2 round message values (unsigned 32-bit
129/// integers) and the corresponding round constants from `k`, and store the
130/// updated SHA256 state (A,B,E,F) in dst.
131///
132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256rnds2_epu32)
133#[inline]
134#[target_feature(enable = "sha")]
135#[cfg_attr(test, assert_instr(sha256rnds2))]
136#[stable(feature = "simd_x86", since = "1.27.0")]
137pub fn _mm_sha256rnds2_epu32(a: __m128i, b: __m128i, k: __m128i) -> __m128i {
138 unsafe { transmute(src:sha256rnds2(a.as_i32x4(), b.as_i32x4(), k.as_i32x4())) }
139}
140
141/// This intrinsic is one of the two SHA512 message scheduling instructions.
142/// The intrinsic performs an intermediate calculation for the next four SHA512
143/// message qwords. The calculated results are stored in dst.
144///
145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sha512msg1_epi64)
146#[inline]
147#[target_feature(enable = "sha512,avx")]
148#[cfg_attr(test, assert_instr(vsha512msg1))]
149#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
150pub fn _mm256_sha512msg1_epi64(a: __m256i, b: __m128i) -> __m256i {
151 unsafe { transmute(src:vsha512msg1(a.as_i64x4(), b.as_i64x2())) }
152}
153
154/// This intrinsic is one of the two SHA512 message scheduling instructions.
155/// The intrinsic performs the final calculation for the next four SHA512 message
156/// qwords. The calculated results are stored in dst.
157///
158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sha512msg2_epi64)
159#[inline]
160#[target_feature(enable = "sha512,avx")]
161#[cfg_attr(test, assert_instr(vsha512msg2))]
162#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
163pub fn _mm256_sha512msg2_epi64(a: __m256i, b: __m256i) -> __m256i {
164 unsafe { transmute(src:vsha512msg2(a.as_i64x4(), b.as_i64x4())) }
165}
166
167/// This intrinsic performs two rounds of SHA512 operation using initial SHA512 state
168/// `(C,D,G,H)` from `a`, an initial SHA512 state `(A,B,E,F)` from `b`, and a
169/// pre-computed sum of the next two round message qwords and the corresponding
170/// round constants from `c` (only the two lower qwords of the third operand). The
171/// updated SHA512 state `(A,B,E,F)` is written to dst, and dst can be used as the
172/// updated state `(C,D,G,H)` in later rounds.
173///
174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sha512rnds2_epi64)
175#[inline]
176#[target_feature(enable = "sha512,avx")]
177#[cfg_attr(test, assert_instr(vsha512rnds2))]
178#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
179pub fn _mm256_sha512rnds2_epi64(a: __m256i, b: __m256i, k: __m128i) -> __m256i {
180 unsafe { transmute(src:vsha512rnds2(a.as_i64x4(), b.as_i64x4(), k.as_i64x2())) }
181}
182
183/// This is one of the two SM3 message scheduling intrinsics. The intrinsic performs
184/// an initial calculation for the next four SM3 message words. The calculated results
185/// are stored in dst.
186///
187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sm3msg1_epi32)
188#[inline]
189#[target_feature(enable = "sm3,avx")]
190#[cfg_attr(test, assert_instr(vsm3msg1))]
191#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
192pub fn _mm_sm3msg1_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
193 unsafe { transmute(src:vsm3msg1(a.as_i32x4(), b.as_i32x4(), c.as_i32x4())) }
194}
195
196/// This is one of the two SM3 message scheduling intrinsics. The intrinsic performs
197/// the final calculation for the next four SM3 message words. The calculated results
198/// are stored in dst.
199///
200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sm3msg2_epi32)
201#[inline]
202#[target_feature(enable = "sm3,avx")]
203#[cfg_attr(test, assert_instr(vsm3msg2))]
204#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
205pub fn _mm_sm3msg2_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
206 unsafe { transmute(src:vsm3msg2(a.as_i32x4(), b.as_i32x4(), c.as_i32x4())) }
207}
208
209/// The intrinsic performs two rounds of SM3 operation using initial SM3 state `(C, D, G, H)`
210/// from `a`, an initial SM3 states `(A, B, E, F)` from `b` and a pre-computed words from the
211/// `c`. `a` with initial SM3 state of `(C, D, G, H)` assumes input of non-rotated left variables
212/// from previous state. The updated SM3 state `(A, B, E, F)` is written to `a`. The `imm8`
213/// should contain the even round number for the first of the two rounds computed by this instruction.
214/// The computation masks the `imm8` value by ANDing it with `0x3E` so that only even round numbers
215/// from 0 through 62 are used for this operation. The calculated results are stored in dst.
216///
217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sm3rnds2_epi32)
218#[inline]
219#[target_feature(enable = "sm3,avx")]
220#[cfg_attr(test, assert_instr(vsm3rnds2, IMM8 = 0))]
221#[rustc_legacy_const_generics(3)]
222#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
223pub fn _mm_sm3rnds2_epi32<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
224 static_assert!(
225 IMM8 == (IMM8 & 0x3e),
226 "IMM8 must be an even number in the range `0..=62`"
227 );
228 unsafe { transmute(src:vsm3rnds2(a.as_i32x4(), b.as_i32x4(), c.as_i32x4(), IMM8)) }
229}
230
231/// This intrinsic performs four rounds of SM4 key expansion. The intrinsic operates on independent
232/// 128-bit lanes. The calculated results are stored in dst.
233///
234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sm4key4_epi32)
235#[inline]
236#[target_feature(enable = "sm4,avx")]
237#[cfg_attr(test, assert_instr(vsm4key4))]
238#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
239pub fn _mm_sm4key4_epi32(a: __m128i, b: __m128i) -> __m128i {
240 unsafe { transmute(src:vsm4key4128(a.as_i32x4(), b.as_i32x4())) }
241}
242
243/// This intrinsic performs four rounds of SM4 key expansion. The intrinsic operates on independent
244/// 128-bit lanes. The calculated results are stored in dst.
245///
246/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sm4key4_epi32)
247#[inline]
248#[target_feature(enable = "sm4,avx")]
249#[cfg_attr(test, assert_instr(vsm4key4))]
250#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
251pub fn _mm256_sm4key4_epi32(a: __m256i, b: __m256i) -> __m256i {
252 unsafe { transmute(src:vsm4key4256(a.as_i32x8(), b.as_i32x8())) }
253}
254
255/// This intrinsic performs four rounds of SM4 encryption. The intrinsic operates on independent
256/// 128-bit lanes. The calculated results are stored in dst.
257///
258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sm4rnds4_epi32)
259#[inline]
260#[target_feature(enable = "sm4,avx")]
261#[cfg_attr(test, assert_instr(vsm4rnds4))]
262#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
263pub fn _mm_sm4rnds4_epi32(a: __m128i, b: __m128i) -> __m128i {
264 unsafe { transmute(src:vsm4rnds4128(a.as_i32x4(), b.as_i32x4())) }
265}
266
267/// This intrinsic performs four rounds of SM4 encryption. The intrinsic operates on independent
268/// 128-bit lanes. The calculated results are stored in dst.
269///
270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sm4rnds4_epi32)
271#[inline]
272#[target_feature(enable = "sm4,avx")]
273#[cfg_attr(test, assert_instr(vsm4rnds4))]
274#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
275pub fn _mm256_sm4rnds4_epi32(a: __m256i, b: __m256i) -> __m256i {
276 unsafe { transmute(src:vsm4rnds4256(a.as_i32x8(), b.as_i32x8())) }
277}
278
279#[cfg(test)]
280mod tests {
281 use crate::{
282 core_arch::{simd::*, x86::*},
283 hint::black_box,
284 };
285 use stdarch_test::simd_test;
286
287 #[simd_test(enable = "sha")]
288 #[allow(overflowing_literals)]
289 unsafe fn test_mm_sha1msg1_epu32() {
290 let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
291 let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
292 let expected = _mm_set_epi64x(0x98829f34f74ad457, 0xda2b1a44d0b5ad3c);
293 let r = _mm_sha1msg1_epu32(a, b);
294 assert_eq_m128i(r, expected);
295 }
296
297 #[simd_test(enable = "sha")]
298 #[allow(overflowing_literals)]
299 unsafe fn test_mm_sha1msg2_epu32() {
300 let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
301 let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
302 let expected = _mm_set_epi64x(0xf714b202d863d47d, 0x90c30d946b3d3b35);
303 let r = _mm_sha1msg2_epu32(a, b);
304 assert_eq_m128i(r, expected);
305 }
306
307 #[simd_test(enable = "sha")]
308 #[allow(overflowing_literals)]
309 unsafe fn test_mm_sha1nexte_epu32() {
310 let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
311 let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
312 let expected = _mm_set_epi64x(0x2589d5be923f82a4, 0x59f111f13956c25b);
313 let r = _mm_sha1nexte_epu32(a, b);
314 assert_eq_m128i(r, expected);
315 }
316
317 #[simd_test(enable = "sha")]
318 #[allow(overflowing_literals)]
319 unsafe fn test_mm_sha1rnds4_epu32() {
320 let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
321 let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
322 let expected = _mm_set_epi64x(0x32b13cd8322f5268, 0xc54420862bd9246f);
323 let r = _mm_sha1rnds4_epu32::<0>(a, b);
324 assert_eq_m128i(r, expected);
325
326 let expected = _mm_set_epi64x(0x6d4c43e56a3c25d9, 0xa7e00fb775cbd3fe);
327 let r = _mm_sha1rnds4_epu32::<1>(a, b);
328 assert_eq_m128i(r, expected);
329
330 let expected = _mm_set_epi64x(0xb304e383c01222f4, 0x66f6b3b1f89d8001);
331 let r = _mm_sha1rnds4_epu32::<2>(a, b);
332 assert_eq_m128i(r, expected);
333
334 let expected = _mm_set_epi64x(0x8189b758bfabfa79, 0xdb08f6e78cae098b);
335 let r = _mm_sha1rnds4_epu32::<3>(a, b);
336 assert_eq_m128i(r, expected);
337 }
338
339 #[simd_test(enable = "sha")]
340 #[allow(overflowing_literals)]
341 unsafe fn test_mm_sha256msg1_epu32() {
342 let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
343 let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
344 let expected = _mm_set_epi64x(0xeb84973fd5cda67d, 0x2857b88f406b09ee);
345 let r = _mm_sha256msg1_epu32(a, b);
346 assert_eq_m128i(r, expected);
347 }
348
349 #[simd_test(enable = "sha")]
350 #[allow(overflowing_literals)]
351 unsafe fn test_mm_sha256msg2_epu32() {
352 let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
353 let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
354 let expected = _mm_set_epi64x(0xb58777ce887fd851, 0x15d1ec8b73ac8450);
355 let r = _mm_sha256msg2_epu32(a, b);
356 assert_eq_m128i(r, expected);
357 }
358
359 #[simd_test(enable = "sha")]
360 #[allow(overflowing_literals)]
361 unsafe fn test_mm_sha256rnds2_epu32() {
362 let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
363 let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
364 let k = _mm_set_epi64x(0, 0x12835b01d807aa98);
365 let expected = _mm_set_epi64x(0xd3063037effb15ea, 0x187ee3db0d6d1d19);
366 let r = _mm_sha256rnds2_epu32(a, b, k);
367 assert_eq_m128i(r, expected);
368 }
369
370 static DATA_64: [u64; 10] = [
371 0x0011223344556677,
372 0x8899aabbccddeeff,
373 0xffeeddccbbaa9988,
374 0x7766554433221100,
375 0x0123456789abcdef,
376 0xfedcba9876543210,
377 0x02468ace13579bdf,
378 0xfdb97531eca86420,
379 0x048c159d26ae37bf,
380 0xfb73ea62d951c840,
381 ];
382
383 #[simd_test(enable = "sha512,avx")]
384 unsafe fn test_mm256_sha512msg1_epi64() {
385 fn s0(word: u64) -> u64 {
386 word.rotate_right(1) ^ word.rotate_right(8) ^ (word >> 7)
387 }
388
389 let A = &DATA_64[0..4];
390 let B = &DATA_64[4..6];
391
392 let a = _mm256_loadu_si256(A.as_ptr().cast());
393 let b = _mm_loadu_si128(B.as_ptr().cast());
394
395 let r = _mm256_sha512msg1_epi64(a, b);
396
397 let e = _mm256_setr_epi64x(
398 A[0].wrapping_add(s0(A[1])) as _,
399 A[1].wrapping_add(s0(A[2])) as _,
400 A[2].wrapping_add(s0(A[3])) as _,
401 A[3].wrapping_add(s0(B[0])) as _,
402 );
403
404 assert_eq_m256i(r, e);
405 }
406
407 #[simd_test(enable = "sha512,avx")]
408 unsafe fn test_mm256_sha512msg2_epi64() {
409 fn s1(word: u64) -> u64 {
410 word.rotate_right(19) ^ word.rotate_right(61) ^ (word >> 6)
411 }
412
413 let A = &DATA_64[0..4];
414 let B = &DATA_64[4..8];
415
416 let a = _mm256_loadu_si256(A.as_ptr().cast());
417 let b = _mm256_loadu_si256(B.as_ptr().cast());
418
419 let r = _mm256_sha512msg2_epi64(a, b);
420
421 let e0 = A[0].wrapping_add(s1(B[2]));
422 let e1 = A[1].wrapping_add(s1(B[3]));
423 let e = _mm256_setr_epi64x(
424 e0 as _,
425 e1 as _,
426 A[2].wrapping_add(s1(e0)) as _,
427 A[3].wrapping_add(s1(e1)) as _,
428 );
429
430 assert_eq_m256i(r, e);
431 }
432
433 #[simd_test(enable = "sha512,avx")]
434 unsafe fn test_mm256_sha512rnds2_epi64() {
435 fn cap_sigma0(word: u64) -> u64 {
436 word.rotate_right(28) ^ word.rotate_right(34) ^ word.rotate_right(39)
437 }
438
439 fn cap_sigma1(word: u64) -> u64 {
440 word.rotate_right(14) ^ word.rotate_right(18) ^ word.rotate_right(41)
441 }
442
443 fn maj(a: u64, b: u64, c: u64) -> u64 {
444 (a & b) ^ (a & c) ^ (b & c)
445 }
446
447 fn ch(e: u64, f: u64, g: u64) -> u64 {
448 (e & f) ^ (g & !e)
449 }
450
451 let A = &DATA_64[0..4];
452 let B = &DATA_64[4..8];
453 let K = &DATA_64[8..10];
454
455 let a = _mm256_loadu_si256(A.as_ptr().cast());
456 let b = _mm256_loadu_si256(B.as_ptr().cast());
457 let k = _mm_loadu_si128(K.as_ptr().cast());
458
459 let r = _mm256_sha512rnds2_epi64(a, b, k);
460
461 let mut array = [B[3], B[2], A[3], A[2], B[1], B[0], A[1], A[0]];
462 for i in 0..2 {
463 let new_d = ch(array[4], array[5], array[6])
464 .wrapping_add(cap_sigma1(array[4]))
465 .wrapping_add(K[i])
466 .wrapping_add(array[7]);
467 array[7] = new_d
468 .wrapping_add(maj(array[0], array[1], array[2]))
469 .wrapping_add(cap_sigma0(array[0]));
470 array[3] = new_d.wrapping_add(array[3]);
471 array.rotate_right(1);
472 }
473 let e = _mm256_setr_epi64x(array[5] as _, array[4] as _, array[1] as _, array[0] as _);
474
475 assert_eq_m256i(r, e);
476 }
477
478 static DATA_32: [u32; 16] = [
479 0x00112233, 0x44556677, 0x8899aabb, 0xccddeeff, 0xffeeddcc, 0xbbaa9988, 0x77665544,
480 0x33221100, 0x01234567, 0x89abcdef, 0xfedcba98, 0x76543210, 0x02468ace, 0x13579bdf,
481 0xfdb97531, 0xeca86420,
482 ];
483
484 #[simd_test(enable = "sm3,avx")]
485 unsafe fn test_mm_sm3msg1_epi32() {
486 fn p1(x: u32) -> u32 {
487 x ^ x.rotate_left(15) ^ x.rotate_left(23)
488 }
489 let A = &DATA_32[0..4];
490 let B = &DATA_32[4..8];
491 let C = &DATA_32[8..12];
492
493 let a = _mm_loadu_si128(A.as_ptr().cast());
494 let b = _mm_loadu_si128(B.as_ptr().cast());
495 let c = _mm_loadu_si128(C.as_ptr().cast());
496
497 let r = _mm_sm3msg1_epi32(a, b, c);
498
499 let e = _mm_setr_epi32(
500 p1(A[0] ^ C[0] ^ B[0].rotate_left(15)) as _,
501 p1(A[1] ^ C[1] ^ B[1].rotate_left(15)) as _,
502 p1(A[2] ^ C[2] ^ B[2].rotate_left(15)) as _,
503 p1(A[3] ^ C[3]) as _,
504 );
505
506 assert_eq_m128i(r, e);
507 }
508
509 #[simd_test(enable = "sm3,avx")]
510 unsafe fn test_mm_sm3msg2_epi32() {
511 let A = &DATA_32[0..4];
512 let B = &DATA_32[4..8];
513 let C = &DATA_32[8..12];
514
515 let a = _mm_loadu_si128(A.as_ptr().cast());
516 let b = _mm_loadu_si128(B.as_ptr().cast());
517 let c = _mm_loadu_si128(C.as_ptr().cast());
518
519 let r = _mm_sm3msg2_epi32(a, b, c);
520
521 let e0 = B[0].rotate_left(7) ^ C[0] ^ A[0];
522 let e = _mm_setr_epi32(
523 e0 as _,
524 (B[1].rotate_left(7) ^ C[1] ^ A[1]) as _,
525 (B[2].rotate_left(7) ^ C[2] ^ A[2]) as _,
526 (B[3].rotate_left(7)
527 ^ C[3]
528 ^ A[3]
529 ^ e0.rotate_left(6)
530 ^ e0.rotate_left(15)
531 ^ e0.rotate_left(30)) as _,
532 );
533
534 assert_eq_m128i(r, e);
535 }
536
537 #[simd_test(enable = "sm3,avx")]
538 unsafe fn test_mm_sm3rnds2_epi32() {
539 fn p0(x: u32) -> u32 {
540 x ^ x.rotate_left(9) ^ x.rotate_left(17)
541 }
542 fn ff(x: u32, y: u32, z: u32, round: u32) -> u32 {
543 if round < 16 {
544 x ^ y ^ z
545 } else {
546 (x & y) | (x & z) | (y & z)
547 }
548 }
549 fn gg(x: u32, y: u32, z: u32, round: u32) -> u32 {
550 if round < 16 {
551 x ^ y ^ z
552 } else {
553 (x & y) | (!x & z)
554 }
555 }
556
557 const ROUND: u32 = 30;
558
559 let A = &DATA_32[0..4];
560 let B = &DATA_32[4..8];
561 let C = &DATA_32[8..12];
562
563 let a = _mm_loadu_si128(A.as_ptr().cast());
564 let b = _mm_loadu_si128(B.as_ptr().cast());
565 let c = _mm_loadu_si128(C.as_ptr().cast());
566
567 let r = _mm_sm3rnds2_epi32::<{ ROUND as i32 }>(a, b, c);
568
569 let CONST: u32 = if ROUND < 16 { 0x79cc4519 } else { 0x7a879d8a };
570
571 let mut array = [
572 B[3],
573 B[2],
574 A[3].rotate_left(9),
575 A[2].rotate_left(9),
576 B[1],
577 B[0],
578 A[1].rotate_left(19),
579 A[0].rotate_left(19),
580 ];
581
582 for i in 0..2 {
583 let s1 = array[0]
584 .rotate_left(12)
585 .wrapping_add(array[4])
586 .wrapping_add(CONST.rotate_left(ROUND as u32 + i as u32))
587 .rotate_left(7);
588 let s2 = s1 ^ array[0].rotate_left(12);
589
590 let t1 = ff(array[0], array[1], array[2], ROUND)
591 .wrapping_add(array[3])
592 .wrapping_add(s2)
593 .wrapping_add(C[i] ^ C[i + 2]);
594 let t2 = gg(array[4], array[5], array[6], ROUND)
595 .wrapping_add(array[7])
596 .wrapping_add(s1)
597 .wrapping_add(C[i]);
598
599 array[3] = array[2];
600 array[2] = array[1].rotate_left(9);
601 array[1] = array[0];
602 array[0] = t1;
603 array[7] = array[6];
604 array[6] = array[5].rotate_left(19);
605 array[5] = array[4];
606 array[4] = p0(t2);
607 }
608
609 let e = _mm_setr_epi32(array[5] as _, array[4] as _, array[1] as _, array[0] as _);
610
611 assert_eq_m128i(r, e);
612 }
613
614 fn lower_t(x: u32) -> u32 {
615 static SBOX: [u8; 256] = [
616 0xD6, 0x90, 0xE9, 0xFE, 0xCC, 0xE1, 0x3D, 0xB7, 0x16, 0xB6, 0x14, 0xC2, 0x28, 0xFB,
617 0x2C, 0x05, 0x2B, 0x67, 0x9A, 0x76, 0x2A, 0xBE, 0x04, 0xC3, 0xAA, 0x44, 0x13, 0x26,
618 0x49, 0x86, 0x06, 0x99, 0x9C, 0x42, 0x50, 0xF4, 0x91, 0xEF, 0x98, 0x7A, 0x33, 0x54,
619 0x0B, 0x43, 0xED, 0xCF, 0xAC, 0x62, 0xE4, 0xB3, 0x1C, 0xA9, 0xC9, 0x08, 0xE8, 0x95,
620 0x80, 0xDF, 0x94, 0xFA, 0x75, 0x8F, 0x3F, 0xA6, 0x47, 0x07, 0xA7, 0xFC, 0xF3, 0x73,
621 0x17, 0xBA, 0x83, 0x59, 0x3C, 0x19, 0xE6, 0x85, 0x4F, 0xA8, 0x68, 0x6B, 0x81, 0xB2,
622 0x71, 0x64, 0xDA, 0x8B, 0xF8, 0xEB, 0x0F, 0x4B, 0x70, 0x56, 0x9D, 0x35, 0x1E, 0x24,
623 0x0E, 0x5E, 0x63, 0x58, 0xD1, 0xA2, 0x25, 0x22, 0x7C, 0x3B, 0x01, 0x21, 0x78, 0x87,
624 0xD4, 0x00, 0x46, 0x57, 0x9F, 0xD3, 0x27, 0x52, 0x4C, 0x36, 0x02, 0xE7, 0xA0, 0xC4,
625 0xC8, 0x9E, 0xEA, 0xBF, 0x8A, 0xD2, 0x40, 0xC7, 0x38, 0xB5, 0xA3, 0xF7, 0xF2, 0xCE,
626 0xF9, 0x61, 0x15, 0xA1, 0xE0, 0xAE, 0x5D, 0xA4, 0x9B, 0x34, 0x1A, 0x55, 0xAD, 0x93,
627 0x32, 0x30, 0xF5, 0x8C, 0xB1, 0xE3, 0x1D, 0xF6, 0xE2, 0x2E, 0x82, 0x66, 0xCA, 0x60,
628 0xC0, 0x29, 0x23, 0xAB, 0x0D, 0x53, 0x4E, 0x6F, 0xD5, 0xDB, 0x37, 0x45, 0xDE, 0xFD,
629 0x8E, 0x2F, 0x03, 0xFF, 0x6A, 0x72, 0x6D, 0x6C, 0x5B, 0x51, 0x8D, 0x1B, 0xAF, 0x92,
630 0xBB, 0xDD, 0xBC, 0x7F, 0x11, 0xD9, 0x5C, 0x41, 0x1F, 0x10, 0x5A, 0xD8, 0x0A, 0xC1,
631 0x31, 0x88, 0xA5, 0xCD, 0x7B, 0xBD, 0x2D, 0x74, 0xD0, 0x12, 0xB8, 0xE5, 0xB4, 0xB0,
632 0x89, 0x69, 0x97, 0x4A, 0x0C, 0x96, 0x77, 0x7E, 0x65, 0xB9, 0xF1, 0x09, 0xC5, 0x6E,
633 0xC6, 0x84, 0x18, 0xF0, 0x7D, 0xEC, 0x3A, 0xDC, 0x4D, 0x20, 0x79, 0xEE, 0x5F, 0x3E,
634 0xD7, 0xCB, 0x39, 0x48,
635 ];
636
637 ((SBOX[(x >> 24) as usize] as u32) << 24)
638 | ((SBOX[((x >> 16) & 0xff) as usize] as u32) << 16)
639 | ((SBOX[((x >> 8) & 0xff) as usize] as u32) << 8)
640 | (SBOX[(x & 0xff) as usize] as u32)
641 }
642
643 #[simd_test(enable = "sm4,avx")]
644 unsafe fn test_mm_sm4key4_epi32() {
645 fn l_key(x: u32) -> u32 {
646 x ^ x.rotate_left(13) ^ x.rotate_left(23)
647 }
648 fn f_key(x0: u32, x1: u32, x2: u32, x3: u32, rk: u32) -> u32 {
649 x0 ^ l_key(lower_t(x1 ^ x2 ^ x3 ^ rk))
650 }
651
652 let A = &DATA_32[0..4];
653 let B = &DATA_32[4..8];
654
655 let a = _mm_loadu_si128(A.as_ptr().cast());
656 let b = _mm_loadu_si128(B.as_ptr().cast());
657
658 let r = _mm_sm4key4_epi32(a, b);
659
660 let e0 = f_key(A[0], A[1], A[2], A[3], B[0]);
661 let e1 = f_key(A[1], A[2], A[3], e0, B[1]);
662 let e2 = f_key(A[2], A[3], e0, e1, B[2]);
663 let e3 = f_key(A[3], e0, e1, e2, B[3]);
664 let e = _mm_setr_epi32(e0 as _, e1 as _, e2 as _, e3 as _);
665
666 assert_eq_m128i(r, e);
667 }
668
669 #[simd_test(enable = "sm4,avx")]
670 unsafe fn test_mm256_sm4key4_epi32() {
671 let a_low = _mm_loadu_si128(DATA_32.as_ptr().cast());
672 let a_high = _mm_loadu_si128(DATA_32[4..].as_ptr().cast());
673 let b_low = _mm_loadu_si128(DATA_32[8..].as_ptr().cast());
674 let b_high = _mm_loadu_si128(DATA_32[12..].as_ptr().cast());
675
676 let a = _mm256_set_m128i(a_high, a_low);
677 let b = _mm256_set_m128i(b_high, b_low);
678
679 let r = _mm256_sm4key4_epi32(a, b);
680
681 let e_low = _mm_sm4key4_epi32(a_low, b_low);
682 let e_high = _mm_sm4key4_epi32(a_high, b_high);
683 let e = _mm256_set_m128i(e_high, e_low);
684
685 assert_eq_m256i(r, e);
686 }
687
688 #[simd_test(enable = "sm4,avx")]
689 unsafe fn test_mm_sm4rnds4_epi32() {
690 fn l_rnd(x: u32) -> u32 {
691 x ^ x.rotate_left(2) ^ x.rotate_left(10) ^ x.rotate_left(18) ^ x.rotate_left(24)
692 }
693 fn f_rnd(x0: u32, x1: u32, x2: u32, x3: u32, rk: u32) -> u32 {
694 x0 ^ l_rnd(lower_t(x1 ^ x2 ^ x3 ^ rk))
695 }
696
697 let A = &DATA_32[0..4];
698 let B = &DATA_32[4..8];
699
700 let a = _mm_loadu_si128(A.as_ptr().cast());
701 let b = _mm_loadu_si128(B.as_ptr().cast());
702
703 let r = _mm_sm4rnds4_epi32(a, b);
704
705 let e0 = f_rnd(A[0], A[1], A[2], A[3], B[0]);
706 let e1 = f_rnd(A[1], A[2], A[3], e0, B[1]);
707 let e2 = f_rnd(A[2], A[3], e0, e1, B[2]);
708 let e3 = f_rnd(A[3], e0, e1, e2, B[3]);
709 let e = _mm_setr_epi32(e0 as _, e1 as _, e2 as _, e3 as _);
710
711 assert_eq_m128i(r, e);
712 }
713
714 #[simd_test(enable = "sm4,avx")]
715 unsafe fn test_mm256_sm4rnds4_epi32() {
716 let a_low = _mm_loadu_si128(DATA_32.as_ptr().cast());
717 let a_high = _mm_loadu_si128(DATA_32[4..].as_ptr().cast());
718 let b_low = _mm_loadu_si128(DATA_32[8..].as_ptr().cast());
719 let b_high = _mm_loadu_si128(DATA_32[12..].as_ptr().cast());
720
721 let a = _mm256_set_m128i(a_high, a_low);
722 let b = _mm256_set_m128i(b_high, b_low);
723
724 let r = _mm256_sm4rnds4_epi32(a, b);
725
726 let e_low = _mm_sm4rnds4_epi32(a_low, b_low);
727 let e_high = _mm_sm4rnds4_epi32(a_high, b_high);
728 let e = _mm256_set_m128i(e_high, e_low);
729
730 assert_eq_m256i(r, e);
731 }
732}
733