sha.rs source code [crates/core_arch/src/x86/sha.rs]

1	use crate::core_arch::{simd::, x86::};
2
3	#[allow(improper_ctypes)]
4	unsafe extern "C" {
5	#[link_name = "llvm.x86.sha1msg1"]
6	unsafefn sha1msg1(a: i32x4, b: i32x4) -> i32x4;
7	#[link_name = "llvm.x86.sha1msg2"]
8	unsafefn sha1msg2(a: i32x4, b: i32x4) -> i32x4;
9	#[link_name = "llvm.x86.sha1nexte"]
10	unsafefn sha1nexte(a: i32x4, b: i32x4) -> i32x4;
11	#[link_name = "llvm.x86.sha1rnds4"]
12	unsafefn sha1rnds4(a: i32x4, b: i32x4, c: i8) -> i32x4;
13	#[link_name = "llvm.x86.sha256msg1"]
14	unsafefn sha256msg1(a: i32x4, b: i32x4) -> i32x4;
15	#[link_name = "llvm.x86.sha256msg2"]
16	unsafefn sha256msg2(a: i32x4, b: i32x4) -> i32x4;
17	#[link_name = "llvm.x86.sha256rnds2"]
18	unsafefn sha256rnds2(a: i32x4, b: i32x4, k: i32x4) -> i32x4;
19	#[link_name = "llvm.x86.vsha512msg1"]
20	unsafefn vsha512msg1(a: i64x4, b: i64x2) -> i64x4;
21	#[link_name = "llvm.x86.vsha512msg2"]
22	unsafefn vsha512msg2(a: i64x4, b: i64x4) -> i64x4;
23	#[link_name = "llvm.x86.vsha512rnds2"]
24	unsafefn vsha512rnds2(a: i64x4, b: i64x4, k: i64x2) -> i64x4;
25	#[link_name = "llvm.x86.vsm3msg1"]
26	unsafefn vsm3msg1(a: i32x4, b: i32x4, c: i32x4) -> i32x4;
27	#[link_name = "llvm.x86.vsm3msg2"]
28	unsafefn vsm3msg2(a: i32x4, b: i32x4, c: i32x4) -> i32x4;
29	#[link_name = "llvm.x86.vsm3rnds2"]
30	unsafefn vsm3rnds2(a: i32x4, b: i32x4, c: i32x4, d: i32) -> i32x4;
31	#[link_name = "llvm.x86.vsm4key4128"]
32	unsafefn vsm4key4128(a: i32x4, b: i32x4) -> i32x4;
33	#[link_name = "llvm.x86.vsm4key4256"]
34	unsafefn vsm4key4256(a: i32x8, b: i32x8) -> i32x8;
35	#[link_name = "llvm.x86.vsm4rnds4128"]
36	unsafefn vsm4rnds4128(a: i32x4, b: i32x4) -> i32x4;
37	#[link_name = "llvm.x86.vsm4rnds4256"]
38	unsafefn vsm4rnds4256(a: i32x8, b: i32x8) -> i32x8;
39	}
40
41	#[cfg(test)]
42	use stdarch_test::assert_instr;
43
44	/// Performs an intermediate calculation for the next four SHA1 message values
45	/// (unsigned 32-bit integers) using previous message values from `a` and `b`,
46	/// and returning the result.
47	///
48	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1msg1_epu32)
49	#[inline]
50	#[target_feature(enable = "sha")]
51	#[cfg_attr(test, assert_instr(sha1msg1))]
52	#[stable(feature = "simd_x86", since = "1.27.0")]
53	pub fn _mm_sha1msg1_epu32(a: __m128i, b: __m128i) -> __m128i {
54	unsafe { transmute(src:sha1msg1(a.as_i32x4(), b.as_i32x4())) }
55	}
56
57	/// Performs the final calculation for the next four SHA1 message values
58	/// (unsigned 32-bit integers) using the intermediate result in `a` and the
59	/// previous message values in `b`, and returns the result.
60	///
61	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1msg2_epu32)
62	#[inline]
63	#[target_feature(enable = "sha")]
64	#[cfg_attr(test, assert_instr(sha1msg2))]
65	#[stable(feature = "simd_x86", since = "1.27.0")]
66	pub fn _mm_sha1msg2_epu32(a: __m128i, b: __m128i) -> __m128i {
67	unsafe { transmute(src:sha1msg2(a.as_i32x4(), b.as_i32x4())) }
68	}
69
70	/// Calculate SHA1 state variable E after four rounds of operation from the
71	/// current SHA1 state variable `a`, add that value to the scheduled values
72	/// (unsigned 32-bit integers) in `b`, and returns the result.
73	///
74	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1nexte_epu32)
75	#[inline]
76	#[target_feature(enable = "sha")]
77	#[cfg_attr(test, assert_instr(sha1nexte))]
78	#[stable(feature = "simd_x86", since = "1.27.0")]
79	pub fn _mm_sha1nexte_epu32(a: __m128i, b: __m128i) -> __m128i {
80	unsafe { transmute(src:sha1nexte(a.as_i32x4(), b.as_i32x4())) }
81	}
82
83	/// Performs four rounds of SHA1 operation using an initial SHA1 state (A,B,C,D)
84	/// from `a` and some pre-computed sum of the next 4 round message values
85	/// (unsigned 32-bit integers), and state variable E from `b`, and return the
86	/// updated SHA1 state (A,B,C,D). `FUNC` contains the logic functions and round
87	/// constants.
88	///
89	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1rnds4_epu32)
90	#[inline]
91	#[target_feature(enable = "sha")]
92	#[cfg_attr(test, assert_instr(sha1rnds4, FUNC = `0`))]
93	#[rustc_legacy_const_generics(`2`)]
94	#[stable(feature = "simd_x86", since = "1.27.0")]
95	pub fn _mm_sha1rnds4_epu32<const FUNC: i32>(a: __m128i, b: __m128i) -> __m128i {
96	static_assert_uimm_bits!(FUNC, `2`);
97	unsafe { transmute(src:sha1rnds4(a.as_i32x4(), b.as_i32x4(), FUNC as i8)) }
98	}
99
100	/// Performs an intermediate calculation for the next four SHA256 message values
101	/// (unsigned 32-bit integers) using previous message values from `a` and `b`,
102	/// and return the result.
103	///
104	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256msg1_epu32)
105	#[inline]
106	#[target_feature(enable = "sha")]
107	#[cfg_attr(test, assert_instr(sha256msg1))]
108	#[stable(feature = "simd_x86", since = "1.27.0")]
109	pub fn _mm_sha256msg1_epu32(a: __m128i, b: __m128i) -> __m128i {
110	unsafe { transmute(src:sha256msg1(a.as_i32x4(), b.as_i32x4())) }
111	}
112
113	/// Performs the final calculation for the next four SHA256 message values
114	/// (unsigned 32-bit integers) using previous message values from `a` and `b`,
115	/// and return the result.
116	///
117	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256msg2_epu32)
118	#[inline]
119	#[target_feature(enable = "sha")]
120	#[cfg_attr(test, assert_instr(sha256msg2))]
121	#[stable(feature = "simd_x86", since = "1.27.0")]
122	pub fn _mm_sha256msg2_epu32(a: __m128i, b: __m128i) -> __m128i {
123	unsafe { transmute(src:sha256msg2(a.as_i32x4(), b.as_i32x4())) }
124	}
125
126	/// Performs 2 rounds of SHA256 operation using an initial SHA256 state
127	/// (C,D,G,H) from `a`, an initial SHA256 state (A,B,E,F) from `b`, and a
128	/// pre-computed sum of the next 2 round message values (unsigned 32-bit
129	/// integers) and the corresponding round constants from `k`, and store the
130	/// updated SHA256 state (A,B,E,F) in dst.
131	///
132	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256rnds2_epu32)
133	#[inline]
134	#[target_feature(enable = "sha")]
135	#[cfg_attr(test, assert_instr(sha256rnds2))]
136	#[stable(feature = "simd_x86", since = "1.27.0")]
137	pub fn _mm_sha256rnds2_epu32(a: __m128i, b: __m128i, k: __m128i) -> __m128i {
138	unsafe { transmute(src:sha256rnds2(a.as_i32x4(), b.as_i32x4(), k.as_i32x4())) }
139	}
140
141	/// This intrinsic is one of the two SHA512 message scheduling instructions.
142	/// The intrinsic performs an intermediate calculation for the next four SHA512
143	/// message qwords. The calculated results are stored in dst.
144	///
145	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sha512msg1_epi64)
146	#[inline]
147	#[target_feature(enable = "sha512,avx")]
148	#[cfg_attr(test, assert_instr(vsha512msg1))]
149	#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
150	pub fn _mm256_sha512msg1_epi64(a: __m256i, b: __m128i) -> __m256i {
151	unsafe { transmute(src:vsha512msg1(a.as_i64x4(), b.as_i64x2())) }
152	}
153
154	/// This intrinsic is one of the two SHA512 message scheduling instructions.
155	/// The intrinsic performs the final calculation for the next four SHA512 message
156	/// qwords. The calculated results are stored in dst.
157	///
158	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sha512msg2_epi64)
159	#[inline]
160	#[target_feature(enable = "sha512,avx")]
161	#[cfg_attr(test, assert_instr(vsha512msg2))]
162	#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
163	pub fn _mm256_sha512msg2_epi64(a: __m256i, b: __m256i) -> __m256i {
164	unsafe { transmute(src:vsha512msg2(a.as_i64x4(), b.as_i64x4())) }
165	}
166
167	/// This intrinsic performs two rounds of SHA512 operation using initial SHA512 state
168	/// `(C,D,G,H)` from `a`, an initial SHA512 state `(A,B,E,F)` from `b`, and a
169	/// pre-computed sum of the next two round message qwords and the corresponding
170	/// round constants from `c` (only the two lower qwords of the third operand). The
171	/// updated SHA512 state `(A,B,E,F)` is written to dst, and dst can be used as the
172	/// updated state `(C,D,G,H)` in later rounds.
173	///
174	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sha512rnds2_epi64)
175	#[inline]
176	#[target_feature(enable = "sha512,avx")]
177	#[cfg_attr(test, assert_instr(vsha512rnds2))]
178	#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
179	pub fn _mm256_sha512rnds2_epi64(a: __m256i, b: __m256i, k: __m128i) -> __m256i {
180	unsafe { transmute(src:vsha512rnds2(a.as_i64x4(), b.as_i64x4(), k.as_i64x2())) }
181	}
182
183	/// This is one of the two SM3 message scheduling intrinsics. The intrinsic performs
184	/// an initial calculation for the next four SM3 message words. The calculated results
185	/// are stored in dst.
186	///
187	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sm3msg1_epi32)
188	#[inline]
189	#[target_feature(enable = "sm3,avx")]
190	#[cfg_attr(test, assert_instr(vsm3msg1))]
191	#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
192	pub fn _mm_sm3msg1_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
193	unsafe { transmute(src:vsm3msg1(a.as_i32x4(), b.as_i32x4(), c.as_i32x4())) }
194	}
195
196	/// This is one of the two SM3 message scheduling intrinsics. The intrinsic performs
197	/// the final calculation for the next four SM3 message words. The calculated results
198	/// are stored in dst.
199	///
200	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sm3msg2_epi32)
201	#[inline]
202	#[target_feature(enable = "sm3,avx")]
203	#[cfg_attr(test, assert_instr(vsm3msg2))]
204	#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
205	pub fn _mm_sm3msg2_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
206	unsafe { transmute(src:vsm3msg2(a.as_i32x4(), b.as_i32x4(), c.as_i32x4())) }
207	}
208
209	/// The intrinsic performs two rounds of SM3 operation using initial SM3 state `(C, D, G, H)`
210	/// from `a`, an initial SM3 states `(A, B, E, F)` from `b` and a pre-computed words from the
211	/// `c`. `a` with initial SM3 state of `(C, D, G, H)` assumes input of non-rotated left variables
212	/// from previous state. The updated SM3 state `(A, B, E, F)` is written to `a`. The `imm8`
213	/// should contain the even round number for the first of the two rounds computed by this instruction.
214	/// The computation masks the `imm8` value by ANDing it with `0x3E` so that only even round numbers
215	/// from 0 through 62 are used for this operation. The calculated results are stored in dst.
216	///
217	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sm3rnds2_epi32)
218	#[inline]
219	#[target_feature(enable = "sm3,avx")]
220	#[cfg_attr(test, assert_instr(vsm3rnds2, IMM8 = `0`))]
221	#[rustc_legacy_const_generics(`3`)]
222	#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
223	pub fn _mm_sm3rnds2_epi32<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
224	static_assert!(
225	IMM8 == (IMM8 & `0x3e`),
226	"IMM8 must be an even number in the range `0..=62`"
227	);
228	unsafe { transmute(src:vsm3rnds2(a.as_i32x4(), b.as_i32x4(), c.as_i32x4(), IMM8)) }
229	}
230
231	/// This intrinsic performs four rounds of SM4 key expansion. The intrinsic operates on independent
232	/// 128-bit lanes. The calculated results are stored in dst.
233	///
234	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sm4key4_epi32)
235	#[inline]
236	#[target_feature(enable = "sm4,avx")]
237	#[cfg_attr(test, assert_instr(vsm4key4))]
238	#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
239	pub fn _mm_sm4key4_epi32(a: __m128i, b: __m128i) -> __m128i {
240	unsafe { transmute(src:vsm4key4128(a.as_i32x4(), b.as_i32x4())) }
241	}
242
243	/// This intrinsic performs four rounds of SM4 key expansion. The intrinsic operates on independent
244	/// 128-bit lanes. The calculated results are stored in dst.
245	///
246	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sm4key4_epi32)
247	#[inline]
248	#[target_feature(enable = "sm4,avx")]
249	#[cfg_attr(test, assert_instr(vsm4key4))]
250	#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
251	pub fn _mm256_sm4key4_epi32(a: __m256i, b: __m256i) -> __m256i {
252	unsafe { transmute(src:vsm4key4256(a.as_i32x8(), b.as_i32x8())) }
253	}
254
255	/// This intrinsic performs four rounds of SM4 encryption. The intrinsic operates on independent
256	/// 128-bit lanes. The calculated results are stored in dst.
257	///
258	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sm4rnds4_epi32)
259	#[inline]
260	#[target_feature(enable = "sm4,avx")]
261	#[cfg_attr(test, assert_instr(vsm4rnds4))]
262	#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
263	pub fn _mm_sm4rnds4_epi32(a: __m128i, b: __m128i) -> __m128i {
264	unsafe { transmute(src:vsm4rnds4128(a.as_i32x4(), b.as_i32x4())) }
265	}
266
267	/// This intrinsic performs four rounds of SM4 encryption. The intrinsic operates on independent
268	/// 128-bit lanes. The calculated results are stored in dst.
269	///
270	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sm4rnds4_epi32)
271	#[inline]
272	#[target_feature(enable = "sm4,avx")]
273	#[cfg_attr(test, assert_instr(vsm4rnds4))]
274	#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
275	pub fn _mm256_sm4rnds4_epi32(a: __m256i, b: __m256i) -> __m256i {
276	unsafe { transmute(src:vsm4rnds4256(a.as_i32x8(), b.as_i32x8())) }
277	}
278
279	#[cfg(test)]
280	mod tests {
281	use crate::{
282	core_arch::{simd::, x86::},
283	hint::black_box,
284	};
285	use stdarch_test::simd_test;
286
287	#[simd_test(enable = "sha")]
288	#[allow(overflowing_literals)]
289	unsafe fn test_mm_sha1msg1_epu32() {
290	let a = _mm_set_epi64x(`0xe9b5dba5b5c0fbcf`, `0x71374491428a2f98`);
291	let b = _mm_set_epi64x(`0xab1c5ed5923f82a4`, `0x59f111f13956c25b`);
292	let expected = _mm_set_epi64x(`0x98829f34f74ad457`, `0xda2b1a44d0b5ad3c`);
293	let r = _mm_sha1msg1_epu32(a, b);
294	assert_eq_m128i(r, expected);
295	}
296
297	#[simd_test(enable = "sha")]
298	#[allow(overflowing_literals)]
299	unsafe fn test_mm_sha1msg2_epu32() {
300	let a = _mm_set_epi64x(`0xe9b5dba5b5c0fbcf`, `0x71374491428a2f98`);
301	let b = _mm_set_epi64x(`0xab1c5ed5923f82a4`, `0x59f111f13956c25b`);
302	let expected = _mm_set_epi64x(`0xf714b202d863d47d`, `0x90c30d946b3d3b35`);
303	let r = _mm_sha1msg2_epu32(a, b);
304	assert_eq_m128i(r, expected);
305	}
306
307	#[simd_test(enable = "sha")]
308	#[allow(overflowing_literals)]
309	unsafe fn test_mm_sha1nexte_epu32() {
310	let a = _mm_set_epi64x(`0xe9b5dba5b5c0fbcf`, `0x71374491428a2f98`);
311	let b = _mm_set_epi64x(`0xab1c5ed5923f82a4`, `0x59f111f13956c25b`);
312	let expected = _mm_set_epi64x(`0x2589d5be923f82a4`, `0x59f111f13956c25b`);
313	let r = _mm_sha1nexte_epu32(a, b);
314	assert_eq_m128i(r, expected);
315	}
316
317	#[simd_test(enable = "sha")]
318	#[allow(overflowing_literals)]
319	unsafe fn test_mm_sha1rnds4_epu32() {
320	let a = _mm_set_epi64x(`0xe9b5dba5b5c0fbcf`, `0x71374491428a2f98`);
321	let b = _mm_set_epi64x(`0xab1c5ed5923f82a4`, `0x59f111f13956c25b`);
322	let expected = _mm_set_epi64x(`0x32b13cd8322f5268`, `0xc54420862bd9246f`);
323	let r = _mm_sha1rnds4_epu32::<`0`>(a, b);
324	assert_eq_m128i(r, expected);
325
326	let expected = _mm_set_epi64x(`0x6d4c43e56a3c25d9`, `0xa7e00fb775cbd3fe`);
327	let r = _mm_sha1rnds4_epu32::<`1`>(a, b);
328	assert_eq_m128i(r, expected);
329
330	let expected = _mm_set_epi64x(`0xb304e383c01222f4`, `0x66f6b3b1f89d8001`);
331	let r = _mm_sha1rnds4_epu32::<`2`>(a, b);
332	assert_eq_m128i(r, expected);
333
334	let expected = _mm_set_epi64x(`0x8189b758bfabfa79`, `0xdb08f6e78cae098b`);
335	let r = _mm_sha1rnds4_epu32::<`3`>(a, b);
336	assert_eq_m128i(r, expected);
337	}
338
339	#[simd_test(enable = "sha")]
340	#[allow(overflowing_literals)]
341	unsafe fn test_mm_sha256msg1_epu32() {
342	let a = _mm_set_epi64x(`0xe9b5dba5b5c0fbcf`, `0x71374491428a2f98`);
343	let b = _mm_set_epi64x(`0xab1c5ed5923f82a4`, `0x59f111f13956c25b`);
344	let expected = _mm_set_epi64x(`0xeb84973fd5cda67d`, `0x2857b88f406b09ee`);
345	let r = _mm_sha256msg1_epu32(a, b);
346	assert_eq_m128i(r, expected);
347	}
348
349	#[simd_test(enable = "sha")]
350	#[allow(overflowing_literals)]
351	unsafe fn test_mm_sha256msg2_epu32() {
352	let a = _mm_set_epi64x(`0xe9b5dba5b5c0fbcf`, `0x71374491428a2f98`);
353	let b = _mm_set_epi64x(`0xab1c5ed5923f82a4`, `0x59f111f13956c25b`);
354	let expected = _mm_set_epi64x(`0xb58777ce887fd851`, `0x15d1ec8b73ac8450`);
355	let r = _mm_sha256msg2_epu32(a, b);
356	assert_eq_m128i(r, expected);
357	}
358
359	#[simd_test(enable = "sha")]
360	#[allow(overflowing_literals)]
361	unsafe fn test_mm_sha256rnds2_epu32() {
362	let a = _mm_set_epi64x(`0xe9b5dba5b5c0fbcf`, `0x71374491428a2f98`);
363	let b = _mm_set_epi64x(`0xab1c5ed5923f82a4`, `0x59f111f13956c25b`);
364	let k = _mm_set_epi64x(`0`, `0x12835b01d807aa98`);
365	let expected = _mm_set_epi64x(`0xd3063037effb15ea`, `0x187ee3db0d6d1d19`);
366	let r = _mm_sha256rnds2_epu32(a, b, k);
367	assert_eq_m128i(r, expected);
368	}
369
370	static DATA_64: [u64; `10`] = [
371	`0x0011223344556677`,
372	`0x8899aabbccddeeff`,
373	`0xffeeddccbbaa9988`,
374	`0x7766554433221100`,
375	`0x0123456789abcdef`,
376	`0xfedcba9876543210`,
377	`0x02468ace13579bdf`,
378	`0xfdb97531eca86420`,
379	`0x048c159d26ae37bf`,
380	`0xfb73ea62d951c840`,
381	];
382
383	#[simd_test(enable = "sha512,avx")]
384	unsafe fn test_mm256_sha512msg1_epi64() {
385	fn s0(word: u64) -> u64 {
386	word.rotate_right(`1`) ^ word.rotate_right(`8`) ^ (word >> `7`)
387	}
388
389	let A = &DATA_64[`0`..`4`];
390	let B = &DATA_64[`4`..`6`];
391
392	let a = _mm256_loadu_si256(A.as_ptr().cast());
393	let b = _mm_loadu_si128(B.as_ptr().cast());
394
395	let r = _mm256_sha512msg1_epi64(a, b);
396
397	let e = _mm256_setr_epi64x(
398	A[`0`].wrapping_add(s0(A[`1`])) as _,
399	A[`1`].wrapping_add(s0(A[`2`])) as _,
400	A[`2`].wrapping_add(s0(A[`3`])) as _,
401	A[`3`].wrapping_add(s0(B[`0`])) as _,
402	);
403
404	assert_eq_m256i(r, e);
405	}
406
407	#[simd_test(enable = "sha512,avx")]
408	unsafe fn test_mm256_sha512msg2_epi64() {
409	fn s1(word: u64) -> u64 {
410	word.rotate_right(`19`) ^ word.rotate_right(`61`) ^ (word >> `6`)
411	}
412
413	let A = &DATA_64[`0`..`4`];
414	let B = &DATA_64[`4`..`8`];
415
416	let a = _mm256_loadu_si256(A.as_ptr().cast());
417	let b = _mm256_loadu_si256(B.as_ptr().cast());
418
419	let r = _mm256_sha512msg2_epi64(a, b);
420
421	let e0 = A[`0`].wrapping_add(s1(B[`2`]));
422	let e1 = A[`1`].wrapping_add(s1(B[`3`]));
423	let e = _mm256_setr_epi64x(
424	e0 as _,
425	e1 as _,
426	A[`2`].wrapping_add(s1(e0)) as _,
427	A[`3`].wrapping_add(s1(e1)) as _,
428	);
429
430	assert_eq_m256i(r, e);
431	}
432
433	#[simd_test(enable = "sha512,avx")]
434	unsafe fn test_mm256_sha512rnds2_epi64() {
435	fn cap_sigma0(word: u64) -> u64 {
436	word.rotate_right(`28`) ^ word.rotate_right(`34`) ^ word.rotate_right(`39`)
437	}
438
439	fn cap_sigma1(word: u64) -> u64 {
440	word.rotate_right(`14`) ^ word.rotate_right(`18`) ^ word.rotate_right(`41`)
441	}
442
443	fn maj(a: u64, b: u64, c: u64) -> u64 {
444	(a & b) ^ (a & c) ^ (b & c)
445	}
446
447	fn ch(e: u64, f: u64, g: u64) -> u64 {
448	(e & f) ^ (g & !e)
449	}
450
451	let A = &DATA_64[`0`..`4`];
452	let B = &DATA_64[`4`..`8`];
453	let K = &DATA_64[`8`..`10`];
454
455	let a = _mm256_loadu_si256(A.as_ptr().cast());
456	let b = _mm256_loadu_si256(B.as_ptr().cast());
457	let k = _mm_loadu_si128(K.as_ptr().cast());
458
459	let r = _mm256_sha512rnds2_epi64(a, b, k);
460
461	let mut array = [B[`3`], B[`2`], A[`3`], A[`2`], B[`1`], B[`0`], A[`1`], A[`0`]];
462	for i in `0`..`2` {
463	let new_d = ch(array[`4`], array[`5`], array[`6`])
464	.wrapping_add(cap_sigma1(array[`4`]))
465	.wrapping_add(K[i])
466	.wrapping_add(array[`7`]);
467	array[`7`] = new_d
468	.wrapping_add(maj(array[`0`], array[`1`], array[`2`]))
469	.wrapping_add(cap_sigma0(array[`0`]));
470	array[`3`] = new_d.wrapping_add(array[`3`]);
471	array.rotate_right(`1`);
472	}
473	let e = _mm256_setr_epi64x(array[`5`] as _, array[`4`] as _, array[`1`] as _, array[`0`] as _);
474
475	assert_eq_m256i(r, e);
476	}
477
478	static DATA_32: [u32; `16`] = [
479	`0x00112233`, `0x44556677`, `0x8899aabb`, `0xccddeeff`, `0xffeeddcc`, `0xbbaa9988`, `0x77665544`,
480	`0x33221100`, `0x01234567`, `0x89abcdef`, `0xfedcba98`, `0x76543210`, `0x02468ace`, `0x13579bdf`,
481	`0xfdb97531`, `0xeca86420`,
482	];
483
484	#[simd_test(enable = "sm3,avx")]
485	unsafe fn test_mm_sm3msg1_epi32() {
486	fn p1(x: u32) -> u32 {
487	x ^ x.rotate_left(`15`) ^ x.rotate_left(`23`)
488	}
489	let A = &DATA_32[`0`..`4`];
490	let B = &DATA_32[`4`..`8`];
491	let C = &DATA_32[`8`..`12`];
492
493	let a = _mm_loadu_si128(A.as_ptr().cast());
494	let b = _mm_loadu_si128(B.as_ptr().cast());
495	let c = _mm_loadu_si128(C.as_ptr().cast());
496
497	let r = _mm_sm3msg1_epi32(a, b, c);
498
499	let e = _mm_setr_epi32(
500	p1(A[`0`] ^ C[`0`] ^ B[`0`].rotate_left(`15`)) as _,
501	p1(A[`1`] ^ C[`1`] ^ B[`1`].rotate_left(`15`)) as _,
502	p1(A[`2`] ^ C[`2`] ^ B[`2`].rotate_left(`15`)) as _,
503	p1(A[`3`] ^ C[`3`]) as _,
504	);
505
506	assert_eq_m128i(r, e);
507	}
508
509	#[simd_test(enable = "sm3,avx")]
510	unsafe fn test_mm_sm3msg2_epi32() {
511	let A = &DATA_32[`0`..`4`];
512	let B = &DATA_32[`4`..`8`];
513	let C = &DATA_32[`8`..`12`];
514
515	let a = _mm_loadu_si128(A.as_ptr().cast());
516	let b = _mm_loadu_si128(B.as_ptr().cast());
517	let c = _mm_loadu_si128(C.as_ptr().cast());
518
519	let r = _mm_sm3msg2_epi32(a, b, c);
520
521	let e0 = B[`0`].rotate_left(`7`) ^ C[`0`] ^ A[`0`];
522	let e = _mm_setr_epi32(
523	e0 as _,
524	(B[`1`].rotate_left(`7`) ^ C[`1`] ^ A[`1`]) as _,
525	(B[`2`].rotate_left(`7`) ^ C[`2`] ^ A[`2`]) as _,
526	(B[`3`].rotate_left(`7`)
527	^ C[`3`]
528	^ A[`3`]
529	^ e0.rotate_left(`6`)
530	^ e0.rotate_left(`15`)
531	^ e0.rotate_left(`30`)) as _,
532	);
533
534	assert_eq_m128i(r, e);
535	}
536
537	#[simd_test(enable = "sm3,avx")]
538	unsafe fn test_mm_sm3rnds2_epi32() {
539	fn p0(x: u32) -> u32 {
540	x ^ x.rotate_left(`9`) ^ x.rotate_left(`17`)
541	}
542	fn ff(x: u32, y: u32, z: u32, round: u32) -> u32 {
543	if round < `16` {
544	x ^ y ^ z
545	} else {
546	(x & y) \| (x & z) \| (y & z)
547	}
548	}
549	fn gg(x: u32, y: u32, z: u32, round: u32) -> u32 {
550	if round < `16` {
551	x ^ y ^ z
552	} else {
553	(x & y) \| (!x & z)
554	}
555	}
556
557	const ROUND: u32 = `30`;
558
559	let A = &DATA_32[`0`..`4`];
560	let B = &DATA_32[`4`..`8`];
561	let C = &DATA_32[`8`..`12`];
562
563	let a = _mm_loadu_si128(A.as_ptr().cast());
564	let b = _mm_loadu_si128(B.as_ptr().cast());
565	let c = _mm_loadu_si128(C.as_ptr().cast());
566
567	let r = _mm_sm3rnds2_epi32::<{ ROUND as i32 }>(a, b, c);
568
569	let CONST: u32 = if ROUND < `16` { `0x79cc4519` } else { `0x7a879d8a` };
570
571	let mut array = [
572	B[`3`],
573	B[`2`],
574	A[`3`].rotate_left(`9`),
575	A[`2`].rotate_left(`9`),
576	B[`1`],
577	B[`0`],
578	A[`1`].rotate_left(`19`),
579	A[`0`].rotate_left(`19`),
580	];
581
582	for i in `0`..`2` {
583	let s1 = array[`0`]
584	.rotate_left(`12`)
585	.wrapping_add(array[`4`])
586	.wrapping_add(CONST.rotate_left(ROUND as u32 + i as u32))
587	.rotate_left(`7`);
588	let s2 = s1 ^ array[`0`].rotate_left(`12`);
589
590	let t1 = ff(array[`0`], array[`1`], array[`2`], ROUND)
591	.wrapping_add(array[`3`])
592	.wrapping_add(s2)
593	.wrapping_add(C[i] ^ C[i + `2`]);
594	let t2 = gg(array[`4`], array[`5`], array[`6`], ROUND)
595	.wrapping_add(array[`7`])
596	.wrapping_add(s1)
597	.wrapping_add(C[i]);
598
599	array[`3`] = array[`2`];
600	array[`2`] = array[`1`].rotate_left(`9`);
601	array[`1`] = array[`0`];
602	array[`0`] = t1;
603	array[`7`] = array[`6`];
604	array[`6`] = array[`5`].rotate_left(`19`);
605	array[`5`] = array[`4`];
606	array[`4`] = p0(t2);
607	}
608
609	let e = _mm_setr_epi32(array[`5`] as _, array[`4`] as _, array[`1`] as _, array[`0`] as _);
610
611	assert_eq_m128i(r, e);
612	}
613
614	fn lower_t(x: u32) -> u32 {
615	static SBOX: [u8; `256`] = [
616	`0xD6`, `0x90`, `0xE9`, `0xFE`, `0xCC`, `0xE1`, `0x3D`, `0xB7`, `0x16`, `0xB6`, `0x14`, `0xC2`, `0x28`, `0xFB`,
617	`0x2C`, `0x05`, `0x2B`, `0x67`, `0x9A`, `0x76`, `0x2A`, `0xBE`, `0x04`, `0xC3`, `0xAA`, `0x44`, `0x13`, `0x26`,
618	`0x49`, `0x86`, `0x06`, `0x99`, `0x9C`, `0x42`, `0x50`, `0xF4`, `0x91`, `0xEF`, `0x98`, `0x7A`, `0x33`, `0x54`,
619	`0x0B`, `0x43`, `0xED`, `0xCF`, `0xAC`, `0x62`, `0xE4`, `0xB3`, `0x1C`, `0xA9`, `0xC9`, `0x08`, `0xE8`, `0x95`,
620	`0x80`, `0xDF`, `0x94`, `0xFA`, `0x75`, `0x8F`, `0x3F`, `0xA6`, `0x47`, `0x07`, `0xA7`, `0xFC`, `0xF3`, `0x73`,
621	`0x17`, `0xBA`, `0x83`, `0x59`, `0x3C`, `0x19`, `0xE6`, `0x85`, `0x4F`, `0xA8`, `0x68`, `0x6B`, `0x81`, `0xB2`,
622	`0x71`, `0x64`, `0xDA`, `0x8B`, `0xF8`, `0xEB`, `0x0F`, `0x4B`, `0x70`, `0x56`, `0x9D`, `0x35`, `0x1E`, `0x24`,
623	`0x0E`, `0x5E`, `0x63`, `0x58`, `0xD1`, `0xA2`, `0x25`, `0x22`, `0x7C`, `0x3B`, `0x01`, `0x21`, `0x78`, `0x87`,
624	`0xD4`, `0x00`, `0x46`, `0x57`, `0x9F`, `0xD3`, `0x27`, `0x52`, `0x4C`, `0x36`, `0x02`, `0xE7`, `0xA0`, `0xC4`,
625	`0xC8`, `0x9E`, `0xEA`, `0xBF`, `0x8A`, `0xD2`, `0x40`, `0xC7`, `0x38`, `0xB5`, `0xA3`, `0xF7`, `0xF2`, `0xCE`,
626	`0xF9`, `0x61`, `0x15`, `0xA1`, `0xE0`, `0xAE`, `0x5D`, `0xA4`, `0x9B`, `0x34`, `0x1A`, `0x55`, `0xAD`, `0x93`,
627	`0x32`, `0x30`, `0xF5`, `0x8C`, `0xB1`, `0xE3`, `0x1D`, `0xF6`, `0xE2`, `0x2E`, `0x82`, `0x66`, `0xCA`, `0x60`,
628	`0xC0`, `0x29`, `0x23`, `0xAB`, `0x0D`, `0x53`, `0x4E`, `0x6F`, `0xD5`, `0xDB`, `0x37`, `0x45`, `0xDE`, `0xFD`,
629	`0x8E`, `0x2F`, `0x03`, `0xFF`, `0x6A`, `0x72`, `0x6D`, `0x6C`, `0x5B`, `0x51`, `0x8D`, `0x1B`, `0xAF`, `0x92`,
630	`0xBB`, `0xDD`, `0xBC`, `0x7F`, `0x11`, `0xD9`, `0x5C`, `0x41`, `0x1F`, `0x10`, `0x5A`, `0xD8`, `0x0A`, `0xC1`,
631	`0x31`, `0x88`, `0xA5`, `0xCD`, `0x7B`, `0xBD`, `0x2D`, `0x74`, `0xD0`, `0x12`, `0xB8`, `0xE5`, `0xB4`, `0xB0`,
632	`0x89`, `0x69`, `0x97`, `0x4A`, `0x0C`, `0x96`, `0x77`, `0x7E`, `0x65`, `0xB9`, `0xF1`, `0x09`, `0xC5`, `0x6E`,
633	`0xC6`, `0x84`, `0x18`, `0xF0`, `0x7D`, `0xEC`, `0x3A`, `0xDC`, `0x4D`, `0x20`, `0x79`, `0xEE`, `0x5F`, `0x3E`,
634	`0xD7`, `0xCB`, `0x39`, `0x48`,
635	];
636
637	((SBOX[(x >> `24`) as usize] as u32) << `24`)
638	\| ((SBOX[((x >> `16`) & `0xff`) as usize] as u32) << `16`)
639	\| ((SBOX[((x >> `8`) & `0xff`) as usize] as u32) << `8`)
640	\| (SBOX[(x & `0xff`) as usize] as u32)
641	}
642
643	#[simd_test(enable = "sm4,avx")]
644	unsafe fn test_mm_sm4key4_epi32() {
645	fn l_key(x: u32) -> u32 {
646	x ^ x.rotate_left(`13`) ^ x.rotate_left(`23`)
647	}
648	fn f_key(x0: u32, x1: u32, x2: u32, x3: u32, rk: u32) -> u32 {
649	x0 ^ l_key(lower_t(x1 ^ x2 ^ x3 ^ rk))
650	}
651
652	let A = &DATA_32[`0`..`4`];
653	let B = &DATA_32[`4`..`8`];
654
655	let a = _mm_loadu_si128(A.as_ptr().cast());
656	let b = _mm_loadu_si128(B.as_ptr().cast());
657
658	let r = _mm_sm4key4_epi32(a, b);
659
660	let e0 = f_key(A[`0`], A[`1`], A[`2`], A[`3`], B[`0`]);
661	let e1 = f_key(A[`1`], A[`2`], A[`3`], e0, B[`1`]);
662	let e2 = f_key(A[`2`], A[`3`], e0, e1, B[`2`]);
663	let e3 = f_key(A[`3`], e0, e1, e2, B[`3`]);
664	let e = _mm_setr_epi32(e0 as _, e1 as _, e2 as _, e3 as _);
665
666	assert_eq_m128i(r, e);
667	}
668
669	#[simd_test(enable = "sm4,avx")]
670	unsafe fn test_mm256_sm4key4_epi32() {
671	let a_low = _mm_loadu_si128(DATA_32.as_ptr().cast());
672	let a_high = _mm_loadu_si128(DATA_32[`4`..].as_ptr().cast());
673	let b_low = _mm_loadu_si128(DATA_32[`8`..].as_ptr().cast());
674	let b_high = _mm_loadu_si128(DATA_32[`12`..].as_ptr().cast());
675
676	let a = _mm256_set_m128i(a_high, a_low);
677	let b = _mm256_set_m128i(b_high, b_low);
678
679	let r = _mm256_sm4key4_epi32(a, b);
680
681	let e_low = _mm_sm4key4_epi32(a_low, b_low);
682	let e_high = _mm_sm4key4_epi32(a_high, b_high);
683	let e = _mm256_set_m128i(e_high, e_low);
684
685	assert_eq_m256i(r, e);
686	}
687
688	#[simd_test(enable = "sm4,avx")]
689	unsafe fn test_mm_sm4rnds4_epi32() {
690	fn l_rnd(x: u32) -> u32 {
691	x ^ x.rotate_left(`2`) ^ x.rotate_left(`10`) ^ x.rotate_left(`18`) ^ x.rotate_left(`24`)
692	}
693	fn f_rnd(x0: u32, x1: u32, x2: u32, x3: u32, rk: u32) -> u32 {
694	x0 ^ l_rnd(lower_t(x1 ^ x2 ^ x3 ^ rk))
695	}
696
697	let A = &DATA_32[`0`..`4`];
698	let B = &DATA_32[`4`..`8`];
699
700	let a = _mm_loadu_si128(A.as_ptr().cast());
701	let b = _mm_loadu_si128(B.as_ptr().cast());
702
703	let r = _mm_sm4rnds4_epi32(a, b);
704
705	let e0 = f_rnd(A[`0`], A[`1`], A[`2`], A[`3`], B[`0`]);
706	let e1 = f_rnd(A[`1`], A[`2`], A[`3`], e0, B[`1`]);
707	let e2 = f_rnd(A[`2`], A[`3`], e0, e1, B[`2`]);
708	let e3 = f_rnd(A[`3`], e0, e1, e2, B[`3`]);
709	let e = _mm_setr_epi32(e0 as _, e1 as _, e2 as _, e3 as _);
710
711	assert_eq_m128i(r, e);
712	}
713
714	#[simd_test(enable = "sm4,avx")]
715	unsafe fn test_mm256_sm4rnds4_epi32() {
716	let a_low = _mm_loadu_si128(DATA_32.as_ptr().cast());
717	let a_high = _mm_loadu_si128(DATA_32[`4`..].as_ptr().cast());
718	let b_low = _mm_loadu_si128(DATA_32[`8`..].as_ptr().cast());
719	let b_high = _mm_loadu_si128(DATA_32[`12`..].as_ptr().cast());
720
721	let a = _mm256_set_m128i(a_high, a_low);
722	let b = _mm256_set_m128i(b_high, b_low);
723
724	let r = _mm256_sm4rnds4_epi32(a, b);
725
726	let e_low = _mm_sm4rnds4_epi32(a_low, b_low);
727	let e_high = _mm_sm4rnds4_epi32(a_high, b_high);
728	let e = _mm256_set_m128i(e_high, e_low);
729
730	assert_eq_m256i(r, e);
731	}
732	}
733