1 | use crate::core_arch::{simd::*, x86::*}; |
2 | |
3 | #[allow (improper_ctypes)] |
4 | unsafe extern "C" { |
5 | #[link_name = "llvm.x86.sha1msg1" ] |
6 | unsafefn sha1msg1(a: i32x4, b: i32x4) -> i32x4; |
7 | #[link_name = "llvm.x86.sha1msg2" ] |
8 | unsafefn sha1msg2(a: i32x4, b: i32x4) -> i32x4; |
9 | #[link_name = "llvm.x86.sha1nexte" ] |
10 | unsafefn sha1nexte(a: i32x4, b: i32x4) -> i32x4; |
11 | #[link_name = "llvm.x86.sha1rnds4" ] |
12 | unsafefn sha1rnds4(a: i32x4, b: i32x4, c: i8) -> i32x4; |
13 | #[link_name = "llvm.x86.sha256msg1" ] |
14 | unsafefn sha256msg1(a: i32x4, b: i32x4) -> i32x4; |
15 | #[link_name = "llvm.x86.sha256msg2" ] |
16 | unsafefn sha256msg2(a: i32x4, b: i32x4) -> i32x4; |
17 | #[link_name = "llvm.x86.sha256rnds2" ] |
18 | unsafefn sha256rnds2(a: i32x4, b: i32x4, k: i32x4) -> i32x4; |
19 | #[link_name = "llvm.x86.vsha512msg1" ] |
20 | unsafefn vsha512msg1(a: i64x4, b: i64x2) -> i64x4; |
21 | #[link_name = "llvm.x86.vsha512msg2" ] |
22 | unsafefn vsha512msg2(a: i64x4, b: i64x4) -> i64x4; |
23 | #[link_name = "llvm.x86.vsha512rnds2" ] |
24 | unsafefn vsha512rnds2(a: i64x4, b: i64x4, k: i64x2) -> i64x4; |
25 | #[link_name = "llvm.x86.vsm3msg1" ] |
26 | unsafefn vsm3msg1(a: i32x4, b: i32x4, c: i32x4) -> i32x4; |
27 | #[link_name = "llvm.x86.vsm3msg2" ] |
28 | unsafefn vsm3msg2(a: i32x4, b: i32x4, c: i32x4) -> i32x4; |
29 | #[link_name = "llvm.x86.vsm3rnds2" ] |
30 | unsafefn vsm3rnds2(a: i32x4, b: i32x4, c: i32x4, d: i32) -> i32x4; |
31 | #[link_name = "llvm.x86.vsm4key4128" ] |
32 | unsafefn vsm4key4128(a: i32x4, b: i32x4) -> i32x4; |
33 | #[link_name = "llvm.x86.vsm4key4256" ] |
34 | unsafefn vsm4key4256(a: i32x8, b: i32x8) -> i32x8; |
35 | #[link_name = "llvm.x86.vsm4rnds4128" ] |
36 | unsafefn vsm4rnds4128(a: i32x4, b: i32x4) -> i32x4; |
37 | #[link_name = "llvm.x86.vsm4rnds4256" ] |
38 | unsafefn vsm4rnds4256(a: i32x8, b: i32x8) -> i32x8; |
39 | } |
40 | |
41 | #[cfg (test)] |
42 | use stdarch_test::assert_instr; |
43 | |
44 | /// Performs an intermediate calculation for the next four SHA1 message values |
45 | /// (unsigned 32-bit integers) using previous message values from `a` and `b`, |
46 | /// and returning the result. |
47 | /// |
48 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1msg1_epu32) |
49 | #[inline ] |
50 | #[target_feature (enable = "sha" )] |
51 | #[cfg_attr (test, assert_instr(sha1msg1))] |
52 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
53 | pub fn _mm_sha1msg1_epu32(a: __m128i, b: __m128i) -> __m128i { |
54 | unsafe { transmute(src:sha1msg1(a.as_i32x4(), b.as_i32x4())) } |
55 | } |
56 | |
57 | /// Performs the final calculation for the next four SHA1 message values |
58 | /// (unsigned 32-bit integers) using the intermediate result in `a` and the |
59 | /// previous message values in `b`, and returns the result. |
60 | /// |
61 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1msg2_epu32) |
62 | #[inline ] |
63 | #[target_feature (enable = "sha" )] |
64 | #[cfg_attr (test, assert_instr(sha1msg2))] |
65 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
66 | pub fn _mm_sha1msg2_epu32(a: __m128i, b: __m128i) -> __m128i { |
67 | unsafe { transmute(src:sha1msg2(a.as_i32x4(), b.as_i32x4())) } |
68 | } |
69 | |
70 | /// Calculate SHA1 state variable E after four rounds of operation from the |
71 | /// current SHA1 state variable `a`, add that value to the scheduled values |
72 | /// (unsigned 32-bit integers) in `b`, and returns the result. |
73 | /// |
74 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1nexte_epu32) |
75 | #[inline ] |
76 | #[target_feature (enable = "sha" )] |
77 | #[cfg_attr (test, assert_instr(sha1nexte))] |
78 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
79 | pub fn _mm_sha1nexte_epu32(a: __m128i, b: __m128i) -> __m128i { |
80 | unsafe { transmute(src:sha1nexte(a.as_i32x4(), b.as_i32x4())) } |
81 | } |
82 | |
83 | /// Performs four rounds of SHA1 operation using an initial SHA1 state (A,B,C,D) |
84 | /// from `a` and some pre-computed sum of the next 4 round message values |
85 | /// (unsigned 32-bit integers), and state variable E from `b`, and return the |
86 | /// updated SHA1 state (A,B,C,D). `FUNC` contains the logic functions and round |
87 | /// constants. |
88 | /// |
89 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1rnds4_epu32) |
90 | #[inline ] |
91 | #[target_feature (enable = "sha" )] |
92 | #[cfg_attr (test, assert_instr(sha1rnds4, FUNC = 0))] |
93 | #[rustc_legacy_const_generics (2)] |
94 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
95 | pub fn _mm_sha1rnds4_epu32<const FUNC: i32>(a: __m128i, b: __m128i) -> __m128i { |
96 | static_assert_uimm_bits!(FUNC, 2); |
97 | unsafe { transmute(src:sha1rnds4(a.as_i32x4(), b.as_i32x4(), FUNC as i8)) } |
98 | } |
99 | |
100 | /// Performs an intermediate calculation for the next four SHA256 message values |
101 | /// (unsigned 32-bit integers) using previous message values from `a` and `b`, |
102 | /// and return the result. |
103 | /// |
104 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256msg1_epu32) |
105 | #[inline ] |
106 | #[target_feature (enable = "sha" )] |
107 | #[cfg_attr (test, assert_instr(sha256msg1))] |
108 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
109 | pub fn _mm_sha256msg1_epu32(a: __m128i, b: __m128i) -> __m128i { |
110 | unsafe { transmute(src:sha256msg1(a.as_i32x4(), b.as_i32x4())) } |
111 | } |
112 | |
113 | /// Performs the final calculation for the next four SHA256 message values |
114 | /// (unsigned 32-bit integers) using previous message values from `a` and `b`, |
115 | /// and return the result. |
116 | /// |
117 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256msg2_epu32) |
118 | #[inline ] |
119 | #[target_feature (enable = "sha" )] |
120 | #[cfg_attr (test, assert_instr(sha256msg2))] |
121 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
122 | pub fn _mm_sha256msg2_epu32(a: __m128i, b: __m128i) -> __m128i { |
123 | unsafe { transmute(src:sha256msg2(a.as_i32x4(), b.as_i32x4())) } |
124 | } |
125 | |
126 | /// Performs 2 rounds of SHA256 operation using an initial SHA256 state |
127 | /// (C,D,G,H) from `a`, an initial SHA256 state (A,B,E,F) from `b`, and a |
128 | /// pre-computed sum of the next 2 round message values (unsigned 32-bit |
129 | /// integers) and the corresponding round constants from `k`, and store the |
130 | /// updated SHA256 state (A,B,E,F) in dst. |
131 | /// |
132 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256rnds2_epu32) |
133 | #[inline ] |
134 | #[target_feature (enable = "sha" )] |
135 | #[cfg_attr (test, assert_instr(sha256rnds2))] |
136 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
137 | pub fn _mm_sha256rnds2_epu32(a: __m128i, b: __m128i, k: __m128i) -> __m128i { |
138 | unsafe { transmute(src:sha256rnds2(a.as_i32x4(), b.as_i32x4(), k.as_i32x4())) } |
139 | } |
140 | |
141 | /// This intrinsic is one of the two SHA512 message scheduling instructions. |
142 | /// The intrinsic performs an intermediate calculation for the next four SHA512 |
143 | /// message qwords. The calculated results are stored in dst. |
144 | /// |
145 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sha512msg1_epi64) |
146 | #[inline ] |
147 | #[target_feature (enable = "sha512,avx" )] |
148 | #[cfg_attr (test, assert_instr(vsha512msg1))] |
149 | #[stable (feature = "sha512_sm_x86" , since = "CURRENT_RUSTC_VERSION" )] |
150 | pub fn _mm256_sha512msg1_epi64(a: __m256i, b: __m128i) -> __m256i { |
151 | unsafe { transmute(src:vsha512msg1(a.as_i64x4(), b.as_i64x2())) } |
152 | } |
153 | |
154 | /// This intrinsic is one of the two SHA512 message scheduling instructions. |
155 | /// The intrinsic performs the final calculation for the next four SHA512 message |
156 | /// qwords. The calculated results are stored in dst. |
157 | /// |
158 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sha512msg2_epi64) |
159 | #[inline ] |
160 | #[target_feature (enable = "sha512,avx" )] |
161 | #[cfg_attr (test, assert_instr(vsha512msg2))] |
162 | #[stable (feature = "sha512_sm_x86" , since = "CURRENT_RUSTC_VERSION" )] |
163 | pub fn _mm256_sha512msg2_epi64(a: __m256i, b: __m256i) -> __m256i { |
164 | unsafe { transmute(src:vsha512msg2(a.as_i64x4(), b.as_i64x4())) } |
165 | } |
166 | |
167 | /// This intrinsic performs two rounds of SHA512 operation using initial SHA512 state |
168 | /// `(C,D,G,H)` from `a`, an initial SHA512 state `(A,B,E,F)` from `b`, and a |
169 | /// pre-computed sum of the next two round message qwords and the corresponding |
170 | /// round constants from `c` (only the two lower qwords of the third operand). The |
171 | /// updated SHA512 state `(A,B,E,F)` is written to dst, and dst can be used as the |
172 | /// updated state `(C,D,G,H)` in later rounds. |
173 | /// |
174 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sha512rnds2_epi64) |
175 | #[inline ] |
176 | #[target_feature (enable = "sha512,avx" )] |
177 | #[cfg_attr (test, assert_instr(vsha512rnds2))] |
178 | #[stable (feature = "sha512_sm_x86" , since = "CURRENT_RUSTC_VERSION" )] |
179 | pub fn _mm256_sha512rnds2_epi64(a: __m256i, b: __m256i, k: __m128i) -> __m256i { |
180 | unsafe { transmute(src:vsha512rnds2(a.as_i64x4(), b.as_i64x4(), k.as_i64x2())) } |
181 | } |
182 | |
183 | /// This is one of the two SM3 message scheduling intrinsics. The intrinsic performs |
184 | /// an initial calculation for the next four SM3 message words. The calculated results |
185 | /// are stored in dst. |
186 | /// |
187 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sm3msg1_epi32) |
188 | #[inline ] |
189 | #[target_feature (enable = "sm3,avx" )] |
190 | #[cfg_attr (test, assert_instr(vsm3msg1))] |
191 | #[stable (feature = "sha512_sm_x86" , since = "CURRENT_RUSTC_VERSION" )] |
192 | pub fn _mm_sm3msg1_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i { |
193 | unsafe { transmute(src:vsm3msg1(a.as_i32x4(), b.as_i32x4(), c.as_i32x4())) } |
194 | } |
195 | |
196 | /// This is one of the two SM3 message scheduling intrinsics. The intrinsic performs |
197 | /// the final calculation for the next four SM3 message words. The calculated results |
198 | /// are stored in dst. |
199 | /// |
200 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sm3msg2_epi32) |
201 | #[inline ] |
202 | #[target_feature (enable = "sm3,avx" )] |
203 | #[cfg_attr (test, assert_instr(vsm3msg2))] |
204 | #[stable (feature = "sha512_sm_x86" , since = "CURRENT_RUSTC_VERSION" )] |
205 | pub fn _mm_sm3msg2_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i { |
206 | unsafe { transmute(src:vsm3msg2(a.as_i32x4(), b.as_i32x4(), c.as_i32x4())) } |
207 | } |
208 | |
209 | /// The intrinsic performs two rounds of SM3 operation using initial SM3 state `(C, D, G, H)` |
210 | /// from `a`, an initial SM3 states `(A, B, E, F)` from `b` and a pre-computed words from the |
211 | /// `c`. `a` with initial SM3 state of `(C, D, G, H)` assumes input of non-rotated left variables |
212 | /// from previous state. The updated SM3 state `(A, B, E, F)` is written to `a`. The `imm8` |
213 | /// should contain the even round number for the first of the two rounds computed by this instruction. |
214 | /// The computation masks the `imm8` value by ANDing it with `0x3E` so that only even round numbers |
215 | /// from 0 through 62 are used for this operation. The calculated results are stored in dst. |
216 | /// |
217 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sm3rnds2_epi32) |
218 | #[inline ] |
219 | #[target_feature (enable = "sm3,avx" )] |
220 | #[cfg_attr (test, assert_instr(vsm3rnds2, IMM8 = 0))] |
221 | #[rustc_legacy_const_generics (3)] |
222 | #[stable (feature = "sha512_sm_x86" , since = "CURRENT_RUSTC_VERSION" )] |
223 | pub fn _mm_sm3rnds2_epi32<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -> __m128i { |
224 | static_assert!( |
225 | IMM8 == (IMM8 & 0x3e), |
226 | "IMM8 must be an even number in the range `0..=62`" |
227 | ); |
228 | unsafe { transmute(src:vsm3rnds2(a.as_i32x4(), b.as_i32x4(), c.as_i32x4(), IMM8)) } |
229 | } |
230 | |
231 | /// This intrinsic performs four rounds of SM4 key expansion. The intrinsic operates on independent |
232 | /// 128-bit lanes. The calculated results are stored in dst. |
233 | /// |
234 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sm4key4_epi32) |
235 | #[inline ] |
236 | #[target_feature (enable = "sm4,avx" )] |
237 | #[cfg_attr (test, assert_instr(vsm4key4))] |
238 | #[stable (feature = "sha512_sm_x86" , since = "CURRENT_RUSTC_VERSION" )] |
239 | pub fn _mm_sm4key4_epi32(a: __m128i, b: __m128i) -> __m128i { |
240 | unsafe { transmute(src:vsm4key4128(a.as_i32x4(), b.as_i32x4())) } |
241 | } |
242 | |
243 | /// This intrinsic performs four rounds of SM4 key expansion. The intrinsic operates on independent |
244 | /// 128-bit lanes. The calculated results are stored in dst. |
245 | /// |
246 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sm4key4_epi32) |
247 | #[inline ] |
248 | #[target_feature (enable = "sm4,avx" )] |
249 | #[cfg_attr (test, assert_instr(vsm4key4))] |
250 | #[stable (feature = "sha512_sm_x86" , since = "CURRENT_RUSTC_VERSION" )] |
251 | pub fn _mm256_sm4key4_epi32(a: __m256i, b: __m256i) -> __m256i { |
252 | unsafe { transmute(src:vsm4key4256(a.as_i32x8(), b.as_i32x8())) } |
253 | } |
254 | |
255 | /// This intrinsic performs four rounds of SM4 encryption. The intrinsic operates on independent |
256 | /// 128-bit lanes. The calculated results are stored in dst. |
257 | /// |
258 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sm4rnds4_epi32) |
259 | #[inline ] |
260 | #[target_feature (enable = "sm4,avx" )] |
261 | #[cfg_attr (test, assert_instr(vsm4rnds4))] |
262 | #[stable (feature = "sha512_sm_x86" , since = "CURRENT_RUSTC_VERSION" )] |
263 | pub fn _mm_sm4rnds4_epi32(a: __m128i, b: __m128i) -> __m128i { |
264 | unsafe { transmute(src:vsm4rnds4128(a.as_i32x4(), b.as_i32x4())) } |
265 | } |
266 | |
267 | /// This intrinsic performs four rounds of SM4 encryption. The intrinsic operates on independent |
268 | /// 128-bit lanes. The calculated results are stored in dst. |
269 | /// |
270 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sm4rnds4_epi32) |
271 | #[inline ] |
272 | #[target_feature (enable = "sm4,avx" )] |
273 | #[cfg_attr (test, assert_instr(vsm4rnds4))] |
274 | #[stable (feature = "sha512_sm_x86" , since = "CURRENT_RUSTC_VERSION" )] |
275 | pub fn _mm256_sm4rnds4_epi32(a: __m256i, b: __m256i) -> __m256i { |
276 | unsafe { transmute(src:vsm4rnds4256(a.as_i32x8(), b.as_i32x8())) } |
277 | } |
278 | |
279 | #[cfg (test)] |
280 | mod tests { |
281 | use crate::{ |
282 | core_arch::{simd::*, x86::*}, |
283 | hint::black_box, |
284 | }; |
285 | use stdarch_test::simd_test; |
286 | |
287 | #[simd_test(enable = "sha" )] |
288 | #[allow (overflowing_literals)] |
289 | unsafe fn test_mm_sha1msg1_epu32() { |
290 | let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98); |
291 | let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b); |
292 | let expected = _mm_set_epi64x(0x98829f34f74ad457, 0xda2b1a44d0b5ad3c); |
293 | let r = _mm_sha1msg1_epu32(a, b); |
294 | assert_eq_m128i(r, expected); |
295 | } |
296 | |
297 | #[simd_test(enable = "sha" )] |
298 | #[allow (overflowing_literals)] |
299 | unsafe fn test_mm_sha1msg2_epu32() { |
300 | let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98); |
301 | let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b); |
302 | let expected = _mm_set_epi64x(0xf714b202d863d47d, 0x90c30d946b3d3b35); |
303 | let r = _mm_sha1msg2_epu32(a, b); |
304 | assert_eq_m128i(r, expected); |
305 | } |
306 | |
307 | #[simd_test(enable = "sha" )] |
308 | #[allow (overflowing_literals)] |
309 | unsafe fn test_mm_sha1nexte_epu32() { |
310 | let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98); |
311 | let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b); |
312 | let expected = _mm_set_epi64x(0x2589d5be923f82a4, 0x59f111f13956c25b); |
313 | let r = _mm_sha1nexte_epu32(a, b); |
314 | assert_eq_m128i(r, expected); |
315 | } |
316 | |
317 | #[simd_test(enable = "sha" )] |
318 | #[allow (overflowing_literals)] |
319 | unsafe fn test_mm_sha1rnds4_epu32() { |
320 | let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98); |
321 | let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b); |
322 | let expected = _mm_set_epi64x(0x32b13cd8322f5268, 0xc54420862bd9246f); |
323 | let r = _mm_sha1rnds4_epu32::<0>(a, b); |
324 | assert_eq_m128i(r, expected); |
325 | |
326 | let expected = _mm_set_epi64x(0x6d4c43e56a3c25d9, 0xa7e00fb775cbd3fe); |
327 | let r = _mm_sha1rnds4_epu32::<1>(a, b); |
328 | assert_eq_m128i(r, expected); |
329 | |
330 | let expected = _mm_set_epi64x(0xb304e383c01222f4, 0x66f6b3b1f89d8001); |
331 | let r = _mm_sha1rnds4_epu32::<2>(a, b); |
332 | assert_eq_m128i(r, expected); |
333 | |
334 | let expected = _mm_set_epi64x(0x8189b758bfabfa79, 0xdb08f6e78cae098b); |
335 | let r = _mm_sha1rnds4_epu32::<3>(a, b); |
336 | assert_eq_m128i(r, expected); |
337 | } |
338 | |
339 | #[simd_test(enable = "sha" )] |
340 | #[allow (overflowing_literals)] |
341 | unsafe fn test_mm_sha256msg1_epu32() { |
342 | let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98); |
343 | let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b); |
344 | let expected = _mm_set_epi64x(0xeb84973fd5cda67d, 0x2857b88f406b09ee); |
345 | let r = _mm_sha256msg1_epu32(a, b); |
346 | assert_eq_m128i(r, expected); |
347 | } |
348 | |
349 | #[simd_test(enable = "sha" )] |
350 | #[allow (overflowing_literals)] |
351 | unsafe fn test_mm_sha256msg2_epu32() { |
352 | let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98); |
353 | let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b); |
354 | let expected = _mm_set_epi64x(0xb58777ce887fd851, 0x15d1ec8b73ac8450); |
355 | let r = _mm_sha256msg2_epu32(a, b); |
356 | assert_eq_m128i(r, expected); |
357 | } |
358 | |
359 | #[simd_test(enable = "sha" )] |
360 | #[allow (overflowing_literals)] |
361 | unsafe fn test_mm_sha256rnds2_epu32() { |
362 | let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98); |
363 | let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b); |
364 | let k = _mm_set_epi64x(0, 0x12835b01d807aa98); |
365 | let expected = _mm_set_epi64x(0xd3063037effb15ea, 0x187ee3db0d6d1d19); |
366 | let r = _mm_sha256rnds2_epu32(a, b, k); |
367 | assert_eq_m128i(r, expected); |
368 | } |
369 | |
370 | static DATA_64: [u64; 10] = [ |
371 | 0x0011223344556677, |
372 | 0x8899aabbccddeeff, |
373 | 0xffeeddccbbaa9988, |
374 | 0x7766554433221100, |
375 | 0x0123456789abcdef, |
376 | 0xfedcba9876543210, |
377 | 0x02468ace13579bdf, |
378 | 0xfdb97531eca86420, |
379 | 0x048c159d26ae37bf, |
380 | 0xfb73ea62d951c840, |
381 | ]; |
382 | |
383 | #[simd_test(enable = "sha512,avx" )] |
384 | unsafe fn test_mm256_sha512msg1_epi64() { |
385 | fn s0(word: u64) -> u64 { |
386 | word.rotate_right(1) ^ word.rotate_right(8) ^ (word >> 7) |
387 | } |
388 | |
389 | let A = &DATA_64[0..4]; |
390 | let B = &DATA_64[4..6]; |
391 | |
392 | let a = _mm256_loadu_si256(A.as_ptr().cast()); |
393 | let b = _mm_loadu_si128(B.as_ptr().cast()); |
394 | |
395 | let r = _mm256_sha512msg1_epi64(a, b); |
396 | |
397 | let e = _mm256_setr_epi64x( |
398 | A[0].wrapping_add(s0(A[1])) as _, |
399 | A[1].wrapping_add(s0(A[2])) as _, |
400 | A[2].wrapping_add(s0(A[3])) as _, |
401 | A[3].wrapping_add(s0(B[0])) as _, |
402 | ); |
403 | |
404 | assert_eq_m256i(r, e); |
405 | } |
406 | |
407 | #[simd_test(enable = "sha512,avx" )] |
408 | unsafe fn test_mm256_sha512msg2_epi64() { |
409 | fn s1(word: u64) -> u64 { |
410 | word.rotate_right(19) ^ word.rotate_right(61) ^ (word >> 6) |
411 | } |
412 | |
413 | let A = &DATA_64[0..4]; |
414 | let B = &DATA_64[4..8]; |
415 | |
416 | let a = _mm256_loadu_si256(A.as_ptr().cast()); |
417 | let b = _mm256_loadu_si256(B.as_ptr().cast()); |
418 | |
419 | let r = _mm256_sha512msg2_epi64(a, b); |
420 | |
421 | let e0 = A[0].wrapping_add(s1(B[2])); |
422 | let e1 = A[1].wrapping_add(s1(B[3])); |
423 | let e = _mm256_setr_epi64x( |
424 | e0 as _, |
425 | e1 as _, |
426 | A[2].wrapping_add(s1(e0)) as _, |
427 | A[3].wrapping_add(s1(e1)) as _, |
428 | ); |
429 | |
430 | assert_eq_m256i(r, e); |
431 | } |
432 | |
433 | #[simd_test(enable = "sha512,avx" )] |
434 | unsafe fn test_mm256_sha512rnds2_epi64() { |
435 | fn cap_sigma0(word: u64) -> u64 { |
436 | word.rotate_right(28) ^ word.rotate_right(34) ^ word.rotate_right(39) |
437 | } |
438 | |
439 | fn cap_sigma1(word: u64) -> u64 { |
440 | word.rotate_right(14) ^ word.rotate_right(18) ^ word.rotate_right(41) |
441 | } |
442 | |
443 | fn maj(a: u64, b: u64, c: u64) -> u64 { |
444 | (a & b) ^ (a & c) ^ (b & c) |
445 | } |
446 | |
447 | fn ch(e: u64, f: u64, g: u64) -> u64 { |
448 | (e & f) ^ (g & !e) |
449 | } |
450 | |
451 | let A = &DATA_64[0..4]; |
452 | let B = &DATA_64[4..8]; |
453 | let K = &DATA_64[8..10]; |
454 | |
455 | let a = _mm256_loadu_si256(A.as_ptr().cast()); |
456 | let b = _mm256_loadu_si256(B.as_ptr().cast()); |
457 | let k = _mm_loadu_si128(K.as_ptr().cast()); |
458 | |
459 | let r = _mm256_sha512rnds2_epi64(a, b, k); |
460 | |
461 | let mut array = [B[3], B[2], A[3], A[2], B[1], B[0], A[1], A[0]]; |
462 | for i in 0..2 { |
463 | let new_d = ch(array[4], array[5], array[6]) |
464 | .wrapping_add(cap_sigma1(array[4])) |
465 | .wrapping_add(K[i]) |
466 | .wrapping_add(array[7]); |
467 | array[7] = new_d |
468 | .wrapping_add(maj(array[0], array[1], array[2])) |
469 | .wrapping_add(cap_sigma0(array[0])); |
470 | array[3] = new_d.wrapping_add(array[3]); |
471 | array.rotate_right(1); |
472 | } |
473 | let e = _mm256_setr_epi64x(array[5] as _, array[4] as _, array[1] as _, array[0] as _); |
474 | |
475 | assert_eq_m256i(r, e); |
476 | } |
477 | |
478 | static DATA_32: [u32; 16] = [ |
479 | 0x00112233, 0x44556677, 0x8899aabb, 0xccddeeff, 0xffeeddcc, 0xbbaa9988, 0x77665544, |
480 | 0x33221100, 0x01234567, 0x89abcdef, 0xfedcba98, 0x76543210, 0x02468ace, 0x13579bdf, |
481 | 0xfdb97531, 0xeca86420, |
482 | ]; |
483 | |
484 | #[simd_test(enable = "sm3,avx" )] |
485 | unsafe fn test_mm_sm3msg1_epi32() { |
486 | fn p1(x: u32) -> u32 { |
487 | x ^ x.rotate_left(15) ^ x.rotate_left(23) |
488 | } |
489 | let A = &DATA_32[0..4]; |
490 | let B = &DATA_32[4..8]; |
491 | let C = &DATA_32[8..12]; |
492 | |
493 | let a = _mm_loadu_si128(A.as_ptr().cast()); |
494 | let b = _mm_loadu_si128(B.as_ptr().cast()); |
495 | let c = _mm_loadu_si128(C.as_ptr().cast()); |
496 | |
497 | let r = _mm_sm3msg1_epi32(a, b, c); |
498 | |
499 | let e = _mm_setr_epi32( |
500 | p1(A[0] ^ C[0] ^ B[0].rotate_left(15)) as _, |
501 | p1(A[1] ^ C[1] ^ B[1].rotate_left(15)) as _, |
502 | p1(A[2] ^ C[2] ^ B[2].rotate_left(15)) as _, |
503 | p1(A[3] ^ C[3]) as _, |
504 | ); |
505 | |
506 | assert_eq_m128i(r, e); |
507 | } |
508 | |
509 | #[simd_test(enable = "sm3,avx" )] |
510 | unsafe fn test_mm_sm3msg2_epi32() { |
511 | let A = &DATA_32[0..4]; |
512 | let B = &DATA_32[4..8]; |
513 | let C = &DATA_32[8..12]; |
514 | |
515 | let a = _mm_loadu_si128(A.as_ptr().cast()); |
516 | let b = _mm_loadu_si128(B.as_ptr().cast()); |
517 | let c = _mm_loadu_si128(C.as_ptr().cast()); |
518 | |
519 | let r = _mm_sm3msg2_epi32(a, b, c); |
520 | |
521 | let e0 = B[0].rotate_left(7) ^ C[0] ^ A[0]; |
522 | let e = _mm_setr_epi32( |
523 | e0 as _, |
524 | (B[1].rotate_left(7) ^ C[1] ^ A[1]) as _, |
525 | (B[2].rotate_left(7) ^ C[2] ^ A[2]) as _, |
526 | (B[3].rotate_left(7) |
527 | ^ C[3] |
528 | ^ A[3] |
529 | ^ e0.rotate_left(6) |
530 | ^ e0.rotate_left(15) |
531 | ^ e0.rotate_left(30)) as _, |
532 | ); |
533 | |
534 | assert_eq_m128i(r, e); |
535 | } |
536 | |
537 | #[simd_test(enable = "sm3,avx" )] |
538 | unsafe fn test_mm_sm3rnds2_epi32() { |
539 | fn p0(x: u32) -> u32 { |
540 | x ^ x.rotate_left(9) ^ x.rotate_left(17) |
541 | } |
542 | fn ff(x: u32, y: u32, z: u32, round: u32) -> u32 { |
543 | if round < 16 { |
544 | x ^ y ^ z |
545 | } else { |
546 | (x & y) | (x & z) | (y & z) |
547 | } |
548 | } |
549 | fn gg(x: u32, y: u32, z: u32, round: u32) -> u32 { |
550 | if round < 16 { |
551 | x ^ y ^ z |
552 | } else { |
553 | (x & y) | (!x & z) |
554 | } |
555 | } |
556 | |
557 | const ROUND: u32 = 30; |
558 | |
559 | let A = &DATA_32[0..4]; |
560 | let B = &DATA_32[4..8]; |
561 | let C = &DATA_32[8..12]; |
562 | |
563 | let a = _mm_loadu_si128(A.as_ptr().cast()); |
564 | let b = _mm_loadu_si128(B.as_ptr().cast()); |
565 | let c = _mm_loadu_si128(C.as_ptr().cast()); |
566 | |
567 | let r = _mm_sm3rnds2_epi32::<{ ROUND as i32 }>(a, b, c); |
568 | |
569 | let CONST: u32 = if ROUND < 16 { 0x79cc4519 } else { 0x7a879d8a }; |
570 | |
571 | let mut array = [ |
572 | B[3], |
573 | B[2], |
574 | A[3].rotate_left(9), |
575 | A[2].rotate_left(9), |
576 | B[1], |
577 | B[0], |
578 | A[1].rotate_left(19), |
579 | A[0].rotate_left(19), |
580 | ]; |
581 | |
582 | for i in 0..2 { |
583 | let s1 = array[0] |
584 | .rotate_left(12) |
585 | .wrapping_add(array[4]) |
586 | .wrapping_add(CONST.rotate_left(ROUND as u32 + i as u32)) |
587 | .rotate_left(7); |
588 | let s2 = s1 ^ array[0].rotate_left(12); |
589 | |
590 | let t1 = ff(array[0], array[1], array[2], ROUND) |
591 | .wrapping_add(array[3]) |
592 | .wrapping_add(s2) |
593 | .wrapping_add(C[i] ^ C[i + 2]); |
594 | let t2 = gg(array[4], array[5], array[6], ROUND) |
595 | .wrapping_add(array[7]) |
596 | .wrapping_add(s1) |
597 | .wrapping_add(C[i]); |
598 | |
599 | array[3] = array[2]; |
600 | array[2] = array[1].rotate_left(9); |
601 | array[1] = array[0]; |
602 | array[0] = t1; |
603 | array[7] = array[6]; |
604 | array[6] = array[5].rotate_left(19); |
605 | array[5] = array[4]; |
606 | array[4] = p0(t2); |
607 | } |
608 | |
609 | let e = _mm_setr_epi32(array[5] as _, array[4] as _, array[1] as _, array[0] as _); |
610 | |
611 | assert_eq_m128i(r, e); |
612 | } |
613 | |
614 | fn lower_t(x: u32) -> u32 { |
615 | static SBOX: [u8; 256] = [ |
616 | 0xD6, 0x90, 0xE9, 0xFE, 0xCC, 0xE1, 0x3D, 0xB7, 0x16, 0xB6, 0x14, 0xC2, 0x28, 0xFB, |
617 | 0x2C, 0x05, 0x2B, 0x67, 0x9A, 0x76, 0x2A, 0xBE, 0x04, 0xC3, 0xAA, 0x44, 0x13, 0x26, |
618 | 0x49, 0x86, 0x06, 0x99, 0x9C, 0x42, 0x50, 0xF4, 0x91, 0xEF, 0x98, 0x7A, 0x33, 0x54, |
619 | 0x0B, 0x43, 0xED, 0xCF, 0xAC, 0x62, 0xE4, 0xB3, 0x1C, 0xA9, 0xC9, 0x08, 0xE8, 0x95, |
620 | 0x80, 0xDF, 0x94, 0xFA, 0x75, 0x8F, 0x3F, 0xA6, 0x47, 0x07, 0xA7, 0xFC, 0xF3, 0x73, |
621 | 0x17, 0xBA, 0x83, 0x59, 0x3C, 0x19, 0xE6, 0x85, 0x4F, 0xA8, 0x68, 0x6B, 0x81, 0xB2, |
622 | 0x71, 0x64, 0xDA, 0x8B, 0xF8, 0xEB, 0x0F, 0x4B, 0x70, 0x56, 0x9D, 0x35, 0x1E, 0x24, |
623 | 0x0E, 0x5E, 0x63, 0x58, 0xD1, 0xA2, 0x25, 0x22, 0x7C, 0x3B, 0x01, 0x21, 0x78, 0x87, |
624 | 0xD4, 0x00, 0x46, 0x57, 0x9F, 0xD3, 0x27, 0x52, 0x4C, 0x36, 0x02, 0xE7, 0xA0, 0xC4, |
625 | 0xC8, 0x9E, 0xEA, 0xBF, 0x8A, 0xD2, 0x40, 0xC7, 0x38, 0xB5, 0xA3, 0xF7, 0xF2, 0xCE, |
626 | 0xF9, 0x61, 0x15, 0xA1, 0xE0, 0xAE, 0x5D, 0xA4, 0x9B, 0x34, 0x1A, 0x55, 0xAD, 0x93, |
627 | 0x32, 0x30, 0xF5, 0x8C, 0xB1, 0xE3, 0x1D, 0xF6, 0xE2, 0x2E, 0x82, 0x66, 0xCA, 0x60, |
628 | 0xC0, 0x29, 0x23, 0xAB, 0x0D, 0x53, 0x4E, 0x6F, 0xD5, 0xDB, 0x37, 0x45, 0xDE, 0xFD, |
629 | 0x8E, 0x2F, 0x03, 0xFF, 0x6A, 0x72, 0x6D, 0x6C, 0x5B, 0x51, 0x8D, 0x1B, 0xAF, 0x92, |
630 | 0xBB, 0xDD, 0xBC, 0x7F, 0x11, 0xD9, 0x5C, 0x41, 0x1F, 0x10, 0x5A, 0xD8, 0x0A, 0xC1, |
631 | 0x31, 0x88, 0xA5, 0xCD, 0x7B, 0xBD, 0x2D, 0x74, 0xD0, 0x12, 0xB8, 0xE5, 0xB4, 0xB0, |
632 | 0x89, 0x69, 0x97, 0x4A, 0x0C, 0x96, 0x77, 0x7E, 0x65, 0xB9, 0xF1, 0x09, 0xC5, 0x6E, |
633 | 0xC6, 0x84, 0x18, 0xF0, 0x7D, 0xEC, 0x3A, 0xDC, 0x4D, 0x20, 0x79, 0xEE, 0x5F, 0x3E, |
634 | 0xD7, 0xCB, 0x39, 0x48, |
635 | ]; |
636 | |
637 | ((SBOX[(x >> 24) as usize] as u32) << 24) |
638 | | ((SBOX[((x >> 16) & 0xff) as usize] as u32) << 16) |
639 | | ((SBOX[((x >> 8) & 0xff) as usize] as u32) << 8) |
640 | | (SBOX[(x & 0xff) as usize] as u32) |
641 | } |
642 | |
643 | #[simd_test(enable = "sm4,avx" )] |
644 | unsafe fn test_mm_sm4key4_epi32() { |
645 | fn l_key(x: u32) -> u32 { |
646 | x ^ x.rotate_left(13) ^ x.rotate_left(23) |
647 | } |
648 | fn f_key(x0: u32, x1: u32, x2: u32, x3: u32, rk: u32) -> u32 { |
649 | x0 ^ l_key(lower_t(x1 ^ x2 ^ x3 ^ rk)) |
650 | } |
651 | |
652 | let A = &DATA_32[0..4]; |
653 | let B = &DATA_32[4..8]; |
654 | |
655 | let a = _mm_loadu_si128(A.as_ptr().cast()); |
656 | let b = _mm_loadu_si128(B.as_ptr().cast()); |
657 | |
658 | let r = _mm_sm4key4_epi32(a, b); |
659 | |
660 | let e0 = f_key(A[0], A[1], A[2], A[3], B[0]); |
661 | let e1 = f_key(A[1], A[2], A[3], e0, B[1]); |
662 | let e2 = f_key(A[2], A[3], e0, e1, B[2]); |
663 | let e3 = f_key(A[3], e0, e1, e2, B[3]); |
664 | let e = _mm_setr_epi32(e0 as _, e1 as _, e2 as _, e3 as _); |
665 | |
666 | assert_eq_m128i(r, e); |
667 | } |
668 | |
669 | #[simd_test(enable = "sm4,avx" )] |
670 | unsafe fn test_mm256_sm4key4_epi32() { |
671 | let a_low = _mm_loadu_si128(DATA_32.as_ptr().cast()); |
672 | let a_high = _mm_loadu_si128(DATA_32[4..].as_ptr().cast()); |
673 | let b_low = _mm_loadu_si128(DATA_32[8..].as_ptr().cast()); |
674 | let b_high = _mm_loadu_si128(DATA_32[12..].as_ptr().cast()); |
675 | |
676 | let a = _mm256_set_m128i(a_high, a_low); |
677 | let b = _mm256_set_m128i(b_high, b_low); |
678 | |
679 | let r = _mm256_sm4key4_epi32(a, b); |
680 | |
681 | let e_low = _mm_sm4key4_epi32(a_low, b_low); |
682 | let e_high = _mm_sm4key4_epi32(a_high, b_high); |
683 | let e = _mm256_set_m128i(e_high, e_low); |
684 | |
685 | assert_eq_m256i(r, e); |
686 | } |
687 | |
688 | #[simd_test(enable = "sm4,avx" )] |
689 | unsafe fn test_mm_sm4rnds4_epi32() { |
690 | fn l_rnd(x: u32) -> u32 { |
691 | x ^ x.rotate_left(2) ^ x.rotate_left(10) ^ x.rotate_left(18) ^ x.rotate_left(24) |
692 | } |
693 | fn f_rnd(x0: u32, x1: u32, x2: u32, x3: u32, rk: u32) -> u32 { |
694 | x0 ^ l_rnd(lower_t(x1 ^ x2 ^ x3 ^ rk)) |
695 | } |
696 | |
697 | let A = &DATA_32[0..4]; |
698 | let B = &DATA_32[4..8]; |
699 | |
700 | let a = _mm_loadu_si128(A.as_ptr().cast()); |
701 | let b = _mm_loadu_si128(B.as_ptr().cast()); |
702 | |
703 | let r = _mm_sm4rnds4_epi32(a, b); |
704 | |
705 | let e0 = f_rnd(A[0], A[1], A[2], A[3], B[0]); |
706 | let e1 = f_rnd(A[1], A[2], A[3], e0, B[1]); |
707 | let e2 = f_rnd(A[2], A[3], e0, e1, B[2]); |
708 | let e3 = f_rnd(A[3], e0, e1, e2, B[3]); |
709 | let e = _mm_setr_epi32(e0 as _, e1 as _, e2 as _, e3 as _); |
710 | |
711 | assert_eq_m128i(r, e); |
712 | } |
713 | |
714 | #[simd_test(enable = "sm4,avx" )] |
715 | unsafe fn test_mm256_sm4rnds4_epi32() { |
716 | let a_low = _mm_loadu_si128(DATA_32.as_ptr().cast()); |
717 | let a_high = _mm_loadu_si128(DATA_32[4..].as_ptr().cast()); |
718 | let b_low = _mm_loadu_si128(DATA_32[8..].as_ptr().cast()); |
719 | let b_high = _mm_loadu_si128(DATA_32[12..].as_ptr().cast()); |
720 | |
721 | let a = _mm256_set_m128i(a_high, a_low); |
722 | let b = _mm256_set_m128i(b_high, b_low); |
723 | |
724 | let r = _mm256_sm4rnds4_epi32(a, b); |
725 | |
726 | let e_low = _mm_sm4rnds4_epi32(a_low, b_low); |
727 | let e_high = _mm_sm4rnds4_epi32(a_high, b_high); |
728 | let e = _mm256_set_m128i(e_high, e_low); |
729 | |
730 | assert_eq_m256i(r, e); |
731 | } |
732 | } |
733 | |