| 1 | //! SHA-1 `x86`/`x86_64` backend |
| 2 | |
| 3 | #![cfg (any(target_arch = "x86" , target_arch = "x86_64" ))] |
| 4 | |
| 5 | #[cfg (target_arch = "x86" )] |
| 6 | use core::arch::x86::*; |
| 7 | #[cfg (target_arch = "x86_64" )] |
| 8 | use core::arch::x86_64::*; |
| 9 | |
| 10 | macro_rules! rounds4 { |
| 11 | ($h0:ident, $h1:ident, $wk:expr, $i:expr) => { |
| 12 | _mm_sha1rnds4_epu32($h0, _mm_sha1nexte_epu32($h1, $wk), $i) |
| 13 | }; |
| 14 | } |
| 15 | |
| 16 | macro_rules! schedule { |
| 17 | ($v0:expr, $v1:expr, $v2:expr, $v3:expr) => { |
| 18 | _mm_sha1msg2_epu32(_mm_xor_si128(_mm_sha1msg1_epu32($v0, $v1), $v2), $v3) |
| 19 | }; |
| 20 | } |
| 21 | |
| 22 | macro_rules! schedule_rounds4 { |
| 23 | ( |
| 24 | $h0:ident, $h1:ident, |
| 25 | $w0:expr, $w1:expr, $w2:expr, $w3:expr, $w4:expr, |
| 26 | $i:expr |
| 27 | ) => { |
| 28 | $w4 = schedule!($w0, $w1, $w2, $w3); |
| 29 | $h1 = rounds4!($h0, $h1, $w4, $i); |
| 30 | }; |
| 31 | } |
| 32 | |
| 33 | #[target_feature (enable = "sha,sse2,ssse3,sse4.1" )] |
| 34 | unsafe fn digest_blocks(state: &mut [u32; 5], blocks: &[[u8; 64]]) { |
| 35 | #[allow (non_snake_case)] |
| 36 | let MASK: __m128i = _mm_set_epi64x(0x0001_0203_0405_0607, 0x0809_0A0B_0C0D_0E0F); |
| 37 | |
| 38 | let mut state_abcd = _mm_set_epi32( |
| 39 | state[0] as i32, |
| 40 | state[1] as i32, |
| 41 | state[2] as i32, |
| 42 | state[3] as i32, |
| 43 | ); |
| 44 | let mut state_e = _mm_set_epi32(state[4] as i32, 0, 0, 0); |
| 45 | |
| 46 | for block in blocks { |
| 47 | // SAFETY: we use only unaligned loads with this pointer |
| 48 | #[allow (clippy::cast_ptr_alignment)] |
| 49 | let block_ptr = block.as_ptr() as *const __m128i; |
| 50 | |
| 51 | let mut w0 = _mm_shuffle_epi8(_mm_loadu_si128(block_ptr.offset(0)), MASK); |
| 52 | let mut w1 = _mm_shuffle_epi8(_mm_loadu_si128(block_ptr.offset(1)), MASK); |
| 53 | let mut w2 = _mm_shuffle_epi8(_mm_loadu_si128(block_ptr.offset(2)), MASK); |
| 54 | let mut w3 = _mm_shuffle_epi8(_mm_loadu_si128(block_ptr.offset(3)), MASK); |
| 55 | #[allow (clippy::needless_late_init)] |
| 56 | let mut w4; |
| 57 | |
| 58 | let mut h0 = state_abcd; |
| 59 | let mut h1 = _mm_add_epi32(state_e, w0); |
| 60 | |
| 61 | // Rounds 0..20 |
| 62 | h1 = _mm_sha1rnds4_epu32(h0, h1, 0); |
| 63 | h0 = rounds4!(h1, h0, w1, 0); |
| 64 | h1 = rounds4!(h0, h1, w2, 0); |
| 65 | h0 = rounds4!(h1, h0, w3, 0); |
| 66 | schedule_rounds4!(h0, h1, w0, w1, w2, w3, w4, 0); |
| 67 | |
| 68 | // Rounds 20..40 |
| 69 | schedule_rounds4!(h1, h0, w1, w2, w3, w4, w0, 1); |
| 70 | schedule_rounds4!(h0, h1, w2, w3, w4, w0, w1, 1); |
| 71 | schedule_rounds4!(h1, h0, w3, w4, w0, w1, w2, 1); |
| 72 | schedule_rounds4!(h0, h1, w4, w0, w1, w2, w3, 1); |
| 73 | schedule_rounds4!(h1, h0, w0, w1, w2, w3, w4, 1); |
| 74 | |
| 75 | // Rounds 40..60 |
| 76 | schedule_rounds4!(h0, h1, w1, w2, w3, w4, w0, 2); |
| 77 | schedule_rounds4!(h1, h0, w2, w3, w4, w0, w1, 2); |
| 78 | schedule_rounds4!(h0, h1, w3, w4, w0, w1, w2, 2); |
| 79 | schedule_rounds4!(h1, h0, w4, w0, w1, w2, w3, 2); |
| 80 | schedule_rounds4!(h0, h1, w0, w1, w2, w3, w4, 2); |
| 81 | |
| 82 | // Rounds 60..80 |
| 83 | schedule_rounds4!(h1, h0, w1, w2, w3, w4, w0, 3); |
| 84 | schedule_rounds4!(h0, h1, w2, w3, w4, w0, w1, 3); |
| 85 | schedule_rounds4!(h1, h0, w3, w4, w0, w1, w2, 3); |
| 86 | schedule_rounds4!(h0, h1, w4, w0, w1, w2, w3, 3); |
| 87 | schedule_rounds4!(h1, h0, w0, w1, w2, w3, w4, 3); |
| 88 | |
| 89 | state_abcd = _mm_add_epi32(state_abcd, h0); |
| 90 | state_e = _mm_sha1nexte_epu32(h1, state_e); |
| 91 | } |
| 92 | |
| 93 | state[0] = _mm_extract_epi32(state_abcd, 3) as u32; |
| 94 | state[1] = _mm_extract_epi32(state_abcd, 2) as u32; |
| 95 | state[2] = _mm_extract_epi32(state_abcd, 1) as u32; |
| 96 | state[3] = _mm_extract_epi32(state_abcd, 0) as u32; |
| 97 | state[4] = _mm_extract_epi32(state_e, 3) as u32; |
| 98 | } |
| 99 | |
| 100 | cpufeatures::new!(shani_cpuid, "sha" , "sse2" , "ssse3" , "sse4.1" ); |
| 101 | |
| 102 | pub fn compress(state: &mut [u32; 5], blocks: &[[u8; 64]]) { |
| 103 | // TODO: Replace with https://github.com/rust-lang/rfcs/pull/2725 |
| 104 | // after stabilization |
| 105 | if shani_cpuid::get() { |
| 106 | unsafe { |
| 107 | digest_blocks(state, blocks); |
| 108 | } |
| 109 | } else { |
| 110 | super::soft::compress(state, blocks); |
| 111 | } |
| 112 | } |
| 113 | |