| 1 | use super::{arch::*, utils::*}; |
| 2 | use crate::{Block, Block8}; |
| 3 | use cipher::inout::InOut; |
| 4 | use core::mem; |
| 5 | |
| 6 | /// AES-192 round keys |
| 7 | pub(super) type RoundKeys = [__m128i; 15]; |
| 8 | |
| 9 | #[inline ] |
| 10 | #[target_feature (enable = "aes" )] |
| 11 | pub(super) unsafe fn encrypt1(keys: &RoundKeys, block: InOut<'_, '_, Block>) { |
| 12 | let (in_ptr: *const GenericArray>, out_ptr: *mut GenericArray>) = block.into_raw(); |
| 13 | let mut b: __m128i = _mm_loadu_si128(mem_addr:in_ptr as *const __m128i); |
| 14 | b = _mm_xor_si128(a:b, b:keys[0]); |
| 15 | b = _mm_aesenc_si128(a:b, round_key:keys[1]); |
| 16 | b = _mm_aesenc_si128(a:b, round_key:keys[2]); |
| 17 | b = _mm_aesenc_si128(a:b, round_key:keys[3]); |
| 18 | b = _mm_aesenc_si128(a:b, round_key:keys[4]); |
| 19 | b = _mm_aesenc_si128(a:b, round_key:keys[5]); |
| 20 | b = _mm_aesenc_si128(a:b, round_key:keys[6]); |
| 21 | b = _mm_aesenc_si128(a:b, round_key:keys[7]); |
| 22 | b = _mm_aesenc_si128(a:b, round_key:keys[8]); |
| 23 | b = _mm_aesenc_si128(a:b, round_key:keys[9]); |
| 24 | b = _mm_aesenc_si128(a:b, round_key:keys[10]); |
| 25 | b = _mm_aesenc_si128(a:b, round_key:keys[11]); |
| 26 | b = _mm_aesenc_si128(a:b, round_key:keys[12]); |
| 27 | b = _mm_aesenc_si128(a:b, round_key:keys[13]); |
| 28 | b = _mm_aesenclast_si128(a:b, round_key:keys[14]); |
| 29 | _mm_storeu_si128(mem_addr:out_ptr as *mut __m128i, a:b); |
| 30 | } |
| 31 | |
| 32 | #[inline ] |
| 33 | #[target_feature (enable = "aes" )] |
| 34 | pub(super) unsafe fn encrypt8(keys: &RoundKeys, blocks: InOut<'_, '_, Block8>) { |
| 35 | let (in_ptr: *const GenericArray, …>, out_ptr: *mut GenericArray, …>) = blocks.into_raw(); |
| 36 | let mut b: [__m128i; 8] = load8(blocks:in_ptr); |
| 37 | xor8(&mut b, key:keys[0]); |
| 38 | aesenc8(&mut b, key:keys[1]); |
| 39 | aesenc8(&mut b, key:keys[2]); |
| 40 | aesenc8(&mut b, key:keys[3]); |
| 41 | aesenc8(&mut b, key:keys[4]); |
| 42 | aesenc8(&mut b, key:keys[5]); |
| 43 | aesenc8(&mut b, key:keys[6]); |
| 44 | aesenc8(&mut b, key:keys[7]); |
| 45 | aesenc8(&mut b, key:keys[8]); |
| 46 | aesenc8(&mut b, key:keys[9]); |
| 47 | aesenc8(&mut b, key:keys[10]); |
| 48 | aesenc8(&mut b, key:keys[11]); |
| 49 | aesenc8(&mut b, key:keys[12]); |
| 50 | aesenc8(&mut b, key:keys[13]); |
| 51 | aesenclast8(&mut b, key:keys[14]); |
| 52 | store8(blocks:out_ptr, b); |
| 53 | } |
| 54 | |
| 55 | #[inline ] |
| 56 | #[target_feature (enable = "aes" )] |
| 57 | pub(super) unsafe fn decrypt1(keys: &RoundKeys, block: InOut<'_, '_, Block>) { |
| 58 | let (in_ptr: *const GenericArray>, out_ptr: *mut GenericArray>) = block.into_raw(); |
| 59 | let mut b: __m128i = _mm_loadu_si128(mem_addr:in_ptr as *const __m128i); |
| 60 | b = _mm_xor_si128(a:b, b:keys[14]); |
| 61 | b = _mm_aesdec_si128(a:b, round_key:keys[13]); |
| 62 | b = _mm_aesdec_si128(a:b, round_key:keys[12]); |
| 63 | b = _mm_aesdec_si128(a:b, round_key:keys[11]); |
| 64 | b = _mm_aesdec_si128(a:b, round_key:keys[10]); |
| 65 | b = _mm_aesdec_si128(a:b, round_key:keys[9]); |
| 66 | b = _mm_aesdec_si128(a:b, round_key:keys[8]); |
| 67 | b = _mm_aesdec_si128(a:b, round_key:keys[7]); |
| 68 | b = _mm_aesdec_si128(a:b, round_key:keys[6]); |
| 69 | b = _mm_aesdec_si128(a:b, round_key:keys[5]); |
| 70 | b = _mm_aesdec_si128(a:b, round_key:keys[4]); |
| 71 | b = _mm_aesdec_si128(a:b, round_key:keys[3]); |
| 72 | b = _mm_aesdec_si128(a:b, round_key:keys[2]); |
| 73 | b = _mm_aesdec_si128(a:b, round_key:keys[1]); |
| 74 | b = _mm_aesdeclast_si128(a:b, round_key:keys[0]); |
| 75 | _mm_storeu_si128(mem_addr:out_ptr as *mut __m128i, a:b); |
| 76 | } |
| 77 | |
| 78 | #[inline ] |
| 79 | #[target_feature (enable = "aes" )] |
| 80 | pub(super) unsafe fn decrypt8(keys: &RoundKeys, blocks: InOut<'_, '_, Block8>) { |
| 81 | let (in_ptr: *const GenericArray, …>, out_ptr: *mut GenericArray, …>) = blocks.into_raw(); |
| 82 | let mut b: [__m128i; 8] = load8(blocks:in_ptr); |
| 83 | xor8(&mut b, key:keys[14]); |
| 84 | aesdec8(&mut b, key:keys[13]); |
| 85 | aesdec8(&mut b, key:keys[12]); |
| 86 | aesdec8(&mut b, key:keys[11]); |
| 87 | aesdec8(&mut b, key:keys[10]); |
| 88 | aesdec8(&mut b, key:keys[9]); |
| 89 | aesdec8(&mut b, key:keys[8]); |
| 90 | aesdec8(&mut b, key:keys[7]); |
| 91 | aesdec8(&mut b, key:keys[6]); |
| 92 | aesdec8(&mut b, key:keys[5]); |
| 93 | aesdec8(&mut b, key:keys[4]); |
| 94 | aesdec8(&mut b, key:keys[3]); |
| 95 | aesdec8(&mut b, key:keys[2]); |
| 96 | aesdec8(&mut b, key:keys[1]); |
| 97 | aesdeclast8(&mut b, key:keys[0]); |
| 98 | store8(blocks:out_ptr, b); |
| 99 | } |
| 100 | |
| 101 | macro_rules! expand_round { |
| 102 | ($keys:expr, $pos:expr, $round:expr) => { |
| 103 | let mut t1 = $keys[$pos - 2]; |
| 104 | let mut t2; |
| 105 | let mut t3 = $keys[$pos - 1]; |
| 106 | let mut t4; |
| 107 | |
| 108 | t2 = _mm_aeskeygenassist_si128(t3, $round); |
| 109 | t2 = _mm_shuffle_epi32(t2, 0xff); |
| 110 | t4 = _mm_slli_si128(t1, 0x4); |
| 111 | t1 = _mm_xor_si128(t1, t4); |
| 112 | t4 = _mm_slli_si128(t4, 0x4); |
| 113 | t1 = _mm_xor_si128(t1, t4); |
| 114 | t4 = _mm_slli_si128(t4, 0x4); |
| 115 | t1 = _mm_xor_si128(t1, t4); |
| 116 | t1 = _mm_xor_si128(t1, t2); |
| 117 | |
| 118 | $keys[$pos] = t1; |
| 119 | |
| 120 | t4 = _mm_aeskeygenassist_si128(t1, 0x00); |
| 121 | t2 = _mm_shuffle_epi32(t4, 0xaa); |
| 122 | t4 = _mm_slli_si128(t3, 0x4); |
| 123 | t3 = _mm_xor_si128(t3, t4); |
| 124 | t4 = _mm_slli_si128(t4, 0x4); |
| 125 | t3 = _mm_xor_si128(t3, t4); |
| 126 | t4 = _mm_slli_si128(t4, 0x4); |
| 127 | t3 = _mm_xor_si128(t3, t4); |
| 128 | t3 = _mm_xor_si128(t3, t2); |
| 129 | |
| 130 | $keys[$pos + 1] = t3; |
| 131 | }; |
| 132 | } |
| 133 | |
| 134 | macro_rules! expand_round_last { |
| 135 | ($keys:expr, $pos:expr, $round:expr) => { |
| 136 | let mut t1 = $keys[$pos - 2]; |
| 137 | let mut t2; |
| 138 | let t3 = $keys[$pos - 1]; |
| 139 | let mut t4; |
| 140 | |
| 141 | t2 = _mm_aeskeygenassist_si128(t3, $round); |
| 142 | t2 = _mm_shuffle_epi32(t2, 0xff); |
| 143 | t4 = _mm_slli_si128(t1, 0x4); |
| 144 | t1 = _mm_xor_si128(t1, t4); |
| 145 | t4 = _mm_slli_si128(t4, 0x4); |
| 146 | t1 = _mm_xor_si128(t1, t4); |
| 147 | t4 = _mm_slli_si128(t4, 0x4); |
| 148 | t1 = _mm_xor_si128(t1, t4); |
| 149 | t1 = _mm_xor_si128(t1, t2); |
| 150 | |
| 151 | $keys[$pos] = t1; |
| 152 | }; |
| 153 | } |
| 154 | |
| 155 | #[inline (always)] |
| 156 | pub(super) unsafe fn expand_key(key: &[u8; 32]) -> RoundKeys { |
| 157 | // SAFETY: `RoundKeys` is a `[__m128i; 15]` which can be initialized |
| 158 | // with all zeroes. |
| 159 | let mut keys: RoundKeys = mem::zeroed(); |
| 160 | |
| 161 | let kp: *const __m128i = key.as_ptr() as *const __m128i; |
| 162 | keys[0] = _mm_loadu_si128(mem_addr:kp); |
| 163 | keys[1] = _mm_loadu_si128(mem_addr:kp.add(count:1)); |
| 164 | |
| 165 | expand_round!(keys, 2, 0x01); |
| 166 | expand_round!(keys, 4, 0x02); |
| 167 | expand_round!(keys, 6, 0x04); |
| 168 | expand_round!(keys, 8, 0x08); |
| 169 | expand_round!(keys, 10, 0x10); |
| 170 | expand_round!(keys, 12, 0x20); |
| 171 | expand_round_last!(keys, 14, 0x40); |
| 172 | |
| 173 | keys |
| 174 | } |
| 175 | |
| 176 | #[inline ] |
| 177 | #[target_feature (enable = "aes" )] |
| 178 | pub(super) unsafe fn inv_expanded_keys(keys: &RoundKeys) -> RoundKeys { |
| 179 | [ |
| 180 | keys[0], |
| 181 | _mm_aesimc_si128(keys[1]), |
| 182 | _mm_aesimc_si128(keys[2]), |
| 183 | _mm_aesimc_si128(keys[3]), |
| 184 | _mm_aesimc_si128(keys[4]), |
| 185 | _mm_aesimc_si128(keys[5]), |
| 186 | _mm_aesimc_si128(keys[6]), |
| 187 | _mm_aesimc_si128(keys[7]), |
| 188 | _mm_aesimc_si128(keys[8]), |
| 189 | _mm_aesimc_si128(keys[9]), |
| 190 | _mm_aesimc_si128(keys[10]), |
| 191 | _mm_aesimc_si128(keys[11]), |
| 192 | _mm_aesimc_si128(keys[12]), |
| 193 | _mm_aesimc_si128(keys[13]), |
| 194 | keys[14], |
| 195 | ] |
| 196 | } |
| 197 | |