| 1 | use super::{arch::*, utils::*}; |
| 2 | use crate::{Block, Block8}; |
| 3 | use cipher::inout::InOut; |
| 4 | use core::{mem, ptr}; |
| 5 | |
| 6 | /// AES-192 round keys |
| 7 | pub(super) type RoundKeys = [__m128i; 13]; |
| 8 | |
| 9 | #[inline ] |
| 10 | #[target_feature (enable = "aes" )] |
| 11 | pub(super) unsafe fn encrypt1(keys: &RoundKeys, block: InOut<'_, '_, Block>) { |
| 12 | let (in_ptr: *const GenericArray>, out_ptr: *mut GenericArray>) = block.into_raw(); |
| 13 | let mut b: __m128i = _mm_loadu_si128(mem_addr:in_ptr as *const __m128i); |
| 14 | b = _mm_xor_si128(a:b, b:keys[0]); |
| 15 | b = _mm_aesenc_si128(a:b, round_key:keys[1]); |
| 16 | b = _mm_aesenc_si128(a:b, round_key:keys[2]); |
| 17 | b = _mm_aesenc_si128(a:b, round_key:keys[3]); |
| 18 | b = _mm_aesenc_si128(a:b, round_key:keys[4]); |
| 19 | b = _mm_aesenc_si128(a:b, round_key:keys[5]); |
| 20 | b = _mm_aesenc_si128(a:b, round_key:keys[6]); |
| 21 | b = _mm_aesenc_si128(a:b, round_key:keys[7]); |
| 22 | b = _mm_aesenc_si128(a:b, round_key:keys[8]); |
| 23 | b = _mm_aesenc_si128(a:b, round_key:keys[9]); |
| 24 | b = _mm_aesenc_si128(a:b, round_key:keys[10]); |
| 25 | b = _mm_aesenc_si128(a:b, round_key:keys[11]); |
| 26 | b = _mm_aesenclast_si128(a:b, round_key:keys[12]); |
| 27 | _mm_storeu_si128(mem_addr:out_ptr as *mut __m128i, a:b); |
| 28 | } |
| 29 | |
| 30 | #[inline ] |
| 31 | #[target_feature (enable = "aes" )] |
| 32 | pub(super) unsafe fn encrypt8(keys: &RoundKeys, blocks: InOut<'_, '_, Block8>) { |
| 33 | let (in_ptr: *const GenericArray, …>, out_ptr: *mut GenericArray, …>) = blocks.into_raw(); |
| 34 | let mut b: [__m128i; 8] = load8(blocks:in_ptr); |
| 35 | xor8(&mut b, key:keys[0]); |
| 36 | aesenc8(&mut b, key:keys[1]); |
| 37 | aesenc8(&mut b, key:keys[2]); |
| 38 | aesenc8(&mut b, key:keys[3]); |
| 39 | aesenc8(&mut b, key:keys[4]); |
| 40 | aesenc8(&mut b, key:keys[5]); |
| 41 | aesenc8(&mut b, key:keys[6]); |
| 42 | aesenc8(&mut b, key:keys[7]); |
| 43 | aesenc8(&mut b, key:keys[8]); |
| 44 | aesenc8(&mut b, key:keys[9]); |
| 45 | aesenc8(&mut b, key:keys[10]); |
| 46 | aesenc8(&mut b, key:keys[11]); |
| 47 | aesenclast8(&mut b, key:keys[12]); |
| 48 | store8(blocks:out_ptr, b); |
| 49 | } |
| 50 | |
| 51 | #[inline ] |
| 52 | #[target_feature (enable = "aes" )] |
| 53 | pub(super) unsafe fn decrypt1(keys: &RoundKeys, block: InOut<'_, '_, Block>) { |
| 54 | let (in_ptr: *const GenericArray>, out_ptr: *mut GenericArray>) = block.into_raw(); |
| 55 | let mut b: __m128i = _mm_loadu_si128(mem_addr:in_ptr as *const __m128i); |
| 56 | b = _mm_xor_si128(a:b, b:keys[12]); |
| 57 | b = _mm_aesdec_si128(a:b, round_key:keys[11]); |
| 58 | b = _mm_aesdec_si128(a:b, round_key:keys[10]); |
| 59 | b = _mm_aesdec_si128(a:b, round_key:keys[9]); |
| 60 | b = _mm_aesdec_si128(a:b, round_key:keys[8]); |
| 61 | b = _mm_aesdec_si128(a:b, round_key:keys[7]); |
| 62 | b = _mm_aesdec_si128(a:b, round_key:keys[6]); |
| 63 | b = _mm_aesdec_si128(a:b, round_key:keys[5]); |
| 64 | b = _mm_aesdec_si128(a:b, round_key:keys[4]); |
| 65 | b = _mm_aesdec_si128(a:b, round_key:keys[3]); |
| 66 | b = _mm_aesdec_si128(a:b, round_key:keys[2]); |
| 67 | b = _mm_aesdec_si128(a:b, round_key:keys[1]); |
| 68 | b = _mm_aesdeclast_si128(a:b, round_key:keys[0]); |
| 69 | _mm_storeu_si128(mem_addr:out_ptr as *mut __m128i, a:b); |
| 70 | } |
| 71 | |
| 72 | #[inline ] |
| 73 | #[target_feature (enable = "aes" )] |
| 74 | pub(super) unsafe fn decrypt8(keys: &RoundKeys, blocks: InOut<'_, '_, Block8>) { |
| 75 | let (in_ptr: *const GenericArray, …>, out_ptr: *mut GenericArray, …>) = blocks.into_raw(); |
| 76 | let mut b: [__m128i; 8] = load8(blocks:in_ptr); |
| 77 | xor8(&mut b, key:keys[12]); |
| 78 | aesdec8(&mut b, key:keys[11]); |
| 79 | aesdec8(&mut b, key:keys[10]); |
| 80 | aesdec8(&mut b, key:keys[9]); |
| 81 | aesdec8(&mut b, key:keys[8]); |
| 82 | aesdec8(&mut b, key:keys[7]); |
| 83 | aesdec8(&mut b, key:keys[6]); |
| 84 | aesdec8(&mut b, key:keys[5]); |
| 85 | aesdec8(&mut b, key:keys[4]); |
| 86 | aesdec8(&mut b, key:keys[3]); |
| 87 | aesdec8(&mut b, key:keys[2]); |
| 88 | aesdec8(&mut b, key:keys[1]); |
| 89 | aesdeclast8(&mut b, key:keys[0]); |
| 90 | store8(blocks:out_ptr, b); |
| 91 | } |
| 92 | |
| 93 | macro_rules! expand_round { |
| 94 | ($t1:expr, $t3:expr, $round:expr) => {{ |
| 95 | let mut t1 = $t1; |
| 96 | let mut t2; |
| 97 | let mut t3 = $t3; |
| 98 | let mut t4; |
| 99 | |
| 100 | t2 = _mm_aeskeygenassist_si128(t3, $round); |
| 101 | t2 = _mm_shuffle_epi32(t2, 0x55); |
| 102 | t4 = _mm_slli_si128(t1, 0x4); |
| 103 | t1 = _mm_xor_si128(t1, t4); |
| 104 | t4 = _mm_slli_si128(t4, 0x4); |
| 105 | t1 = _mm_xor_si128(t1, t4); |
| 106 | t4 = _mm_slli_si128(t4, 0x4); |
| 107 | t1 = _mm_xor_si128(t1, t4); |
| 108 | t1 = _mm_xor_si128(t1, t2); |
| 109 | t2 = _mm_shuffle_epi32(t1, 0xff); |
| 110 | t4 = _mm_slli_si128(t3, 0x4); |
| 111 | t3 = _mm_xor_si128(t3, t4); |
| 112 | t3 = _mm_xor_si128(t3, t2); |
| 113 | |
| 114 | (t1, t3) |
| 115 | }}; |
| 116 | } |
| 117 | |
| 118 | macro_rules! shuffle { |
| 119 | ($a:expr, $b:expr, $imm:expr) => { |
| 120 | mem::transmute::<_, __m128i>(_mm_shuffle_pd(mem::transmute($a), mem::transmute($b), $imm)) |
| 121 | }; |
| 122 | } |
| 123 | |
| 124 | #[inline ] |
| 125 | #[target_feature (enable = "aes" )] |
| 126 | pub(super) unsafe fn expand_key(key: &[u8; 24]) -> RoundKeys { |
| 127 | // SAFETY: `RoundKeys` is a `[__m128i; 13]` which can be initialized |
| 128 | // with all zeroes. |
| 129 | let mut keys: RoundKeys = mem::zeroed(); |
| 130 | // we are being extra pedantic here to remove out-of-bound access. |
| 131 | // this should be optimized out into movups, movsd sequence |
| 132 | // note that unaligned load MUST be used here, even though we read |
| 133 | // from the array (compiler missoptimizes aligned load) |
| 134 | let (k0, k1l) = { |
| 135 | let mut t = [0u8; 32]; |
| 136 | ptr::write(t.as_mut_ptr() as *mut [u8; 24], *key); |
| 137 | |
| 138 | ( |
| 139 | _mm_loadu_si128(t.as_ptr() as *const __m128i), |
| 140 | _mm_loadu_si128(t.as_ptr().offset(16) as *const __m128i), |
| 141 | ) |
| 142 | }; |
| 143 | |
| 144 | keys[0] = k0; |
| 145 | |
| 146 | let (k1_2, k2r) = expand_round!(k0, k1l, 0x01); |
| 147 | keys[1] = shuffle!(k1l, k1_2, 0); |
| 148 | keys[2] = shuffle!(k1_2, k2r, 1); |
| 149 | |
| 150 | let (k3, k4l) = expand_round!(k1_2, k2r, 0x02); |
| 151 | keys[3] = k3; |
| 152 | |
| 153 | let (k4_5, k5r) = expand_round!(k3, k4l, 0x04); |
| 154 | let k4 = shuffle!(k4l, k4_5, 0); |
| 155 | let k5 = shuffle!(k4_5, k5r, 1); |
| 156 | keys[4] = k4; |
| 157 | keys[5] = k5; |
| 158 | |
| 159 | let (k6, k7l) = expand_round!(k4_5, k5r, 0x08); |
| 160 | keys[6] = k6; |
| 161 | |
| 162 | let (k7_8, k8r) = expand_round!(k6, k7l, 0x10); |
| 163 | keys[7] = shuffle!(k7l, k7_8, 0); |
| 164 | keys[8] = shuffle!(k7_8, k8r, 1); |
| 165 | |
| 166 | let (k9, k10l) = expand_round!(k7_8, k8r, 0x20); |
| 167 | keys[9] = k9; |
| 168 | |
| 169 | let (k10_11, k11r) = expand_round!(k9, k10l, 0x40); |
| 170 | keys[10] = shuffle!(k10l, k10_11, 0); |
| 171 | keys[11] = shuffle!(k10_11, k11r, 1); |
| 172 | |
| 173 | let (k12, _) = expand_round!(k10_11, k11r, 0x80); |
| 174 | keys[12] = k12; |
| 175 | |
| 176 | keys |
| 177 | } |
| 178 | |
| 179 | #[inline ] |
| 180 | #[target_feature (enable = "aes" )] |
| 181 | pub(super) unsafe fn inv_expanded_keys(keys: &RoundKeys) -> RoundKeys { |
| 182 | [ |
| 183 | keys[0], |
| 184 | _mm_aesimc_si128(keys[1]), |
| 185 | _mm_aesimc_si128(keys[2]), |
| 186 | _mm_aesimc_si128(keys[3]), |
| 187 | _mm_aesimc_si128(keys[4]), |
| 188 | _mm_aesimc_si128(keys[5]), |
| 189 | _mm_aesimc_si128(keys[6]), |
| 190 | _mm_aesimc_si128(keys[7]), |
| 191 | _mm_aesimc_si128(keys[8]), |
| 192 | _mm_aesimc_si128(keys[9]), |
| 193 | _mm_aesimc_si128(keys[10]), |
| 194 | _mm_aesimc_si128(keys[11]), |
| 195 | keys[12], |
| 196 | ] |
| 197 | } |
| 198 | |